4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
63 from ganeti import runtime
64 from ganeti import network
66 import ganeti.masterd.instance # pylint: disable=W0611
69 #: Size of DRBD meta block device
73 INSTANCE_DOWN = [constants.ADMINST_DOWN]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
77 #: Instance status in which an instance can be marked as offline/online
78 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
79 constants.ADMINST_OFFLINE,
84 """Data container for LU results with jobs.
86 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
87 by L{mcpu._ProcessResult}. The latter will then submit the jobs
88 contained in the C{jobs} attribute and include the job IDs in the opcode
92 def __init__(self, jobs, **kwargs):
93 """Initializes this class.
95 Additional return values can be specified as keyword arguments.
97 @type jobs: list of lists of L{opcode.OpCode}
98 @param jobs: A list of lists of opcode objects
105 class LogicalUnit(object):
106 """Logical Unit base class.
108 Subclasses must follow these rules:
109 - implement ExpandNames
110 - implement CheckPrereq (except when tasklets are used)
111 - implement Exec (except when tasklets are used)
112 - implement BuildHooksEnv
113 - implement BuildHooksNodes
114 - redefine HPATH and HTYPE
115 - optionally redefine their run requirements:
116 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
118 Note that all commands require root permissions.
120 @ivar dry_run_result: the value (if any) that will be returned to the caller
121 in dry-run mode (signalled by opcode dry_run parameter)
128 def __init__(self, processor, op, context, rpc_runner):
129 """Constructor for LogicalUnit.
131 This needs to be overridden in derived classes in order to check op
135 self.proc = processor
137 self.cfg = context.cfg
138 self.glm = context.glm
140 self.owned_locks = context.glm.list_owned
141 self.context = context
142 self.rpc = rpc_runner
143 # Dicts used to declare locking needs to mcpu
144 self.needed_locks = None
145 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
147 self.remove_locks = {}
148 # Used to force good behavior when calling helper functions
149 self.recalculate_locks = {}
151 self.Log = processor.Log # pylint: disable=C0103
152 self.LogWarning = processor.LogWarning # pylint: disable=C0103
153 self.LogInfo = processor.LogInfo # pylint: disable=C0103
154 self.LogStep = processor.LogStep # pylint: disable=C0103
155 # support for dry-run
156 self.dry_run_result = None
157 # support for generic debug attribute
158 if (not hasattr(self.op, "debug_level") or
159 not isinstance(self.op.debug_level, int)):
160 self.op.debug_level = 0
165 # Validate opcode parameters and set defaults
166 self.op.Validate(True)
168 self.CheckArguments()
170 def CheckArguments(self):
171 """Check syntactic validity for the opcode arguments.
173 This method is for doing a simple syntactic check and ensure
174 validity of opcode parameters, without any cluster-related
175 checks. While the same can be accomplished in ExpandNames and/or
176 CheckPrereq, doing these separate is better because:
178 - ExpandNames is left as as purely a lock-related function
179 - CheckPrereq is run after we have acquired locks (and possible
182 The function is allowed to change the self.op attribute so that
183 later methods can no longer worry about missing parameters.
188 def ExpandNames(self):
189 """Expand names for this LU.
191 This method is called before starting to execute the opcode, and it should
192 update all the parameters of the opcode to their canonical form (e.g. a
193 short node name must be fully expanded after this method has successfully
194 completed). This way locking, hooks, logging, etc. can work correctly.
196 LUs which implement this method must also populate the self.needed_locks
197 member, as a dict with lock levels as keys, and a list of needed lock names
200 - use an empty dict if you don't need any lock
201 - if you don't need any lock at a particular level omit that
202 level (note that in this case C{DeclareLocks} won't be called
203 at all for that level)
204 - if you need locks at a level, but you can't calculate it in
205 this function, initialise that level with an empty list and do
206 further processing in L{LogicalUnit.DeclareLocks} (see that
207 function's docstring)
208 - don't put anything for the BGL level
209 - if you want all locks at a level use L{locking.ALL_SET} as a value
211 If you need to share locks (rather than acquire them exclusively) at one
212 level you can modify self.share_locks, setting a true value (usually 1) for
213 that level. By default locks are not shared.
215 This function can also define a list of tasklets, which then will be
216 executed in order instead of the usual LU-level CheckPrereq and Exec
217 functions, if those are not defined by the LU.
221 # Acquire all nodes and one instance
222 self.needed_locks = {
223 locking.LEVEL_NODE: locking.ALL_SET,
224 locking.LEVEL_INSTANCE: ['instance1.example.com'],
226 # Acquire just two nodes
227 self.needed_locks = {
228 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
231 self.needed_locks = {} # No, you can't leave it to the default value None
234 # The implementation of this method is mandatory only if the new LU is
235 # concurrent, so that old LUs don't need to be changed all at the same
238 self.needed_locks = {} # Exclusive LUs don't need locks.
240 raise NotImplementedError
242 def DeclareLocks(self, level):
243 """Declare LU locking needs for a level
245 While most LUs can just declare their locking needs at ExpandNames time,
246 sometimes there's the need to calculate some locks after having acquired
247 the ones before. This function is called just before acquiring locks at a
248 particular level, but after acquiring the ones at lower levels, and permits
249 such calculations. It can be used to modify self.needed_locks, and by
250 default it does nothing.
252 This function is only called if you have something already set in
253 self.needed_locks for the level.
255 @param level: Locking level which is going to be locked
256 @type level: member of L{ganeti.locking.LEVELS}
260 def CheckPrereq(self):
261 """Check prerequisites for this LU.
263 This method should check that the prerequisites for the execution
264 of this LU are fulfilled. It can do internode communication, but
265 it should be idempotent - no cluster or system changes are
268 The method should raise errors.OpPrereqError in case something is
269 not fulfilled. Its return value is ignored.
271 This method should also update all the parameters of the opcode to
272 their canonical form if it hasn't been done by ExpandNames before.
275 if self.tasklets is not None:
276 for (idx, tl) in enumerate(self.tasklets):
277 logging.debug("Checking prerequisites for tasklet %s/%s",
278 idx + 1, len(self.tasklets))
283 def Exec(self, feedback_fn):
286 This method should implement the actual work. It should raise
287 errors.OpExecError for failures that are somewhat dealt with in
291 if self.tasklets is not None:
292 for (idx, tl) in enumerate(self.tasklets):
293 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
296 raise NotImplementedError
298 def BuildHooksEnv(self):
299 """Build hooks environment for this LU.
302 @return: Dictionary containing the environment that will be used for
303 running the hooks for this LU. The keys of the dict must not be prefixed
304 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
305 will extend the environment with additional variables. If no environment
306 should be defined, an empty dictionary should be returned (not C{None}).
307 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
311 raise NotImplementedError
313 def BuildHooksNodes(self):
314 """Build list of nodes to run LU's hooks.
316 @rtype: tuple; (list, list)
317 @return: Tuple containing a list of node names on which the hook
318 should run before the execution and a list of node names on which the
319 hook should run after the execution. No nodes should be returned as an
320 empty list (and not None).
321 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
325 raise NotImplementedError
327 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
328 """Notify the LU about the results of its hooks.
330 This method is called every time a hooks phase is executed, and notifies
331 the Logical Unit about the hooks' result. The LU can then use it to alter
332 its result based on the hooks. By default the method does nothing and the
333 previous result is passed back unchanged but any LU can define it if it
334 wants to use the local cluster hook-scripts somehow.
336 @param phase: one of L{constants.HOOKS_PHASE_POST} or
337 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
338 @param hook_results: the results of the multi-node hooks rpc call
339 @param feedback_fn: function used send feedback back to the caller
340 @param lu_result: the previous Exec result this LU had, or None
342 @return: the new Exec result, based on the previous result
346 # API must be kept, thus we ignore the unused argument and could
347 # be a function warnings
348 # pylint: disable=W0613,R0201
351 def _ExpandAndLockInstance(self):
352 """Helper function to expand and lock an instance.
354 Many LUs that work on an instance take its name in self.op.instance_name
355 and need to expand it and then declare the expanded name for locking. This
356 function does it, and then updates self.op.instance_name to the expanded
357 name. It also initializes needed_locks as a dict, if this hasn't been done
361 if self.needed_locks is None:
362 self.needed_locks = {}
364 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
365 "_ExpandAndLockInstance called with instance-level locks set"
366 self.op.instance_name = _ExpandInstanceName(self.cfg,
367 self.op.instance_name)
368 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
370 def _LockInstancesNodes(self, primary_only=False,
371 level=locking.LEVEL_NODE):
372 """Helper function to declare instances' nodes for locking.
374 This function should be called after locking one or more instances to lock
375 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
376 with all primary or secondary nodes for instances already locked and
377 present in self.needed_locks[locking.LEVEL_INSTANCE].
379 It should be called from DeclareLocks, and for safety only works if
380 self.recalculate_locks[locking.LEVEL_NODE] is set.
382 In the future it may grow parameters to just lock some instance's nodes, or
383 to just lock primaries or secondary nodes, if needed.
385 If should be called in DeclareLocks in a way similar to::
387 if level == locking.LEVEL_NODE:
388 self._LockInstancesNodes()
390 @type primary_only: boolean
391 @param primary_only: only lock primary nodes of locked instances
392 @param level: Which lock level to use for locking nodes
395 assert level in self.recalculate_locks, \
396 "_LockInstancesNodes helper function called with no nodes to recalculate"
398 # TODO: check if we're really been called with the instance locks held
400 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
401 # future we might want to have different behaviors depending on the value
402 # of self.recalculate_locks[locking.LEVEL_NODE]
404 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
405 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
406 wanted_nodes.append(instance.primary_node)
408 wanted_nodes.extend(instance.secondary_nodes)
410 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
411 self.needed_locks[level] = wanted_nodes
412 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
413 self.needed_locks[level].extend(wanted_nodes)
415 raise errors.ProgrammerError("Unknown recalculation mode")
417 del self.recalculate_locks[level]
420 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
421 """Simple LU which runs no hooks.
423 This LU is intended as a parent for other LogicalUnits which will
424 run no hooks, in order to reduce duplicate code.
430 def BuildHooksEnv(self):
431 """Empty BuildHooksEnv for NoHooksLu.
433 This just raises an error.
436 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
438 def BuildHooksNodes(self):
439 """Empty BuildHooksNodes for NoHooksLU.
442 raise AssertionError("BuildHooksNodes called for NoHooksLU")
446 """Tasklet base class.
448 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
449 they can mix legacy code with tasklets. Locking needs to be done in the LU,
450 tasklets know nothing about locks.
452 Subclasses must follow these rules:
453 - Implement CheckPrereq
457 def __init__(self, lu):
464 def CheckPrereq(self):
465 """Check prerequisites for this tasklets.
467 This method should check whether the prerequisites for the execution of
468 this tasklet are fulfilled. It can do internode communication, but it
469 should be idempotent - no cluster or system changes are allowed.
471 The method should raise errors.OpPrereqError in case something is not
472 fulfilled. Its return value is ignored.
474 This method should also update all parameters to their canonical form if it
475 hasn't been done before.
480 def Exec(self, feedback_fn):
481 """Execute the tasklet.
483 This method should implement the actual work. It should raise
484 errors.OpExecError for failures that are somewhat dealt with in code, or
488 raise NotImplementedError
492 """Base for query utility classes.
495 #: Attribute holding field definitions
501 def __init__(self, qfilter, fields, use_locking):
502 """Initializes this class.
505 self.use_locking = use_locking
507 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
508 namefield=self.SORT_FIELD)
509 self.requested_data = self.query.RequestedData()
510 self.names = self.query.RequestedNames()
512 # Sort only if no names were requested
513 self.sort_by_name = not self.names
515 self.do_locking = None
518 def _GetNames(self, lu, all_names, lock_level):
519 """Helper function to determine names asked for in the query.
523 names = lu.owned_locks(lock_level)
527 if self.wanted == locking.ALL_SET:
528 assert not self.names
529 # caller didn't specify names, so ordering is not important
530 return utils.NiceSort(names)
532 # caller specified names and we must keep the same order
534 assert not self.do_locking or lu.glm.is_owned(lock_level)
536 missing = set(self.wanted).difference(names)
538 raise errors.OpExecError("Some items were removed before retrieving"
539 " their data: %s" % missing)
541 # Return expanded names
544 def ExpandNames(self, lu):
545 """Expand names for this query.
547 See L{LogicalUnit.ExpandNames}.
550 raise NotImplementedError()
552 def DeclareLocks(self, lu, level):
553 """Declare locks for this query.
555 See L{LogicalUnit.DeclareLocks}.
558 raise NotImplementedError()
560 def _GetQueryData(self, lu):
561 """Collects all data for this query.
563 @return: Query data object
566 raise NotImplementedError()
568 def NewStyleQuery(self, lu):
569 """Collect data and execute query.
572 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
573 sort_by_name=self.sort_by_name)
575 def OldStyleQuery(self, lu):
576 """Collect data and execute query.
579 return self.query.OldStyleQuery(self._GetQueryData(lu),
580 sort_by_name=self.sort_by_name)
584 """Returns a dict declaring all lock levels shared.
587 return dict.fromkeys(locking.LEVELS, 1)
590 def _MakeLegacyNodeInfo(data):
591 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
593 Converts the data into a single dictionary. This is fine for most use cases,
594 but some require information from more than one volume group or hypervisor.
597 (bootid, (vg_info, ), (hv_info, )) = data
599 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
604 def _AnnotateDiskParams(instance, devs, cfg):
605 """Little helper wrapper to the rpc annotation method.
607 @param instance: The instance object
608 @type devs: List of L{objects.Disk}
609 @param devs: The root devices (not any of its children!)
610 @param cfg: The config object
611 @returns The annotated disk copies
612 @see L{rpc.AnnotateDiskParams}
615 return rpc.AnnotateDiskParams(instance.disk_template, devs,
616 cfg.GetInstanceDiskParams(instance))
619 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
621 """Checks if node groups for locked instances are still correct.
623 @type cfg: L{config.ConfigWriter}
624 @param cfg: Cluster configuration
625 @type instances: dict; string as key, L{objects.Instance} as value
626 @param instances: Dictionary, instance name as key, instance object as value
627 @type owned_groups: iterable of string
628 @param owned_groups: List of owned groups
629 @type owned_nodes: iterable of string
630 @param owned_nodes: List of owned nodes
631 @type cur_group_uuid: string or None
632 @param cur_group_uuid: Optional group UUID to check against instance's groups
635 for (name, inst) in instances.items():
636 assert owned_nodes.issuperset(inst.all_nodes), \
637 "Instance %s's nodes changed while we kept the lock" % name
639 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
641 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
642 "Instance %s has no node in group %s" % (name, cur_group_uuid)
645 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
646 """Checks if the owned node groups are still correct for an instance.
648 @type cfg: L{config.ConfigWriter}
649 @param cfg: The cluster configuration
650 @type instance_name: string
651 @param instance_name: Instance name
652 @type owned_groups: set or frozenset
653 @param owned_groups: List of currently owned node groups
656 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
658 if not owned_groups.issuperset(inst_groups):
659 raise errors.OpPrereqError("Instance %s's node groups changed since"
660 " locks were acquired, current groups are"
661 " are '%s', owning groups '%s'; retry the"
664 utils.CommaJoin(inst_groups),
665 utils.CommaJoin(owned_groups)),
671 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
672 """Checks if the instances in a node group are still correct.
674 @type cfg: L{config.ConfigWriter}
675 @param cfg: The cluster configuration
676 @type group_uuid: string
677 @param group_uuid: Node group UUID
678 @type owned_instances: set or frozenset
679 @param owned_instances: List of currently owned instances
682 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
683 if owned_instances != wanted_instances:
684 raise errors.OpPrereqError("Instances in node group '%s' changed since"
685 " locks were acquired, wanted '%s', have '%s';"
686 " retry the operation" %
688 utils.CommaJoin(wanted_instances),
689 utils.CommaJoin(owned_instances)),
692 return wanted_instances
695 def _SupportsOob(cfg, node):
696 """Tells if node supports OOB.
698 @type cfg: L{config.ConfigWriter}
699 @param cfg: The cluster configuration
700 @type node: L{objects.Node}
701 @param node: The node
702 @return: The OOB script if supported or an empty string otherwise
705 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
708 def _CopyLockList(names):
709 """Makes a copy of a list of lock names.
711 Handles L{locking.ALL_SET} correctly.
714 if names == locking.ALL_SET:
715 return locking.ALL_SET
720 def _GetWantedNodes(lu, nodes):
721 """Returns list of checked and expanded node names.
723 @type lu: L{LogicalUnit}
724 @param lu: the logical unit on whose behalf we execute
726 @param nodes: list of node names or None for all nodes
728 @return: the list of nodes, sorted
729 @raise errors.ProgrammerError: if the nodes parameter is wrong type
733 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
735 return utils.NiceSort(lu.cfg.GetNodeList())
738 def _GetWantedInstances(lu, instances):
739 """Returns list of checked and expanded instance names.
741 @type lu: L{LogicalUnit}
742 @param lu: the logical unit on whose behalf we execute
743 @type instances: list
744 @param instances: list of instance names or None for all instances
746 @return: the list of instances, sorted
747 @raise errors.OpPrereqError: if the instances parameter is wrong type
748 @raise errors.OpPrereqError: if any of the passed instances is not found
752 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
754 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
758 def _GetUpdatedParams(old_params, update_dict,
759 use_default=True, use_none=False):
760 """Return the new version of a parameter dictionary.
762 @type old_params: dict
763 @param old_params: old parameters
764 @type update_dict: dict
765 @param update_dict: dict containing new parameter values, or
766 constants.VALUE_DEFAULT to reset the parameter to its default
768 @param use_default: boolean
769 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
770 values as 'to be deleted' values
771 @param use_none: boolean
772 @type use_none: whether to recognise C{None} values as 'to be
775 @return: the new parameter dictionary
778 params_copy = copy.deepcopy(old_params)
779 for key, val in update_dict.iteritems():
780 if ((use_default and val == constants.VALUE_DEFAULT) or
781 (use_none and val is None)):
787 params_copy[key] = val
791 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
792 """Return the new version of a instance policy.
794 @param group_policy: whether this policy applies to a group and thus
795 we should support removal of policy entries
798 use_none = use_default = group_policy
799 ipolicy = copy.deepcopy(old_ipolicy)
800 for key, value in new_ipolicy.items():
801 if key not in constants.IPOLICY_ALL_KEYS:
802 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
804 if key in constants.IPOLICY_ISPECS:
805 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
806 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
808 use_default=use_default)
810 if (not value or value == [constants.VALUE_DEFAULT] or
811 value == constants.VALUE_DEFAULT):
815 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
816 " on the cluster'" % key,
819 if key in constants.IPOLICY_PARAMETERS:
820 # FIXME: we assume all such values are float
822 ipolicy[key] = float(value)
823 except (TypeError, ValueError), err:
824 raise errors.OpPrereqError("Invalid value for attribute"
825 " '%s': '%s', error: %s" %
826 (key, value, err), errors.ECODE_INVAL)
828 # FIXME: we assume all others are lists; this should be redone
830 ipolicy[key] = list(value)
832 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
833 except errors.ConfigurationError, err:
834 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
839 def _UpdateAndVerifySubDict(base, updates, type_check):
840 """Updates and verifies a dict with sub dicts of the same type.
842 @param base: The dict with the old data
843 @param updates: The dict with the new data
844 @param type_check: Dict suitable to ForceDictType to verify correct types
845 @returns: A new dict with updated and verified values
849 new = _GetUpdatedParams(old, value)
850 utils.ForceDictType(new, type_check)
853 ret = copy.deepcopy(base)
854 ret.update(dict((key, fn(base.get(key, {}), value))
855 for key, value in updates.items()))
859 def _MergeAndVerifyHvState(op_input, obj_input):
860 """Combines the hv state from an opcode with the one of the object
862 @param op_input: The input dict from the opcode
863 @param obj_input: The input dict from the objects
864 @return: The verified and updated dict
868 invalid_hvs = set(op_input) - constants.HYPER_TYPES
870 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
871 " %s" % utils.CommaJoin(invalid_hvs),
873 if obj_input is None:
875 type_check = constants.HVSTS_PARAMETER_TYPES
876 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
881 def _MergeAndVerifyDiskState(op_input, obj_input):
882 """Combines the disk state from an opcode with the one of the object
884 @param op_input: The input dict from the opcode
885 @param obj_input: The input dict from the objects
886 @return: The verified and updated dict
889 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
891 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
892 utils.CommaJoin(invalid_dst),
894 type_check = constants.DSS_PARAMETER_TYPES
895 if obj_input is None:
897 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
899 for key, value in op_input.items())
904 def _ReleaseLocks(lu, level, names=None, keep=None):
905 """Releases locks owned by an LU.
907 @type lu: L{LogicalUnit}
908 @param level: Lock level
909 @type names: list or None
910 @param names: Names of locks to release
911 @type keep: list or None
912 @param keep: Names of locks to retain
915 assert not (keep is not None and names is not None), \
916 "Only one of the 'names' and the 'keep' parameters can be given"
918 if names is not None:
919 should_release = names.__contains__
921 should_release = lambda name: name not in keep
923 should_release = None
925 owned = lu.owned_locks(level)
927 # Not owning any lock at this level, do nothing
934 # Determine which locks to release
936 if should_release(name):
941 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
943 # Release just some locks
944 lu.glm.release(level, names=release)
946 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
949 lu.glm.release(level)
951 assert not lu.glm.is_owned(level), "No locks should be owned"
954 def _MapInstanceDisksToNodes(instances):
955 """Creates a map from (node, volume) to instance name.
957 @type instances: list of L{objects.Instance}
958 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
961 return dict(((node, vol), inst.name)
962 for inst in instances
963 for (node, vols) in inst.MapLVsByNode().items()
967 def _RunPostHook(lu, node_name):
968 """Runs the post-hook for an opcode on a single node.
971 hm = lu.proc.BuildHooksManager(lu)
973 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
974 except Exception, err: # pylint: disable=W0703
975 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
978 def _CheckOutputFields(static, dynamic, selected):
979 """Checks whether all selected fields are valid.
981 @type static: L{utils.FieldSet}
982 @param static: static fields set
983 @type dynamic: L{utils.FieldSet}
984 @param dynamic: dynamic fields set
991 delta = f.NonMatching(selected)
993 raise errors.OpPrereqError("Unknown output fields selected: %s"
994 % ",".join(delta), errors.ECODE_INVAL)
997 def _CheckGlobalHvParams(params):
998 """Validates that given hypervisor params are not global ones.
1000 This will ensure that instances don't get customised versions of
1004 used_globals = constants.HVC_GLOBALS.intersection(params)
1006 msg = ("The following hypervisor parameters are global and cannot"
1007 " be customized at instance level, please modify them at"
1008 " cluster level: %s" % utils.CommaJoin(used_globals))
1009 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1012 def _CheckNodeOnline(lu, node, msg=None):
1013 """Ensure that a given node is online.
1015 @param lu: the LU on behalf of which we make the check
1016 @param node: the node to check
1017 @param msg: if passed, should be a message to replace the default one
1018 @raise errors.OpPrereqError: if the node is offline
1022 msg = "Can't use offline node"
1023 if lu.cfg.GetNodeInfo(node).offline:
1024 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1027 def _CheckNodeNotDrained(lu, node):
1028 """Ensure that a given node is not drained.
1030 @param lu: the LU on behalf of which we make the check
1031 @param node: the node to check
1032 @raise errors.OpPrereqError: if the node is drained
1035 if lu.cfg.GetNodeInfo(node).drained:
1036 raise errors.OpPrereqError("Can't use drained node %s" % node,
1040 def _CheckNodeVmCapable(lu, node):
1041 """Ensure that a given node is vm capable.
1043 @param lu: the LU on behalf of which we make the check
1044 @param node: the node to check
1045 @raise errors.OpPrereqError: if the node is not vm capable
1048 if not lu.cfg.GetNodeInfo(node).vm_capable:
1049 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1053 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1054 """Ensure that a node supports a given OS.
1056 @param lu: the LU on behalf of which we make the check
1057 @param node: the node to check
1058 @param os_name: the OS to query about
1059 @param force_variant: whether to ignore variant errors
1060 @raise errors.OpPrereqError: if the node is not supporting the OS
1063 result = lu.rpc.call_os_get(node, os_name)
1064 result.Raise("OS '%s' not in supported OS list for node %s" %
1066 prereq=True, ecode=errors.ECODE_INVAL)
1067 if not force_variant:
1068 _CheckOSVariant(result.payload, os_name)
1071 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1072 """Ensure that a node has the given secondary ip.
1074 @type lu: L{LogicalUnit}
1075 @param lu: the LU on behalf of which we make the check
1077 @param node: the node to check
1078 @type secondary_ip: string
1079 @param secondary_ip: the ip to check
1080 @type prereq: boolean
1081 @param prereq: whether to throw a prerequisite or an execute error
1082 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1083 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1086 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1087 result.Raise("Failure checking secondary ip on node %s" % node,
1088 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1089 if not result.payload:
1090 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1091 " please fix and re-run this command" % secondary_ip)
1093 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1095 raise errors.OpExecError(msg)
1098 def _GetClusterDomainSecret():
1099 """Reads the cluster domain secret.
1102 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1106 def _CheckInstanceState(lu, instance, req_states, msg=None):
1107 """Ensure that an instance is in one of the required states.
1109 @param lu: the LU on behalf of which we make the check
1110 @param instance: the instance to check
1111 @param msg: if passed, should be a message to replace the default one
1112 @raise errors.OpPrereqError: if the instance is not in the required state
1116 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1117 if instance.admin_state not in req_states:
1118 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1119 (instance.name, instance.admin_state, msg),
1122 if constants.ADMINST_UP not in req_states:
1123 pnode = instance.primary_node
1124 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1125 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1126 prereq=True, ecode=errors.ECODE_ENVIRON)
1128 if instance.name in ins_l.payload:
1129 raise errors.OpPrereqError("Instance %s is running, %s" %
1130 (instance.name, msg), errors.ECODE_STATE)
1133 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1134 """Computes if value is in the desired range.
1136 @param name: name of the parameter for which we perform the check
1137 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1139 @param ipolicy: dictionary containing min, max and std values
1140 @param value: actual value that we want to use
1141 @return: None or element not meeting the criteria
1145 if value in [None, constants.VALUE_AUTO]:
1147 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1148 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1149 if value > max_v or min_v > value:
1151 fqn = "%s/%s" % (name, qualifier)
1154 return ("%s value %s is not in range [%s, %s]" %
1155 (fqn, value, min_v, max_v))
1159 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1160 nic_count, disk_sizes, spindle_use,
1161 _compute_fn=_ComputeMinMaxSpec):
1162 """Verifies ipolicy against provided specs.
1165 @param ipolicy: The ipolicy
1167 @param mem_size: The memory size
1168 @type cpu_count: int
1169 @param cpu_count: Used cpu cores
1170 @type disk_count: int
1171 @param disk_count: Number of disks used
1172 @type nic_count: int
1173 @param nic_count: Number of nics used
1174 @type disk_sizes: list of ints
1175 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1176 @type spindle_use: int
1177 @param spindle_use: The number of spindles this instance uses
1178 @param _compute_fn: The compute function (unittest only)
1179 @return: A list of violations, or an empty list of no violations are found
1182 assert disk_count == len(disk_sizes)
1185 (constants.ISPEC_MEM_SIZE, "", mem_size),
1186 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1187 (constants.ISPEC_DISK_COUNT, "", disk_count),
1188 (constants.ISPEC_NIC_COUNT, "", nic_count),
1189 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1190 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1191 for idx, d in enumerate(disk_sizes)]
1194 (_compute_fn(name, qualifier, ipolicy, value)
1195 for (name, qualifier, value) in test_settings))
1198 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1199 _compute_fn=_ComputeIPolicySpecViolation):
1200 """Compute if instance meets the specs of ipolicy.
1203 @param ipolicy: The ipolicy to verify against
1204 @type instance: L{objects.Instance}
1205 @param instance: The instance to verify
1206 @param _compute_fn: The function to verify ipolicy (unittest only)
1207 @see: L{_ComputeIPolicySpecViolation}
1210 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1211 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1212 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1213 disk_count = len(instance.disks)
1214 disk_sizes = [disk.size for disk in instance.disks]
1215 nic_count = len(instance.nics)
1217 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1218 disk_sizes, spindle_use)
1221 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1222 _compute_fn=_ComputeIPolicySpecViolation):
1223 """Compute if instance specs meets the specs of ipolicy.
1226 @param ipolicy: The ipolicy to verify against
1227 @param instance_spec: dict
1228 @param instance_spec: The instance spec to verify
1229 @param _compute_fn: The function to verify ipolicy (unittest only)
1230 @see: L{_ComputeIPolicySpecViolation}
1233 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1234 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1235 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1236 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1237 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1238 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1240 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1241 disk_sizes, spindle_use)
1244 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1246 _compute_fn=_ComputeIPolicyInstanceViolation):
1247 """Compute if instance meets the specs of the new target group.
1249 @param ipolicy: The ipolicy to verify
1250 @param instance: The instance object to verify
1251 @param current_group: The current group of the instance
1252 @param target_group: The new group of the instance
1253 @param _compute_fn: The function to verify ipolicy (unittest only)
1254 @see: L{_ComputeIPolicySpecViolation}
1257 if current_group == target_group:
1260 return _compute_fn(ipolicy, instance)
1263 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1264 _compute_fn=_ComputeIPolicyNodeViolation):
1265 """Checks that the target node is correct in terms of instance policy.
1267 @param ipolicy: The ipolicy to verify
1268 @param instance: The instance object to verify
1269 @param node: The new node to relocate
1270 @param ignore: Ignore violations of the ipolicy
1271 @param _compute_fn: The function to verify ipolicy (unittest only)
1272 @see: L{_ComputeIPolicySpecViolation}
1275 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1276 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1279 msg = ("Instance does not meet target node group's (%s) instance"
1280 " policy: %s") % (node.group, utils.CommaJoin(res))
1284 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1287 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1288 """Computes a set of any instances that would violate the new ipolicy.
1290 @param old_ipolicy: The current (still in-place) ipolicy
1291 @param new_ipolicy: The new (to become) ipolicy
1292 @param instances: List of instances to verify
1293 @return: A list of instances which violates the new ipolicy but
1297 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1298 _ComputeViolatingInstances(old_ipolicy, instances))
1301 def _ExpandItemName(fn, name, kind):
1302 """Expand an item name.
1304 @param fn: the function to use for expansion
1305 @param name: requested item name
1306 @param kind: text description ('Node' or 'Instance')
1307 @return: the resolved (full) name
1308 @raise errors.OpPrereqError: if the item is not found
1311 full_name = fn(name)
1312 if full_name is None:
1313 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1318 def _ExpandNodeName(cfg, name):
1319 """Wrapper over L{_ExpandItemName} for nodes."""
1320 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1323 def _ExpandInstanceName(cfg, name):
1324 """Wrapper over L{_ExpandItemName} for instance."""
1325 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1327 def _BuildNetworkHookEnv(name, network, gateway, network6, gateway6,
1328 network_type, mac_prefix, tags, serial_no):
1331 env["NETWORK_NAME"] = name
1333 env["NETWORK_SUBNET"] = network
1335 env["NETWORK_GATEWAY"] = gateway
1337 env["NETWORK_SUBNET6"] = network6
1339 env["NETWORK_GATEWAY6"] = gateway6
1341 env["NETWORK_MAC_PREFIX"] = mac_prefix
1343 env["NETWORK_TYPE"] = network_type
1345 env["NETWORK_TAGS"] = " ".join(tags)
1347 env["NETWORK_SERIAL_NO"] = serial_no
1352 def _BuildNetworkHookEnvByObject(lu, network):
1354 "name": network.name,
1355 "network": network.network,
1356 "gateway": network.gateway,
1357 "network6": network.network6,
1358 "gateway6": network.gateway6,
1359 "network_type": network.network_type,
1360 "mac_prefix": network.mac_prefix,
1361 "tags": network.tags,
1362 "serial_no": network.serial_no,
1364 return _BuildNetworkHookEnv(**args)
1367 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1368 minmem, maxmem, vcpus, nics, disk_template, disks,
1369 bep, hvp, hypervisor_name, tags, serial_no):
1370 """Builds instance related env variables for hooks
1372 This builds the hook environment from individual variables.
1375 @param name: the name of the instance
1376 @type primary_node: string
1377 @param primary_node: the name of the instance's primary node
1378 @type secondary_nodes: list
1379 @param secondary_nodes: list of secondary nodes as strings
1380 @type os_type: string
1381 @param os_type: the name of the instance's OS
1382 @type status: string
1383 @param status: the desired status of the instance
1384 @type minmem: string
1385 @param minmem: the minimum memory size of the instance
1386 @type maxmem: string
1387 @param maxmem: the maximum memory size of the instance
1389 @param vcpus: the count of VCPUs the instance has
1391 @param nics: list of tuples (ip, mac, mode, link, network) representing
1392 the NICs the instance has
1393 @type disk_template: string
1394 @param disk_template: the disk template of the instance
1396 @param disks: the list of (size, mode) pairs
1398 @param bep: the backend parameters for the instance
1400 @param hvp: the hypervisor parameters for the instance
1401 @type hypervisor_name: string
1402 @param hypervisor_name: the hypervisor for the instance
1404 @param tags: list of instance tags as strings
1406 @return: the hook environment for this instance
1411 "INSTANCE_NAME": name,
1412 "INSTANCE_PRIMARY": primary_node,
1413 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1414 "INSTANCE_OS_TYPE": os_type,
1415 "INSTANCE_STATUS": status,
1416 "INSTANCE_MINMEM": minmem,
1417 "INSTANCE_MAXMEM": maxmem,
1418 # TODO(2.7) remove deprecated "memory" value
1419 "INSTANCE_MEMORY": maxmem,
1420 "INSTANCE_VCPUS": vcpus,
1421 "INSTANCE_DISK_TEMPLATE": disk_template,
1422 "INSTANCE_HYPERVISOR": hypervisor_name,
1423 "INSTANCE_SERIAL_NO": serial_no,
1426 nic_count = len(nics)
1427 for idx, (ip, mac, mode, link, network, netinfo) in enumerate(nics):
1430 env["INSTANCE_NIC%d_IP" % idx] = ip
1431 env["INSTANCE_NIC%d_MAC" % idx] = mac
1432 env["INSTANCE_NIC%d_MODE" % idx] = mode
1433 env["INSTANCE_NIC%d_LINK" % idx] = link
1435 env["INSTANCE_NIC%d_NETWORK" % idx] = network
1437 nobj = objects.Network.FromDict(netinfo)
1439 env["INSTANCE_NIC%d_NETWORK_SUBNET" % idx] = nobj.network
1441 env["INSTANCE_NIC%d_NETWORK_GATEWAY" % idx] = nobj.gateway
1443 env["INSTANCE_NIC%d_NETWORK_SUBNET6" % idx] = nobj.network6
1445 env["INSTANCE_NIC%d_NETWORK_GATEWAY6" % idx] = nobj.gateway6
1447 env["INSTANCE_NIC%d_NETWORK_MAC_PREFIX" % idx] = nobj.mac_prefix
1448 if nobj.network_type:
1449 env["INSTANCE_NIC%d_NETWORK_TYPE" % idx] = nobj.network_type
1451 env["INSTANCE_NIC%d_NETWORK_TAGS" % idx] = " ".join(nobj.tags)
1452 if mode == constants.NIC_MODE_BRIDGED:
1453 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1457 env["INSTANCE_NIC_COUNT"] = nic_count
1460 disk_count = len(disks)
1461 for idx, (size, mode) in enumerate(disks):
1462 env["INSTANCE_DISK%d_SIZE" % idx] = size
1463 env["INSTANCE_DISK%d_MODE" % idx] = mode
1467 env["INSTANCE_DISK_COUNT"] = disk_count
1472 env["INSTANCE_TAGS"] = " ".join(tags)
1474 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1475 for key, value in source.items():
1476 env["INSTANCE_%s_%s" % (kind, key)] = value
1480 def _NICToTuple(lu, nic):
1481 """Build a tupple of nic information.
1483 @type lu: L{LogicalUnit}
1484 @param lu: the logical unit on whose behalf we execute
1485 @type nic: L{objects.NIC}
1486 @param nic: nic to convert to hooks tuple
1489 cluster = lu.cfg.GetClusterInfo()
1492 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1493 mode = filled_params[constants.NIC_MODE]
1494 link = filled_params[constants.NIC_LINK]
1495 network = nic.network
1498 net_uuid = lu.cfg.LookupNetwork(network)
1500 nobj = lu.cfg.GetNetwork(net_uuid)
1501 netinfo = objects.Network.ToDict(nobj)
1502 return (ip, mac, mode, link, network, netinfo)
1504 def _NICListToTuple(lu, nics):
1505 """Build a list of nic information tuples.
1507 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1508 value in LUInstanceQueryData.
1510 @type lu: L{LogicalUnit}
1511 @param lu: the logical unit on whose behalf we execute
1512 @type nics: list of L{objects.NIC}
1513 @param nics: list of nics to convert to hooks tuples
1517 cluster = lu.cfg.GetClusterInfo()
1519 hooks_nics.append(_NICToTuple(lu, nic))
1522 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1523 """Builds instance related env variables for hooks from an object.
1525 @type lu: L{LogicalUnit}
1526 @param lu: the logical unit on whose behalf we execute
1527 @type instance: L{objects.Instance}
1528 @param instance: the instance for which we should build the
1530 @type override: dict
1531 @param override: dictionary with key/values that will override
1534 @return: the hook environment dictionary
1537 cluster = lu.cfg.GetClusterInfo()
1538 bep = cluster.FillBE(instance)
1539 hvp = cluster.FillHV(instance)
1541 "name": instance.name,
1542 "primary_node": instance.primary_node,
1543 "secondary_nodes": instance.secondary_nodes,
1544 "os_type": instance.os,
1545 "status": instance.admin_state,
1546 "maxmem": bep[constants.BE_MAXMEM],
1547 "minmem": bep[constants.BE_MINMEM],
1548 "vcpus": bep[constants.BE_VCPUS],
1549 "nics": _NICListToTuple(lu, instance.nics),
1550 "disk_template": instance.disk_template,
1551 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1554 "hypervisor_name": instance.hypervisor,
1555 "tags": instance.tags,
1556 "serial_no": instance.serial_no,
1559 args.update(override)
1560 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1563 def _AdjustCandidatePool(lu, exceptions):
1564 """Adjust the candidate pool after node operations.
1567 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1569 lu.LogInfo("Promoted nodes to master candidate role: %s",
1570 utils.CommaJoin(node.name for node in mod_list))
1571 for name in mod_list:
1572 lu.context.ReaddNode(name)
1573 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1575 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1579 def _DecideSelfPromotion(lu, exceptions=None):
1580 """Decide whether I should promote myself as a master candidate.
1583 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1584 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1585 # the new node will increase mc_max with one, so:
1586 mc_should = min(mc_should + 1, cp_size)
1587 return mc_now < mc_should
1590 def _CalculateGroupIPolicy(cluster, group):
1591 """Calculate instance policy for group.
1594 return cluster.SimpleFillIPolicy(group.ipolicy)
1597 def _ComputeViolatingInstances(ipolicy, instances):
1598 """Computes a set of instances who violates given ipolicy.
1600 @param ipolicy: The ipolicy to verify
1601 @type instances: object.Instance
1602 @param instances: List of instances to verify
1603 @return: A frozenset of instance names violating the ipolicy
1606 return frozenset([inst.name for inst in instances
1607 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1610 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1611 """Check that the brigdes needed by a list of nics exist.
1614 cluster = lu.cfg.GetClusterInfo()
1615 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1616 brlist = [params[constants.NIC_LINK] for params in paramslist
1617 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1619 result = lu.rpc.call_bridges_exist(target_node, brlist)
1620 result.Raise("Error checking bridges on destination node '%s'" %
1621 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1624 def _CheckInstanceBridgesExist(lu, instance, node=None):
1625 """Check that the brigdes needed by an instance exist.
1629 node = instance.primary_node
1630 _CheckNicsBridgesExist(lu, instance.nics, node)
1633 def _CheckOSVariant(os_obj, name):
1634 """Check whether an OS name conforms to the os variants specification.
1636 @type os_obj: L{objects.OS}
1637 @param os_obj: OS object to check
1639 @param name: OS name passed by the user, to check for validity
1642 variant = objects.OS.GetVariant(name)
1643 if not os_obj.supported_variants:
1645 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1646 " passed)" % (os_obj.name, variant),
1650 raise errors.OpPrereqError("OS name must include a variant",
1653 if variant not in os_obj.supported_variants:
1654 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1657 def _GetNodeInstancesInner(cfg, fn):
1658 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1661 def _GetNodeInstances(cfg, node_name):
1662 """Returns a list of all primary and secondary instances on a node.
1666 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1669 def _GetNodePrimaryInstances(cfg, node_name):
1670 """Returns primary instances on a node.
1673 return _GetNodeInstancesInner(cfg,
1674 lambda inst: node_name == inst.primary_node)
1677 def _GetNodeSecondaryInstances(cfg, node_name):
1678 """Returns secondary instances on a node.
1681 return _GetNodeInstancesInner(cfg,
1682 lambda inst: node_name in inst.secondary_nodes)
1685 def _GetStorageTypeArgs(cfg, storage_type):
1686 """Returns the arguments for a storage type.
1689 # Special case for file storage
1690 if storage_type == constants.ST_FILE:
1691 # storage.FileStorage wants a list of storage directories
1692 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1697 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1700 for dev in instance.disks:
1701 cfg.SetDiskID(dev, node_name)
1703 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1705 result.Raise("Failed to get disk status from node %s" % node_name,
1706 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1708 for idx, bdev_status in enumerate(result.payload):
1709 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1715 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1716 """Check the sanity of iallocator and node arguments and use the
1717 cluster-wide iallocator if appropriate.
1719 Check that at most one of (iallocator, node) is specified. If none is
1720 specified, then the LU's opcode's iallocator slot is filled with the
1721 cluster-wide default iallocator.
1723 @type iallocator_slot: string
1724 @param iallocator_slot: the name of the opcode iallocator slot
1725 @type node_slot: string
1726 @param node_slot: the name of the opcode target node slot
1729 node = getattr(lu.op, node_slot, None)
1730 iallocator = getattr(lu.op, iallocator_slot, None)
1732 if node is not None and iallocator is not None:
1733 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1735 elif node is None and iallocator is None:
1736 default_iallocator = lu.cfg.GetDefaultIAllocator()
1737 if default_iallocator:
1738 setattr(lu.op, iallocator_slot, default_iallocator)
1740 raise errors.OpPrereqError("No iallocator or node given and no"
1741 " cluster-wide default iallocator found;"
1742 " please specify either an iallocator or a"
1743 " node, or set a cluster-wide default"
1747 def _GetDefaultIAllocator(cfg, iallocator):
1748 """Decides on which iallocator to use.
1750 @type cfg: L{config.ConfigWriter}
1751 @param cfg: Cluster configuration object
1752 @type iallocator: string or None
1753 @param iallocator: Iallocator specified in opcode
1755 @return: Iallocator name
1759 # Use default iallocator
1760 iallocator = cfg.GetDefaultIAllocator()
1763 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1764 " opcode nor as a cluster-wide default",
1770 def _InstanceRunning(lu, instance):
1771 """Return True if instance is running else False."""
1773 remote_info = lu.rpc.call_instance_info(instance.primary_node,
1775 instance.hypervisor)
1776 remote_info.Raise("Error checking node %s" % instance.primary_node)
1777 instance_running = bool(remote_info.payload)
1778 return instance_running
1781 def _CheckHostnameSane(lu, name):
1782 """Ensures that a given hostname resolves to a 'sane' name.
1784 The given name is required to be a prefix of the resolved hostname,
1785 to prevent accidental mismatches.
1787 @param lu: the logical unit on behalf of which we're checking
1788 @param name: the name we should resolve and check
1789 @return: the resolved hostname object
1792 hostname = netutils.GetHostname(name=name)
1793 if hostname.name != name:
1794 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1795 if not utils.MatchNameComponent(name, [hostname.name]):
1796 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1797 " same as given hostname '%s'") %
1798 (hostname.name, name), errors.ECODE_INVAL)
1802 class LUClusterPostInit(LogicalUnit):
1803 """Logical unit for running hooks after cluster initialization.
1806 HPATH = "cluster-init"
1807 HTYPE = constants.HTYPE_CLUSTER
1809 def BuildHooksEnv(self):
1814 "OP_TARGET": self.cfg.GetClusterName(),
1817 def BuildHooksNodes(self):
1818 """Build hooks nodes.
1821 return ([], [self.cfg.GetMasterNode()])
1823 def Exec(self, feedback_fn):
1830 class LUClusterDestroy(LogicalUnit):
1831 """Logical unit for destroying the cluster.
1834 HPATH = "cluster-destroy"
1835 HTYPE = constants.HTYPE_CLUSTER
1837 def BuildHooksEnv(self):
1842 "OP_TARGET": self.cfg.GetClusterName(),
1845 def BuildHooksNodes(self):
1846 """Build hooks nodes.
1851 def CheckPrereq(self):
1852 """Check prerequisites.
1854 This checks whether the cluster is empty.
1856 Any errors are signaled by raising errors.OpPrereqError.
1859 master = self.cfg.GetMasterNode()
1861 nodelist = self.cfg.GetNodeList()
1862 if len(nodelist) != 1 or nodelist[0] != master:
1863 raise errors.OpPrereqError("There are still %d node(s) in"
1864 " this cluster." % (len(nodelist) - 1),
1866 instancelist = self.cfg.GetInstanceList()
1868 raise errors.OpPrereqError("There are still %d instance(s) in"
1869 " this cluster." % len(instancelist),
1872 def Exec(self, feedback_fn):
1873 """Destroys the cluster.
1876 master_params = self.cfg.GetMasterNetworkParameters()
1878 # Run post hooks on master node before it's removed
1879 _RunPostHook(self, master_params.name)
1881 ems = self.cfg.GetUseExternalMipScript()
1882 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1885 self.LogWarning("Error disabling the master IP address: %s",
1888 return master_params.name
1891 def _VerifyCertificate(filename):
1892 """Verifies a certificate for L{LUClusterVerifyConfig}.
1894 @type filename: string
1895 @param filename: Path to PEM file
1899 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1900 utils.ReadFile(filename))
1901 except Exception, err: # pylint: disable=W0703
1902 return (LUClusterVerifyConfig.ETYPE_ERROR,
1903 "Failed to load X509 certificate %s: %s" % (filename, err))
1906 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1907 constants.SSL_CERT_EXPIRATION_ERROR)
1910 fnamemsg = "While verifying %s: %s" % (filename, msg)
1915 return (None, fnamemsg)
1916 elif errcode == utils.CERT_WARNING:
1917 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1918 elif errcode == utils.CERT_ERROR:
1919 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1921 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1924 def _GetAllHypervisorParameters(cluster, instances):
1925 """Compute the set of all hypervisor parameters.
1927 @type cluster: L{objects.Cluster}
1928 @param cluster: the cluster object
1929 @param instances: list of L{objects.Instance}
1930 @param instances: additional instances from which to obtain parameters
1931 @rtype: list of (origin, hypervisor, parameters)
1932 @return: a list with all parameters found, indicating the hypervisor they
1933 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1938 for hv_name in cluster.enabled_hypervisors:
1939 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1941 for os_name, os_hvp in cluster.os_hvp.items():
1942 for hv_name, hv_params in os_hvp.items():
1944 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1945 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1947 # TODO: collapse identical parameter values in a single one
1948 for instance in instances:
1949 if instance.hvparams:
1950 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1951 cluster.FillHV(instance)))
1956 class _VerifyErrors(object):
1957 """Mix-in for cluster/group verify LUs.
1959 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1960 self.op and self._feedback_fn to be available.)
1964 ETYPE_FIELD = "code"
1965 ETYPE_ERROR = "ERROR"
1966 ETYPE_WARNING = "WARNING"
1968 def _Error(self, ecode, item, msg, *args, **kwargs):
1969 """Format an error message.
1971 Based on the opcode's error_codes parameter, either format a
1972 parseable error code, or a simpler error string.
1974 This must be called only from Exec and functions called from Exec.
1977 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1978 itype, etxt, _ = ecode
1979 # first complete the msg
1982 # then format the whole message
1983 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1984 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1990 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1991 # and finally report it via the feedback_fn
1992 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1994 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1995 """Log an error message if the passed condition is True.
1999 or self.op.debug_simulate_errors) # pylint: disable=E1101
2001 # If the error code is in the list of ignored errors, demote the error to a
2003 (_, etxt, _) = ecode
2004 if etxt in self.op.ignore_errors: # pylint: disable=E1101
2005 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
2008 self._Error(ecode, *args, **kwargs)
2010 # do not mark the operation as failed for WARN cases only
2011 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
2012 self.bad = self.bad or cond
2015 class LUClusterVerify(NoHooksLU):
2016 """Submits all jobs necessary to verify the cluster.
2021 def ExpandNames(self):
2022 self.needed_locks = {}
2024 def Exec(self, feedback_fn):
2027 if self.op.group_name:
2028 groups = [self.op.group_name]
2029 depends_fn = lambda: None
2031 groups = self.cfg.GetNodeGroupList()
2033 # Verify global configuration
2035 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
2038 # Always depend on global verification
2039 depends_fn = lambda: [(-len(jobs), [])]
2041 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
2042 ignore_errors=self.op.ignore_errors,
2043 depends=depends_fn())]
2044 for group in groups)
2046 # Fix up all parameters
2047 for op in itertools.chain(*jobs): # pylint: disable=W0142
2048 op.debug_simulate_errors = self.op.debug_simulate_errors
2049 op.verbose = self.op.verbose
2050 op.error_codes = self.op.error_codes
2052 op.skip_checks = self.op.skip_checks
2053 except AttributeError:
2054 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
2056 return ResultWithJobs(jobs)
2059 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
2060 """Verifies the cluster config.
2065 def _VerifyHVP(self, hvp_data):
2066 """Verifies locally the syntax of the hypervisor parameters.
2069 for item, hv_name, hv_params in hvp_data:
2070 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2073 hv_class = hypervisor.GetHypervisor(hv_name)
2074 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2075 hv_class.CheckParameterSyntax(hv_params)
2076 except errors.GenericError, err:
2077 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
2079 def ExpandNames(self):
2080 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
2081 self.share_locks = _ShareAll()
2083 def CheckPrereq(self):
2084 """Check prerequisites.
2087 # Retrieve all information
2088 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
2089 self.all_node_info = self.cfg.GetAllNodesInfo()
2090 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2092 def Exec(self, feedback_fn):
2093 """Verify integrity of cluster, performing various test on nodes.
2097 self._feedback_fn = feedback_fn
2099 feedback_fn("* Verifying cluster config")
2101 for msg in self.cfg.VerifyConfig():
2102 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2104 feedback_fn("* Verifying cluster certificate files")
2106 for cert_filename in constants.ALL_CERT_FILES:
2107 (errcode, msg) = _VerifyCertificate(cert_filename)
2108 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2110 feedback_fn("* Verifying hypervisor parameters")
2112 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2113 self.all_inst_info.values()))
2115 feedback_fn("* Verifying all nodes belong to an existing group")
2117 # We do this verification here because, should this bogus circumstance
2118 # occur, it would never be caught by VerifyGroup, which only acts on
2119 # nodes/instances reachable from existing node groups.
2121 dangling_nodes = set(node.name for node in self.all_node_info.values()
2122 if node.group not in self.all_group_info)
2124 dangling_instances = {}
2125 no_node_instances = []
2127 for inst in self.all_inst_info.values():
2128 if inst.primary_node in dangling_nodes:
2129 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2130 elif inst.primary_node not in self.all_node_info:
2131 no_node_instances.append(inst.name)
2136 utils.CommaJoin(dangling_instances.get(node.name,
2138 for node in dangling_nodes]
2140 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2142 "the following nodes (and their instances) belong to a non"
2143 " existing group: %s", utils.CommaJoin(pretty_dangling))
2145 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2147 "the following instances have a non-existing primary-node:"
2148 " %s", utils.CommaJoin(no_node_instances))
2153 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2154 """Verifies the status of a node group.
2157 HPATH = "cluster-verify"
2158 HTYPE = constants.HTYPE_CLUSTER
2161 _HOOKS_INDENT_RE = re.compile("^", re.M)
2163 class NodeImage(object):
2164 """A class representing the logical and physical status of a node.
2167 @ivar name: the node name to which this object refers
2168 @ivar volumes: a structure as returned from
2169 L{ganeti.backend.GetVolumeList} (runtime)
2170 @ivar instances: a list of running instances (runtime)
2171 @ivar pinst: list of configured primary instances (config)
2172 @ivar sinst: list of configured secondary instances (config)
2173 @ivar sbp: dictionary of {primary-node: list of instances} for all
2174 instances for which this node is secondary (config)
2175 @ivar mfree: free memory, as reported by hypervisor (runtime)
2176 @ivar dfree: free disk, as reported by the node (runtime)
2177 @ivar offline: the offline status (config)
2178 @type rpc_fail: boolean
2179 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2180 not whether the individual keys were correct) (runtime)
2181 @type lvm_fail: boolean
2182 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2183 @type hyp_fail: boolean
2184 @ivar hyp_fail: whether the RPC call didn't return the instance list
2185 @type ghost: boolean
2186 @ivar ghost: whether this is a known node or not (config)
2187 @type os_fail: boolean
2188 @ivar os_fail: whether the RPC call didn't return valid OS data
2190 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2191 @type vm_capable: boolean
2192 @ivar vm_capable: whether the node can host instances
2195 def __init__(self, offline=False, name=None, vm_capable=True):
2204 self.offline = offline
2205 self.vm_capable = vm_capable
2206 self.rpc_fail = False
2207 self.lvm_fail = False
2208 self.hyp_fail = False
2210 self.os_fail = False
2213 def ExpandNames(self):
2214 # This raises errors.OpPrereqError on its own:
2215 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2217 # Get instances in node group; this is unsafe and needs verification later
2219 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2221 self.needed_locks = {
2222 locking.LEVEL_INSTANCE: inst_names,
2223 locking.LEVEL_NODEGROUP: [self.group_uuid],
2224 locking.LEVEL_NODE: [],
2227 self.share_locks = _ShareAll()
2229 def DeclareLocks(self, level):
2230 if level == locking.LEVEL_NODE:
2231 # Get members of node group; this is unsafe and needs verification later
2232 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2234 all_inst_info = self.cfg.GetAllInstancesInfo()
2236 # In Exec(), we warn about mirrored instances that have primary and
2237 # secondary living in separate node groups. To fully verify that
2238 # volumes for these instances are healthy, we will need to do an
2239 # extra call to their secondaries. We ensure here those nodes will
2241 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2242 # Important: access only the instances whose lock is owned
2243 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2244 nodes.update(all_inst_info[inst].secondary_nodes)
2246 self.needed_locks[locking.LEVEL_NODE] = nodes
2248 def CheckPrereq(self):
2249 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2250 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2252 group_nodes = set(self.group_info.members)
2254 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2257 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2259 unlocked_instances = \
2260 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2263 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2264 utils.CommaJoin(unlocked_nodes),
2267 if unlocked_instances:
2268 raise errors.OpPrereqError("Missing lock for instances: %s" %
2269 utils.CommaJoin(unlocked_instances),
2272 self.all_node_info = self.cfg.GetAllNodesInfo()
2273 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2275 self.my_node_names = utils.NiceSort(group_nodes)
2276 self.my_inst_names = utils.NiceSort(group_instances)
2278 self.my_node_info = dict((name, self.all_node_info[name])
2279 for name in self.my_node_names)
2281 self.my_inst_info = dict((name, self.all_inst_info[name])
2282 for name in self.my_inst_names)
2284 # We detect here the nodes that will need the extra RPC calls for verifying
2285 # split LV volumes; they should be locked.
2286 extra_lv_nodes = set()
2288 for inst in self.my_inst_info.values():
2289 if inst.disk_template in constants.DTS_INT_MIRROR:
2290 for nname in inst.all_nodes:
2291 if self.all_node_info[nname].group != self.group_uuid:
2292 extra_lv_nodes.add(nname)
2294 unlocked_lv_nodes = \
2295 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2297 if unlocked_lv_nodes:
2298 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2299 utils.CommaJoin(unlocked_lv_nodes),
2301 self.extra_lv_nodes = list(extra_lv_nodes)
2303 def _VerifyNode(self, ninfo, nresult):
2304 """Perform some basic validation on data returned from a node.
2306 - check the result data structure is well formed and has all the
2308 - check ganeti version
2310 @type ninfo: L{objects.Node}
2311 @param ninfo: the node to check
2312 @param nresult: the results from the node
2314 @return: whether overall this call was successful (and we can expect
2315 reasonable values in the respose)
2319 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2321 # main result, nresult should be a non-empty dict
2322 test = not nresult or not isinstance(nresult, dict)
2323 _ErrorIf(test, constants.CV_ENODERPC, node,
2324 "unable to verify node: no data returned")
2328 # compares ganeti version
2329 local_version = constants.PROTOCOL_VERSION
2330 remote_version = nresult.get("version", None)
2331 test = not (remote_version and
2332 isinstance(remote_version, (list, tuple)) and
2333 len(remote_version) == 2)
2334 _ErrorIf(test, constants.CV_ENODERPC, node,
2335 "connection to node returned invalid data")
2339 test = local_version != remote_version[0]
2340 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2341 "incompatible protocol versions: master %s,"
2342 " node %s", local_version, remote_version[0])
2346 # node seems compatible, we can actually try to look into its results
2348 # full package version
2349 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2350 constants.CV_ENODEVERSION, node,
2351 "software version mismatch: master %s, node %s",
2352 constants.RELEASE_VERSION, remote_version[1],
2353 code=self.ETYPE_WARNING)
2355 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2356 if ninfo.vm_capable and isinstance(hyp_result, dict):
2357 for hv_name, hv_result in hyp_result.iteritems():
2358 test = hv_result is not None
2359 _ErrorIf(test, constants.CV_ENODEHV, node,
2360 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2362 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2363 if ninfo.vm_capable and isinstance(hvp_result, list):
2364 for item, hv_name, hv_result in hvp_result:
2365 _ErrorIf(True, constants.CV_ENODEHV, node,
2366 "hypervisor %s parameter verify failure (source %s): %s",
2367 hv_name, item, hv_result)
2369 test = nresult.get(constants.NV_NODESETUP,
2370 ["Missing NODESETUP results"])
2371 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2376 def _VerifyNodeTime(self, ninfo, nresult,
2377 nvinfo_starttime, nvinfo_endtime):
2378 """Check the node time.
2380 @type ninfo: L{objects.Node}
2381 @param ninfo: the node to check
2382 @param nresult: the remote results for the node
2383 @param nvinfo_starttime: the start time of the RPC call
2384 @param nvinfo_endtime: the end time of the RPC call
2388 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2390 ntime = nresult.get(constants.NV_TIME, None)
2392 ntime_merged = utils.MergeTime(ntime)
2393 except (ValueError, TypeError):
2394 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2397 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2398 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2399 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2400 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2404 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2405 "Node time diverges by at least %s from master node time",
2408 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2409 """Check the node LVM results.
2411 @type ninfo: L{objects.Node}
2412 @param ninfo: the node to check
2413 @param nresult: the remote results for the node
2414 @param vg_name: the configured VG name
2421 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2423 # checks vg existence and size > 20G
2424 vglist = nresult.get(constants.NV_VGLIST, None)
2426 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2428 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2429 constants.MIN_VG_SIZE)
2430 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2433 pvlist = nresult.get(constants.NV_PVLIST, None)
2434 test = pvlist is None
2435 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2437 # check that ':' is not present in PV names, since it's a
2438 # special character for lvcreate (denotes the range of PEs to
2440 for _, pvname, owner_vg in pvlist:
2441 test = ":" in pvname
2442 _ErrorIf(test, constants.CV_ENODELVM, node,
2443 "Invalid character ':' in PV '%s' of VG '%s'",
2446 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2447 """Check the node bridges.
2449 @type ninfo: L{objects.Node}
2450 @param ninfo: the node to check
2451 @param nresult: the remote results for the node
2452 @param bridges: the expected list of bridges
2459 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2461 missing = nresult.get(constants.NV_BRIDGES, None)
2462 test = not isinstance(missing, list)
2463 _ErrorIf(test, constants.CV_ENODENET, node,
2464 "did not return valid bridge information")
2466 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2467 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2469 def _VerifyNodeUserScripts(self, ninfo, nresult):
2470 """Check the results of user scripts presence and executability on the node
2472 @type ninfo: L{objects.Node}
2473 @param ninfo: the node to check
2474 @param nresult: the remote results for the node
2479 test = not constants.NV_USERSCRIPTS in nresult
2480 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2481 "did not return user scripts information")
2483 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2485 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2486 "user scripts not present or not executable: %s" %
2487 utils.CommaJoin(sorted(broken_scripts)))
2489 def _VerifyNodeNetwork(self, ninfo, nresult):
2490 """Check the node network connectivity results.
2492 @type ninfo: L{objects.Node}
2493 @param ninfo: the node to check
2494 @param nresult: the remote results for the node
2498 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2500 test = constants.NV_NODELIST not in nresult
2501 _ErrorIf(test, constants.CV_ENODESSH, node,
2502 "node hasn't returned node ssh connectivity data")
2504 if nresult[constants.NV_NODELIST]:
2505 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2506 _ErrorIf(True, constants.CV_ENODESSH, node,
2507 "ssh communication with node '%s': %s", a_node, a_msg)
2509 test = constants.NV_NODENETTEST not in nresult
2510 _ErrorIf(test, constants.CV_ENODENET, node,
2511 "node hasn't returned node tcp connectivity data")
2513 if nresult[constants.NV_NODENETTEST]:
2514 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2516 _ErrorIf(True, constants.CV_ENODENET, node,
2517 "tcp communication with node '%s': %s",
2518 anode, nresult[constants.NV_NODENETTEST][anode])
2520 test = constants.NV_MASTERIP not in nresult
2521 _ErrorIf(test, constants.CV_ENODENET, node,
2522 "node hasn't returned node master IP reachability data")
2524 if not nresult[constants.NV_MASTERIP]:
2525 if node == self.master_node:
2526 msg = "the master node cannot reach the master IP (not configured?)"
2528 msg = "cannot reach the master IP"
2529 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2531 def _VerifyInstance(self, instance, instanceconfig, node_image,
2533 """Verify an instance.
2535 This function checks to see if the required block devices are
2536 available on the instance's node.
2539 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2540 node_current = instanceconfig.primary_node
2542 node_vol_should = {}
2543 instanceconfig.MapLVsByNode(node_vol_should)
2545 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2546 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2547 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2549 for node in node_vol_should:
2550 n_img = node_image[node]
2551 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2552 # ignore missing volumes on offline or broken nodes
2554 for volume in node_vol_should[node]:
2555 test = volume not in n_img.volumes
2556 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2557 "volume %s missing on node %s", volume, node)
2559 if instanceconfig.admin_state == constants.ADMINST_UP:
2560 pri_img = node_image[node_current]
2561 test = instance not in pri_img.instances and not pri_img.offline
2562 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2563 "instance not running on its primary node %s",
2566 diskdata = [(nname, success, status, idx)
2567 for (nname, disks) in diskstatus.items()
2568 for idx, (success, status) in enumerate(disks)]
2570 for nname, success, bdev_status, idx in diskdata:
2571 # the 'ghost node' construction in Exec() ensures that we have a
2573 snode = node_image[nname]
2574 bad_snode = snode.ghost or snode.offline
2575 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2576 not success and not bad_snode,
2577 constants.CV_EINSTANCEFAULTYDISK, instance,
2578 "couldn't retrieve status for disk/%s on %s: %s",
2579 idx, nname, bdev_status)
2580 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2581 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2582 constants.CV_EINSTANCEFAULTYDISK, instance,
2583 "disk/%s on %s is faulty", idx, nname)
2585 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2586 """Verify if there are any unknown volumes in the cluster.
2588 The .os, .swap and backup volumes are ignored. All other volumes are
2589 reported as unknown.
2591 @type reserved: L{ganeti.utils.FieldSet}
2592 @param reserved: a FieldSet of reserved volume names
2595 for node, n_img in node_image.items():
2596 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2597 self.all_node_info[node].group != self.group_uuid):
2598 # skip non-healthy nodes
2600 for volume in n_img.volumes:
2601 test = ((node not in node_vol_should or
2602 volume not in node_vol_should[node]) and
2603 not reserved.Matches(volume))
2604 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2605 "volume %s is unknown", volume)
2607 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2608 """Verify N+1 Memory Resilience.
2610 Check that if one single node dies we can still start all the
2611 instances it was primary for.
2614 cluster_info = self.cfg.GetClusterInfo()
2615 for node, n_img in node_image.items():
2616 # This code checks that every node which is now listed as
2617 # secondary has enough memory to host all instances it is
2618 # supposed to should a single other node in the cluster fail.
2619 # FIXME: not ready for failover to an arbitrary node
2620 # FIXME: does not support file-backed instances
2621 # WARNING: we currently take into account down instances as well
2622 # as up ones, considering that even if they're down someone
2623 # might want to start them even in the event of a node failure.
2624 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2625 # we're skipping nodes marked offline and nodes in other groups from
2626 # the N+1 warning, since most likely we don't have good memory
2627 # infromation from them; we already list instances living on such
2628 # nodes, and that's enough warning
2630 #TODO(dynmem): also consider ballooning out other instances
2631 for prinode, instances in n_img.sbp.items():
2633 for instance in instances:
2634 bep = cluster_info.FillBE(instance_cfg[instance])
2635 if bep[constants.BE_AUTO_BALANCE]:
2636 needed_mem += bep[constants.BE_MINMEM]
2637 test = n_img.mfree < needed_mem
2638 self._ErrorIf(test, constants.CV_ENODEN1, node,
2639 "not enough memory to accomodate instance failovers"
2640 " should node %s fail (%dMiB needed, %dMiB available)",
2641 prinode, needed_mem, n_img.mfree)
2644 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2645 (files_all, files_opt, files_mc, files_vm)):
2646 """Verifies file checksums collected from all nodes.
2648 @param errorif: Callback for reporting errors
2649 @param nodeinfo: List of L{objects.Node} objects
2650 @param master_node: Name of master node
2651 @param all_nvinfo: RPC results
2654 # Define functions determining which nodes to consider for a file
2657 (files_mc, lambda node: (node.master_candidate or
2658 node.name == master_node)),
2659 (files_vm, lambda node: node.vm_capable),
2662 # Build mapping from filename to list of nodes which should have the file
2664 for (files, fn) in files2nodefn:
2666 filenodes = nodeinfo
2668 filenodes = filter(fn, nodeinfo)
2669 nodefiles.update((filename,
2670 frozenset(map(operator.attrgetter("name"), filenodes)))
2671 for filename in files)
2673 assert set(nodefiles) == (files_all | files_mc | files_vm)
2675 fileinfo = dict((filename, {}) for filename in nodefiles)
2676 ignore_nodes = set()
2678 for node in nodeinfo:
2680 ignore_nodes.add(node.name)
2683 nresult = all_nvinfo[node.name]
2685 if nresult.fail_msg or not nresult.payload:
2688 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2690 test = not (node_files and isinstance(node_files, dict))
2691 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2692 "Node did not return file checksum data")
2694 ignore_nodes.add(node.name)
2697 # Build per-checksum mapping from filename to nodes having it
2698 for (filename, checksum) in node_files.items():
2699 assert filename in nodefiles
2700 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2702 for (filename, checksums) in fileinfo.items():
2703 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2705 # Nodes having the file
2706 with_file = frozenset(node_name
2707 for nodes in fileinfo[filename].values()
2708 for node_name in nodes) - ignore_nodes
2710 expected_nodes = nodefiles[filename] - ignore_nodes
2712 # Nodes missing file
2713 missing_file = expected_nodes - with_file
2715 if filename in files_opt:
2717 errorif(missing_file and missing_file != expected_nodes,
2718 constants.CV_ECLUSTERFILECHECK, None,
2719 "File %s is optional, but it must exist on all or no"
2720 " nodes (not found on %s)",
2721 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2723 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2724 "File %s is missing from node(s) %s", filename,
2725 utils.CommaJoin(utils.NiceSort(missing_file)))
2727 # Warn if a node has a file it shouldn't
2728 unexpected = with_file - expected_nodes
2730 constants.CV_ECLUSTERFILECHECK, None,
2731 "File %s should not exist on node(s) %s",
2732 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2734 # See if there are multiple versions of the file
2735 test = len(checksums) > 1
2737 variants = ["variant %s on %s" %
2738 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2739 for (idx, (checksum, nodes)) in
2740 enumerate(sorted(checksums.items()))]
2744 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2745 "File %s found with %s different checksums (%s)",
2746 filename, len(checksums), "; ".join(variants))
2748 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2750 """Verifies and the node DRBD status.
2752 @type ninfo: L{objects.Node}
2753 @param ninfo: the node to check
2754 @param nresult: the remote results for the node
2755 @param instanceinfo: the dict of instances
2756 @param drbd_helper: the configured DRBD usermode helper
2757 @param drbd_map: the DRBD map as returned by
2758 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2762 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2765 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2766 test = (helper_result == None)
2767 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2768 "no drbd usermode helper returned")
2770 status, payload = helper_result
2772 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2773 "drbd usermode helper check unsuccessful: %s", payload)
2774 test = status and (payload != drbd_helper)
2775 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2776 "wrong drbd usermode helper: %s", payload)
2778 # compute the DRBD minors
2780 for minor, instance in drbd_map[node].items():
2781 test = instance not in instanceinfo
2782 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2783 "ghost instance '%s' in temporary DRBD map", instance)
2784 # ghost instance should not be running, but otherwise we
2785 # don't give double warnings (both ghost instance and
2786 # unallocated minor in use)
2788 node_drbd[minor] = (instance, False)
2790 instance = instanceinfo[instance]
2791 node_drbd[minor] = (instance.name,
2792 instance.admin_state == constants.ADMINST_UP)
2794 # and now check them
2795 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2796 test = not isinstance(used_minors, (tuple, list))
2797 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2798 "cannot parse drbd status file: %s", str(used_minors))
2800 # we cannot check drbd status
2803 for minor, (iname, must_exist) in node_drbd.items():
2804 test = minor not in used_minors and must_exist
2805 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2806 "drbd minor %d of instance %s is not active", minor, iname)
2807 for minor in used_minors:
2808 test = minor not in node_drbd
2809 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2810 "unallocated drbd minor %d is in use", minor)
2812 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2813 """Builds the node OS structures.
2815 @type ninfo: L{objects.Node}
2816 @param ninfo: the node to check
2817 @param nresult: the remote results for the node
2818 @param nimg: the node image object
2822 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2824 remote_os = nresult.get(constants.NV_OSLIST, None)
2825 test = (not isinstance(remote_os, list) or
2826 not compat.all(isinstance(v, list) and len(v) == 7
2827 for v in remote_os))
2829 _ErrorIf(test, constants.CV_ENODEOS, node,
2830 "node hasn't returned valid OS data")
2839 for (name, os_path, status, diagnose,
2840 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2842 if name not in os_dict:
2845 # parameters is a list of lists instead of list of tuples due to
2846 # JSON lacking a real tuple type, fix it:
2847 parameters = [tuple(v) for v in parameters]
2848 os_dict[name].append((os_path, status, diagnose,
2849 set(variants), set(parameters), set(api_ver)))
2851 nimg.oslist = os_dict
2853 def _VerifyNodeOS(self, ninfo, nimg, base):
2854 """Verifies the node OS list.
2856 @type ninfo: L{objects.Node}
2857 @param ninfo: the node to check
2858 @param nimg: the node image object
2859 @param base: the 'template' node we match against (e.g. from the master)
2863 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2865 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2867 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2868 for os_name, os_data in nimg.oslist.items():
2869 assert os_data, "Empty OS status for OS %s?!" % os_name
2870 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2871 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2872 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2873 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2874 "OS '%s' has multiple entries (first one shadows the rest): %s",
2875 os_name, utils.CommaJoin([v[0] for v in os_data]))
2876 # comparisons with the 'base' image
2877 test = os_name not in base.oslist
2878 _ErrorIf(test, constants.CV_ENODEOS, node,
2879 "Extra OS %s not present on reference node (%s)",
2883 assert base.oslist[os_name], "Base node has empty OS status?"
2884 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2886 # base OS is invalid, skipping
2888 for kind, a, b in [("API version", f_api, b_api),
2889 ("variants list", f_var, b_var),
2890 ("parameters", beautify_params(f_param),
2891 beautify_params(b_param))]:
2892 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2893 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2894 kind, os_name, base.name,
2895 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2897 # check any missing OSes
2898 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2899 _ErrorIf(missing, constants.CV_ENODEOS, node,
2900 "OSes present on reference node %s but missing on this node: %s",
2901 base.name, utils.CommaJoin(missing))
2903 def _VerifyOob(self, ninfo, nresult):
2904 """Verifies out of band functionality of a node.
2906 @type ninfo: L{objects.Node}
2907 @param ninfo: the node to check
2908 @param nresult: the remote results for the node
2912 # We just have to verify the paths on master and/or master candidates
2913 # as the oob helper is invoked on the master
2914 if ((ninfo.master_candidate or ninfo.master_capable) and
2915 constants.NV_OOB_PATHS in nresult):
2916 for path_result in nresult[constants.NV_OOB_PATHS]:
2917 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2919 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2920 """Verifies and updates the node volume data.
2922 This function will update a L{NodeImage}'s internal structures
2923 with data from the remote call.
2925 @type ninfo: L{objects.Node}
2926 @param ninfo: the node to check
2927 @param nresult: the remote results for the node
2928 @param nimg: the node image object
2929 @param vg_name: the configured VG name
2933 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2935 nimg.lvm_fail = True
2936 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2939 elif isinstance(lvdata, basestring):
2940 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2941 utils.SafeEncode(lvdata))
2942 elif not isinstance(lvdata, dict):
2943 _ErrorIf(True, constants.CV_ENODELVM, node,
2944 "rpc call to node failed (lvlist)")
2946 nimg.volumes = lvdata
2947 nimg.lvm_fail = False
2949 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2950 """Verifies and updates the node instance list.
2952 If the listing was successful, then updates this node's instance
2953 list. Otherwise, it marks the RPC call as failed for the instance
2956 @type ninfo: L{objects.Node}
2957 @param ninfo: the node to check
2958 @param nresult: the remote results for the node
2959 @param nimg: the node image object
2962 idata = nresult.get(constants.NV_INSTANCELIST, None)
2963 test = not isinstance(idata, list)
2964 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2965 "rpc call to node failed (instancelist): %s",
2966 utils.SafeEncode(str(idata)))
2968 nimg.hyp_fail = True
2970 nimg.instances = idata
2972 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2973 """Verifies and computes a node information map
2975 @type ninfo: L{objects.Node}
2976 @param ninfo: the node to check
2977 @param nresult: the remote results for the node
2978 @param nimg: the node image object
2979 @param vg_name: the configured VG name
2983 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2985 # try to read free memory (from the hypervisor)
2986 hv_info = nresult.get(constants.NV_HVINFO, None)
2987 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2988 _ErrorIf(test, constants.CV_ENODEHV, node,
2989 "rpc call to node failed (hvinfo)")
2992 nimg.mfree = int(hv_info["memory_free"])
2993 except (ValueError, TypeError):
2994 _ErrorIf(True, constants.CV_ENODERPC, node,
2995 "node returned invalid nodeinfo, check hypervisor")
2997 # FIXME: devise a free space model for file based instances as well
2998 if vg_name is not None:
2999 test = (constants.NV_VGLIST not in nresult or
3000 vg_name not in nresult[constants.NV_VGLIST])
3001 _ErrorIf(test, constants.CV_ENODELVM, node,
3002 "node didn't return data for the volume group '%s'"
3003 " - it is either missing or broken", vg_name)
3006 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
3007 except (ValueError, TypeError):
3008 _ErrorIf(True, constants.CV_ENODERPC, node,
3009 "node returned invalid LVM info, check LVM status")
3011 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
3012 """Gets per-disk status information for all instances.
3014 @type nodelist: list of strings
3015 @param nodelist: Node names
3016 @type node_image: dict of (name, L{objects.Node})
3017 @param node_image: Node objects
3018 @type instanceinfo: dict of (name, L{objects.Instance})
3019 @param instanceinfo: Instance objects
3020 @rtype: {instance: {node: [(succes, payload)]}}
3021 @return: a dictionary of per-instance dictionaries with nodes as
3022 keys and disk information as values; the disk information is a
3023 list of tuples (success, payload)
3026 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3029 node_disks_devonly = {}
3030 diskless_instances = set()
3031 diskless = constants.DT_DISKLESS
3033 for nname in nodelist:
3034 node_instances = list(itertools.chain(node_image[nname].pinst,
3035 node_image[nname].sinst))
3036 diskless_instances.update(inst for inst in node_instances
3037 if instanceinfo[inst].disk_template == diskless)
3038 disks = [(inst, disk)
3039 for inst in node_instances
3040 for disk in instanceinfo[inst].disks]
3043 # No need to collect data
3046 node_disks[nname] = disks
3048 # _AnnotateDiskParams makes already copies of the disks
3050 for (inst, dev) in disks:
3051 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
3052 self.cfg.SetDiskID(anno_disk, nname)
3053 devonly.append(anno_disk)
3055 node_disks_devonly[nname] = devonly
3057 assert len(node_disks) == len(node_disks_devonly)
3059 # Collect data from all nodes with disks
3060 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
3063 assert len(result) == len(node_disks)
3067 for (nname, nres) in result.items():
3068 disks = node_disks[nname]
3071 # No data from this node
3072 data = len(disks) * [(False, "node offline")]
3075 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3076 "while getting disk information: %s", msg)
3078 # No data from this node
3079 data = len(disks) * [(False, msg)]
3082 for idx, i in enumerate(nres.payload):
3083 if isinstance(i, (tuple, list)) and len(i) == 2:
3086 logging.warning("Invalid result from node %s, entry %d: %s",
3088 data.append((False, "Invalid result from the remote node"))
3090 for ((inst, _), status) in zip(disks, data):
3091 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3093 # Add empty entries for diskless instances.
3094 for inst in diskless_instances:
3095 assert inst not in instdisk
3098 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3099 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3100 compat.all(isinstance(s, (tuple, list)) and
3101 len(s) == 2 for s in statuses)
3102 for inst, nnames in instdisk.items()
3103 for nname, statuses in nnames.items())
3104 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3109 def _SshNodeSelector(group_uuid, all_nodes):
3110 """Create endless iterators for all potential SSH check hosts.
3113 nodes = [node for node in all_nodes
3114 if (node.group != group_uuid and
3116 keyfunc = operator.attrgetter("group")
3118 return map(itertools.cycle,
3119 [sorted(map(operator.attrgetter("name"), names))
3120 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3124 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3125 """Choose which nodes should talk to which other nodes.
3127 We will make nodes contact all nodes in their group, and one node from
3130 @warning: This algorithm has a known issue if one node group is much
3131 smaller than others (e.g. just one node). In such a case all other
3132 nodes will talk to the single node.
3135 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3136 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3138 return (online_nodes,
3139 dict((name, sorted([i.next() for i in sel]))
3140 for name in online_nodes))
3142 def BuildHooksEnv(self):
3145 Cluster-Verify hooks just ran in the post phase and their failure makes
3146 the output be logged in the verify output and the verification to fail.
3150 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3153 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3154 for node in self.my_node_info.values())
3158 def BuildHooksNodes(self):
3159 """Build hooks nodes.
3162 return ([], self.my_node_names)
3164 def Exec(self, feedback_fn):
3165 """Verify integrity of the node group, performing various test on nodes.
3168 # This method has too many local variables. pylint: disable=R0914
3169 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3171 if not self.my_node_names:
3173 feedback_fn("* Empty node group, skipping verification")
3177 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3178 verbose = self.op.verbose
3179 self._feedback_fn = feedback_fn
3181 vg_name = self.cfg.GetVGName()
3182 drbd_helper = self.cfg.GetDRBDHelper()
3183 cluster = self.cfg.GetClusterInfo()
3184 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3185 hypervisors = cluster.enabled_hypervisors
3186 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3188 i_non_redundant = [] # Non redundant instances
3189 i_non_a_balanced = [] # Non auto-balanced instances
3190 i_offline = 0 # Count of offline instances
3191 n_offline = 0 # Count of offline nodes
3192 n_drained = 0 # Count of nodes being drained
3193 node_vol_should = {}
3195 # FIXME: verify OS list
3198 filemap = _ComputeAncillaryFiles(cluster, False)
3200 # do local checksums
3201 master_node = self.master_node = self.cfg.GetMasterNode()
3202 master_ip = self.cfg.GetMasterIP()
3204 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3207 if self.cfg.GetUseExternalMipScript():
3208 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3210 node_verify_param = {
3211 constants.NV_FILELIST:
3212 utils.UniqueSequence(filename
3213 for files in filemap
3214 for filename in files),
3215 constants.NV_NODELIST:
3216 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3217 self.all_node_info.values()),
3218 constants.NV_HYPERVISOR: hypervisors,
3219 constants.NV_HVPARAMS:
3220 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3221 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3222 for node in node_data_list
3223 if not node.offline],
3224 constants.NV_INSTANCELIST: hypervisors,
3225 constants.NV_VERSION: None,
3226 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3227 constants.NV_NODESETUP: None,
3228 constants.NV_TIME: None,
3229 constants.NV_MASTERIP: (master_node, master_ip),
3230 constants.NV_OSLIST: None,
3231 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3232 constants.NV_USERSCRIPTS: user_scripts,
3235 if vg_name is not None:
3236 node_verify_param[constants.NV_VGLIST] = None
3237 node_verify_param[constants.NV_LVLIST] = vg_name
3238 node_verify_param[constants.NV_PVLIST] = [vg_name]
3241 node_verify_param[constants.NV_DRBDLIST] = None
3242 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3245 # FIXME: this needs to be changed per node-group, not cluster-wide
3247 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3248 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3249 bridges.add(default_nicpp[constants.NIC_LINK])
3250 for instance in self.my_inst_info.values():
3251 for nic in instance.nics:
3252 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3253 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3254 bridges.add(full_nic[constants.NIC_LINK])
3257 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3259 # Build our expected cluster state
3260 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3262 vm_capable=node.vm_capable))
3263 for node in node_data_list)
3267 for node in self.all_node_info.values():
3268 path = _SupportsOob(self.cfg, node)
3269 if path and path not in oob_paths:
3270 oob_paths.append(path)
3273 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3275 for instance in self.my_inst_names:
3276 inst_config = self.my_inst_info[instance]
3277 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3280 for nname in inst_config.all_nodes:
3281 if nname not in node_image:
3282 gnode = self.NodeImage(name=nname)
3283 gnode.ghost = (nname not in self.all_node_info)
3284 node_image[nname] = gnode
3286 inst_config.MapLVsByNode(node_vol_should)
3288 pnode = inst_config.primary_node
3289 node_image[pnode].pinst.append(instance)
3291 for snode in inst_config.secondary_nodes:
3292 nimg = node_image[snode]
3293 nimg.sinst.append(instance)
3294 if pnode not in nimg.sbp:
3295 nimg.sbp[pnode] = []
3296 nimg.sbp[pnode].append(instance)
3298 # At this point, we have the in-memory data structures complete,
3299 # except for the runtime information, which we'll gather next
3301 # Due to the way our RPC system works, exact response times cannot be
3302 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3303 # time before and after executing the request, we can at least have a time
3305 nvinfo_starttime = time.time()
3306 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3308 self.cfg.GetClusterName())
3309 nvinfo_endtime = time.time()
3311 if self.extra_lv_nodes and vg_name is not None:
3313 self.rpc.call_node_verify(self.extra_lv_nodes,
3314 {constants.NV_LVLIST: vg_name},
3315 self.cfg.GetClusterName())
3317 extra_lv_nvinfo = {}
3319 all_drbd_map = self.cfg.ComputeDRBDMap()
3321 feedback_fn("* Gathering disk information (%s nodes)" %
3322 len(self.my_node_names))
3323 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3326 feedback_fn("* Verifying configuration file consistency")
3328 # If not all nodes are being checked, we need to make sure the master node
3329 # and a non-checked vm_capable node are in the list.
3330 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3332 vf_nvinfo = all_nvinfo.copy()
3333 vf_node_info = list(self.my_node_info.values())
3334 additional_nodes = []
3335 if master_node not in self.my_node_info:
3336 additional_nodes.append(master_node)
3337 vf_node_info.append(self.all_node_info[master_node])
3338 # Add the first vm_capable node we find which is not included,
3339 # excluding the master node (which we already have)
3340 for node in absent_nodes:
3341 nodeinfo = self.all_node_info[node]
3342 if (nodeinfo.vm_capable and not nodeinfo.offline and
3343 node != master_node):
3344 additional_nodes.append(node)
3345 vf_node_info.append(self.all_node_info[node])
3347 key = constants.NV_FILELIST
3348 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3349 {key: node_verify_param[key]},
3350 self.cfg.GetClusterName()))
3352 vf_nvinfo = all_nvinfo
3353 vf_node_info = self.my_node_info.values()
3355 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3357 feedback_fn("* Verifying node status")
3361 for node_i in node_data_list:
3363 nimg = node_image[node]
3367 feedback_fn("* Skipping offline node %s" % (node,))
3371 if node == master_node:
3373 elif node_i.master_candidate:
3374 ntype = "master candidate"
3375 elif node_i.drained:
3381 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3383 msg = all_nvinfo[node].fail_msg
3384 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3387 nimg.rpc_fail = True
3390 nresult = all_nvinfo[node].payload
3392 nimg.call_ok = self._VerifyNode(node_i, nresult)
3393 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3394 self._VerifyNodeNetwork(node_i, nresult)
3395 self._VerifyNodeUserScripts(node_i, nresult)
3396 self._VerifyOob(node_i, nresult)
3399 self._VerifyNodeLVM(node_i, nresult, vg_name)
3400 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3403 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3404 self._UpdateNodeInstances(node_i, nresult, nimg)
3405 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3406 self._UpdateNodeOS(node_i, nresult, nimg)
3408 if not nimg.os_fail:
3409 if refos_img is None:
3411 self._VerifyNodeOS(node_i, nimg, refos_img)
3412 self._VerifyNodeBridges(node_i, nresult, bridges)
3414 # Check whether all running instancies are primary for the node. (This
3415 # can no longer be done from _VerifyInstance below, since some of the
3416 # wrong instances could be from other node groups.)
3417 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3419 for inst in non_primary_inst:
3420 test = inst in self.all_inst_info
3421 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3422 "instance should not run on node %s", node_i.name)
3423 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3424 "node is running unknown instance %s", inst)
3426 for node, result in extra_lv_nvinfo.items():
3427 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3428 node_image[node], vg_name)
3430 feedback_fn("* Verifying instance status")
3431 for instance in self.my_inst_names:
3433 feedback_fn("* Verifying instance %s" % instance)
3434 inst_config = self.my_inst_info[instance]
3435 self._VerifyInstance(instance, inst_config, node_image,
3437 inst_nodes_offline = []
3439 pnode = inst_config.primary_node
3440 pnode_img = node_image[pnode]
3441 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3442 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3443 " primary node failed", instance)
3445 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3447 constants.CV_EINSTANCEBADNODE, instance,
3448 "instance is marked as running and lives on offline node %s",
3449 inst_config.primary_node)
3451 # If the instance is non-redundant we cannot survive losing its primary
3452 # node, so we are not N+1 compliant.
3453 if inst_config.disk_template not in constants.DTS_MIRRORED:
3454 i_non_redundant.append(instance)
3456 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3457 constants.CV_EINSTANCELAYOUT,
3458 instance, "instance has multiple secondary nodes: %s",
3459 utils.CommaJoin(inst_config.secondary_nodes),
3460 code=self.ETYPE_WARNING)
3462 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3463 pnode = inst_config.primary_node
3464 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3465 instance_groups = {}
3467 for node in instance_nodes:
3468 instance_groups.setdefault(self.all_node_info[node].group,
3472 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3473 # Sort so that we always list the primary node first.
3474 for group, nodes in sorted(instance_groups.items(),
3475 key=lambda (_, nodes): pnode in nodes,
3478 self._ErrorIf(len(instance_groups) > 1,
3479 constants.CV_EINSTANCESPLITGROUPS,
3480 instance, "instance has primary and secondary nodes in"
3481 " different groups: %s", utils.CommaJoin(pretty_list),
3482 code=self.ETYPE_WARNING)
3484 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3485 i_non_a_balanced.append(instance)
3487 for snode in inst_config.secondary_nodes:
3488 s_img = node_image[snode]
3489 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3490 snode, "instance %s, connection to secondary node failed",
3494 inst_nodes_offline.append(snode)
3496 # warn that the instance lives on offline nodes
3497 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3498 "instance has offline secondary node(s) %s",
3499 utils.CommaJoin(inst_nodes_offline))
3500 # ... or ghost/non-vm_capable nodes
3501 for node in inst_config.all_nodes:
3502 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3503 instance, "instance lives on ghost node %s", node)
3504 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3505 instance, "instance lives on non-vm_capable node %s", node)
3507 feedback_fn("* Verifying orphan volumes")
3508 reserved = utils.FieldSet(*cluster.reserved_lvs)
3510 # We will get spurious "unknown volume" warnings if any node of this group
3511 # is secondary for an instance whose primary is in another group. To avoid
3512 # them, we find these instances and add their volumes to node_vol_should.
3513 for inst in self.all_inst_info.values():
3514 for secondary in inst.secondary_nodes:
3515 if (secondary in self.my_node_info
3516 and inst.name not in self.my_inst_info):
3517 inst.MapLVsByNode(node_vol_should)
3520 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3522 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3523 feedback_fn("* Verifying N+1 Memory redundancy")
3524 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3526 feedback_fn("* Other Notes")
3528 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3529 % len(i_non_redundant))
3531 if i_non_a_balanced:
3532 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3533 % len(i_non_a_balanced))
3536 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3539 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3542 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3546 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3547 """Analyze the post-hooks' result
3549 This method analyses the hook result, handles it, and sends some
3550 nicely-formatted feedback back to the user.
3552 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3553 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3554 @param hooks_results: the results of the multi-node hooks rpc call
3555 @param feedback_fn: function used send feedback back to the caller
3556 @param lu_result: previous Exec result
3557 @return: the new Exec result, based on the previous result
3561 # We only really run POST phase hooks, only for non-empty groups,
3562 # and are only interested in their results
3563 if not self.my_node_names:
3566 elif phase == constants.HOOKS_PHASE_POST:
3567 # Used to change hooks' output to proper indentation
3568 feedback_fn("* Hooks Results")
3569 assert hooks_results, "invalid result from hooks"
3571 for node_name in hooks_results:
3572 res = hooks_results[node_name]
3574 test = msg and not res.offline
3575 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3576 "Communication failure in hooks execution: %s", msg)
3577 if res.offline or msg:
3578 # No need to investigate payload if node is offline or gave
3581 for script, hkr, output in res.payload:
3582 test = hkr == constants.HKR_FAIL
3583 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3584 "Script %s failed, output:", script)
3586 output = self._HOOKS_INDENT_RE.sub(" ", output)
3587 feedback_fn("%s" % output)
3593 class LUClusterVerifyDisks(NoHooksLU):
3594 """Verifies the cluster disks status.
3599 def ExpandNames(self):
3600 self.share_locks = _ShareAll()
3601 self.needed_locks = {
3602 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3605 def Exec(self, feedback_fn):
3606 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3608 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3609 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3610 for group in group_names])
3613 class LUGroupVerifyDisks(NoHooksLU):
3614 """Verifies the status of all disks in a node group.
3619 def ExpandNames(self):
3620 # Raises errors.OpPrereqError on its own if group can't be found
3621 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3623 self.share_locks = _ShareAll()
3624 self.needed_locks = {
3625 locking.LEVEL_INSTANCE: [],
3626 locking.LEVEL_NODEGROUP: [],
3627 locking.LEVEL_NODE: [],
3630 def DeclareLocks(self, level):
3631 if level == locking.LEVEL_INSTANCE:
3632 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3634 # Lock instances optimistically, needs verification once node and group
3635 # locks have been acquired
3636 self.needed_locks[locking.LEVEL_INSTANCE] = \
3637 self.cfg.GetNodeGroupInstances(self.group_uuid)
3639 elif level == locking.LEVEL_NODEGROUP:
3640 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3642 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3643 set([self.group_uuid] +
3644 # Lock all groups used by instances optimistically; this requires
3645 # going via the node before it's locked, requiring verification
3648 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3649 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3651 elif level == locking.LEVEL_NODE:
3652 # This will only lock the nodes in the group to be verified which contain
3654 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3655 self._LockInstancesNodes()
3657 # Lock all nodes in group to be verified
3658 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3659 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3660 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3662 def CheckPrereq(self):
3663 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3664 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3665 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3667 assert self.group_uuid in owned_groups
3669 # Check if locked instances are still correct
3670 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3672 # Get instance information
3673 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3675 # Check if node groups for locked instances are still correct
3676 _CheckInstancesNodeGroups(self.cfg, self.instances,
3677 owned_groups, owned_nodes, self.group_uuid)
3679 def Exec(self, feedback_fn):
3680 """Verify integrity of cluster disks.
3682 @rtype: tuple of three items
3683 @return: a tuple of (dict of node-to-node_error, list of instances
3684 which need activate-disks, dict of instance: (node, volume) for
3689 res_instances = set()
3692 nv_dict = _MapInstanceDisksToNodes([inst
3693 for inst in self.instances.values()
3694 if inst.admin_state == constants.ADMINST_UP])
3697 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3698 set(self.cfg.GetVmCapableNodeList()))
3700 node_lvs = self.rpc.call_lv_list(nodes, [])
3702 for (node, node_res) in node_lvs.items():
3703 if node_res.offline:
3706 msg = node_res.fail_msg
3708 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3709 res_nodes[node] = msg
3712 for lv_name, (_, _, lv_online) in node_res.payload.items():
3713 inst = nv_dict.pop((node, lv_name), None)
3714 if not (lv_online or inst is None):
3715 res_instances.add(inst)
3717 # any leftover items in nv_dict are missing LVs, let's arrange the data
3719 for key, inst in nv_dict.iteritems():
3720 res_missing.setdefault(inst, []).append(list(key))
3722 return (res_nodes, list(res_instances), res_missing)
3725 class LUClusterRepairDiskSizes(NoHooksLU):
3726 """Verifies the cluster disks sizes.
3731 def ExpandNames(self):
3732 if self.op.instances:
3733 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3734 self.needed_locks = {
3735 locking.LEVEL_NODE_RES: [],
3736 locking.LEVEL_INSTANCE: self.wanted_names,
3738 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3740 self.wanted_names = None
3741 self.needed_locks = {
3742 locking.LEVEL_NODE_RES: locking.ALL_SET,
3743 locking.LEVEL_INSTANCE: locking.ALL_SET,
3745 self.share_locks = {
3746 locking.LEVEL_NODE_RES: 1,
3747 locking.LEVEL_INSTANCE: 0,
3750 def DeclareLocks(self, level):
3751 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3752 self._LockInstancesNodes(primary_only=True, level=level)
3754 def CheckPrereq(self):
3755 """Check prerequisites.
3757 This only checks the optional instance list against the existing names.
3760 if self.wanted_names is None:
3761 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3763 self.wanted_instances = \
3764 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3766 def _EnsureChildSizes(self, disk):
3767 """Ensure children of the disk have the needed disk size.
3769 This is valid mainly for DRBD8 and fixes an issue where the
3770 children have smaller disk size.
3772 @param disk: an L{ganeti.objects.Disk} object
3775 if disk.dev_type == constants.LD_DRBD8:
3776 assert disk.children, "Empty children for DRBD8?"
3777 fchild = disk.children[0]
3778 mismatch = fchild.size < disk.size
3780 self.LogInfo("Child disk has size %d, parent %d, fixing",
3781 fchild.size, disk.size)
3782 fchild.size = disk.size
3784 # and we recurse on this child only, not on the metadev
3785 return self._EnsureChildSizes(fchild) or mismatch
3789 def Exec(self, feedback_fn):
3790 """Verify the size of cluster disks.
3793 # TODO: check child disks too
3794 # TODO: check differences in size between primary/secondary nodes
3796 for instance in self.wanted_instances:
3797 pnode = instance.primary_node
3798 if pnode not in per_node_disks:
3799 per_node_disks[pnode] = []
3800 for idx, disk in enumerate(instance.disks):
3801 per_node_disks[pnode].append((instance, idx, disk))
3803 assert not (frozenset(per_node_disks.keys()) -
3804 self.owned_locks(locking.LEVEL_NODE_RES)), \
3805 "Not owning correct locks"
3806 assert not self.owned_locks(locking.LEVEL_NODE)
3809 for node, dskl in per_node_disks.items():
3810 newl = [v[2].Copy() for v in dskl]
3812 self.cfg.SetDiskID(dsk, node)
3813 result = self.rpc.call_blockdev_getsize(node, newl)
3815 self.LogWarning("Failure in blockdev_getsize call to node"
3816 " %s, ignoring", node)
3818 if len(result.payload) != len(dskl):
3819 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3820 " result.payload=%s", node, len(dskl), result.payload)
3821 self.LogWarning("Invalid result from node %s, ignoring node results",
3824 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3826 self.LogWarning("Disk %d of instance %s did not return size"
3827 " information, ignoring", idx, instance.name)
3829 if not isinstance(size, (int, long)):
3830 self.LogWarning("Disk %d of instance %s did not return valid"
3831 " size information, ignoring", idx, instance.name)
3834 if size != disk.size:
3835 self.LogInfo("Disk %d of instance %s has mismatched size,"
3836 " correcting: recorded %d, actual %d", idx,
3837 instance.name, disk.size, size)
3839 self.cfg.Update(instance, feedback_fn)
3840 changed.append((instance.name, idx, size))
3841 if self._EnsureChildSizes(disk):
3842 self.cfg.Update(instance, feedback_fn)
3843 changed.append((instance.name, idx, disk.size))
3847 class LUClusterRename(LogicalUnit):
3848 """Rename the cluster.
3851 HPATH = "cluster-rename"
3852 HTYPE = constants.HTYPE_CLUSTER
3854 def BuildHooksEnv(self):
3859 "OP_TARGET": self.cfg.GetClusterName(),
3860 "NEW_NAME": self.op.name,
3863 def BuildHooksNodes(self):
3864 """Build hooks nodes.
3867 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3869 def CheckPrereq(self):
3870 """Verify that the passed name is a valid one.
3873 hostname = netutils.GetHostname(name=self.op.name,
3874 family=self.cfg.GetPrimaryIPFamily())
3876 new_name = hostname.name
3877 self.ip = new_ip = hostname.ip
3878 old_name = self.cfg.GetClusterName()
3879 old_ip = self.cfg.GetMasterIP()
3880 if new_name == old_name and new_ip == old_ip:
3881 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3882 " cluster has changed",
3884 if new_ip != old_ip:
3885 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3886 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3887 " reachable on the network" %
3888 new_ip, errors.ECODE_NOTUNIQUE)
3890 self.op.name = new_name
3892 def Exec(self, feedback_fn):
3893 """Rename the cluster.
3896 clustername = self.op.name
3899 # shutdown the master IP
3900 master_params = self.cfg.GetMasterNetworkParameters()
3901 ems = self.cfg.GetUseExternalMipScript()
3902 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3904 result.Raise("Could not disable the master role")
3907 cluster = self.cfg.GetClusterInfo()
3908 cluster.cluster_name = clustername
3909 cluster.master_ip = new_ip
3910 self.cfg.Update(cluster, feedback_fn)
3912 # update the known hosts file
3913 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3914 node_list = self.cfg.GetOnlineNodeList()
3916 node_list.remove(master_params.name)
3919 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3921 master_params.ip = new_ip
3922 result = self.rpc.call_node_activate_master_ip(master_params.name,
3924 msg = result.fail_msg
3926 self.LogWarning("Could not re-enable the master role on"
3927 " the master, please restart manually: %s", msg)
3932 def _ValidateNetmask(cfg, netmask):
3933 """Checks if a netmask is valid.
3935 @type cfg: L{config.ConfigWriter}
3936 @param cfg: The cluster configuration
3938 @param netmask: the netmask to be verified
3939 @raise errors.OpPrereqError: if the validation fails
3942 ip_family = cfg.GetPrimaryIPFamily()
3944 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3945 except errors.ProgrammerError:
3946 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3948 if not ipcls.ValidateNetmask(netmask):
3949 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3953 class LUClusterSetParams(LogicalUnit):
3954 """Change the parameters of the cluster.
3957 HPATH = "cluster-modify"
3958 HTYPE = constants.HTYPE_CLUSTER
3961 def CheckArguments(self):
3965 if self.op.uid_pool:
3966 uidpool.CheckUidPool(self.op.uid_pool)
3968 if self.op.add_uids:
3969 uidpool.CheckUidPool(self.op.add_uids)
3971 if self.op.remove_uids:
3972 uidpool.CheckUidPool(self.op.remove_uids)
3974 if self.op.master_netmask is not None:
3975 _ValidateNetmask(self.cfg, self.op.master_netmask)
3977 if self.op.diskparams:
3978 for dt_params in self.op.diskparams.values():
3979 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3981 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3982 except errors.OpPrereqError, err:
3983 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3986 def ExpandNames(self):
3987 # FIXME: in the future maybe other cluster params won't require checking on
3988 # all nodes to be modified.
3989 self.needed_locks = {
3990 locking.LEVEL_NODE: locking.ALL_SET,
3991 locking.LEVEL_INSTANCE: locking.ALL_SET,
3992 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3994 self.share_locks = {
3995 locking.LEVEL_NODE: 1,
3996 locking.LEVEL_INSTANCE: 1,
3997 locking.LEVEL_NODEGROUP: 1,
4000 def BuildHooksEnv(self):
4005 "OP_TARGET": self.cfg.GetClusterName(),
4006 "NEW_VG_NAME": self.op.vg_name,
4009 def BuildHooksNodes(self):
4010 """Build hooks nodes.
4013 mn = self.cfg.GetMasterNode()
4016 def CheckPrereq(self):
4017 """Check prerequisites.
4019 This checks whether the given params don't conflict and
4020 if the given volume group is valid.
4023 if self.op.vg_name is not None and not self.op.vg_name:
4024 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
4025 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
4026 " instances exist", errors.ECODE_INVAL)
4028 if self.op.drbd_helper is not None and not self.op.drbd_helper:
4029 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
4030 raise errors.OpPrereqError("Cannot disable drbd helper while"
4031 " drbd-based instances exist",
4034 node_list = self.owned_locks(locking.LEVEL_NODE)
4036 # if vg_name not None, checks given volume group on all nodes
4038 vglist = self.rpc.call_vg_list(node_list)
4039 for node in node_list:
4040 msg = vglist[node].fail_msg
4042 # ignoring down node
4043 self.LogWarning("Error while gathering data on node %s"
4044 " (ignoring node): %s", node, msg)
4046 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
4048 constants.MIN_VG_SIZE)
4050 raise errors.OpPrereqError("Error on node '%s': %s" %
4051 (node, vgstatus), errors.ECODE_ENVIRON)
4053 if self.op.drbd_helper:
4054 # checks given drbd helper on all nodes
4055 helpers = self.rpc.call_drbd_helper(node_list)
4056 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4058 self.LogInfo("Not checking drbd helper on offline node %s", node)
4060 msg = helpers[node].fail_msg
4062 raise errors.OpPrereqError("Error checking drbd helper on node"
4063 " '%s': %s" % (node, msg),
4064 errors.ECODE_ENVIRON)
4065 node_helper = helpers[node].payload
4066 if node_helper != self.op.drbd_helper:
4067 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4068 (node, node_helper), errors.ECODE_ENVIRON)
4070 self.cluster = cluster = self.cfg.GetClusterInfo()
4071 # validate params changes
4072 if self.op.beparams:
4073 objects.UpgradeBeParams(self.op.beparams)
4074 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4075 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4077 if self.op.ndparams:
4078 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4079 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4081 # TODO: we need a more general way to handle resetting
4082 # cluster-level parameters to default values
4083 if self.new_ndparams["oob_program"] == "":
4084 self.new_ndparams["oob_program"] = \
4085 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4087 if self.op.hv_state:
4088 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4089 self.cluster.hv_state_static)
4090 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4091 for hv, values in new_hv_state.items())
4093 if self.op.disk_state:
4094 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4095 self.cluster.disk_state_static)
4096 self.new_disk_state = \
4097 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4098 for name, values in svalues.items()))
4099 for storage, svalues in new_disk_state.items())
4102 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4105 all_instances = self.cfg.GetAllInstancesInfo().values()
4107 for group in self.cfg.GetAllNodeGroupsInfo().values():
4108 instances = frozenset([inst for inst in all_instances
4109 if compat.any(node in group.members
4110 for node in inst.all_nodes)])
4111 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4112 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
4114 new_ipolicy, instances)
4116 violations.update(new)
4119 self.LogWarning("After the ipolicy change the following instances"
4120 " violate them: %s",
4121 utils.CommaJoin(utils.NiceSort(violations)))
4123 if self.op.nicparams:
4124 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4125 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4126 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4129 # check all instances for consistency
4130 for instance in self.cfg.GetAllInstancesInfo().values():
4131 for nic_idx, nic in enumerate(instance.nics):
4132 params_copy = copy.deepcopy(nic.nicparams)
4133 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4135 # check parameter syntax
4137 objects.NIC.CheckParameterSyntax(params_filled)
4138 except errors.ConfigurationError, err:
4139 nic_errors.append("Instance %s, nic/%d: %s" %
4140 (instance.name, nic_idx, err))
4142 # if we're moving instances to routed, check that they have an ip
4143 target_mode = params_filled[constants.NIC_MODE]
4144 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4145 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4146 " address" % (instance.name, nic_idx))
4148 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4149 "\n".join(nic_errors))
4151 # hypervisor list/parameters
4152 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4153 if self.op.hvparams:
4154 for hv_name, hv_dict in self.op.hvparams.items():
4155 if hv_name not in self.new_hvparams:
4156 self.new_hvparams[hv_name] = hv_dict
4158 self.new_hvparams[hv_name].update(hv_dict)
4160 # disk template parameters
4161 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4162 if self.op.diskparams:
4163 for dt_name, dt_params in self.op.diskparams.items():
4164 if dt_name not in self.op.diskparams:
4165 self.new_diskparams[dt_name] = dt_params
4167 self.new_diskparams[dt_name].update(dt_params)
4169 # os hypervisor parameters
4170 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4172 for os_name, hvs in self.op.os_hvp.items():
4173 if os_name not in self.new_os_hvp:
4174 self.new_os_hvp[os_name] = hvs
4176 for hv_name, hv_dict in hvs.items():
4177 if hv_name not in self.new_os_hvp[os_name]:
4178 self.new_os_hvp[os_name][hv_name] = hv_dict
4180 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4183 self.new_osp = objects.FillDict(cluster.osparams, {})
4184 if self.op.osparams:
4185 for os_name, osp in self.op.osparams.items():
4186 if os_name not in self.new_osp:
4187 self.new_osp[os_name] = {}
4189 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4192 if not self.new_osp[os_name]:
4193 # we removed all parameters
4194 del self.new_osp[os_name]
4196 # check the parameter validity (remote check)
4197 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4198 os_name, self.new_osp[os_name])
4200 # changes to the hypervisor list
4201 if self.op.enabled_hypervisors is not None:
4202 self.hv_list = self.op.enabled_hypervisors
4203 for hv in self.hv_list:
4204 # if the hypervisor doesn't already exist in the cluster
4205 # hvparams, we initialize it to empty, and then (in both
4206 # cases) we make sure to fill the defaults, as we might not
4207 # have a complete defaults list if the hypervisor wasn't
4209 if hv not in new_hvp:
4211 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4212 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4214 self.hv_list = cluster.enabled_hypervisors
4216 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4217 # either the enabled list has changed, or the parameters have, validate
4218 for hv_name, hv_params in self.new_hvparams.items():
4219 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4220 (self.op.enabled_hypervisors and
4221 hv_name in self.op.enabled_hypervisors)):
4222 # either this is a new hypervisor, or its parameters have changed
4223 hv_class = hypervisor.GetHypervisor(hv_name)
4224 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4225 hv_class.CheckParameterSyntax(hv_params)
4226 _CheckHVParams(self, node_list, hv_name, hv_params)
4229 # no need to check any newly-enabled hypervisors, since the
4230 # defaults have already been checked in the above code-block
4231 for os_name, os_hvp in self.new_os_hvp.items():
4232 for hv_name, hv_params in os_hvp.items():
4233 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4234 # we need to fill in the new os_hvp on top of the actual hv_p
4235 cluster_defaults = self.new_hvparams.get(hv_name, {})
4236 new_osp = objects.FillDict(cluster_defaults, hv_params)
4237 hv_class = hypervisor.GetHypervisor(hv_name)
4238 hv_class.CheckParameterSyntax(new_osp)
4239 _CheckHVParams(self, node_list, hv_name, new_osp)
4241 if self.op.default_iallocator:
4242 alloc_script = utils.FindFile(self.op.default_iallocator,
4243 constants.IALLOCATOR_SEARCH_PATH,
4245 if alloc_script is None:
4246 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4247 " specified" % self.op.default_iallocator,
4250 def Exec(self, feedback_fn):
4251 """Change the parameters of the cluster.
4254 if self.op.vg_name is not None:
4255 new_volume = self.op.vg_name
4258 if new_volume != self.cfg.GetVGName():
4259 self.cfg.SetVGName(new_volume)
4261 feedback_fn("Cluster LVM configuration already in desired"
4262 " state, not changing")
4263 if self.op.drbd_helper is not None:
4264 new_helper = self.op.drbd_helper
4267 if new_helper != self.cfg.GetDRBDHelper():
4268 self.cfg.SetDRBDHelper(new_helper)
4270 feedback_fn("Cluster DRBD helper already in desired state,"
4272 if self.op.hvparams:
4273 self.cluster.hvparams = self.new_hvparams
4275 self.cluster.os_hvp = self.new_os_hvp
4276 if self.op.enabled_hypervisors is not None:
4277 self.cluster.hvparams = self.new_hvparams
4278 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4279 if self.op.beparams:
4280 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4281 if self.op.nicparams:
4282 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4284 self.cluster.ipolicy = self.new_ipolicy
4285 if self.op.osparams:
4286 self.cluster.osparams = self.new_osp
4287 if self.op.ndparams:
4288 self.cluster.ndparams = self.new_ndparams
4289 if self.op.diskparams:
4290 self.cluster.diskparams = self.new_diskparams
4291 if self.op.hv_state:
4292 self.cluster.hv_state_static = self.new_hv_state
4293 if self.op.disk_state:
4294 self.cluster.disk_state_static = self.new_disk_state
4296 if self.op.candidate_pool_size is not None:
4297 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4298 # we need to update the pool size here, otherwise the save will fail
4299 _AdjustCandidatePool(self, [])
4301 if self.op.maintain_node_health is not None:
4302 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4303 feedback_fn("Note: CONFD was disabled at build time, node health"
4304 " maintenance is not useful (still enabling it)")
4305 self.cluster.maintain_node_health = self.op.maintain_node_health
4307 if self.op.prealloc_wipe_disks is not None:
4308 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4310 if self.op.add_uids is not None:
4311 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4313 if self.op.remove_uids is not None:
4314 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4316 if self.op.uid_pool is not None:
4317 self.cluster.uid_pool = self.op.uid_pool
4319 if self.op.default_iallocator is not None:
4320 self.cluster.default_iallocator = self.op.default_iallocator
4322 if self.op.reserved_lvs is not None:
4323 self.cluster.reserved_lvs = self.op.reserved_lvs
4325 if self.op.use_external_mip_script is not None:
4326 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4328 def helper_os(aname, mods, desc):
4330 lst = getattr(self.cluster, aname)
4331 for key, val in mods:
4332 if key == constants.DDM_ADD:
4334 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4337 elif key == constants.DDM_REMOVE:
4341 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4343 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4345 if self.op.hidden_os:
4346 helper_os("hidden_os", self.op.hidden_os, "hidden")
4348 if self.op.blacklisted_os:
4349 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4351 if self.op.master_netdev:
4352 master_params = self.cfg.GetMasterNetworkParameters()
4353 ems = self.cfg.GetUseExternalMipScript()
4354 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4355 self.cluster.master_netdev)
4356 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4358 result.Raise("Could not disable the master ip")
4359 feedback_fn("Changing master_netdev from %s to %s" %
4360 (master_params.netdev, self.op.master_netdev))
4361 self.cluster.master_netdev = self.op.master_netdev
4363 if self.op.master_netmask:
4364 master_params = self.cfg.GetMasterNetworkParameters()
4365 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4366 result = self.rpc.call_node_change_master_netmask(master_params.name,
4367 master_params.netmask,
4368 self.op.master_netmask,
4370 master_params.netdev)
4372 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4375 self.cluster.master_netmask = self.op.master_netmask
4377 self.cfg.Update(self.cluster, feedback_fn)
4379 if self.op.master_netdev:
4380 master_params = self.cfg.GetMasterNetworkParameters()
4381 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4382 self.op.master_netdev)
4383 ems = self.cfg.GetUseExternalMipScript()
4384 result = self.rpc.call_node_activate_master_ip(master_params.name,
4387 self.LogWarning("Could not re-enable the master ip on"
4388 " the master, please restart manually: %s",
4392 def _UploadHelper(lu, nodes, fname):
4393 """Helper for uploading a file and showing warnings.
4396 if os.path.exists(fname):
4397 result = lu.rpc.call_upload_file(nodes, fname)
4398 for to_node, to_result in result.items():
4399 msg = to_result.fail_msg
4401 msg = ("Copy of file %s to node %s failed: %s" %
4402 (fname, to_node, msg))
4403 lu.proc.LogWarning(msg)
4406 def _ComputeAncillaryFiles(cluster, redist):
4407 """Compute files external to Ganeti which need to be consistent.
4409 @type redist: boolean
4410 @param redist: Whether to include files which need to be redistributed
4413 # Compute files for all nodes
4415 constants.SSH_KNOWN_HOSTS_FILE,
4416 constants.CONFD_HMAC_KEY,
4417 constants.CLUSTER_DOMAIN_SECRET_FILE,
4418 constants.SPICE_CERT_FILE,
4419 constants.SPICE_CACERT_FILE,
4420 constants.RAPI_USERS_FILE,
4424 files_all.update(constants.ALL_CERT_FILES)
4425 files_all.update(ssconf.SimpleStore().GetFileList())
4427 # we need to ship at least the RAPI certificate
4428 files_all.add(constants.RAPI_CERT_FILE)
4430 if cluster.modify_etc_hosts:
4431 files_all.add(constants.ETC_HOSTS)
4433 if cluster.use_external_mip_script:
4434 files_all.add(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
4436 # Files which are optional, these must:
4437 # - be present in one other category as well
4438 # - either exist or not exist on all nodes of that category (mc, vm all)
4440 constants.RAPI_USERS_FILE,
4443 # Files which should only be on master candidates
4447 files_mc.add(constants.CLUSTER_CONF_FILE)
4449 # Files which should only be on VM-capable nodes
4450 files_vm = set(filename
4451 for hv_name in cluster.enabled_hypervisors
4452 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4454 files_opt |= set(filename
4455 for hv_name in cluster.enabled_hypervisors
4456 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4458 # Filenames in each category must be unique
4459 all_files_set = files_all | files_mc | files_vm
4460 assert (len(all_files_set) ==
4461 sum(map(len, [files_all, files_mc, files_vm]))), \
4462 "Found file listed in more than one file list"
4464 # Optional files must be present in one other category
4465 assert all_files_set.issuperset(files_opt), \
4466 "Optional file not in a different required list"
4468 return (files_all, files_opt, files_mc, files_vm)
4471 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4472 """Distribute additional files which are part of the cluster configuration.
4474 ConfigWriter takes care of distributing the config and ssconf files, but
4475 there are more files which should be distributed to all nodes. This function
4476 makes sure those are copied.
4478 @param lu: calling logical unit
4479 @param additional_nodes: list of nodes not in the config to distribute to
4480 @type additional_vm: boolean
4481 @param additional_vm: whether the additional nodes are vm-capable or not
4484 # Gather target nodes
4485 cluster = lu.cfg.GetClusterInfo()
4486 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4488 online_nodes = lu.cfg.GetOnlineNodeList()
4489 online_set = frozenset(online_nodes)
4490 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4492 if additional_nodes is not None:
4493 online_nodes.extend(additional_nodes)
4495 vm_nodes.extend(additional_nodes)
4497 # Never distribute to master node
4498 for nodelist in [online_nodes, vm_nodes]:
4499 if master_info.name in nodelist:
4500 nodelist.remove(master_info.name)
4503 (files_all, _, files_mc, files_vm) = \
4504 _ComputeAncillaryFiles(cluster, True)
4506 # Never re-distribute configuration file from here
4507 assert not (constants.CLUSTER_CONF_FILE in files_all or
4508 constants.CLUSTER_CONF_FILE in files_vm)
4509 assert not files_mc, "Master candidates not handled in this function"
4512 (online_nodes, files_all),
4513 (vm_nodes, files_vm),
4517 for (node_list, files) in filemap:
4519 _UploadHelper(lu, node_list, fname)
4522 class LUClusterRedistConf(NoHooksLU):
4523 """Force the redistribution of cluster configuration.
4525 This is a very simple LU.
4530 def ExpandNames(self):
4531 self.needed_locks = {
4532 locking.LEVEL_NODE: locking.ALL_SET,
4534 self.share_locks[locking.LEVEL_NODE] = 1
4536 def Exec(self, feedback_fn):
4537 """Redistribute the configuration.
4540 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4541 _RedistributeAncillaryFiles(self)
4544 class LUClusterActivateMasterIp(NoHooksLU):
4545 """Activate the master IP on the master node.
4548 def Exec(self, feedback_fn):
4549 """Activate the master IP.
4552 master_params = self.cfg.GetMasterNetworkParameters()
4553 ems = self.cfg.GetUseExternalMipScript()
4554 result = self.rpc.call_node_activate_master_ip(master_params.name,
4556 result.Raise("Could not activate the master IP")
4559 class LUClusterDeactivateMasterIp(NoHooksLU):
4560 """Deactivate the master IP on the master node.
4563 def Exec(self, feedback_fn):
4564 """Deactivate the master IP.
4567 master_params = self.cfg.GetMasterNetworkParameters()
4568 ems = self.cfg.GetUseExternalMipScript()
4569 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4571 result.Raise("Could not deactivate the master IP")
4574 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4575 """Sleep and poll for an instance's disk to sync.
4578 if not instance.disks or disks is not None and not disks:
4581 disks = _ExpandCheckDisks(instance, disks)
4584 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4586 node = instance.primary_node
4589 lu.cfg.SetDiskID(dev, node)
4591 # TODO: Convert to utils.Retry
4594 degr_retries = 10 # in seconds, as we sleep 1 second each time
4598 cumul_degraded = False
4599 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4600 msg = rstats.fail_msg
4602 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4605 raise errors.RemoteError("Can't contact node %s for mirror data,"
4606 " aborting." % node)
4609 rstats = rstats.payload
4611 for i, mstat in enumerate(rstats):
4613 lu.LogWarning("Can't compute data for node %s/%s",
4614 node, disks[i].iv_name)
4617 cumul_degraded = (cumul_degraded or
4618 (mstat.is_degraded and mstat.sync_percent is None))
4619 if mstat.sync_percent is not None:
4621 if mstat.estimated_time is not None:
4622 rem_time = ("%s remaining (estimated)" %
4623 utils.FormatSeconds(mstat.estimated_time))
4624 max_time = mstat.estimated_time
4626 rem_time = "no time estimate"
4627 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4628 (disks[i].iv_name, mstat.sync_percent, rem_time))
4630 # if we're done but degraded, let's do a few small retries, to
4631 # make sure we see a stable and not transient situation; therefore
4632 # we force restart of the loop
4633 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4634 logging.info("Degraded disks found, %d retries left", degr_retries)
4642 time.sleep(min(60, max_time))
4645 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4646 return not cumul_degraded
4649 def _BlockdevFind(lu, node, dev, instance):
4650 """Wrapper around call_blockdev_find to annotate diskparams.
4652 @param lu: A reference to the lu object
4653 @param node: The node to call out
4654 @param dev: The device to find
4655 @param instance: The instance object the device belongs to
4656 @returns The result of the rpc call
4659 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4660 return lu.rpc.call_blockdev_find(node, disk)
4663 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4664 """Wrapper around L{_CheckDiskConsistencyInner}.
4667 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4668 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4672 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4674 """Check that mirrors are not degraded.
4676 @attention: The device has to be annotated already.
4678 The ldisk parameter, if True, will change the test from the
4679 is_degraded attribute (which represents overall non-ok status for
4680 the device(s)) to the ldisk (representing the local storage status).
4683 lu.cfg.SetDiskID(dev, node)
4687 if on_primary or dev.AssembleOnSecondary():
4688 rstats = lu.rpc.call_blockdev_find(node, dev)
4689 msg = rstats.fail_msg
4691 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4693 elif not rstats.payload:
4694 lu.LogWarning("Can't find disk on node %s", node)
4698 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4700 result = result and not rstats.payload.is_degraded
4703 for child in dev.children:
4704 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4710 class LUOobCommand(NoHooksLU):
4711 """Logical unit for OOB handling.
4715 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4717 def ExpandNames(self):
4718 """Gather locks we need.
4721 if self.op.node_names:
4722 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4723 lock_names = self.op.node_names
4725 lock_names = locking.ALL_SET
4727 self.needed_locks = {
4728 locking.LEVEL_NODE: lock_names,
4731 def CheckPrereq(self):
4732 """Check prerequisites.
4735 - the node exists in the configuration
4738 Any errors are signaled by raising errors.OpPrereqError.
4742 self.master_node = self.cfg.GetMasterNode()
4744 assert self.op.power_delay >= 0.0
4746 if self.op.node_names:
4747 if (self.op.command in self._SKIP_MASTER and
4748 self.master_node in self.op.node_names):
4749 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4750 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4752 if master_oob_handler:
4753 additional_text = ("run '%s %s %s' if you want to operate on the"
4754 " master regardless") % (master_oob_handler,
4758 additional_text = "it does not support out-of-band operations"
4760 raise errors.OpPrereqError(("Operating on the master node %s is not"
4761 " allowed for %s; %s") %
4762 (self.master_node, self.op.command,
4763 additional_text), errors.ECODE_INVAL)
4765 self.op.node_names = self.cfg.GetNodeList()
4766 if self.op.command in self._SKIP_MASTER:
4767 self.op.node_names.remove(self.master_node)
4769 if self.op.command in self._SKIP_MASTER:
4770 assert self.master_node not in self.op.node_names
4772 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4774 raise errors.OpPrereqError("Node %s not found" % node_name,
4777 self.nodes.append(node)
4779 if (not self.op.ignore_status and
4780 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4781 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4782 " not marked offline") % node_name,
4785 def Exec(self, feedback_fn):
4786 """Execute OOB and return result if we expect any.
4789 master_node = self.master_node
4792 for idx, node in enumerate(utils.NiceSort(self.nodes,
4793 key=lambda node: node.name)):
4794 node_entry = [(constants.RS_NORMAL, node.name)]
4795 ret.append(node_entry)
4797 oob_program = _SupportsOob(self.cfg, node)
4800 node_entry.append((constants.RS_UNAVAIL, None))
4803 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4804 self.op.command, oob_program, node.name)
4805 result = self.rpc.call_run_oob(master_node, oob_program,
4806 self.op.command, node.name,
4810 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4811 node.name, result.fail_msg)
4812 node_entry.append((constants.RS_NODATA, None))
4815 self._CheckPayload(result)
4816 except errors.OpExecError, err:
4817 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4819 node_entry.append((constants.RS_NODATA, None))
4821 if self.op.command == constants.OOB_HEALTH:
4822 # For health we should log important events
4823 for item, status in result.payload:
4824 if status in [constants.OOB_STATUS_WARNING,
4825 constants.OOB_STATUS_CRITICAL]:
4826 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4827 item, node.name, status)
4829 if self.op.command == constants.OOB_POWER_ON:
4831 elif self.op.command == constants.OOB_POWER_OFF:
4832 node.powered = False
4833 elif self.op.command == constants.OOB_POWER_STATUS:
4834 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4835 if powered != node.powered:
4836 logging.warning(("Recorded power state (%s) of node '%s' does not"
4837 " match actual power state (%s)"), node.powered,
4840 # For configuration changing commands we should update the node
4841 if self.op.command in (constants.OOB_POWER_ON,
4842 constants.OOB_POWER_OFF):
4843 self.cfg.Update(node, feedback_fn)
4845 node_entry.append((constants.RS_NORMAL, result.payload))
4847 if (self.op.command == constants.OOB_POWER_ON and
4848 idx < len(self.nodes) - 1):
4849 time.sleep(self.op.power_delay)
4853 def _CheckPayload(self, result):
4854 """Checks if the payload is valid.
4856 @param result: RPC result
4857 @raises errors.OpExecError: If payload is not valid
4861 if self.op.command == constants.OOB_HEALTH:
4862 if not isinstance(result.payload, list):
4863 errs.append("command 'health' is expected to return a list but got %s" %
4864 type(result.payload))
4866 for item, status in result.payload:
4867 if status not in constants.OOB_STATUSES:
4868 errs.append("health item '%s' has invalid status '%s'" %
4871 if self.op.command == constants.OOB_POWER_STATUS:
4872 if not isinstance(result.payload, dict):
4873 errs.append("power-status is expected to return a dict but got %s" %
4874 type(result.payload))
4876 if self.op.command in [
4877 constants.OOB_POWER_ON,
4878 constants.OOB_POWER_OFF,
4879 constants.OOB_POWER_CYCLE,
4881 if result.payload is not None:
4882 errs.append("%s is expected to not return payload but got '%s'" %
4883 (self.op.command, result.payload))
4886 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4887 utils.CommaJoin(errs))
4890 class _OsQuery(_QueryBase):
4891 FIELDS = query.OS_FIELDS
4893 def ExpandNames(self, lu):
4894 # Lock all nodes in shared mode
4895 # Temporary removal of locks, should be reverted later
4896 # TODO: reintroduce locks when they are lighter-weight
4897 lu.needed_locks = {}
4898 #self.share_locks[locking.LEVEL_NODE] = 1
4899 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4901 # The following variables interact with _QueryBase._GetNames
4903 self.wanted = self.names
4905 self.wanted = locking.ALL_SET
4907 self.do_locking = self.use_locking
4909 def DeclareLocks(self, lu, level):
4913 def _DiagnoseByOS(rlist):
4914 """Remaps a per-node return list into an a per-os per-node dictionary
4916 @param rlist: a map with node names as keys and OS objects as values
4919 @return: a dictionary with osnames as keys and as value another
4920 map, with nodes as keys and tuples of (path, status, diagnose,
4921 variants, parameters, api_versions) as values, eg::
4923 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4924 (/srv/..., False, "invalid api")],
4925 "node2": [(/srv/..., True, "", [], [])]}
4930 # we build here the list of nodes that didn't fail the RPC (at RPC
4931 # level), so that nodes with a non-responding node daemon don't
4932 # make all OSes invalid
4933 good_nodes = [node_name for node_name in rlist
4934 if not rlist[node_name].fail_msg]
4935 for node_name, nr in rlist.items():
4936 if nr.fail_msg or not nr.payload:
4938 for (name, path, status, diagnose, variants,
4939 params, api_versions) in nr.payload:
4940 if name not in all_os:
4941 # build a list of nodes for this os containing empty lists
4942 # for each node in node_list
4944 for nname in good_nodes:
4945 all_os[name][nname] = []
4946 # convert params from [name, help] to (name, help)
4947 params = [tuple(v) for v in params]
4948 all_os[name][node_name].append((path, status, diagnose,
4949 variants, params, api_versions))
4952 def _GetQueryData(self, lu):
4953 """Computes the list of nodes and their attributes.
4956 # Locking is not used
4957 assert not (compat.any(lu.glm.is_owned(level)
4958 for level in locking.LEVELS
4959 if level != locking.LEVEL_CLUSTER) or
4960 self.do_locking or self.use_locking)
4962 valid_nodes = [node.name
4963 for node in lu.cfg.GetAllNodesInfo().values()
4964 if not node.offline and node.vm_capable]
4965 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4966 cluster = lu.cfg.GetClusterInfo()
4970 for (os_name, os_data) in pol.items():
4971 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4972 hidden=(os_name in cluster.hidden_os),
4973 blacklisted=(os_name in cluster.blacklisted_os))
4977 api_versions = set()
4979 for idx, osl in enumerate(os_data.values()):
4980 info.valid = bool(info.valid and osl and osl[0][1])
4984 (node_variants, node_params, node_api) = osl[0][3:6]
4987 variants.update(node_variants)
4988 parameters.update(node_params)
4989 api_versions.update(node_api)
4991 # Filter out inconsistent values
4992 variants.intersection_update(node_variants)
4993 parameters.intersection_update(node_params)
4994 api_versions.intersection_update(node_api)
4996 info.variants = list(variants)
4997 info.parameters = list(parameters)
4998 info.api_versions = list(api_versions)
5000 data[os_name] = info
5002 # Prepare data in requested order
5003 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5007 class LUOsDiagnose(NoHooksLU):
5008 """Logical unit for OS diagnose/query.
5014 def _BuildFilter(fields, names):
5015 """Builds a filter for querying OSes.
5018 name_filter = qlang.MakeSimpleFilter("name", names)
5020 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
5021 # respective field is not requested
5022 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
5023 for fname in ["hidden", "blacklisted"]
5024 if fname not in fields]
5025 if "valid" not in fields:
5026 status_filter.append([qlang.OP_TRUE, "valid"])
5029 status_filter.insert(0, qlang.OP_AND)
5031 status_filter = None
5033 if name_filter and status_filter:
5034 return [qlang.OP_AND, name_filter, status_filter]
5038 return status_filter
5040 def CheckArguments(self):
5041 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
5042 self.op.output_fields, False)
5044 def ExpandNames(self):
5045 self.oq.ExpandNames(self)
5047 def Exec(self, feedback_fn):
5048 return self.oq.OldStyleQuery(self)
5051 class _ExtStorageQuery(_QueryBase):
5052 FIELDS = query.EXTSTORAGE_FIELDS
5054 def ExpandNames(self, lu):
5055 # Lock all nodes in shared mode
5056 # Temporary removal of locks, should be reverted later
5057 # TODO: reintroduce locks when they are lighter-weight
5058 lu.needed_locks = {}
5059 #self.share_locks[locking.LEVEL_NODE] = 1
5060 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5062 # The following variables interact with _QueryBase._GetNames
5064 self.wanted = self.names
5066 self.wanted = locking.ALL_SET
5068 self.do_locking = self.use_locking
5070 def DeclareLocks(self, lu, level):
5074 def _DiagnoseByProvider(rlist):
5075 """Remaps a per-node return list into an a per-provider per-node dictionary
5077 @param rlist: a map with node names as keys and ExtStorage objects as values
5080 @return: a dictionary with extstorage providers as keys and as
5081 value another map, with nodes as keys and tuples of
5082 (path, status, diagnose, parameters) as values, eg::
5084 {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
5085 "node2": [(/srv/..., False, "missing file")]
5086 "node3": [(/srv/..., True, "", [])]
5091 # we build here the list of nodes that didn't fail the RPC (at RPC
5092 # level), so that nodes with a non-responding node daemon don't
5093 # make all OSes invalid
5094 good_nodes = [node_name for node_name in rlist
5095 if not rlist[node_name].fail_msg]
5096 for node_name, nr in rlist.items():
5097 if nr.fail_msg or not nr.payload:
5099 for (name, path, status, diagnose, params) in nr.payload:
5100 if name not in all_es:
5101 # build a list of nodes for this os containing empty lists
5102 # for each node in node_list
5104 for nname in good_nodes:
5105 all_es[name][nname] = []
5106 # convert params from [name, help] to (name, help)
5107 params = [tuple(v) for v in params]
5108 all_es[name][node_name].append((path, status, diagnose, params))
5111 def _GetQueryData(self, lu):
5112 """Computes the list of nodes and their attributes.
5115 # Locking is not used
5116 assert not (compat.any(lu.glm.is_owned(level)
5117 for level in locking.LEVELS
5118 if level != locking.LEVEL_CLUSTER) or
5119 self.do_locking or self.use_locking)
5121 valid_nodes = [node.name
5122 for node in lu.cfg.GetAllNodesInfo().values()
5123 if not node.offline and node.vm_capable]
5124 pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
5128 nodegroup_list = lu.cfg.GetNodeGroupList()
5130 for (es_name, es_data) in pol.items():
5131 # For every provider compute the nodegroup validity.
5132 # To do this we need to check the validity of each node in es_data
5133 # and then construct the corresponding nodegroup dict:
5134 # { nodegroup1: status
5135 # nodegroup2: status
5138 for nodegroup in nodegroup_list:
5139 ndgrp = lu.cfg.GetNodeGroup(nodegroup)
5141 nodegroup_nodes = ndgrp.members
5142 nodegroup_name = ndgrp.name
5145 for node in nodegroup_nodes:
5146 if node in valid_nodes:
5147 if es_data[node] != []:
5148 node_status = es_data[node][0][1]
5149 node_statuses.append(node_status)
5151 node_statuses.append(False)
5153 if False in node_statuses:
5154 ndgrp_data[nodegroup_name] = False
5156 ndgrp_data[nodegroup_name] = True
5158 # Compute the provider's parameters
5160 for idx, esl in enumerate(es_data.values()):
5161 valid = bool(esl and esl[0][1])
5165 node_params = esl[0][3]
5168 parameters.update(node_params)
5170 # Filter out inconsistent values
5171 parameters.intersection_update(node_params)
5173 params = list(parameters)
5175 # Now fill all the info for this provider
5176 info = query.ExtStorageInfo(name=es_name, node_status=es_data,
5177 nodegroup_status=ndgrp_data,
5180 data[es_name] = info
5182 # Prepare data in requested order
5183 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
5187 class LUExtStorageDiagnose(NoHooksLU):
5188 """Logical unit for ExtStorage diagnose/query.
5193 def CheckArguments(self):
5194 self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
5195 self.op.output_fields, False)
5197 def ExpandNames(self):
5198 self.eq.ExpandNames(self)
5200 def Exec(self, feedback_fn):
5201 return self.eq.OldStyleQuery(self)
5204 class LUNodeRemove(LogicalUnit):
5205 """Logical unit for removing a node.
5208 HPATH = "node-remove"
5209 HTYPE = constants.HTYPE_NODE
5211 def BuildHooksEnv(self):
5216 "OP_TARGET": self.op.node_name,
5217 "NODE_NAME": self.op.node_name,
5220 def BuildHooksNodes(self):
5221 """Build hooks nodes.
5223 This doesn't run on the target node in the pre phase as a failed
5224 node would then be impossible to remove.
5227 all_nodes = self.cfg.GetNodeList()
5229 all_nodes.remove(self.op.node_name)
5232 return (all_nodes, all_nodes)
5234 def CheckPrereq(self):
5235 """Check prerequisites.
5238 - the node exists in the configuration
5239 - it does not have primary or secondary instances
5240 - it's not the master
5242 Any errors are signaled by raising errors.OpPrereqError.
5245 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5246 node = self.cfg.GetNodeInfo(self.op.node_name)
5247 assert node is not None
5249 masternode = self.cfg.GetMasterNode()
5250 if node.name == masternode:
5251 raise errors.OpPrereqError("Node is the master node, failover to another"
5252 " node is required", errors.ECODE_INVAL)
5254 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5255 if node.name in instance.all_nodes:
5256 raise errors.OpPrereqError("Instance %s is still running on the node,"
5257 " please remove first" % instance_name,
5259 self.op.node_name = node.name
5262 def Exec(self, feedback_fn):
5263 """Removes the node from the cluster.
5267 logging.info("Stopping the node daemon and removing configs from node %s",
5270 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5272 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5275 # Promote nodes to master candidate as needed
5276 _AdjustCandidatePool(self, exceptions=[node.name])
5277 self.context.RemoveNode(node.name)
5279 # Run post hooks on the node before it's removed
5280 _RunPostHook(self, node.name)
5282 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5283 msg = result.fail_msg
5285 self.LogWarning("Errors encountered on the remote node while leaving"
5286 " the cluster: %s", msg)
5288 # Remove node from our /etc/hosts
5289 if self.cfg.GetClusterInfo().modify_etc_hosts:
5290 master_node = self.cfg.GetMasterNode()
5291 result = self.rpc.call_etc_hosts_modify(master_node,
5292 constants.ETC_HOSTS_REMOVE,
5294 result.Raise("Can't update hosts file with new host data")
5295 _RedistributeAncillaryFiles(self)
5298 class _NodeQuery(_QueryBase):
5299 FIELDS = query.NODE_FIELDS
5301 def ExpandNames(self, lu):
5302 lu.needed_locks = {}
5303 lu.share_locks = _ShareAll()
5306 self.wanted = _GetWantedNodes(lu, self.names)
5308 self.wanted = locking.ALL_SET
5310 self.do_locking = (self.use_locking and
5311 query.NQ_LIVE in self.requested_data)
5314 # If any non-static field is requested we need to lock the nodes
5315 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5317 def DeclareLocks(self, lu, level):
5320 def _GetQueryData(self, lu):
5321 """Computes the list of nodes and their attributes.
5324 all_info = lu.cfg.GetAllNodesInfo()
5326 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5328 # Gather data as requested
5329 if query.NQ_LIVE in self.requested_data:
5330 # filter out non-vm_capable nodes
5331 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5333 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5334 [lu.cfg.GetHypervisorType()])
5335 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
5336 for (name, nresult) in node_data.items()
5337 if not nresult.fail_msg and nresult.payload)
5341 if query.NQ_INST in self.requested_data:
5342 node_to_primary = dict([(name, set()) for name in nodenames])
5343 node_to_secondary = dict([(name, set()) for name in nodenames])
5345 inst_data = lu.cfg.GetAllInstancesInfo()
5347 for inst in inst_data.values():
5348 if inst.primary_node in node_to_primary:
5349 node_to_primary[inst.primary_node].add(inst.name)
5350 for secnode in inst.secondary_nodes:
5351 if secnode in node_to_secondary:
5352 node_to_secondary[secnode].add(inst.name)
5354 node_to_primary = None
5355 node_to_secondary = None
5357 if query.NQ_OOB in self.requested_data:
5358 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5359 for name, node in all_info.iteritems())
5363 if query.NQ_GROUP in self.requested_data:
5364 groups = lu.cfg.GetAllNodeGroupsInfo()
5368 return query.NodeQueryData([all_info[name] for name in nodenames],
5369 live_data, lu.cfg.GetMasterNode(),
5370 node_to_primary, node_to_secondary, groups,
5371 oob_support, lu.cfg.GetClusterInfo())
5374 class LUNodeQuery(NoHooksLU):
5375 """Logical unit for querying nodes.
5378 # pylint: disable=W0142
5381 def CheckArguments(self):
5382 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5383 self.op.output_fields, self.op.use_locking)
5385 def ExpandNames(self):
5386 self.nq.ExpandNames(self)
5388 def DeclareLocks(self, level):
5389 self.nq.DeclareLocks(self, level)
5391 def Exec(self, feedback_fn):
5392 return self.nq.OldStyleQuery(self)
5395 class LUNodeQueryvols(NoHooksLU):
5396 """Logical unit for getting volumes on node(s).
5400 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5401 _FIELDS_STATIC = utils.FieldSet("node")
5403 def CheckArguments(self):
5404 _CheckOutputFields(static=self._FIELDS_STATIC,
5405 dynamic=self._FIELDS_DYNAMIC,
5406 selected=self.op.output_fields)
5408 def ExpandNames(self):
5409 self.share_locks = _ShareAll()
5410 self.needed_locks = {}
5412 if not self.op.nodes:
5413 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5415 self.needed_locks[locking.LEVEL_NODE] = \
5416 _GetWantedNodes(self, self.op.nodes)
5418 def Exec(self, feedback_fn):
5419 """Computes the list of nodes and their attributes.
5422 nodenames = self.owned_locks(locking.LEVEL_NODE)
5423 volumes = self.rpc.call_node_volumes(nodenames)
5425 ilist = self.cfg.GetAllInstancesInfo()
5426 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5429 for node in nodenames:
5430 nresult = volumes[node]
5433 msg = nresult.fail_msg
5435 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5438 node_vols = sorted(nresult.payload,
5439 key=operator.itemgetter("dev"))
5441 for vol in node_vols:
5443 for field in self.op.output_fields:
5446 elif field == "phys":
5450 elif field == "name":
5452 elif field == "size":
5453 val = int(float(vol["size"]))
5454 elif field == "instance":
5455 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5457 raise errors.ParameterError(field)
5458 node_output.append(str(val))
5460 output.append(node_output)
5465 class LUNodeQueryStorage(NoHooksLU):
5466 """Logical unit for getting information on storage units on node(s).
5469 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5472 def CheckArguments(self):
5473 _CheckOutputFields(static=self._FIELDS_STATIC,
5474 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5475 selected=self.op.output_fields)
5477 def ExpandNames(self):
5478 self.share_locks = _ShareAll()
5479 self.needed_locks = {}
5482 self.needed_locks[locking.LEVEL_NODE] = \
5483 _GetWantedNodes(self, self.op.nodes)
5485 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5487 def Exec(self, feedback_fn):
5488 """Computes the list of nodes and their attributes.
5491 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5493 # Always get name to sort by
5494 if constants.SF_NAME in self.op.output_fields:
5495 fields = self.op.output_fields[:]
5497 fields = [constants.SF_NAME] + self.op.output_fields
5499 # Never ask for node or type as it's only known to the LU
5500 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5501 while extra in fields:
5502 fields.remove(extra)
5504 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5505 name_idx = field_idx[constants.SF_NAME]
5507 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5508 data = self.rpc.call_storage_list(self.nodes,
5509 self.op.storage_type, st_args,
5510 self.op.name, fields)
5514 for node in utils.NiceSort(self.nodes):
5515 nresult = data[node]
5519 msg = nresult.fail_msg
5521 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5524 rows = dict([(row[name_idx], row) for row in nresult.payload])
5526 for name in utils.NiceSort(rows.keys()):
5531 for field in self.op.output_fields:
5532 if field == constants.SF_NODE:
5534 elif field == constants.SF_TYPE:
5535 val = self.op.storage_type
5536 elif field in field_idx:
5537 val = row[field_idx[field]]
5539 raise errors.ParameterError(field)
5548 class _InstanceQuery(_QueryBase):
5549 FIELDS = query.INSTANCE_FIELDS
5551 def ExpandNames(self, lu):
5552 lu.needed_locks = {}
5553 lu.share_locks = _ShareAll()
5556 self.wanted = _GetWantedInstances(lu, self.names)
5558 self.wanted = locking.ALL_SET
5560 self.do_locking = (self.use_locking and
5561 query.IQ_LIVE in self.requested_data)
5563 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5564 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5565 lu.needed_locks[locking.LEVEL_NODE] = []
5566 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5568 self.do_grouplocks = (self.do_locking and
5569 query.IQ_NODES in self.requested_data)
5571 def DeclareLocks(self, lu, level):
5573 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5574 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5576 # Lock all groups used by instances optimistically; this requires going
5577 # via the node before it's locked, requiring verification later on
5578 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5580 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5581 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5582 elif level == locking.LEVEL_NODE:
5583 lu._LockInstancesNodes() # pylint: disable=W0212
5586 def _CheckGroupLocks(lu):
5587 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5588 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5590 # Check if node groups for locked instances are still correct
5591 for instance_name in owned_instances:
5592 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5594 def _GetQueryData(self, lu):
5595 """Computes the list of instances and their attributes.
5598 if self.do_grouplocks:
5599 self._CheckGroupLocks(lu)
5601 cluster = lu.cfg.GetClusterInfo()
5602 all_info = lu.cfg.GetAllInstancesInfo()
5604 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5606 instance_list = [all_info[name] for name in instance_names]
5607 nodes = frozenset(itertools.chain(*(inst.all_nodes
5608 for inst in instance_list)))
5609 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5612 wrongnode_inst = set()
5614 # Gather data as requested
5615 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5617 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5619 result = node_data[name]
5621 # offline nodes will be in both lists
5622 assert result.fail_msg
5623 offline_nodes.append(name)
5625 bad_nodes.append(name)
5626 elif result.payload:
5627 for inst in result.payload:
5628 if inst in all_info:
5629 if all_info[inst].primary_node == name:
5630 live_data.update(result.payload)
5632 wrongnode_inst.add(inst)
5634 # orphan instance; we don't list it here as we don't
5635 # handle this case yet in the output of instance listing
5636 logging.warning("Orphan instance '%s' found on node %s",
5638 # else no instance is alive
5642 if query.IQ_DISKUSAGE in self.requested_data:
5643 disk_usage = dict((inst.name,
5644 _ComputeDiskSize(inst.disk_template,
5645 [{constants.IDISK_SIZE: disk.size}
5646 for disk in inst.disks]))
5647 for inst in instance_list)
5651 if query.IQ_CONSOLE in self.requested_data:
5653 for inst in instance_list:
5654 if inst.name in live_data:
5655 # Instance is running
5656 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5658 consinfo[inst.name] = None
5659 assert set(consinfo.keys()) == set(instance_names)
5663 if query.IQ_NODES in self.requested_data:
5664 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5666 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5667 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5668 for uuid in set(map(operator.attrgetter("group"),
5674 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5675 disk_usage, offline_nodes, bad_nodes,
5676 live_data, wrongnode_inst, consinfo,
5680 class LUQuery(NoHooksLU):
5681 """Query for resources/items of a certain kind.
5684 # pylint: disable=W0142
5687 def CheckArguments(self):
5688 qcls = _GetQueryImplementation(self.op.what)
5690 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5692 def ExpandNames(self):
5693 self.impl.ExpandNames(self)
5695 def DeclareLocks(self, level):
5696 self.impl.DeclareLocks(self, level)
5698 def Exec(self, feedback_fn):
5699 return self.impl.NewStyleQuery(self)
5702 class LUQueryFields(NoHooksLU):
5703 """Query for resources/items of a certain kind.
5706 # pylint: disable=W0142
5709 def CheckArguments(self):
5710 self.qcls = _GetQueryImplementation(self.op.what)
5712 def ExpandNames(self):
5713 self.needed_locks = {}
5715 def Exec(self, feedback_fn):
5716 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5719 class LUNodeModifyStorage(NoHooksLU):
5720 """Logical unit for modifying a storage volume on a node.
5725 def CheckArguments(self):
5726 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5728 storage_type = self.op.storage_type
5731 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5733 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5734 " modified" % storage_type,
5737 diff = set(self.op.changes.keys()) - modifiable
5739 raise errors.OpPrereqError("The following fields can not be modified for"
5740 " storage units of type '%s': %r" %
5741 (storage_type, list(diff)),
5744 def ExpandNames(self):
5745 self.needed_locks = {
5746 locking.LEVEL_NODE: self.op.node_name,
5749 def Exec(self, feedback_fn):
5750 """Computes the list of nodes and their attributes.
5753 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5754 result = self.rpc.call_storage_modify(self.op.node_name,
5755 self.op.storage_type, st_args,
5756 self.op.name, self.op.changes)
5757 result.Raise("Failed to modify storage unit '%s' on %s" %
5758 (self.op.name, self.op.node_name))
5761 class LUNodeAdd(LogicalUnit):
5762 """Logical unit for adding node to the cluster.
5766 HTYPE = constants.HTYPE_NODE
5767 _NFLAGS = ["master_capable", "vm_capable"]
5769 def CheckArguments(self):
5770 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5771 # validate/normalize the node name
5772 self.hostname = netutils.GetHostname(name=self.op.node_name,
5773 family=self.primary_ip_family)
5774 self.op.node_name = self.hostname.name
5776 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5777 raise errors.OpPrereqError("Cannot readd the master node",
5780 if self.op.readd and self.op.group:
5781 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5782 " being readded", errors.ECODE_INVAL)
5784 def BuildHooksEnv(self):
5787 This will run on all nodes before, and on all nodes + the new node after.
5791 "OP_TARGET": self.op.node_name,
5792 "NODE_NAME": self.op.node_name,
5793 "NODE_PIP": self.op.primary_ip,
5794 "NODE_SIP": self.op.secondary_ip,
5795 "MASTER_CAPABLE": str(self.op.master_capable),
5796 "VM_CAPABLE": str(self.op.vm_capable),
5799 def BuildHooksNodes(self):
5800 """Build hooks nodes.
5803 # Exclude added node
5804 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5805 post_nodes = pre_nodes + [self.op.node_name, ]
5807 return (pre_nodes, post_nodes)
5809 def CheckPrereq(self):
5810 """Check prerequisites.
5813 - the new node is not already in the config
5815 - its parameters (single/dual homed) matches the cluster
5817 Any errors are signaled by raising errors.OpPrereqError.
5821 hostname = self.hostname
5822 node = hostname.name
5823 primary_ip = self.op.primary_ip = hostname.ip
5824 if self.op.secondary_ip is None:
5825 if self.primary_ip_family == netutils.IP6Address.family:
5826 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5827 " IPv4 address must be given as secondary",
5829 self.op.secondary_ip = primary_ip
5831 secondary_ip = self.op.secondary_ip
5832 if not netutils.IP4Address.IsValid(secondary_ip):
5833 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5834 " address" % secondary_ip, errors.ECODE_INVAL)
5836 node_list = cfg.GetNodeList()
5837 if not self.op.readd and node in node_list:
5838 raise errors.OpPrereqError("Node %s is already in the configuration" %
5839 node, errors.ECODE_EXISTS)
5840 elif self.op.readd and node not in node_list:
5841 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5844 self.changed_primary_ip = False
5846 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5847 if self.op.readd and node == existing_node_name:
5848 if existing_node.secondary_ip != secondary_ip:
5849 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5850 " address configuration as before",
5852 if existing_node.primary_ip != primary_ip:
5853 self.changed_primary_ip = True
5857 if (existing_node.primary_ip == primary_ip or
5858 existing_node.secondary_ip == primary_ip or
5859 existing_node.primary_ip == secondary_ip or
5860 existing_node.secondary_ip == secondary_ip):
5861 raise errors.OpPrereqError("New node ip address(es) conflict with"
5862 " existing node %s" % existing_node.name,
5863 errors.ECODE_NOTUNIQUE)
5865 # After this 'if' block, None is no longer a valid value for the
5866 # _capable op attributes
5868 old_node = self.cfg.GetNodeInfo(node)
5869 assert old_node is not None, "Can't retrieve locked node %s" % node
5870 for attr in self._NFLAGS:
5871 if getattr(self.op, attr) is None:
5872 setattr(self.op, attr, getattr(old_node, attr))
5874 for attr in self._NFLAGS:
5875 if getattr(self.op, attr) is None:
5876 setattr(self.op, attr, True)
5878 if self.op.readd and not self.op.vm_capable:
5879 pri, sec = cfg.GetNodeInstances(node)
5881 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5882 " flag set to false, but it already holds"
5883 " instances" % node,
5886 # check that the type of the node (single versus dual homed) is the
5887 # same as for the master
5888 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5889 master_singlehomed = myself.secondary_ip == myself.primary_ip
5890 newbie_singlehomed = secondary_ip == primary_ip
5891 if master_singlehomed != newbie_singlehomed:
5892 if master_singlehomed:
5893 raise errors.OpPrereqError("The master has no secondary ip but the"
5894 " new node has one",
5897 raise errors.OpPrereqError("The master has a secondary ip but the"
5898 " new node doesn't have one",
5901 # checks reachability
5902 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5903 raise errors.OpPrereqError("Node not reachable by ping",
5904 errors.ECODE_ENVIRON)
5906 if not newbie_singlehomed:
5907 # check reachability from my secondary ip to newbie's secondary ip
5908 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5909 source=myself.secondary_ip):
5910 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5911 " based ping to node daemon port",
5912 errors.ECODE_ENVIRON)
5919 if self.op.master_capable:
5920 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5922 self.master_candidate = False
5925 self.new_node = old_node
5927 node_group = cfg.LookupNodeGroup(self.op.group)
5928 self.new_node = objects.Node(name=node,
5929 primary_ip=primary_ip,
5930 secondary_ip=secondary_ip,
5931 master_candidate=self.master_candidate,
5932 offline=False, drained=False,
5935 if self.op.ndparams:
5936 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5938 if self.op.hv_state:
5939 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5941 if self.op.disk_state:
5942 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5944 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5945 # it a property on the base class.
5946 result = rpc.DnsOnlyRunner().call_version([node])[node]
5947 result.Raise("Can't get version information from node %s" % node)
5948 if constants.PROTOCOL_VERSION == result.payload:
5949 logging.info("Communication to node %s fine, sw version %s match",
5950 node, result.payload)
5952 raise errors.OpPrereqError("Version mismatch master version %s,"
5953 " node version %s" %
5954 (constants.PROTOCOL_VERSION, result.payload),
5955 errors.ECODE_ENVIRON)
5957 def Exec(self, feedback_fn):
5958 """Adds the new node to the cluster.
5961 new_node = self.new_node
5962 node = new_node.name
5964 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5967 # We adding a new node so we assume it's powered
5968 new_node.powered = True
5970 # for re-adds, reset the offline/drained/master-candidate flags;
5971 # we need to reset here, otherwise offline would prevent RPC calls
5972 # later in the procedure; this also means that if the re-add
5973 # fails, we are left with a non-offlined, broken node
5975 new_node.drained = new_node.offline = False # pylint: disable=W0201
5976 self.LogInfo("Readding a node, the offline/drained flags were reset")
5977 # if we demote the node, we do cleanup later in the procedure
5978 new_node.master_candidate = self.master_candidate
5979 if self.changed_primary_ip:
5980 new_node.primary_ip = self.op.primary_ip
5982 # copy the master/vm_capable flags
5983 for attr in self._NFLAGS:
5984 setattr(new_node, attr, getattr(self.op, attr))
5986 # notify the user about any possible mc promotion
5987 if new_node.master_candidate:
5988 self.LogInfo("Node will be a master candidate")
5990 if self.op.ndparams:
5991 new_node.ndparams = self.op.ndparams
5993 new_node.ndparams = {}
5995 if self.op.hv_state:
5996 new_node.hv_state_static = self.new_hv_state
5998 if self.op.disk_state:
5999 new_node.disk_state_static = self.new_disk_state
6001 # Add node to our /etc/hosts, and add key to known_hosts
6002 if self.cfg.GetClusterInfo().modify_etc_hosts:
6003 master_node = self.cfg.GetMasterNode()
6004 result = self.rpc.call_etc_hosts_modify(master_node,
6005 constants.ETC_HOSTS_ADD,
6008 result.Raise("Can't update hosts file with new host data")
6010 if new_node.secondary_ip != new_node.primary_ip:
6011 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
6014 node_verify_list = [self.cfg.GetMasterNode()]
6015 node_verify_param = {
6016 constants.NV_NODELIST: ([node], {}),
6017 # TODO: do a node-net-test as well?
6020 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
6021 self.cfg.GetClusterName())
6022 for verifier in node_verify_list:
6023 result[verifier].Raise("Cannot communicate with node %s" % verifier)
6024 nl_payload = result[verifier].payload[constants.NV_NODELIST]
6026 for failed in nl_payload:
6027 feedback_fn("ssh/hostname verification failed"
6028 " (checking from %s): %s" %
6029 (verifier, nl_payload[failed]))
6030 raise errors.OpExecError("ssh/hostname verification failed")
6033 _RedistributeAncillaryFiles(self)
6034 self.context.ReaddNode(new_node)
6035 # make sure we redistribute the config
6036 self.cfg.Update(new_node, feedback_fn)
6037 # and make sure the new node will not have old files around
6038 if not new_node.master_candidate:
6039 result = self.rpc.call_node_demote_from_mc(new_node.name)
6040 msg = result.fail_msg
6042 self.LogWarning("Node failed to demote itself from master"
6043 " candidate status: %s" % msg)
6045 _RedistributeAncillaryFiles(self, additional_nodes=[node],
6046 additional_vm=self.op.vm_capable)
6047 self.context.AddNode(new_node, self.proc.GetECId())
6050 class LUNodeSetParams(LogicalUnit):
6051 """Modifies the parameters of a node.
6053 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
6054 to the node role (as _ROLE_*)
6055 @cvar _R2F: a dictionary from node role to tuples of flags
6056 @cvar _FLAGS: a list of attribute names corresponding to the flags
6059 HPATH = "node-modify"
6060 HTYPE = constants.HTYPE_NODE
6062 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
6064 (True, False, False): _ROLE_CANDIDATE,
6065 (False, True, False): _ROLE_DRAINED,
6066 (False, False, True): _ROLE_OFFLINE,
6067 (False, False, False): _ROLE_REGULAR,
6069 _R2F = dict((v, k) for k, v in _F2R.items())
6070 _FLAGS = ["master_candidate", "drained", "offline"]
6072 def CheckArguments(self):
6073 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6074 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
6075 self.op.master_capable, self.op.vm_capable,
6076 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
6078 if all_mods.count(None) == len(all_mods):
6079 raise errors.OpPrereqError("Please pass at least one modification",
6081 if all_mods.count(True) > 1:
6082 raise errors.OpPrereqError("Can't set the node into more than one"
6083 " state at the same time",
6086 # Boolean value that tells us whether we might be demoting from MC
6087 self.might_demote = (self.op.master_candidate == False or
6088 self.op.offline == True or
6089 self.op.drained == True or
6090 self.op.master_capable == False)
6092 if self.op.secondary_ip:
6093 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
6094 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
6095 " address" % self.op.secondary_ip,
6098 self.lock_all = self.op.auto_promote and self.might_demote
6099 self.lock_instances = self.op.secondary_ip is not None
6101 def _InstanceFilter(self, instance):
6102 """Filter for getting affected instances.
6105 return (instance.disk_template in constants.DTS_INT_MIRROR and
6106 self.op.node_name in instance.all_nodes)
6108 def ExpandNames(self):
6110 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
6112 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
6114 # Since modifying a node can have severe effects on currently running
6115 # operations the resource lock is at least acquired in shared mode
6116 self.needed_locks[locking.LEVEL_NODE_RES] = \
6117 self.needed_locks[locking.LEVEL_NODE]
6119 # Get node resource and instance locks in shared mode; they are not used
6120 # for anything but read-only access
6121 self.share_locks[locking.LEVEL_NODE_RES] = 1
6122 self.share_locks[locking.LEVEL_INSTANCE] = 1
6124 if self.lock_instances:
6125 self.needed_locks[locking.LEVEL_INSTANCE] = \
6126 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
6128 def BuildHooksEnv(self):
6131 This runs on the master node.
6135 "OP_TARGET": self.op.node_name,
6136 "MASTER_CANDIDATE": str(self.op.master_candidate),
6137 "OFFLINE": str(self.op.offline),
6138 "DRAINED": str(self.op.drained),
6139 "MASTER_CAPABLE": str(self.op.master_capable),
6140 "VM_CAPABLE": str(self.op.vm_capable),
6143 def BuildHooksNodes(self):
6144 """Build hooks nodes.
6147 nl = [self.cfg.GetMasterNode(), self.op.node_name]
6150 def CheckPrereq(self):
6151 """Check prerequisites.
6153 This only checks the instance list against the existing names.
6156 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
6158 if self.lock_instances:
6159 affected_instances = \
6160 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
6162 # Verify instance locks
6163 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
6164 wanted_instances = frozenset(affected_instances.keys())
6165 if wanted_instances - owned_instances:
6166 raise errors.OpPrereqError("Instances affected by changing node %s's"
6167 " secondary IP address have changed since"
6168 " locks were acquired, wanted '%s', have"
6169 " '%s'; retry the operation" %
6171 utils.CommaJoin(wanted_instances),
6172 utils.CommaJoin(owned_instances)),
6175 affected_instances = None
6177 if (self.op.master_candidate is not None or
6178 self.op.drained is not None or
6179 self.op.offline is not None):
6180 # we can't change the master's node flags
6181 if self.op.node_name == self.cfg.GetMasterNode():
6182 raise errors.OpPrereqError("The master role can be changed"
6183 " only via master-failover",
6186 if self.op.master_candidate and not node.master_capable:
6187 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
6188 " it a master candidate" % node.name,
6191 if self.op.vm_capable == False:
6192 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
6194 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
6195 " the vm_capable flag" % node.name,
6198 if node.master_candidate and self.might_demote and not self.lock_all:
6199 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6200 # check if after removing the current node, we're missing master
6202 (mc_remaining, mc_should, _) = \
6203 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6204 if mc_remaining < mc_should:
6205 raise errors.OpPrereqError("Not enough master candidates, please"
6206 " pass auto promote option to allow"
6207 " promotion (--auto-promote or RAPI"
6208 " auto_promote=True)", errors.ECODE_STATE)
6210 self.old_flags = old_flags = (node.master_candidate,
6211 node.drained, node.offline)
6212 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6213 self.old_role = old_role = self._F2R[old_flags]
6215 # Check for ineffective changes
6216 for attr in self._FLAGS:
6217 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
6218 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6219 setattr(self.op, attr, None)
6221 # Past this point, any flag change to False means a transition
6222 # away from the respective state, as only real changes are kept
6224 # TODO: We might query the real power state if it supports OOB
6225 if _SupportsOob(self.cfg, node):
6226 if self.op.offline is False and not (node.powered or
6227 self.op.powered == True):
6228 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6229 " offline status can be reset") %
6231 elif self.op.powered is not None:
6232 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6233 " as it does not support out-of-band"
6234 " handling") % self.op.node_name)
6236 # If we're being deofflined/drained, we'll MC ourself if needed
6237 if (self.op.drained == False or self.op.offline == False or
6238 (self.op.master_capable and not node.master_capable)):
6239 if _DecideSelfPromotion(self):
6240 self.op.master_candidate = True
6241 self.LogInfo("Auto-promoting node to master candidate")
6243 # If we're no longer master capable, we'll demote ourselves from MC
6244 if self.op.master_capable == False and node.master_candidate:
6245 self.LogInfo("Demoting from master candidate")
6246 self.op.master_candidate = False
6249 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6250 if self.op.master_candidate:
6251 new_role = self._ROLE_CANDIDATE
6252 elif self.op.drained:
6253 new_role = self._ROLE_DRAINED
6254 elif self.op.offline:
6255 new_role = self._ROLE_OFFLINE
6256 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6257 # False is still in new flags, which means we're un-setting (the
6259 new_role = self._ROLE_REGULAR
6260 else: # no new flags, nothing, keep old role
6263 self.new_role = new_role
6265 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6266 # Trying to transition out of offline status
6267 result = self.rpc.call_version([node.name])[node.name]
6269 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6270 " to report its version: %s" %
6271 (node.name, result.fail_msg),
6274 self.LogWarning("Transitioning node from offline to online state"
6275 " without using re-add. Please make sure the node"
6278 if self.op.secondary_ip:
6279 # Ok even without locking, because this can't be changed by any LU
6280 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6281 master_singlehomed = master.secondary_ip == master.primary_ip
6282 if master_singlehomed and self.op.secondary_ip:
6283 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
6284 " homed cluster", errors.ECODE_INVAL)
6286 assert not (frozenset(affected_instances) -
6287 self.owned_locks(locking.LEVEL_INSTANCE))
6290 if affected_instances:
6291 raise errors.OpPrereqError("Cannot change secondary IP address:"
6292 " offline node has instances (%s)"
6293 " configured to use it" %
6294 utils.CommaJoin(affected_instances.keys()))
6296 # On online nodes, check that no instances are running, and that
6297 # the node has the new ip and we can reach it.
6298 for instance in affected_instances.values():
6299 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6300 msg="cannot change secondary ip")
6302 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6303 if master.name != node.name:
6304 # check reachability from master secondary ip to new secondary ip
6305 if not netutils.TcpPing(self.op.secondary_ip,
6306 constants.DEFAULT_NODED_PORT,
6307 source=master.secondary_ip):
6308 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6309 " based ping to node daemon port",
6310 errors.ECODE_ENVIRON)
6312 if self.op.ndparams:
6313 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6314 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6315 self.new_ndparams = new_ndparams
6317 if self.op.hv_state:
6318 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6319 self.node.hv_state_static)
6321 if self.op.disk_state:
6322 self.new_disk_state = \
6323 _MergeAndVerifyDiskState(self.op.disk_state,
6324 self.node.disk_state_static)
6326 def Exec(self, feedback_fn):
6331 old_role = self.old_role
6332 new_role = self.new_role
6336 if self.op.ndparams:
6337 node.ndparams = self.new_ndparams
6339 if self.op.powered is not None:
6340 node.powered = self.op.powered
6342 if self.op.hv_state:
6343 node.hv_state_static = self.new_hv_state
6345 if self.op.disk_state:
6346 node.disk_state_static = self.new_disk_state
6348 for attr in ["master_capable", "vm_capable"]:
6349 val = getattr(self.op, attr)
6351 setattr(node, attr, val)
6352 result.append((attr, str(val)))
6354 if new_role != old_role:
6355 # Tell the node to demote itself, if no longer MC and not offline
6356 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6357 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6359 self.LogWarning("Node failed to demote itself: %s", msg)
6361 new_flags = self._R2F[new_role]
6362 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6364 result.append((desc, str(nf)))
6365 (node.master_candidate, node.drained, node.offline) = new_flags
6367 # we locked all nodes, we adjust the CP before updating this node
6369 _AdjustCandidatePool(self, [node.name])
6371 if self.op.secondary_ip:
6372 node.secondary_ip = self.op.secondary_ip
6373 result.append(("secondary_ip", self.op.secondary_ip))
6375 # this will trigger configuration file update, if needed
6376 self.cfg.Update(node, feedback_fn)
6378 # this will trigger job queue propagation or cleanup if the mc
6380 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6381 self.context.ReaddNode(node)
6386 class LUNodePowercycle(NoHooksLU):
6387 """Powercycles a node.
6392 def CheckArguments(self):
6393 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6394 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6395 raise errors.OpPrereqError("The node is the master and the force"
6396 " parameter was not set",
6399 def ExpandNames(self):
6400 """Locking for PowercycleNode.
6402 This is a last-resort option and shouldn't block on other
6403 jobs. Therefore, we grab no locks.
6406 self.needed_locks = {}
6408 def Exec(self, feedback_fn):
6412 result = self.rpc.call_node_powercycle(self.op.node_name,
6413 self.cfg.GetHypervisorType())
6414 result.Raise("Failed to schedule the reboot")
6415 return result.payload
6418 class LUClusterQuery(NoHooksLU):
6419 """Query cluster configuration.
6424 def ExpandNames(self):
6425 self.needed_locks = {}
6427 def Exec(self, feedback_fn):
6428 """Return cluster config.
6431 cluster = self.cfg.GetClusterInfo()
6434 # Filter just for enabled hypervisors
6435 for os_name, hv_dict in cluster.os_hvp.items():
6436 os_hvp[os_name] = {}
6437 for hv_name, hv_params in hv_dict.items():
6438 if hv_name in cluster.enabled_hypervisors:
6439 os_hvp[os_name][hv_name] = hv_params
6441 # Convert ip_family to ip_version
6442 primary_ip_version = constants.IP4_VERSION
6443 if cluster.primary_ip_family == netutils.IP6Address.family:
6444 primary_ip_version = constants.IP6_VERSION
6447 "software_version": constants.RELEASE_VERSION,
6448 "protocol_version": constants.PROTOCOL_VERSION,
6449 "config_version": constants.CONFIG_VERSION,
6450 "os_api_version": max(constants.OS_API_VERSIONS),
6451 "export_version": constants.EXPORT_VERSION,
6452 "architecture": runtime.GetArchInfo(),
6453 "name": cluster.cluster_name,
6454 "master": cluster.master_node,
6455 "default_hypervisor": cluster.primary_hypervisor,
6456 "enabled_hypervisors": cluster.enabled_hypervisors,
6457 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6458 for hypervisor_name in cluster.enabled_hypervisors]),
6460 "beparams": cluster.beparams,
6461 "osparams": cluster.osparams,
6462 "ipolicy": cluster.ipolicy,
6463 "nicparams": cluster.nicparams,
6464 "ndparams": cluster.ndparams,
6465 "diskparams": cluster.diskparams,
6466 "candidate_pool_size": cluster.candidate_pool_size,
6467 "master_netdev": cluster.master_netdev,
6468 "master_netmask": cluster.master_netmask,
6469 "use_external_mip_script": cluster.use_external_mip_script,
6470 "volume_group_name": cluster.volume_group_name,
6471 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6472 "file_storage_dir": cluster.file_storage_dir,
6473 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6474 "maintain_node_health": cluster.maintain_node_health,
6475 "ctime": cluster.ctime,
6476 "mtime": cluster.mtime,
6477 "uuid": cluster.uuid,
6478 "tags": list(cluster.GetTags()),
6479 "uid_pool": cluster.uid_pool,
6480 "default_iallocator": cluster.default_iallocator,
6481 "reserved_lvs": cluster.reserved_lvs,
6482 "primary_ip_version": primary_ip_version,
6483 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6484 "hidden_os": cluster.hidden_os,
6485 "blacklisted_os": cluster.blacklisted_os,
6491 class LUClusterConfigQuery(NoHooksLU):
6492 """Return configuration values.
6497 def CheckArguments(self):
6498 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6500 def ExpandNames(self):
6501 self.cq.ExpandNames(self)
6503 def DeclareLocks(self, level):
6504 self.cq.DeclareLocks(self, level)
6506 def Exec(self, feedback_fn):
6507 result = self.cq.OldStyleQuery(self)
6509 assert len(result) == 1
6514 class _ClusterQuery(_QueryBase):
6515 FIELDS = query.CLUSTER_FIELDS
6517 #: Do not sort (there is only one item)
6520 def ExpandNames(self, lu):
6521 lu.needed_locks = {}
6523 # The following variables interact with _QueryBase._GetNames
6524 self.wanted = locking.ALL_SET
6525 self.do_locking = self.use_locking
6528 raise errors.OpPrereqError("Can not use locking for cluster queries",
6531 def DeclareLocks(self, lu, level):
6534 def _GetQueryData(self, lu):
6535 """Computes the list of nodes and their attributes.
6538 # Locking is not used
6539 assert not (compat.any(lu.glm.is_owned(level)
6540 for level in locking.LEVELS
6541 if level != locking.LEVEL_CLUSTER) or
6542 self.do_locking or self.use_locking)
6544 if query.CQ_CONFIG in self.requested_data:
6545 cluster = lu.cfg.GetClusterInfo()
6547 cluster = NotImplemented
6549 if query.CQ_QUEUE_DRAINED in self.requested_data:
6550 drain_flag = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6552 drain_flag = NotImplemented
6554 if query.CQ_WATCHER_PAUSE in self.requested_data:
6555 watcher_pause = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6557 watcher_pause = NotImplemented
6559 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6562 class LUInstanceActivateDisks(NoHooksLU):
6563 """Bring up an instance's disks.
6568 def ExpandNames(self):
6569 self._ExpandAndLockInstance()
6570 self.needed_locks[locking.LEVEL_NODE] = []
6571 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6573 def DeclareLocks(self, level):
6574 if level == locking.LEVEL_NODE:
6575 self._LockInstancesNodes()
6577 def CheckPrereq(self):
6578 """Check prerequisites.
6580 This checks that the instance is in the cluster.
6583 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6584 assert self.instance is not None, \
6585 "Cannot retrieve locked instance %s" % self.op.instance_name
6586 _CheckNodeOnline(self, self.instance.primary_node)
6588 def Exec(self, feedback_fn):
6589 """Activate the disks.
6592 disks_ok, disks_info = \
6593 _AssembleInstanceDisks(self, self.instance,
6594 ignore_size=self.op.ignore_size)
6596 raise errors.OpExecError("Cannot activate block devices")
6601 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6602 ignore_size=False, check=True):
6603 """Prepare the block devices for an instance.
6605 This sets up the block devices on all nodes.
6607 @type lu: L{LogicalUnit}
6608 @param lu: the logical unit on whose behalf we execute
6609 @type instance: L{objects.Instance}
6610 @param instance: the instance for whose disks we assemble
6611 @type disks: list of L{objects.Disk} or None
6612 @param disks: which disks to assemble (or all, if None)
6613 @type ignore_secondaries: boolean
6614 @param ignore_secondaries: if true, errors on secondary nodes
6615 won't result in an error return from the function
6616 @type ignore_size: boolean
6617 @param ignore_size: if true, the current known size of the disk
6618 will not be used during the disk activation, useful for cases
6619 when the size is wrong
6620 @return: False if the operation failed, otherwise a list of
6621 (host, instance_visible_name, node_visible_name)
6622 with the mapping from node devices to instance devices
6627 iname = instance.name
6629 disks = _ExpandCheckDisks(instance, disks)
6631 # With the two passes mechanism we try to reduce the window of
6632 # opportunity for the race condition of switching DRBD to primary
6633 # before handshaking occured, but we do not eliminate it
6635 # The proper fix would be to wait (with some limits) until the
6636 # connection has been made and drbd transitions from WFConnection
6637 # into any other network-connected state (Connected, SyncTarget,
6640 # 1st pass, assemble on all nodes in secondary mode
6641 for idx, inst_disk in enumerate(disks):
6642 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6644 node_disk = node_disk.Copy()
6645 node_disk.UnsetSize()
6646 lu.cfg.SetDiskID(node_disk, node)
6647 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6649 msg = result.fail_msg
6651 is_offline_secondary = (node in instance.secondary_nodes and
6653 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6654 " (is_primary=False, pass=1): %s",
6655 inst_disk.iv_name, node, msg)
6656 if not (ignore_secondaries or is_offline_secondary):
6659 # FIXME: race condition on drbd migration to primary
6661 # 2nd pass, do only the primary node
6662 for idx, inst_disk in enumerate(disks):
6665 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6666 if node != instance.primary_node:
6669 node_disk = node_disk.Copy()
6670 node_disk.UnsetSize()
6671 lu.cfg.SetDiskID(node_disk, node)
6672 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6674 msg = result.fail_msg
6676 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6677 " (is_primary=True, pass=2): %s",
6678 inst_disk.iv_name, node, msg)
6681 dev_path = result.payload
6683 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6685 # leave the disks configured for the primary node
6686 # this is a workaround that would be fixed better by
6687 # improving the logical/physical id handling
6689 lu.cfg.SetDiskID(disk, instance.primary_node)
6691 return disks_ok, device_info
6694 def _StartInstanceDisks(lu, instance, force):
6695 """Start the disks of an instance.
6698 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6699 ignore_secondaries=force)
6701 _ShutdownInstanceDisks(lu, instance)
6702 if force is not None and not force:
6703 lu.proc.LogWarning("", hint="If the message above refers to a"
6705 " you can retry the operation using '--force'.")
6706 raise errors.OpExecError("Disk consistency error")
6709 class LUInstanceDeactivateDisks(NoHooksLU):
6710 """Shutdown an instance's disks.
6715 def ExpandNames(self):
6716 self._ExpandAndLockInstance()
6717 self.needed_locks[locking.LEVEL_NODE] = []
6718 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6720 def DeclareLocks(self, level):
6721 if level == locking.LEVEL_NODE:
6722 self._LockInstancesNodes()
6724 def CheckPrereq(self):
6725 """Check prerequisites.
6727 This checks that the instance is in the cluster.
6730 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6731 assert self.instance is not None, \
6732 "Cannot retrieve locked instance %s" % self.op.instance_name
6734 def Exec(self, feedback_fn):
6735 """Deactivate the disks
6738 instance = self.instance
6740 _ShutdownInstanceDisks(self, instance)
6742 _SafeShutdownInstanceDisks(self, instance)
6745 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6746 """Shutdown block devices of an instance.
6748 This function checks if an instance is running, before calling
6749 _ShutdownInstanceDisks.
6752 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6753 _ShutdownInstanceDisks(lu, instance, disks=disks)
6756 def _ExpandCheckDisks(instance, disks):
6757 """Return the instance disks selected by the disks list
6759 @type disks: list of L{objects.Disk} or None
6760 @param disks: selected disks
6761 @rtype: list of L{objects.Disk}
6762 @return: selected instance disks to act on
6766 return instance.disks
6768 if not set(disks).issubset(instance.disks):
6769 raise errors.ProgrammerError("Can only act on disks belonging to the"
6774 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6775 """Shutdown block devices of an instance.
6777 This does the shutdown on all nodes of the instance.
6779 If the ignore_primary is false, errors on the primary node are
6784 disks = _ExpandCheckDisks(instance, disks)
6787 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6788 lu.cfg.SetDiskID(top_disk, node)
6789 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6790 msg = result.fail_msg
6792 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6793 disk.iv_name, node, msg)
6794 if ((node == instance.primary_node and not ignore_primary) or
6795 (node != instance.primary_node and not result.offline)):
6800 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6801 """Checks if a node has enough free memory.
6803 This function check if a given node has the needed amount of free
6804 memory. In case the node has less memory or we cannot get the
6805 information from the node, this function raise an OpPrereqError
6808 @type lu: C{LogicalUnit}
6809 @param lu: a logical unit from which we get configuration data
6811 @param node: the node to check
6812 @type reason: C{str}
6813 @param reason: string to use in the error message
6814 @type requested: C{int}
6815 @param requested: the amount of memory in MiB to check for
6816 @type hypervisor_name: C{str}
6817 @param hypervisor_name: the hypervisor to ask for memory stats
6819 @return: node current free memory
6820 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6821 we cannot check the node
6824 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6825 nodeinfo[node].Raise("Can't get data from node %s" % node,
6826 prereq=True, ecode=errors.ECODE_ENVIRON)
6827 (_, _, (hv_info, )) = nodeinfo[node].payload
6829 free_mem = hv_info.get("memory_free", None)
6830 if not isinstance(free_mem, int):
6831 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6832 " was '%s'" % (node, free_mem),
6833 errors.ECODE_ENVIRON)
6834 if requested > free_mem:
6835 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6836 " needed %s MiB, available %s MiB" %
6837 (node, reason, requested, free_mem),
6842 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6843 """Checks if nodes have enough free disk space in the all VGs.
6845 This function check if all given nodes have the needed amount of
6846 free disk. In case any node has less disk or we cannot get the
6847 information from the node, this function raise an OpPrereqError
6850 @type lu: C{LogicalUnit}
6851 @param lu: a logical unit from which we get configuration data
6852 @type nodenames: C{list}
6853 @param nodenames: the list of node names to check
6854 @type req_sizes: C{dict}
6855 @param req_sizes: the hash of vg and corresponding amount of disk in
6857 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6858 or we cannot check the node
6861 for vg, req_size in req_sizes.items():
6862 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6865 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6866 """Checks if nodes have enough free disk space in the specified VG.
6868 This function check if all given nodes have the needed amount of
6869 free disk. In case any node has less disk or we cannot get the
6870 information from the node, this function raise an OpPrereqError
6873 @type lu: C{LogicalUnit}
6874 @param lu: a logical unit from which we get configuration data
6875 @type nodenames: C{list}
6876 @param nodenames: the list of node names to check
6878 @param vg: the volume group to check
6879 @type requested: C{int}
6880 @param requested: the amount of disk in MiB to check for
6881 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6882 or we cannot check the node
6885 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6886 for node in nodenames:
6887 info = nodeinfo[node]
6888 info.Raise("Cannot get current information from node %s" % node,
6889 prereq=True, ecode=errors.ECODE_ENVIRON)
6890 (_, (vg_info, ), _) = info.payload
6891 vg_free = vg_info.get("vg_free", None)
6892 if not isinstance(vg_free, int):
6893 raise errors.OpPrereqError("Can't compute free disk space on node"
6894 " %s for vg %s, result was '%s'" %
6895 (node, vg, vg_free), errors.ECODE_ENVIRON)
6896 if requested > vg_free:
6897 raise errors.OpPrereqError("Not enough disk space on target node %s"
6898 " vg %s: required %d MiB, available %d MiB" %
6899 (node, vg, requested, vg_free),
6903 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6904 """Checks if nodes have enough physical CPUs
6906 This function checks if all given nodes have the needed number of
6907 physical CPUs. In case any node has less CPUs or we cannot get the
6908 information from the node, this function raises an OpPrereqError
6911 @type lu: C{LogicalUnit}
6912 @param lu: a logical unit from which we get configuration data
6913 @type nodenames: C{list}
6914 @param nodenames: the list of node names to check
6915 @type requested: C{int}
6916 @param requested: the minimum acceptable number of physical CPUs
6917 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6918 or we cannot check the node
6921 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6922 for node in nodenames:
6923 info = nodeinfo[node]
6924 info.Raise("Cannot get current information from node %s" % node,
6925 prereq=True, ecode=errors.ECODE_ENVIRON)
6926 (_, _, (hv_info, )) = info.payload
6927 num_cpus = hv_info.get("cpu_total", None)
6928 if not isinstance(num_cpus, int):
6929 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6930 " on node %s, result was '%s'" %
6931 (node, num_cpus), errors.ECODE_ENVIRON)
6932 if requested > num_cpus:
6933 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6934 "required" % (node, num_cpus, requested),
6938 class LUInstanceStartup(LogicalUnit):
6939 """Starts an instance.
6942 HPATH = "instance-start"
6943 HTYPE = constants.HTYPE_INSTANCE
6946 def CheckArguments(self):
6948 if self.op.beparams:
6949 # fill the beparams dict
6950 objects.UpgradeBeParams(self.op.beparams)
6951 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6953 def ExpandNames(self):
6954 self._ExpandAndLockInstance()
6955 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6957 def DeclareLocks(self, level):
6958 if level == locking.LEVEL_NODE_RES:
6959 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6961 def BuildHooksEnv(self):
6964 This runs on master, primary and secondary nodes of the instance.
6968 "FORCE": self.op.force,
6971 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6975 def BuildHooksNodes(self):
6976 """Build hooks nodes.
6979 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6982 def CheckPrereq(self):
6983 """Check prerequisites.
6985 This checks that the instance is in the cluster.
6988 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6989 assert self.instance is not None, \
6990 "Cannot retrieve locked instance %s" % self.op.instance_name
6993 if self.op.hvparams:
6994 # check hypervisor parameter syntax (locally)
6995 cluster = self.cfg.GetClusterInfo()
6996 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6997 filled_hvp = cluster.FillHV(instance)
6998 filled_hvp.update(self.op.hvparams)
6999 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
7000 hv_type.CheckParameterSyntax(filled_hvp)
7001 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
7003 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7005 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
7007 if self.primary_offline and self.op.ignore_offline_nodes:
7008 self.proc.LogWarning("Ignoring offline primary node")
7010 if self.op.hvparams or self.op.beparams:
7011 self.proc.LogWarning("Overridden parameters are ignored")
7013 _CheckNodeOnline(self, instance.primary_node)
7015 bep = self.cfg.GetClusterInfo().FillBE(instance)
7016 bep.update(self.op.beparams)
7018 # check bridges existence
7019 _CheckInstanceBridgesExist(self, instance)
7021 remote_info = self.rpc.call_instance_info(instance.primary_node,
7023 instance.hypervisor)
7024 remote_info.Raise("Error checking node %s" % instance.primary_node,
7025 prereq=True, ecode=errors.ECODE_ENVIRON)
7026 if not remote_info.payload: # not running already
7027 _CheckNodeFreeMemory(self, instance.primary_node,
7028 "starting instance %s" % instance.name,
7029 bep[constants.BE_MINMEM], instance.hypervisor)
7031 def Exec(self, feedback_fn):
7032 """Start the instance.
7035 instance = self.instance
7036 force = self.op.force
7038 if not self.op.no_remember:
7039 self.cfg.MarkInstanceUp(instance.name)
7041 if self.primary_offline:
7042 assert self.op.ignore_offline_nodes
7043 self.proc.LogInfo("Primary node offline, marked instance as started")
7045 node_current = instance.primary_node
7047 _StartInstanceDisks(self, instance, force)
7050 self.rpc.call_instance_start(node_current,
7051 (instance, self.op.hvparams,
7053 self.op.startup_paused)
7054 msg = result.fail_msg
7056 _ShutdownInstanceDisks(self, instance)
7057 raise errors.OpExecError("Could not start instance: %s" % msg)
7060 class LUInstanceReboot(LogicalUnit):
7061 """Reboot an instance.
7064 HPATH = "instance-reboot"
7065 HTYPE = constants.HTYPE_INSTANCE
7068 def ExpandNames(self):
7069 self._ExpandAndLockInstance()
7071 def BuildHooksEnv(self):
7074 This runs on master, primary and secondary nodes of the instance.
7078 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
7079 "REBOOT_TYPE": self.op.reboot_type,
7080 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7083 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7087 def BuildHooksNodes(self):
7088 """Build hooks nodes.
7091 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7094 def CheckPrereq(self):
7095 """Check prerequisites.
7097 This checks that the instance is in the cluster.
7100 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7101 assert self.instance is not None, \
7102 "Cannot retrieve locked instance %s" % self.op.instance_name
7103 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
7104 _CheckNodeOnline(self, instance.primary_node)
7106 # check bridges existence
7107 _CheckInstanceBridgesExist(self, instance)
7109 def Exec(self, feedback_fn):
7110 """Reboot the instance.
7113 instance = self.instance
7114 ignore_secondaries = self.op.ignore_secondaries
7115 reboot_type = self.op.reboot_type
7117 remote_info = self.rpc.call_instance_info(instance.primary_node,
7119 instance.hypervisor)
7120 remote_info.Raise("Error checking node %s" % instance.primary_node)
7121 instance_running = bool(remote_info.payload)
7123 node_current = instance.primary_node
7125 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
7126 constants.INSTANCE_REBOOT_HARD]:
7127 for disk in instance.disks:
7128 self.cfg.SetDiskID(disk, node_current)
7129 result = self.rpc.call_instance_reboot(node_current, instance,
7131 self.op.shutdown_timeout)
7132 result.Raise("Could not reboot instance")
7134 if instance_running:
7135 result = self.rpc.call_instance_shutdown(node_current, instance,
7136 self.op.shutdown_timeout)
7137 result.Raise("Could not shutdown instance for full reboot")
7138 _ShutdownInstanceDisks(self, instance)
7140 self.LogInfo("Instance %s was already stopped, starting now",
7142 _StartInstanceDisks(self, instance, ignore_secondaries)
7143 result = self.rpc.call_instance_start(node_current,
7144 (instance, None, None), False)
7145 msg = result.fail_msg
7147 _ShutdownInstanceDisks(self, instance)
7148 raise errors.OpExecError("Could not start instance for"
7149 " full reboot: %s" % msg)
7151 self.cfg.MarkInstanceUp(instance.name)
7154 class LUInstanceShutdown(LogicalUnit):
7155 """Shutdown an instance.
7158 HPATH = "instance-stop"
7159 HTYPE = constants.HTYPE_INSTANCE
7162 def ExpandNames(self):
7163 self._ExpandAndLockInstance()
7165 def BuildHooksEnv(self):
7168 This runs on master, primary and secondary nodes of the instance.
7171 env = _BuildInstanceHookEnvByObject(self, self.instance)
7172 env["TIMEOUT"] = self.op.timeout
7175 def BuildHooksNodes(self):
7176 """Build hooks nodes.
7179 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7182 def CheckPrereq(self):
7183 """Check prerequisites.
7185 This checks that the instance is in the cluster.
7188 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7189 assert self.instance is not None, \
7190 "Cannot retrieve locked instance %s" % self.op.instance_name
7192 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7194 self.primary_offline = \
7195 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7197 if self.primary_offline and self.op.ignore_offline_nodes:
7198 self.proc.LogWarning("Ignoring offline primary node")
7200 _CheckNodeOnline(self, self.instance.primary_node)
7202 def Exec(self, feedback_fn):
7203 """Shutdown the instance.
7206 instance = self.instance
7207 node_current = instance.primary_node
7208 timeout = self.op.timeout
7210 if not self.op.no_remember:
7211 self.cfg.MarkInstanceDown(instance.name)
7213 if self.primary_offline:
7214 assert self.op.ignore_offline_nodes
7215 self.proc.LogInfo("Primary node offline, marked instance as stopped")
7217 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7218 msg = result.fail_msg
7220 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
7222 _ShutdownInstanceDisks(self, instance)
7225 class LUInstanceReinstall(LogicalUnit):
7226 """Reinstall an instance.
7229 HPATH = "instance-reinstall"
7230 HTYPE = constants.HTYPE_INSTANCE
7233 def ExpandNames(self):
7234 self._ExpandAndLockInstance()
7236 def BuildHooksEnv(self):
7239 This runs on master, primary and secondary nodes of the instance.
7242 return _BuildInstanceHookEnvByObject(self, self.instance)
7244 def BuildHooksNodes(self):
7245 """Build hooks nodes.
7248 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7251 def CheckPrereq(self):
7252 """Check prerequisites.
7254 This checks that the instance is in the cluster and is not running.
7257 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7258 assert instance is not None, \
7259 "Cannot retrieve locked instance %s" % self.op.instance_name
7260 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7261 " offline, cannot reinstall")
7263 if instance.disk_template == constants.DT_DISKLESS:
7264 raise errors.OpPrereqError("Instance '%s' has no disks" %
7265 self.op.instance_name,
7267 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7269 if self.op.os_type is not None:
7271 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7272 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7273 instance_os = self.op.os_type
7275 instance_os = instance.os
7277 nodelist = list(instance.all_nodes)
7279 if self.op.osparams:
7280 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7281 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7282 self.os_inst = i_osdict # the new dict (without defaults)
7286 self.instance = instance
7288 def Exec(self, feedback_fn):
7289 """Reinstall the instance.
7292 inst = self.instance
7294 if self.op.os_type is not None:
7295 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7296 inst.os = self.op.os_type
7297 # Write to configuration
7298 self.cfg.Update(inst, feedback_fn)
7300 _StartInstanceDisks(self, inst, None)
7302 feedback_fn("Running the instance OS create scripts...")
7303 # FIXME: pass debug option from opcode to backend
7304 result = self.rpc.call_instance_os_add(inst.primary_node,
7305 (inst, self.os_inst), True,
7306 self.op.debug_level)
7307 result.Raise("Could not install OS for instance %s on node %s" %
7308 (inst.name, inst.primary_node))
7310 _ShutdownInstanceDisks(self, inst)
7313 class LUInstanceRecreateDisks(LogicalUnit):
7314 """Recreate an instance's missing disks.
7317 HPATH = "instance-recreate-disks"
7318 HTYPE = constants.HTYPE_INSTANCE
7321 _MODIFYABLE = frozenset([
7322 constants.IDISK_SIZE,
7323 constants.IDISK_MODE,
7326 # New or changed disk parameters may have different semantics
7327 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7328 constants.IDISK_ADOPT,
7330 # TODO: Implement support changing VG while recreating
7332 constants.IDISK_METAVG,
7333 constants.IDISK_PROVIDER,
7336 def CheckArguments(self):
7337 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7338 # Normalize and convert deprecated list of disk indices
7339 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7341 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7343 raise errors.OpPrereqError("Some disks have been specified more than"
7344 " once: %s" % utils.CommaJoin(duplicates),
7347 for (idx, params) in self.op.disks:
7348 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7349 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7351 raise errors.OpPrereqError("Parameters for disk %s try to change"
7352 " unmodifyable parameter(s): %s" %
7353 (idx, utils.CommaJoin(unsupported)),
7356 def ExpandNames(self):
7357 self._ExpandAndLockInstance()
7358 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7360 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7361 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7363 self.needed_locks[locking.LEVEL_NODE] = []
7364 self.needed_locks[locking.LEVEL_NODE_RES] = []
7366 def DeclareLocks(self, level):
7367 if level == locking.LEVEL_NODE:
7368 # if we replace the nodes, we only need to lock the old primary,
7369 # otherwise we need to lock all nodes for disk re-creation
7370 primary_only = bool(self.op.nodes)
7371 self._LockInstancesNodes(primary_only=primary_only)
7372 elif level == locking.LEVEL_NODE_RES:
7374 self.needed_locks[locking.LEVEL_NODE_RES] = \
7375 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7377 def BuildHooksEnv(self):
7380 This runs on master, primary and secondary nodes of the instance.
7383 return _BuildInstanceHookEnvByObject(self, self.instance)
7385 def BuildHooksNodes(self):
7386 """Build hooks nodes.
7389 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7392 def CheckPrereq(self):
7393 """Check prerequisites.
7395 This checks that the instance is in the cluster and is not running.
7398 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7399 assert instance is not None, \
7400 "Cannot retrieve locked instance %s" % self.op.instance_name
7402 if len(self.op.nodes) != len(instance.all_nodes):
7403 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7404 " %d replacement nodes were specified" %
7405 (instance.name, len(instance.all_nodes),
7406 len(self.op.nodes)),
7408 assert instance.disk_template != constants.DT_DRBD8 or \
7409 len(self.op.nodes) == 2
7410 assert instance.disk_template != constants.DT_PLAIN or \
7411 len(self.op.nodes) == 1
7412 primary_node = self.op.nodes[0]
7414 primary_node = instance.primary_node
7415 _CheckNodeOnline(self, primary_node)
7417 if instance.disk_template == constants.DT_DISKLESS:
7418 raise errors.OpPrereqError("Instance '%s' has no disks" %
7419 self.op.instance_name, errors.ECODE_INVAL)
7421 # if we replace nodes *and* the old primary is offline, we don't
7423 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7424 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7425 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7426 if not (self.op.nodes and old_pnode.offline):
7427 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7428 msg="cannot recreate disks")
7431 self.disks = dict(self.op.disks)
7433 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7435 maxidx = max(self.disks.keys())
7436 if maxidx >= len(instance.disks):
7437 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7440 if (self.op.nodes and
7441 sorted(self.disks.keys()) != range(len(instance.disks))):
7442 raise errors.OpPrereqError("Can't recreate disks partially and"
7443 " change the nodes at the same time",
7446 self.instance = instance
7448 def Exec(self, feedback_fn):
7449 """Recreate the disks.
7452 instance = self.instance
7454 assert (self.owned_locks(locking.LEVEL_NODE) ==
7455 self.owned_locks(locking.LEVEL_NODE_RES))
7458 mods = [] # keeps track of needed changes
7460 for idx, disk in enumerate(instance.disks):
7462 changes = self.disks[idx]
7464 # Disk should not be recreated
7468 # update secondaries for disks, if needed
7469 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7470 # need to update the nodes and minors
7471 assert len(self.op.nodes) == 2
7472 assert len(disk.logical_id) == 6 # otherwise disk internals
7474 (_, _, old_port, _, _, old_secret) = disk.logical_id
7475 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7476 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7477 new_minors[0], new_minors[1], old_secret)
7478 assert len(disk.logical_id) == len(new_id)
7482 mods.append((idx, new_id, changes))
7484 # now that we have passed all asserts above, we can apply the mods
7485 # in a single run (to avoid partial changes)
7486 for idx, new_id, changes in mods:
7487 disk = instance.disks[idx]
7488 if new_id is not None:
7489 assert disk.dev_type == constants.LD_DRBD8
7490 disk.logical_id = new_id
7492 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7493 mode=changes.get(constants.IDISK_MODE, None))
7495 # change primary node, if needed
7497 instance.primary_node = self.op.nodes[0]
7498 self.LogWarning("Changing the instance's nodes, you will have to"
7499 " remove any disks left on the older nodes manually")
7502 self.cfg.Update(instance, feedback_fn)
7504 _CreateDisks(self, instance, to_skip=to_skip)
7507 class LUInstanceRename(LogicalUnit):
7508 """Rename an instance.
7511 HPATH = "instance-rename"
7512 HTYPE = constants.HTYPE_INSTANCE
7514 def CheckArguments(self):
7518 if self.op.ip_check and not self.op.name_check:
7519 # TODO: make the ip check more flexible and not depend on the name check
7520 raise errors.OpPrereqError("IP address check requires a name check",
7523 def BuildHooksEnv(self):
7526 This runs on master, primary and secondary nodes of the instance.
7529 env = _BuildInstanceHookEnvByObject(self, self.instance)
7530 env["INSTANCE_NEW_NAME"] = self.op.new_name
7533 def BuildHooksNodes(self):
7534 """Build hooks nodes.
7537 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7540 def CheckPrereq(self):
7541 """Check prerequisites.
7543 This checks that the instance is in the cluster and is not running.
7546 self.op.instance_name = _ExpandInstanceName(self.cfg,
7547 self.op.instance_name)
7548 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7549 assert instance is not None
7550 _CheckNodeOnline(self, instance.primary_node)
7551 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7552 msg="cannot rename")
7553 self.instance = instance
7555 new_name = self.op.new_name
7556 if self.op.name_check:
7557 hostname = _CheckHostnameSane(self, new_name)
7558 new_name = self.op.new_name = hostname.name
7559 if (self.op.ip_check and
7560 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7561 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7562 (hostname.ip, new_name),
7563 errors.ECODE_NOTUNIQUE)
7565 instance_list = self.cfg.GetInstanceList()
7566 if new_name in instance_list and new_name != instance.name:
7567 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7568 new_name, errors.ECODE_EXISTS)
7570 def Exec(self, feedback_fn):
7571 """Rename the instance.
7574 inst = self.instance
7575 old_name = inst.name
7577 rename_file_storage = False
7578 if (inst.disk_template in constants.DTS_FILEBASED and
7579 self.op.new_name != inst.name):
7580 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7581 rename_file_storage = True
7583 self.cfg.RenameInstance(inst.name, self.op.new_name)
7584 # Change the instance lock. This is definitely safe while we hold the BGL.
7585 # Otherwise the new lock would have to be added in acquired mode.
7587 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7588 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7590 # re-read the instance from the configuration after rename
7591 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7593 if rename_file_storage:
7594 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7595 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7596 old_file_storage_dir,
7597 new_file_storage_dir)
7598 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7599 " (but the instance has been renamed in Ganeti)" %
7600 (inst.primary_node, old_file_storage_dir,
7601 new_file_storage_dir))
7603 _StartInstanceDisks(self, inst, None)
7605 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7606 old_name, self.op.debug_level)
7607 msg = result.fail_msg
7609 msg = ("Could not run OS rename script for instance %s on node %s"
7610 " (but the instance has been renamed in Ganeti): %s" %
7611 (inst.name, inst.primary_node, msg))
7612 self.proc.LogWarning(msg)
7614 _ShutdownInstanceDisks(self, inst)
7619 class LUInstanceRemove(LogicalUnit):
7620 """Remove an instance.
7623 HPATH = "instance-remove"
7624 HTYPE = constants.HTYPE_INSTANCE
7627 def ExpandNames(self):
7628 self._ExpandAndLockInstance()
7629 self.needed_locks[locking.LEVEL_NODE] = []
7630 self.needed_locks[locking.LEVEL_NODE_RES] = []
7631 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7633 def DeclareLocks(self, level):
7634 if level == locking.LEVEL_NODE:
7635 self._LockInstancesNodes()
7636 elif level == locking.LEVEL_NODE_RES:
7638 self.needed_locks[locking.LEVEL_NODE_RES] = \
7639 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7641 def BuildHooksEnv(self):
7644 This runs on master, primary and secondary nodes of the instance.
7647 env = _BuildInstanceHookEnvByObject(self, self.instance)
7648 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7651 def BuildHooksNodes(self):
7652 """Build hooks nodes.
7655 nl = [self.cfg.GetMasterNode()]
7656 nl_post = list(self.instance.all_nodes) + nl
7657 return (nl, nl_post)
7659 def CheckPrereq(self):
7660 """Check prerequisites.
7662 This checks that the instance is in the cluster.
7665 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7666 assert self.instance is not None, \
7667 "Cannot retrieve locked instance %s" % self.op.instance_name
7669 def Exec(self, feedback_fn):
7670 """Remove the instance.
7673 instance = self.instance
7674 logging.info("Shutting down instance %s on node %s",
7675 instance.name, instance.primary_node)
7677 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7678 self.op.shutdown_timeout)
7679 msg = result.fail_msg
7681 if self.op.ignore_failures:
7682 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7684 raise errors.OpExecError("Could not shutdown instance %s on"
7686 (instance.name, instance.primary_node, msg))
7688 assert (self.owned_locks(locking.LEVEL_NODE) ==
7689 self.owned_locks(locking.LEVEL_NODE_RES))
7690 assert not (set(instance.all_nodes) -
7691 self.owned_locks(locking.LEVEL_NODE)), \
7692 "Not owning correct locks"
7694 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7697 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7698 """Utility function to remove an instance.
7701 logging.info("Removing block devices for instance %s", instance.name)
7703 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7704 if not ignore_failures:
7705 raise errors.OpExecError("Can't remove instance's disks")
7706 feedback_fn("Warning: can't remove instance's disks")
7708 logging.info("Removing instance %s out of cluster config", instance.name)
7710 lu.cfg.RemoveInstance(instance.name)
7712 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7713 "Instance lock removal conflict"
7715 # Remove lock for the instance
7716 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7719 class LUInstanceQuery(NoHooksLU):
7720 """Logical unit for querying instances.
7723 # pylint: disable=W0142
7726 def CheckArguments(self):
7727 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7728 self.op.output_fields, self.op.use_locking)
7730 def ExpandNames(self):
7731 self.iq.ExpandNames(self)
7733 def DeclareLocks(self, level):
7734 self.iq.DeclareLocks(self, level)
7736 def Exec(self, feedback_fn):
7737 return self.iq.OldStyleQuery(self)
7740 class LUInstanceFailover(LogicalUnit):
7741 """Failover an instance.
7744 HPATH = "instance-failover"
7745 HTYPE = constants.HTYPE_INSTANCE
7748 def CheckArguments(self):
7749 """Check the arguments.
7752 self.iallocator = getattr(self.op, "iallocator", None)
7753 self.target_node = getattr(self.op, "target_node", None)
7755 def ExpandNames(self):
7756 self._ExpandAndLockInstance()
7758 if self.op.target_node is not None:
7759 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7761 self.needed_locks[locking.LEVEL_NODE] = []
7762 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7764 self.needed_locks[locking.LEVEL_NODE_RES] = []
7765 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7767 ignore_consistency = self.op.ignore_consistency
7768 shutdown_timeout = self.op.shutdown_timeout
7769 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7772 ignore_consistency=ignore_consistency,
7773 shutdown_timeout=shutdown_timeout,
7774 ignore_ipolicy=self.op.ignore_ipolicy)
7775 self.tasklets = [self._migrater]
7777 def DeclareLocks(self, level):
7778 if level == locking.LEVEL_NODE:
7779 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7780 if instance.disk_template in constants.DTS_EXT_MIRROR:
7781 if self.op.target_node is None:
7782 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7784 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7785 self.op.target_node]
7786 del self.recalculate_locks[locking.LEVEL_NODE]
7788 self._LockInstancesNodes()
7789 elif level == locking.LEVEL_NODE_RES:
7791 self.needed_locks[locking.LEVEL_NODE_RES] = \
7792 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7794 def BuildHooksEnv(self):
7797 This runs on master, primary and secondary nodes of the instance.
7800 instance = self._migrater.instance
7801 source_node = instance.primary_node
7802 target_node = self.op.target_node
7804 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7805 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7806 "OLD_PRIMARY": source_node,
7807 "NEW_PRIMARY": target_node,
7810 if instance.disk_template in constants.DTS_INT_MIRROR:
7811 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7812 env["NEW_SECONDARY"] = source_node
7814 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7816 env.update(_BuildInstanceHookEnvByObject(self, instance))
7820 def BuildHooksNodes(self):
7821 """Build hooks nodes.
7824 instance = self._migrater.instance
7825 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7826 return (nl, nl + [instance.primary_node])
7829 class LUInstanceMigrate(LogicalUnit):
7830 """Migrate an instance.
7832 This is migration without shutting down, compared to the failover,
7833 which is done with shutdown.
7836 HPATH = "instance-migrate"
7837 HTYPE = constants.HTYPE_INSTANCE
7840 def ExpandNames(self):
7841 self._ExpandAndLockInstance()
7843 if self.op.target_node is not None:
7844 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7846 self.needed_locks[locking.LEVEL_NODE] = []
7847 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7849 self.needed_locks[locking.LEVEL_NODE] = []
7850 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7853 TLMigrateInstance(self, self.op.instance_name,
7854 cleanup=self.op.cleanup,
7856 fallback=self.op.allow_failover,
7857 allow_runtime_changes=self.op.allow_runtime_changes,
7858 ignore_ipolicy=self.op.ignore_ipolicy)
7859 self.tasklets = [self._migrater]
7861 def DeclareLocks(self, level):
7862 if level == locking.LEVEL_NODE:
7863 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7864 if instance.disk_template in constants.DTS_EXT_MIRROR:
7865 if self.op.target_node is None:
7866 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7868 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7869 self.op.target_node]
7870 del self.recalculate_locks[locking.LEVEL_NODE]
7872 self._LockInstancesNodes()
7873 elif level == locking.LEVEL_NODE_RES:
7875 self.needed_locks[locking.LEVEL_NODE_RES] = \
7876 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7878 def BuildHooksEnv(self):
7881 This runs on master, primary and secondary nodes of the instance.
7884 instance = self._migrater.instance
7885 source_node = instance.primary_node
7886 target_node = self.op.target_node
7887 env = _BuildInstanceHookEnvByObject(self, instance)
7889 "MIGRATE_LIVE": self._migrater.live,
7890 "MIGRATE_CLEANUP": self.op.cleanup,
7891 "OLD_PRIMARY": source_node,
7892 "NEW_PRIMARY": target_node,
7893 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7896 if instance.disk_template in constants.DTS_INT_MIRROR:
7897 env["OLD_SECONDARY"] = target_node
7898 env["NEW_SECONDARY"] = source_node
7900 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7904 def BuildHooksNodes(self):
7905 """Build hooks nodes.
7908 instance = self._migrater.instance
7909 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7910 return (nl, nl + [instance.primary_node])
7913 class LUInstanceMove(LogicalUnit):
7914 """Move an instance by data-copying.
7917 HPATH = "instance-move"
7918 HTYPE = constants.HTYPE_INSTANCE
7921 def ExpandNames(self):
7922 self._ExpandAndLockInstance()
7923 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7924 self.op.target_node = target_node
7925 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7926 self.needed_locks[locking.LEVEL_NODE_RES] = []
7927 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7929 def DeclareLocks(self, level):
7930 if level == locking.LEVEL_NODE:
7931 self._LockInstancesNodes(primary_only=True)
7932 elif level == locking.LEVEL_NODE_RES:
7934 self.needed_locks[locking.LEVEL_NODE_RES] = \
7935 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7937 def BuildHooksEnv(self):
7940 This runs on master, primary and secondary nodes of the instance.
7944 "TARGET_NODE": self.op.target_node,
7945 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7947 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7950 def BuildHooksNodes(self):
7951 """Build hooks nodes.
7955 self.cfg.GetMasterNode(),
7956 self.instance.primary_node,
7957 self.op.target_node,
7961 def CheckPrereq(self):
7962 """Check prerequisites.
7964 This checks that the instance is in the cluster.
7967 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7968 assert self.instance is not None, \
7969 "Cannot retrieve locked instance %s" % self.op.instance_name
7971 node = self.cfg.GetNodeInfo(self.op.target_node)
7972 assert node is not None, \
7973 "Cannot retrieve locked node %s" % self.op.target_node
7975 self.target_node = target_node = node.name
7977 if target_node == instance.primary_node:
7978 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7979 (instance.name, target_node),
7982 bep = self.cfg.GetClusterInfo().FillBE(instance)
7984 for idx, dsk in enumerate(instance.disks):
7985 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7986 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7987 " cannot copy" % idx, errors.ECODE_STATE)
7989 _CheckNodeOnline(self, target_node)
7990 _CheckNodeNotDrained(self, target_node)
7991 _CheckNodeVmCapable(self, target_node)
7992 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7993 self.cfg.GetNodeGroup(node.group))
7994 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7995 ignore=self.op.ignore_ipolicy)
7997 if instance.admin_state == constants.ADMINST_UP:
7998 # check memory requirements on the secondary node
7999 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
8000 instance.name, bep[constants.BE_MAXMEM],
8001 instance.hypervisor)
8003 self.LogInfo("Not checking memory on the secondary node as"
8004 " instance will not be started")
8006 # check bridge existance
8007 _CheckInstanceBridgesExist(self, instance, node=target_node)
8009 def Exec(self, feedback_fn):
8010 """Move an instance.
8012 The move is done by shutting it down on its present node, copying
8013 the data over (slow) and starting it on the new node.
8016 instance = self.instance
8018 source_node = instance.primary_node
8019 target_node = self.target_node
8021 self.LogInfo("Shutting down instance %s on source node %s",
8022 instance.name, source_node)
8024 assert (self.owned_locks(locking.LEVEL_NODE) ==
8025 self.owned_locks(locking.LEVEL_NODE_RES))
8027 result = self.rpc.call_instance_shutdown(source_node, instance,
8028 self.op.shutdown_timeout)
8029 msg = result.fail_msg
8031 if self.op.ignore_consistency:
8032 self.proc.LogWarning("Could not shutdown instance %s on node %s."
8033 " Proceeding anyway. Please make sure node"
8034 " %s is down. Error details: %s",
8035 instance.name, source_node, source_node, msg)
8037 raise errors.OpExecError("Could not shutdown instance %s on"
8039 (instance.name, source_node, msg))
8041 # create the target disks
8043 _CreateDisks(self, instance, target_node=target_node)
8044 except errors.OpExecError:
8045 self.LogWarning("Device creation failed, reverting...")
8047 _RemoveDisks(self, instance, target_node=target_node)
8049 self.cfg.ReleaseDRBDMinors(instance.name)
8052 cluster_name = self.cfg.GetClusterInfo().cluster_name
8055 # activate, get path, copy the data over
8056 for idx, disk in enumerate(instance.disks):
8057 self.LogInfo("Copying data for disk %d", idx)
8058 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
8059 instance.name, True, idx)
8061 self.LogWarning("Can't assemble newly created disk %d: %s",
8062 idx, result.fail_msg)
8063 errs.append(result.fail_msg)
8065 dev_path = result.payload
8066 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8067 target_node, dev_path,
8070 self.LogWarning("Can't copy data over for disk %d: %s",
8071 idx, result.fail_msg)
8072 errs.append(result.fail_msg)
8076 self.LogWarning("Some disks failed to copy, aborting")
8078 _RemoveDisks(self, instance, target_node=target_node)
8080 self.cfg.ReleaseDRBDMinors(instance.name)
8081 raise errors.OpExecError("Errors during disk copy: %s" %
8084 instance.primary_node = target_node
8085 self.cfg.Update(instance, feedback_fn)
8087 self.LogInfo("Removing the disks on the original node")
8088 _RemoveDisks(self, instance, target_node=source_node)
8090 # Only start the instance if it's marked as up
8091 if instance.admin_state == constants.ADMINST_UP:
8092 self.LogInfo("Starting instance %s on node %s",
8093 instance.name, target_node)
8095 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8096 ignore_secondaries=True)
8098 _ShutdownInstanceDisks(self, instance)
8099 raise errors.OpExecError("Can't activate the instance's disks")
8101 result = self.rpc.call_instance_start(target_node,
8102 (instance, None, None), False)
8103 msg = result.fail_msg
8105 _ShutdownInstanceDisks(self, instance)
8106 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8107 (instance.name, target_node, msg))
8110 class LUNodeMigrate(LogicalUnit):
8111 """Migrate all instances from a node.
8114 HPATH = "node-migrate"
8115 HTYPE = constants.HTYPE_NODE
8118 def CheckArguments(self):
8121 def ExpandNames(self):
8122 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8124 self.share_locks = _ShareAll()
8125 self.needed_locks = {
8126 locking.LEVEL_NODE: [self.op.node_name],
8129 def BuildHooksEnv(self):
8132 This runs on the master, the primary and all the secondaries.
8136 "NODE_NAME": self.op.node_name,
8137 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8140 def BuildHooksNodes(self):
8141 """Build hooks nodes.
8144 nl = [self.cfg.GetMasterNode()]
8147 def CheckPrereq(self):
8150 def Exec(self, feedback_fn):
8151 # Prepare jobs for migration instances
8152 allow_runtime_changes = self.op.allow_runtime_changes
8154 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8157 iallocator=self.op.iallocator,
8158 target_node=self.op.target_node,
8159 allow_runtime_changes=allow_runtime_changes,
8160 ignore_ipolicy=self.op.ignore_ipolicy)]
8161 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8164 # TODO: Run iallocator in this opcode and pass correct placement options to
8165 # OpInstanceMigrate. Since other jobs can modify the cluster between
8166 # running the iallocator and the actual migration, a good consistency model
8167 # will have to be found.
8169 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8170 frozenset([self.op.node_name]))
8172 return ResultWithJobs(jobs)
8175 class TLMigrateInstance(Tasklet):
8176 """Tasklet class for instance migration.
8179 @ivar live: whether the migration will be done live or non-live;
8180 this variable is initalized only after CheckPrereq has run
8181 @type cleanup: boolean
8182 @ivar cleanup: Wheater we cleanup from a failed migration
8183 @type iallocator: string
8184 @ivar iallocator: The iallocator used to determine target_node
8185 @type target_node: string
8186 @ivar target_node: If given, the target_node to reallocate the instance to
8187 @type failover: boolean
8188 @ivar failover: Whether operation results in failover or migration
8189 @type fallback: boolean
8190 @ivar fallback: Whether fallback to failover is allowed if migration not
8192 @type ignore_consistency: boolean
8193 @ivar ignore_consistency: Wheter we should ignore consistency between source
8195 @type shutdown_timeout: int
8196 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8197 @type ignore_ipolicy: bool
8198 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8203 _MIGRATION_POLL_INTERVAL = 1 # seconds
8204 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8206 def __init__(self, lu, instance_name, cleanup=False,
8207 failover=False, fallback=False,
8208 ignore_consistency=False,
8209 allow_runtime_changes=True,
8210 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8211 ignore_ipolicy=False):
8212 """Initializes this class.
8215 Tasklet.__init__(self, lu)
8218 self.instance_name = instance_name
8219 self.cleanup = cleanup
8220 self.live = False # will be overridden later
8221 self.failover = failover
8222 self.fallback = fallback
8223 self.ignore_consistency = ignore_consistency
8224 self.shutdown_timeout = shutdown_timeout
8225 self.ignore_ipolicy = ignore_ipolicy
8226 self.allow_runtime_changes = allow_runtime_changes
8228 def CheckPrereq(self):
8229 """Check prerequisites.
8231 This checks that the instance is in the cluster.
8234 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8235 instance = self.cfg.GetInstanceInfo(instance_name)
8236 assert instance is not None
8237 self.instance = instance
8238 cluster = self.cfg.GetClusterInfo()
8240 if (not self.cleanup and
8241 not instance.admin_state == constants.ADMINST_UP and
8242 not self.failover and self.fallback):
8243 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8244 " switching to failover")
8245 self.failover = True
8247 if instance.disk_template not in constants.DTS_MIRRORED:
8252 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8253 " %s" % (instance.disk_template, text),
8256 if instance.disk_template in constants.DTS_EXT_MIRROR:
8257 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8259 if self.lu.op.iallocator:
8260 self._RunAllocator()
8262 # We set set self.target_node as it is required by
8264 self.target_node = self.lu.op.target_node
8266 # Check that the target node is correct in terms of instance policy
8267 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8268 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8269 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8270 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8271 ignore=self.ignore_ipolicy)
8273 # self.target_node is already populated, either directly or by the
8275 target_node = self.target_node
8276 if self.target_node == instance.primary_node:
8277 raise errors.OpPrereqError("Cannot migrate instance %s"
8278 " to its primary (%s)" %
8279 (instance.name, instance.primary_node))
8281 if len(self.lu.tasklets) == 1:
8282 # It is safe to release locks only when we're the only tasklet
8284 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8285 keep=[instance.primary_node, self.target_node])
8288 secondary_nodes = instance.secondary_nodes
8289 if not secondary_nodes:
8290 raise errors.ConfigurationError("No secondary node but using"
8291 " %s disk template" %
8292 instance.disk_template)
8293 target_node = secondary_nodes[0]
8294 if self.lu.op.iallocator or (self.lu.op.target_node and
8295 self.lu.op.target_node != target_node):
8297 text = "failed over"
8300 raise errors.OpPrereqError("Instances with disk template %s cannot"
8301 " be %s to arbitrary nodes"
8302 " (neither an iallocator nor a target"
8303 " node can be passed)" %
8304 (instance.disk_template, text),
8306 nodeinfo = self.cfg.GetNodeInfo(target_node)
8307 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8308 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
8309 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8310 ignore=self.ignore_ipolicy)
8312 i_be = cluster.FillBE(instance)
8314 # check memory requirements on the secondary node
8315 if (not self.cleanup and
8316 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8317 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8318 "migrating instance %s" %
8320 i_be[constants.BE_MINMEM],
8321 instance.hypervisor)
8323 self.lu.LogInfo("Not checking memory on the secondary node as"
8324 " instance will not be started")
8326 # check if failover must be forced instead of migration
8327 if (not self.cleanup and not self.failover and
8328 i_be[constants.BE_ALWAYS_FAILOVER]):
8329 self.lu.LogInfo("Instance configured to always failover; fallback"
8331 self.failover = True
8333 # check bridge existance
8334 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8336 if not self.cleanup:
8337 _CheckNodeNotDrained(self.lu, target_node)
8338 if not self.failover:
8339 result = self.rpc.call_instance_migratable(instance.primary_node,
8341 if result.fail_msg and self.fallback:
8342 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8344 self.failover = True
8346 result.Raise("Can't migrate, please use failover",
8347 prereq=True, ecode=errors.ECODE_STATE)
8349 assert not (self.failover and self.cleanup)
8351 if not self.failover:
8352 if self.lu.op.live is not None and self.lu.op.mode is not None:
8353 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8354 " parameters are accepted",
8356 if self.lu.op.live is not None:
8358 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8360 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8361 # reset the 'live' parameter to None so that repeated
8362 # invocations of CheckPrereq do not raise an exception
8363 self.lu.op.live = None
8364 elif self.lu.op.mode is None:
8365 # read the default value from the hypervisor
8366 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8367 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8369 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8371 # Failover is never live
8374 if not (self.failover or self.cleanup):
8375 remote_info = self.rpc.call_instance_info(instance.primary_node,
8377 instance.hypervisor)
8378 remote_info.Raise("Error checking instance on node %s" %
8379 instance.primary_node)
8380 instance_running = bool(remote_info.payload)
8381 if instance_running:
8382 self.current_mem = int(remote_info.payload["memory"])
8384 def _RunAllocator(self):
8385 """Run the allocator based on input opcode.
8388 # FIXME: add a self.ignore_ipolicy option
8389 ial = IAllocator(self.cfg, self.rpc,
8390 mode=constants.IALLOCATOR_MODE_RELOC,
8391 name=self.instance_name,
8392 relocate_from=[self.instance.primary_node],
8395 ial.Run(self.lu.op.iallocator)
8398 raise errors.OpPrereqError("Can't compute nodes using"
8399 " iallocator '%s': %s" %
8400 (self.lu.op.iallocator, ial.info),
8402 if len(ial.result) != ial.required_nodes:
8403 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8404 " of nodes (%s), required %s" %
8405 (self.lu.op.iallocator, len(ial.result),
8406 ial.required_nodes), errors.ECODE_FAULT)
8407 self.target_node = ial.result[0]
8408 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8409 self.instance_name, self.lu.op.iallocator,
8410 utils.CommaJoin(ial.result))
8412 def _WaitUntilSync(self):
8413 """Poll with custom rpc for disk sync.
8415 This uses our own step-based rpc call.
8418 self.feedback_fn("* wait until resync is done")
8422 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8424 (self.instance.disks,
8427 for node, nres in result.items():
8428 nres.Raise("Cannot resync disks on node %s" % node)
8429 node_done, node_percent = nres.payload
8430 all_done = all_done and node_done
8431 if node_percent is not None:
8432 min_percent = min(min_percent, node_percent)
8434 if min_percent < 100:
8435 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8438 def _EnsureSecondary(self, node):
8439 """Demote a node to secondary.
8442 self.feedback_fn("* switching node %s to secondary mode" % node)
8444 for dev in self.instance.disks:
8445 self.cfg.SetDiskID(dev, node)
8447 result = self.rpc.call_blockdev_close(node, self.instance.name,
8448 self.instance.disks)
8449 result.Raise("Cannot change disk to secondary on node %s" % node)
8451 def _GoStandalone(self):
8452 """Disconnect from the network.
8455 self.feedback_fn("* changing into standalone mode")
8456 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8457 self.instance.disks)
8458 for node, nres in result.items():
8459 nres.Raise("Cannot disconnect disks node %s" % node)
8461 def _GoReconnect(self, multimaster):
8462 """Reconnect to the network.
8468 msg = "single-master"
8469 self.feedback_fn("* changing disks into %s mode" % msg)
8470 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8471 (self.instance.disks, self.instance),
8472 self.instance.name, multimaster)
8473 for node, nres in result.items():
8474 nres.Raise("Cannot change disks config on node %s" % node)
8476 def _ExecCleanup(self):
8477 """Try to cleanup after a failed migration.
8479 The cleanup is done by:
8480 - check that the instance is running only on one node
8481 (and update the config if needed)
8482 - change disks on its secondary node to secondary
8483 - wait until disks are fully synchronized
8484 - disconnect from the network
8485 - change disks into single-master mode
8486 - wait again until disks are fully synchronized
8489 instance = self.instance
8490 target_node = self.target_node
8491 source_node = self.source_node
8493 # check running on only one node
8494 self.feedback_fn("* checking where the instance actually runs"
8495 " (if this hangs, the hypervisor might be in"
8497 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8498 for node, result in ins_l.items():
8499 result.Raise("Can't contact node %s" % node)
8501 runningon_source = instance.name in ins_l[source_node].payload
8502 runningon_target = instance.name in ins_l[target_node].payload
8504 if runningon_source and runningon_target:
8505 raise errors.OpExecError("Instance seems to be running on two nodes,"
8506 " or the hypervisor is confused; you will have"
8507 " to ensure manually that it runs only on one"
8508 " and restart this operation")
8510 if not (runningon_source or runningon_target):
8511 raise errors.OpExecError("Instance does not seem to be running at all;"
8512 " in this case it's safer to repair by"
8513 " running 'gnt-instance stop' to ensure disk"
8514 " shutdown, and then restarting it")
8516 if runningon_target:
8517 # the migration has actually succeeded, we need to update the config
8518 self.feedback_fn("* instance running on secondary node (%s),"
8519 " updating config" % target_node)
8520 instance.primary_node = target_node
8521 self.cfg.Update(instance, self.feedback_fn)
8522 demoted_node = source_node
8524 self.feedback_fn("* instance confirmed to be running on its"
8525 " primary node (%s)" % source_node)
8526 demoted_node = target_node
8528 if instance.disk_template in constants.DTS_INT_MIRROR:
8529 self._EnsureSecondary(demoted_node)
8531 self._WaitUntilSync()
8532 except errors.OpExecError:
8533 # we ignore here errors, since if the device is standalone, it
8534 # won't be able to sync
8536 self._GoStandalone()
8537 self._GoReconnect(False)
8538 self._WaitUntilSync()
8540 self.feedback_fn("* done")
8542 def _RevertDiskStatus(self):
8543 """Try to revert the disk status after a failed migration.
8546 target_node = self.target_node
8547 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8551 self._EnsureSecondary(target_node)
8552 self._GoStandalone()
8553 self._GoReconnect(False)
8554 self._WaitUntilSync()
8555 except errors.OpExecError, err:
8556 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8557 " please try to recover the instance manually;"
8558 " error '%s'" % str(err))
8560 def _AbortMigration(self):
8561 """Call the hypervisor code to abort a started migration.
8564 instance = self.instance
8565 target_node = self.target_node
8566 source_node = self.source_node
8567 migration_info = self.migration_info
8569 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8573 abort_msg = abort_result.fail_msg
8575 logging.error("Aborting migration failed on target node %s: %s",
8576 target_node, abort_msg)
8577 # Don't raise an exception here, as we stil have to try to revert the
8578 # disk status, even if this step failed.
8580 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8581 instance, False, self.live)
8582 abort_msg = abort_result.fail_msg
8584 logging.error("Aborting migration failed on source node %s: %s",
8585 source_node, abort_msg)
8587 def _ExecMigration(self):
8588 """Migrate an instance.
8590 The migrate is done by:
8591 - change the disks into dual-master mode
8592 - wait until disks are fully synchronized again
8593 - migrate the instance
8594 - change disks on the new secondary node (the old primary) to secondary
8595 - wait until disks are fully synchronized
8596 - change disks into single-master mode
8599 instance = self.instance
8600 target_node = self.target_node
8601 source_node = self.source_node
8603 # Check for hypervisor version mismatch and warn the user.
8604 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8605 None, [self.instance.hypervisor])
8606 for ninfo in nodeinfo.values():
8607 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8609 (_, _, (src_info, )) = nodeinfo[source_node].payload
8610 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8612 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8613 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8614 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8615 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8616 if src_version != dst_version:
8617 self.feedback_fn("* warning: hypervisor version mismatch between"
8618 " source (%s) and target (%s) node" %
8619 (src_version, dst_version))
8621 self.feedback_fn("* checking disk consistency between source and target")
8622 for (idx, dev) in enumerate(instance.disks):
8623 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8624 raise errors.OpExecError("Disk %s is degraded or not fully"
8625 " synchronized on target node,"
8626 " aborting migration" % idx)
8628 if self.current_mem > self.tgt_free_mem:
8629 if not self.allow_runtime_changes:
8630 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8631 " free memory to fit instance %s on target"
8632 " node %s (have %dMB, need %dMB)" %
8633 (instance.name, target_node,
8634 self.tgt_free_mem, self.current_mem))
8635 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8636 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8639 rpcres.Raise("Cannot modify instance runtime memory")
8641 # First get the migration information from the remote node
8642 result = self.rpc.call_migration_info(source_node, instance)
8643 msg = result.fail_msg
8645 log_err = ("Failed fetching source migration information from %s: %s" %
8647 logging.error(log_err)
8648 raise errors.OpExecError(log_err)
8650 self.migration_info = migration_info = result.payload
8652 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8653 # Then switch the disks to master/master mode
8654 self._EnsureSecondary(target_node)
8655 self._GoStandalone()
8656 self._GoReconnect(True)
8657 self._WaitUntilSync()
8659 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8660 result = self.rpc.call_accept_instance(target_node,
8663 self.nodes_ip[target_node])
8665 msg = result.fail_msg
8667 logging.error("Instance pre-migration failed, trying to revert"
8668 " disk status: %s", msg)
8669 self.feedback_fn("Pre-migration failed, aborting")
8670 self._AbortMigration()
8671 self._RevertDiskStatus()
8672 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8673 (instance.name, msg))
8675 self.feedback_fn("* migrating instance to %s" % target_node)
8676 result = self.rpc.call_instance_migrate(source_node, instance,
8677 self.nodes_ip[target_node],
8679 msg = result.fail_msg
8681 logging.error("Instance migration failed, trying to revert"
8682 " disk status: %s", msg)
8683 self.feedback_fn("Migration failed, aborting")
8684 self._AbortMigration()
8685 self._RevertDiskStatus()
8686 raise errors.OpExecError("Could not migrate instance %s: %s" %
8687 (instance.name, msg))
8689 self.feedback_fn("* starting memory transfer")
8690 last_feedback = time.time()
8692 result = self.rpc.call_instance_get_migration_status(source_node,
8694 msg = result.fail_msg
8695 ms = result.payload # MigrationStatus instance
8696 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8697 logging.error("Instance migration failed, trying to revert"
8698 " disk status: %s", msg)
8699 self.feedback_fn("Migration failed, aborting")
8700 self._AbortMigration()
8701 self._RevertDiskStatus()
8702 raise errors.OpExecError("Could not migrate instance %s: %s" %
8703 (instance.name, msg))
8705 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8706 self.feedback_fn("* memory transfer complete")
8709 if (utils.TimeoutExpired(last_feedback,
8710 self._MIGRATION_FEEDBACK_INTERVAL) and
8711 ms.transferred_ram is not None):
8712 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8713 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8714 last_feedback = time.time()
8716 time.sleep(self._MIGRATION_POLL_INTERVAL)
8718 result = self.rpc.call_instance_finalize_migration_src(source_node,
8722 msg = result.fail_msg
8724 logging.error("Instance migration succeeded, but finalization failed"
8725 " on the source node: %s", msg)
8726 raise errors.OpExecError("Could not finalize instance migration: %s" %
8729 instance.primary_node = target_node
8731 # distribute new instance config to the other nodes
8732 self.cfg.Update(instance, self.feedback_fn)
8734 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8738 msg = result.fail_msg
8740 logging.error("Instance migration succeeded, but finalization failed"
8741 " on the target node: %s", msg)
8742 raise errors.OpExecError("Could not finalize instance migration: %s" %
8745 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8746 self._EnsureSecondary(source_node)
8747 self._WaitUntilSync()
8748 self._GoStandalone()
8749 self._GoReconnect(False)
8750 self._WaitUntilSync()
8752 # If the instance's disk template is `rbd' or `ext' and there was a
8753 # successful migration, unmap the device from the source node.
8754 if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
8755 disks = _ExpandCheckDisks(instance, instance.disks)
8756 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8758 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8759 msg = result.fail_msg
8761 logging.error("Migration was successful, but couldn't unmap the"
8762 " block device %s on source node %s: %s",
8763 disk.iv_name, source_node, msg)
8764 logging.error("You need to unmap the device %s manually on %s",
8765 disk.iv_name, source_node)
8767 self.feedback_fn("* done")
8769 def _ExecFailover(self):
8770 """Failover an instance.
8772 The failover is done by shutting it down on its present node and
8773 starting it on the secondary.
8776 instance = self.instance
8777 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8779 source_node = instance.primary_node
8780 target_node = self.target_node
8782 if instance.admin_state == constants.ADMINST_UP:
8783 self.feedback_fn("* checking disk consistency between source and target")
8784 for (idx, dev) in enumerate(instance.disks):
8785 # for drbd, these are drbd over lvm
8786 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8788 if primary_node.offline:
8789 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8791 (primary_node.name, idx, target_node))
8792 elif not self.ignore_consistency:
8793 raise errors.OpExecError("Disk %s is degraded on target node,"
8794 " aborting failover" % idx)
8796 self.feedback_fn("* not checking disk consistency as instance is not"
8799 self.feedback_fn("* shutting down instance on source node")
8800 logging.info("Shutting down instance %s on node %s",
8801 instance.name, source_node)
8803 result = self.rpc.call_instance_shutdown(source_node, instance,
8804 self.shutdown_timeout)
8805 msg = result.fail_msg
8807 if self.ignore_consistency or primary_node.offline:
8808 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8809 " proceeding anyway; please make sure node"
8810 " %s is down; error details: %s",
8811 instance.name, source_node, source_node, msg)
8813 raise errors.OpExecError("Could not shutdown instance %s on"
8815 (instance.name, source_node, msg))
8817 self.feedback_fn("* deactivating the instance's disks on source node")
8818 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8819 raise errors.OpExecError("Can't shut down the instance's disks")
8821 instance.primary_node = target_node
8822 # distribute new instance config to the other nodes
8823 self.cfg.Update(instance, self.feedback_fn)
8825 # Only start the instance if it's marked as up
8826 if instance.admin_state == constants.ADMINST_UP:
8827 self.feedback_fn("* activating the instance's disks on target node %s" %
8829 logging.info("Starting instance %s on node %s",
8830 instance.name, target_node)
8832 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8833 ignore_secondaries=True)
8835 _ShutdownInstanceDisks(self.lu, instance)
8836 raise errors.OpExecError("Can't activate the instance's disks")
8838 self.feedback_fn("* starting the instance on the target node %s" %
8840 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8842 msg = result.fail_msg
8844 _ShutdownInstanceDisks(self.lu, instance)
8845 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8846 (instance.name, target_node, msg))
8848 def Exec(self, feedback_fn):
8849 """Perform the migration.
8852 self.feedback_fn = feedback_fn
8853 self.source_node = self.instance.primary_node
8855 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8856 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8857 self.target_node = self.instance.secondary_nodes[0]
8858 # Otherwise self.target_node has been populated either
8859 # directly, or through an iallocator.
8861 self.all_nodes = [self.source_node, self.target_node]
8862 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8863 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8866 feedback_fn("Failover instance %s" % self.instance.name)
8867 self._ExecFailover()
8869 feedback_fn("Migrating instance %s" % self.instance.name)
8872 return self._ExecCleanup()
8874 return self._ExecMigration()
8877 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8879 """Wrapper around L{_CreateBlockDevInner}.
8881 This method annotates the root device first.
8884 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8885 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8889 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8891 """Create a tree of block devices on a given node.
8893 If this device type has to be created on secondaries, create it and
8896 If not, just recurse to children keeping the same 'force' value.
8898 @attention: The device has to be annotated already.
8900 @param lu: the lu on whose behalf we execute
8901 @param node: the node on which to create the device
8902 @type instance: L{objects.Instance}
8903 @param instance: the instance which owns the device
8904 @type device: L{objects.Disk}
8905 @param device: the device to create
8906 @type force_create: boolean
8907 @param force_create: whether to force creation of this device; this
8908 will be change to True whenever we find a device which has
8909 CreateOnSecondary() attribute
8910 @param info: the extra 'metadata' we should attach to the device
8911 (this will be represented as a LVM tag)
8912 @type force_open: boolean
8913 @param force_open: this parameter will be passes to the
8914 L{backend.BlockdevCreate} function where it specifies
8915 whether we run on primary or not, and it affects both
8916 the child assembly and the device own Open() execution
8919 if device.CreateOnSecondary():
8923 for child in device.children:
8924 _CreateBlockDevInner(lu, node, instance, child, force_create,
8927 if not force_create:
8930 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8933 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8934 """Create a single block device on a given node.
8936 This will not recurse over children of the device, so they must be
8939 @param lu: the lu on whose behalf we execute
8940 @param node: the node on which to create the device
8941 @type instance: L{objects.Instance}
8942 @param instance: the instance which owns the device
8943 @type device: L{objects.Disk}
8944 @param device: the device to create
8945 @param info: the extra 'metadata' we should attach to the device
8946 (this will be represented as a LVM tag)
8947 @type force_open: boolean
8948 @param force_open: this parameter will be passes to the
8949 L{backend.BlockdevCreate} function where it specifies
8950 whether we run on primary or not, and it affects both
8951 the child assembly and the device own Open() execution
8954 lu.cfg.SetDiskID(device, node)
8955 result = lu.rpc.call_blockdev_create(node, device, device.size,
8956 instance.name, force_open, info)
8957 result.Raise("Can't create block device %s on"
8958 " node %s for instance %s" % (device, node, instance.name))
8959 if device.physical_id is None:
8960 device.physical_id = result.payload
8963 def _GenerateUniqueNames(lu, exts):
8964 """Generate a suitable LV name.
8966 This will generate a logical volume name for the given instance.
8971 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8972 results.append("%s%s" % (new_id, val))
8975 def _GetPCIInfo(lu, dev_type):
8977 if (hasattr(lu, 'op') and lu.op.hotplug):
8978 # case of InstanceCreate()
8979 if hasattr(lu, 'hotplug_info'):
8980 if lu.hotplug_info is not None:
8981 idx = getattr(lu.hotplug_info, dev_type)
8982 setattr(lu.hotplug_info, dev_type, idx+1)
8983 if dev_type == 'disks' and idx == 0:
8984 lu.LogInfo("Disk 0 cannot be hotpluggable.")
8986 pci = lu.hotplug_info.pci_pool.pop()
8987 lu.LogInfo("Choosing pci slot %d" % pci)
8989 # case of InstanceSetParams()
8990 elif lu.instance.hotplug_info is not None:
8991 idx, pci = lu.cfg.GetPCIInfo(lu.instance, dev_type)
8992 lu.LogInfo("Choosing pci slot %d" % pci)
8995 lu.LogWarning("Hotplug not supported for this instance.")
8999 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
9000 iv_name, p_minor, s_minor):
9001 """Generate a drbd8 device complete with its children.
9004 assert len(vgnames) == len(names) == 2
9005 port = lu.cfg.AllocatePort()
9006 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
9008 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
9009 logical_id=(vgnames[0], names[0]),
9011 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
9012 logical_id=(vgnames[1], names[1]),
9015 disk_idx, pci = _GetPCIInfo(lu, 'disks')
9016 drbd_dev = objects.Disk(idx=disk_idx, pci=pci,
9017 dev_type=constants.LD_DRBD8, size=size,
9018 logical_id=(primary, secondary, port,
9021 children=[dev_data, dev_meta],
9022 iv_name=iv_name, params={})
9026 _DISK_TEMPLATE_NAME_PREFIX = {
9027 constants.DT_PLAIN: "",
9028 constants.DT_RBD: ".rbd",
9029 constants.DT_EXT: ".ext",
9033 _DISK_TEMPLATE_DEVICE_TYPE = {
9034 constants.DT_PLAIN: constants.LD_LV,
9035 constants.DT_FILE: constants.LD_FILE,
9036 constants.DT_SHARED_FILE: constants.LD_FILE,
9037 constants.DT_BLOCK: constants.LD_BLOCKDEV,
9038 constants.DT_RBD: constants.LD_RBD,
9039 constants.DT_EXT: constants.LD_EXT,
9043 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
9044 secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
9045 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
9046 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
9047 """Generate the entire disk layout for a given template type.
9050 #TODO: compute space requirements
9052 vgname = lu.cfg.GetVGName()
9053 disk_count = len(disk_info)
9056 if template_name == constants.DT_DISKLESS:
9058 elif template_name == constants.DT_DRBD8:
9059 if len(secondary_nodes) != 1:
9060 raise errors.ProgrammerError("Wrong template configuration")
9061 remote_node = secondary_nodes[0]
9062 minors = lu.cfg.AllocateDRBDMinor(
9063 [primary_node, remote_node] * len(disk_info), instance_name)
9065 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
9067 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
9070 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
9071 for i in range(disk_count)]):
9072 names.append(lv_prefix + "_data")
9073 names.append(lv_prefix + "_meta")
9074 for idx, disk in enumerate(disk_info):
9075 disk_index = idx + base_index
9076 data_vg = disk.get(constants.IDISK_VG, vgname)
9077 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
9078 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
9079 disk[constants.IDISK_SIZE],
9081 names[idx * 2:idx * 2 + 2],
9082 "disk/%d" % disk_index,
9083 minors[idx * 2], minors[idx * 2 + 1])
9084 disk_dev.mode = disk[constants.IDISK_MODE]
9085 disks.append(disk_dev)
9088 raise errors.ProgrammerError("Wrong template configuration")
9090 if template_name == constants.DT_FILE:
9092 elif template_name == constants.DT_SHARED_FILE:
9093 _req_shr_file_storage()
9095 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9096 if name_prefix is None:
9099 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9100 (name_prefix, base_index + i)
9101 for i in range(disk_count)])
9103 if template_name == constants.DT_PLAIN:
9104 def logical_id_fn(idx, _, disk):
9105 vg = disk.get(constants.IDISK_VG, vgname)
9106 return (vg, names[idx])
9107 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9109 lambda _, disk_index, disk: (file_driver,
9110 "%s/disk%d" % (file_storage_dir,
9112 elif template_name == constants.DT_BLOCK:
9114 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9115 disk[constants.IDISK_ADOPT])
9116 elif template_name == constants.DT_RBD:
9117 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9118 elif template_name == constants.DT_EXT:
9119 def logical_id_fn(idx, _, disk):
9120 provider = disk.get(constants.IDISK_PROVIDER, None)
9121 if provider is None:
9122 raise errors.ProgrammerError("Disk template is %s, but '%s' is"
9123 " not found", constants.DT_EXT,
9124 constants.IDISK_PROVIDER)
9125 return (provider, names[idx])
9127 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9129 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9131 for idx, disk in enumerate(disk_info):
9133 # Only for the Ext template add disk_info to params
9134 if template_name == constants.DT_EXT:
9135 params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
9137 if key not in constants.IDISK_PARAMS:
9138 params[key] = disk[key]
9139 disk_index = idx + base_index
9140 size = disk[constants.IDISK_SIZE]
9141 feedback_fn("* disk %s, size %s" %
9142 (disk_index, utils.FormatUnit(size, "h")))
9144 disk_idx, pci = _GetPCIInfo(lu, 'disks')
9146 disks.append(objects.Disk(dev_type=dev_type, size=size,
9147 logical_id=logical_id_fn(idx, disk_index, disk),
9148 iv_name="disk/%d" % disk_index,
9149 mode=disk[constants.IDISK_MODE],
9150 params=params, idx=disk_idx, pci=pci))
9155 def _GetInstanceInfoText(instance):
9156 """Compute that text that should be added to the disk's metadata.
9159 return "originstname+%s" % instance.name
9162 def _CalcEta(time_taken, written, total_size):
9163 """Calculates the ETA based on size written and total size.
9165 @param time_taken: The time taken so far
9166 @param written: amount written so far
9167 @param total_size: The total size of data to be written
9168 @return: The remaining time in seconds
9171 avg_time = time_taken / float(written)
9172 return (total_size - written) * avg_time
9175 def _WipeDisks(lu, instance):
9176 """Wipes instance disks.
9178 @type lu: L{LogicalUnit}
9179 @param lu: the logical unit on whose behalf we execute
9180 @type instance: L{objects.Instance}
9181 @param instance: the instance whose disks we should create
9182 @return: the success of the wipe
9185 node = instance.primary_node
9187 for device in instance.disks:
9188 lu.cfg.SetDiskID(device, node)
9190 logging.info("Pause sync of instance %s disks", instance.name)
9191 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9192 (instance.disks, instance),
9194 result.Raise("Failed RPC to node %s for pausing the disk syncing" % node)
9196 for idx, success in enumerate(result.payload):
9198 logging.warn("pause-sync of instance %s for disks %d failed",
9202 for idx, device in enumerate(instance.disks):
9203 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9204 # MAX_WIPE_CHUNK at max
9205 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
9206 constants.MIN_WIPE_CHUNK_PERCENT)
9207 # we _must_ make this an int, otherwise rounding errors will
9209 wipe_chunk_size = int(wipe_chunk_size)
9211 lu.LogInfo("* Wiping disk %d", idx)
9212 logging.info("Wiping disk %d for instance %s, node %s using"
9213 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9218 start_time = time.time()
9220 while offset < size:
9221 wipe_size = min(wipe_chunk_size, size - offset)
9222 logging.debug("Wiping disk %d, offset %s, chunk %s",
9223 idx, offset, wipe_size)
9224 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9226 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9227 (idx, offset, wipe_size))
9230 if now - last_output >= 60:
9231 eta = _CalcEta(now - start_time, offset, size)
9232 lu.LogInfo(" - done: %.1f%% ETA: %s" %
9233 (offset / float(size) * 100, utils.FormatSeconds(eta)))
9236 logging.info("Resume sync of instance %s disks", instance.name)
9238 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9239 (instance.disks, instance),
9243 lu.LogWarning("RPC call to %s for resuming disk syncing failed,"
9244 " please have a look at the status and troubleshoot"
9245 " the issue: %s", node, result.fail_msg)
9247 for idx, success in enumerate(result.payload):
9249 lu.LogWarning("Resume sync of disk %d failed, please have a"
9250 " look at the status and troubleshoot the issue", idx)
9251 logging.warn("resume-sync of instance %s for disks %d failed",
9255 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9256 """Create all disks for an instance.
9258 This abstracts away some work from AddInstance.
9260 @type lu: L{LogicalUnit}
9261 @param lu: the logical unit on whose behalf we execute
9262 @type instance: L{objects.Instance}
9263 @param instance: the instance whose disks we should create
9265 @param to_skip: list of indices to skip
9266 @type target_node: string
9267 @param target_node: if passed, overrides the target node for creation
9269 @return: the success of the creation
9272 info = _GetInstanceInfoText(instance)
9273 if target_node is None:
9274 pnode = instance.primary_node
9275 all_nodes = instance.all_nodes
9280 if instance.disk_template in constants.DTS_FILEBASED:
9281 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9282 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9284 result.Raise("Failed to create directory '%s' on"
9285 " node %s" % (file_storage_dir, pnode))
9287 # Note: this needs to be kept in sync with adding of disks in
9288 # LUInstanceSetParams
9289 for idx, device in enumerate(instance.disks):
9290 if to_skip and idx in to_skip:
9292 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9294 for node in all_nodes:
9295 f_create = node == pnode
9296 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9299 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9300 """Remove all disks for an instance.
9302 This abstracts away some work from `AddInstance()` and
9303 `RemoveInstance()`. Note that in case some of the devices couldn't
9304 be removed, the removal will continue with the other ones (compare
9305 with `_CreateDisks()`).
9307 @type lu: L{LogicalUnit}
9308 @param lu: the logical unit on whose behalf we execute
9309 @type instance: L{objects.Instance}
9310 @param instance: the instance whose disks we should remove
9311 @type target_node: string
9312 @param target_node: used to override the node on which to remove the disks
9314 @return: the success of the removal
9317 logging.info("Removing block devices for instance %s", instance.name)
9320 ports_to_release = set()
9321 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9322 for (idx, device) in enumerate(anno_disks):
9324 edata = [(target_node, device)]
9326 edata = device.ComputeNodeTree(instance.primary_node)
9327 for node, disk in edata:
9328 lu.cfg.SetDiskID(disk, node)
9329 result = lu.rpc.call_blockdev_remove(node, disk)
9331 lu.LogWarning("Could not remove disk %s on node %s,"
9332 " continuing anyway: %s", idx, node, result.fail_msg)
9333 if not (result.offline and node != instance.primary_node):
9336 # if this is a DRBD disk, return its port to the pool
9337 if device.dev_type in constants.LDS_DRBD:
9338 ports_to_release.add(device.logical_id[2])
9340 if all_result or ignore_failures:
9341 for port in ports_to_release:
9342 lu.cfg.AddTcpUdpPort(port)
9344 if instance.disk_template in constants.DTS_FILEBASED:
9345 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9349 tgt = instance.primary_node
9350 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9352 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9353 file_storage_dir, instance.primary_node, result.fail_msg)
9359 def _ComputeDiskSizePerVG(disk_template, disks):
9360 """Compute disk size requirements in the volume group
9363 def _compute(disks, payload):
9364 """Universal algorithm.
9369 vgs[disk[constants.IDISK_VG]] = \
9370 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9374 # Required free disk space as a function of disk and swap space
9376 constants.DT_DISKLESS: {},
9377 constants.DT_PLAIN: _compute(disks, 0),
9378 # 128 MB are added for drbd metadata for each disk
9379 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9380 constants.DT_FILE: {},
9381 constants.DT_SHARED_FILE: {},
9384 if disk_template not in req_size_dict:
9385 raise errors.ProgrammerError("Disk template '%s' size requirement"
9386 " is unknown" % disk_template)
9388 return req_size_dict[disk_template]
9391 def _ComputeDiskSize(disk_template, disks):
9392 """Compute disk size requirements according to disk template
9395 # Required free disk space as a function of disk and swap space
9397 constants.DT_DISKLESS: None,
9398 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9399 # 128 MB are added for drbd metadata for each disk
9401 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9402 constants.DT_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9403 constants.DT_SHARED_FILE: sum(d[constants.IDISK_SIZE] for d in disks),
9404 constants.DT_BLOCK: 0,
9405 constants.DT_RBD: sum(d[constants.IDISK_SIZE] for d in disks),
9406 constants.DT_EXT: sum(d[constants.IDISK_SIZE] for d in disks),
9409 if disk_template not in req_size_dict:
9410 raise errors.ProgrammerError("Disk template '%s' size requirement"
9411 " is unknown" % disk_template)
9413 return req_size_dict[disk_template]
9416 def _FilterVmNodes(lu, nodenames):
9417 """Filters out non-vm_capable nodes from a list.
9419 @type lu: L{LogicalUnit}
9420 @param lu: the logical unit for which we check
9421 @type nodenames: list
9422 @param nodenames: the list of nodes on which we should check
9424 @return: the list of vm-capable nodes
9427 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9428 return [name for name in nodenames if name not in vm_nodes]
9431 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9432 """Hypervisor parameter validation.
9434 This function abstract the hypervisor parameter validation to be
9435 used in both instance create and instance modify.
9437 @type lu: L{LogicalUnit}
9438 @param lu: the logical unit for which we check
9439 @type nodenames: list
9440 @param nodenames: the list of nodes on which we should check
9441 @type hvname: string
9442 @param hvname: the name of the hypervisor we should use
9443 @type hvparams: dict
9444 @param hvparams: the parameters which we need to check
9445 @raise errors.OpPrereqError: if the parameters are not valid
9448 nodenames = _FilterVmNodes(lu, nodenames)
9450 cluster = lu.cfg.GetClusterInfo()
9451 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9453 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9454 for node in nodenames:
9458 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9461 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9462 """OS parameters validation.
9464 @type lu: L{LogicalUnit}
9465 @param lu: the logical unit for which we check
9466 @type required: boolean
9467 @param required: whether the validation should fail if the OS is not
9469 @type nodenames: list
9470 @param nodenames: the list of nodes on which we should check
9471 @type osname: string
9472 @param osname: the name of the hypervisor we should use
9473 @type osparams: dict
9474 @param osparams: the parameters which we need to check
9475 @raise errors.OpPrereqError: if the parameters are not valid
9478 nodenames = _FilterVmNodes(lu, nodenames)
9479 result = lu.rpc.call_os_validate(nodenames, required, osname,
9480 [constants.OS_VALIDATE_PARAMETERS],
9482 for node, nres in result.items():
9483 # we don't check for offline cases since this should be run only
9484 # against the master node and/or an instance's nodes
9485 nres.Raise("OS Parameters validation failed on node %s" % node)
9486 if not nres.payload:
9487 lu.LogInfo("OS %s not found on node %s, validation skipped",
9491 class LUInstanceCreate(LogicalUnit):
9492 """Create an instance.
9495 HPATH = "instance-add"
9496 HTYPE = constants.HTYPE_INSTANCE
9499 def CheckArguments(self):
9503 # do not require name_check to ease forward/backward compatibility
9505 if self.op.no_install and self.op.start:
9506 self.LogInfo("No-installation mode selected, disabling startup")
9507 self.op.start = False
9508 # validate/normalize the instance name
9509 self.op.instance_name = \
9510 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9512 if self.op.ip_check and not self.op.name_check:
9513 # TODO: make the ip check more flexible and not depend on the name check
9514 raise errors.OpPrereqError("Cannot do IP address check without a name"
9515 " check", errors.ECODE_INVAL)
9517 # check nics' parameter names
9518 for nic in self.op.nics:
9519 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9521 # check disks. parameter names and consistent adopt/no-adopt strategy
9522 has_adopt = has_no_adopt = False
9523 for disk in self.op.disks:
9524 if self.op.disk_template != constants.DT_EXT:
9525 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9526 if constants.IDISK_ADOPT in disk:
9530 if has_adopt and has_no_adopt:
9531 raise errors.OpPrereqError("Either all disks are adopted or none is",
9534 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9535 raise errors.OpPrereqError("Disk adoption is not supported for the"
9536 " '%s' disk template" %
9537 self.op.disk_template,
9539 if self.op.iallocator is not None:
9540 raise errors.OpPrereqError("Disk adoption not allowed with an"
9541 " iallocator script", errors.ECODE_INVAL)
9542 if self.op.mode == constants.INSTANCE_IMPORT:
9543 raise errors.OpPrereqError("Disk adoption not allowed for"
9544 " instance import", errors.ECODE_INVAL)
9546 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9547 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9548 " but no 'adopt' parameter given" %
9549 self.op.disk_template,
9552 self.adopt_disks = has_adopt
9554 # instance name verification
9555 if self.op.name_check:
9556 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9557 self.op.instance_name = self.hostname1.name
9558 # used in CheckPrereq for ip ping check
9559 self.check_ip = self.hostname1.ip
9561 self.check_ip = None
9563 # file storage checks
9564 if (self.op.file_driver and
9565 not self.op.file_driver in constants.FILE_DRIVER):
9566 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9567 self.op.file_driver, errors.ECODE_INVAL)
9569 if self.op.disk_template == constants.DT_FILE:
9570 opcodes.RequireFileStorage()
9571 elif self.op.disk_template == constants.DT_SHARED_FILE:
9572 opcodes.RequireSharedFileStorage()
9574 ### Node/iallocator related checks
9575 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9577 if self.op.pnode is not None:
9578 if self.op.disk_template in constants.DTS_INT_MIRROR:
9579 if self.op.snode is None:
9580 raise errors.OpPrereqError("The networked disk templates need"
9581 " a mirror node", errors.ECODE_INVAL)
9583 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9585 self.op.snode = None
9587 self._cds = _GetClusterDomainSecret()
9589 if self.op.mode == constants.INSTANCE_IMPORT:
9590 # On import force_variant must be True, because if we forced it at
9591 # initial install, our only chance when importing it back is that it
9593 self.op.force_variant = True
9595 if self.op.no_install:
9596 self.LogInfo("No-installation mode has no effect during import")
9598 elif self.op.mode == constants.INSTANCE_CREATE:
9599 if self.op.os_type is None:
9600 raise errors.OpPrereqError("No guest OS specified",
9602 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9603 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9604 " installation" % self.op.os_type,
9606 if self.op.disk_template is None:
9607 raise errors.OpPrereqError("No disk template specified",
9610 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9611 # Check handshake to ensure both clusters have the same domain secret
9612 src_handshake = self.op.source_handshake
9613 if not src_handshake:
9614 raise errors.OpPrereqError("Missing source handshake",
9617 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9620 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9623 # Load and check source CA
9624 self.source_x509_ca_pem = self.op.source_x509_ca
9625 if not self.source_x509_ca_pem:
9626 raise errors.OpPrereqError("Missing source X509 CA",
9630 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9632 except OpenSSL.crypto.Error, err:
9633 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9634 (err, ), errors.ECODE_INVAL)
9636 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9637 if errcode is not None:
9638 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9641 self.source_x509_ca = cert
9643 src_instance_name = self.op.source_instance_name
9644 if not src_instance_name:
9645 raise errors.OpPrereqError("Missing source instance name",
9648 self.source_instance_name = \
9649 netutils.GetHostname(name=src_instance_name).name
9652 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9653 self.op.mode, errors.ECODE_INVAL)
9655 def ExpandNames(self):
9656 """ExpandNames for CreateInstance.
9658 Figure out the right locks for instance creation.
9661 self.needed_locks = {}
9663 instance_name = self.op.instance_name
9664 # this is just a preventive check, but someone might still add this
9665 # instance in the meantime, and creation will fail at lock-add time
9666 if instance_name in self.cfg.GetInstanceList():
9667 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9668 instance_name, errors.ECODE_EXISTS)
9670 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9672 if self.op.iallocator:
9673 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9674 # specifying a group on instance creation and then selecting nodes from
9676 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9677 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9679 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9680 nodelist = [self.op.pnode]
9681 if self.op.snode is not None:
9682 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9683 nodelist.append(self.op.snode)
9684 self.needed_locks[locking.LEVEL_NODE] = nodelist
9685 # Lock resources of instance's primary and secondary nodes (copy to
9686 # prevent accidential modification)
9687 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9689 # in case of import lock the source node too
9690 if self.op.mode == constants.INSTANCE_IMPORT:
9691 src_node = self.op.src_node
9692 src_path = self.op.src_path
9694 if src_path is None:
9695 self.op.src_path = src_path = self.op.instance_name
9697 if src_node is None:
9698 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9699 self.op.src_node = None
9700 if os.path.isabs(src_path):
9701 raise errors.OpPrereqError("Importing an instance from a path"
9702 " requires a source node option",
9705 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9706 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9707 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9708 if not os.path.isabs(src_path):
9709 self.op.src_path = src_path = \
9710 utils.PathJoin(constants.EXPORT_DIR, src_path)
9712 def _RunAllocator(self):
9713 """Run the allocator based on input opcode.
9716 #TODO Export network to iallocator so that it chooses a pnode
9717 # in a nodegroup that has the desired network connected to
9718 nics = [n.ToDict() for n in self.nics]
9719 ial = IAllocator(self.cfg, self.rpc,
9720 mode=constants.IALLOCATOR_MODE_ALLOC,
9721 name=self.op.instance_name,
9722 disk_template=self.op.disk_template,
9725 vcpus=self.be_full[constants.BE_VCPUS],
9726 memory=self.be_full[constants.BE_MAXMEM],
9727 spindle_use=self.be_full[constants.BE_SPINDLE_USE],
9730 hypervisor=self.op.hypervisor,
9733 ial.Run(self.op.iallocator)
9736 raise errors.OpPrereqError("Can't compute nodes using"
9737 " iallocator '%s': %s" %
9738 (self.op.iallocator, ial.info),
9740 if len(ial.result) != ial.required_nodes:
9741 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9742 " of nodes (%s), required %s" %
9743 (self.op.iallocator, len(ial.result),
9744 ial.required_nodes), errors.ECODE_FAULT)
9745 self.op.pnode = ial.result[0]
9746 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9747 self.op.instance_name, self.op.iallocator,
9748 utils.CommaJoin(ial.result))
9749 if ial.required_nodes == 2:
9750 self.op.snode = ial.result[1]
9752 def BuildHooksEnv(self):
9755 This runs on master, primary and secondary nodes of the instance.
9759 "ADD_MODE": self.op.mode,
9761 if self.op.mode == constants.INSTANCE_IMPORT:
9762 env["SRC_NODE"] = self.op.src_node
9763 env["SRC_PATH"] = self.op.src_path
9764 env["SRC_IMAGES"] = self.src_images
9766 env.update(_BuildInstanceHookEnv(
9767 name=self.op.instance_name,
9768 primary_node=self.op.pnode,
9769 secondary_nodes=self.secondaries,
9770 status=self.op.start,
9771 os_type=self.op.os_type,
9772 minmem=self.be_full[constants.BE_MINMEM],
9773 maxmem=self.be_full[constants.BE_MAXMEM],
9774 vcpus=self.be_full[constants.BE_VCPUS],
9775 nics=_NICListToTuple(self, self.nics),
9776 disk_template=self.op.disk_template,
9777 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9778 for d in self.disks],
9781 hypervisor_name=self.op.hypervisor,
9788 def BuildHooksNodes(self):
9789 """Build hooks nodes.
9792 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9795 def _ReadExportInfo(self):
9796 """Reads the export information from disk.
9798 It will override the opcode source node and path with the actual
9799 information, if these two were not specified before.
9801 @return: the export information
9804 assert self.op.mode == constants.INSTANCE_IMPORT
9806 src_node = self.op.src_node
9807 src_path = self.op.src_path
9809 if src_node is None:
9810 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9811 exp_list = self.rpc.call_export_list(locked_nodes)
9813 for node in exp_list:
9814 if exp_list[node].fail_msg:
9816 if src_path in exp_list[node].payload:
9818 self.op.src_node = src_node = node
9819 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9823 raise errors.OpPrereqError("No export found for relative path %s" %
9824 src_path, errors.ECODE_INVAL)
9826 _CheckNodeOnline(self, src_node)
9827 result = self.rpc.call_export_info(src_node, src_path)
9828 result.Raise("No export or invalid export found in dir %s" % src_path)
9830 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9831 if not export_info.has_section(constants.INISECT_EXP):
9832 raise errors.ProgrammerError("Corrupted export config",
9833 errors.ECODE_ENVIRON)
9835 ei_version = export_info.get(constants.INISECT_EXP, "version")
9836 if (int(ei_version) != constants.EXPORT_VERSION):
9837 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9838 (ei_version, constants.EXPORT_VERSION),
9839 errors.ECODE_ENVIRON)
9842 def _ReadExportParams(self, einfo):
9843 """Use export parameters as defaults.
9845 In case the opcode doesn't specify (as in override) some instance
9846 parameters, then try to use them from the export information, if
9850 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9852 if self.op.disk_template is None:
9853 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9854 self.op.disk_template = einfo.get(constants.INISECT_INS,
9856 if self.op.disk_template not in constants.DISK_TEMPLATES:
9857 raise errors.OpPrereqError("Disk template specified in configuration"
9858 " file is not one of the allowed values:"
9859 " %s" % " ".join(constants.DISK_TEMPLATES))
9861 raise errors.OpPrereqError("No disk template specified and the export"
9862 " is missing the disk_template information",
9865 if not self.op.disks:
9867 # TODO: import the disk iv_name too
9868 for idx in range(constants.MAX_DISKS):
9869 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9870 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9871 disks.append({constants.IDISK_SIZE: disk_sz})
9872 self.op.disks = disks
9873 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9874 raise errors.OpPrereqError("No disk info specified and the export"
9875 " is missing the disk information",
9878 if not self.op.nics:
9880 for idx in range(constants.MAX_NICS):
9881 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9883 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9884 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9891 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9892 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9894 if (self.op.hypervisor is None and
9895 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9896 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9898 if einfo.has_section(constants.INISECT_HYP):
9899 # use the export parameters but do not override the ones
9900 # specified by the user
9901 for name, value in einfo.items(constants.INISECT_HYP):
9902 if name not in self.op.hvparams:
9903 self.op.hvparams[name] = value
9905 if einfo.has_section(constants.INISECT_BEP):
9906 # use the parameters, without overriding
9907 for name, value in einfo.items(constants.INISECT_BEP):
9908 if name not in self.op.beparams:
9909 self.op.beparams[name] = value
9910 # Compatibility for the old "memory" be param
9911 if name == constants.BE_MEMORY:
9912 if constants.BE_MAXMEM not in self.op.beparams:
9913 self.op.beparams[constants.BE_MAXMEM] = value
9914 if constants.BE_MINMEM not in self.op.beparams:
9915 self.op.beparams[constants.BE_MINMEM] = value
9917 # try to read the parameters old style, from the main section
9918 for name in constants.BES_PARAMETERS:
9919 if (name not in self.op.beparams and
9920 einfo.has_option(constants.INISECT_INS, name)):
9921 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9923 if einfo.has_section(constants.INISECT_OSP):
9924 # use the parameters, without overriding
9925 for name, value in einfo.items(constants.INISECT_OSP):
9926 if name not in self.op.osparams:
9927 self.op.osparams[name] = value
9929 def _RevertToDefaults(self, cluster):
9930 """Revert the instance parameters to the default values.
9934 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9935 for name in self.op.hvparams.keys():
9936 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9937 del self.op.hvparams[name]
9939 be_defs = cluster.SimpleFillBE({})
9940 for name in self.op.beparams.keys():
9941 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9942 del self.op.beparams[name]
9944 nic_defs = cluster.SimpleFillNIC({})
9945 for nic in self.op.nics:
9946 for name in constants.NICS_PARAMETERS:
9947 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9950 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9951 for name in self.op.osparams.keys():
9952 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9953 del self.op.osparams[name]
9955 def _CalculateFileStorageDir(self):
9956 """Calculate final instance file storage dir.
9959 # file storage dir calculation/check
9960 self.instance_file_storage_dir = None
9961 if self.op.disk_template in constants.DTS_FILEBASED:
9962 # build the full file storage dir path
9965 if self.op.disk_template == constants.DT_SHARED_FILE:
9966 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9968 get_fsd_fn = self.cfg.GetFileStorageDir
9970 cfg_storagedir = get_fsd_fn()
9971 if not cfg_storagedir:
9972 raise errors.OpPrereqError("Cluster file storage dir not defined")
9973 joinargs.append(cfg_storagedir)
9975 if self.op.file_storage_dir is not None:
9976 joinargs.append(self.op.file_storage_dir)
9978 joinargs.append(self.op.instance_name)
9980 # pylint: disable=W0142
9981 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9983 def CheckPrereq(self): # pylint: disable=R0914
9984 """Check prerequisites.
9987 self._CalculateFileStorageDir()
9989 if self.op.mode == constants.INSTANCE_IMPORT:
9990 export_info = self._ReadExportInfo()
9991 self._ReadExportParams(export_info)
9992 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
9994 self._old_instance_name = None
9996 if (not self.cfg.GetVGName() and
9997 self.op.disk_template not in constants.DTS_NOT_LVM):
9998 raise errors.OpPrereqError("Cluster does not support lvm-based"
9999 " instances", errors.ECODE_STATE)
10001 if (self.op.hypervisor is None or
10002 self.op.hypervisor == constants.VALUE_AUTO):
10003 self.op.hypervisor = self.cfg.GetHypervisorType()
10005 cluster = self.cfg.GetClusterInfo()
10006 enabled_hvs = cluster.enabled_hypervisors
10007 if self.op.hypervisor not in enabled_hvs:
10008 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10009 " cluster (%s)" % (self.op.hypervisor,
10010 ",".join(enabled_hvs)),
10011 errors.ECODE_STATE)
10013 # Check tag validity
10014 for tag in self.op.tags:
10015 objects.TaggableObject.ValidateTag(tag)
10017 # check hypervisor parameter syntax (locally)
10018 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10019 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10021 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10022 hv_type.CheckParameterSyntax(filled_hvp)
10023 self.hv_full = filled_hvp
10024 # check that we don't specify global parameters on an instance
10025 _CheckGlobalHvParams(self.op.hvparams)
10027 # fill and remember the beparams dict
10028 default_beparams = cluster.beparams[constants.PP_DEFAULT]
10029 for param, value in self.op.beparams.iteritems():
10030 if value == constants.VALUE_AUTO:
10031 self.op.beparams[param] = default_beparams[param]
10032 objects.UpgradeBeParams(self.op.beparams)
10033 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
10034 self.be_full = cluster.SimpleFillBE(self.op.beparams)
10036 # build os parameters
10037 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10039 # now that hvp/bep are in final format, let's reset to defaults,
10041 if self.op.identify_defaults:
10042 self._RevertToDefaults(cluster)
10044 self.hotplug_info = None
10045 if self.op.hotplug:
10046 self.LogInfo("Enabling hotplug.")
10047 self.hotplug_info = objects.HotplugInfo(disks=0, nics=0,
10048 pci_pool=list(range(16,32)))
10051 for idx, nic in enumerate(self.op.nics):
10052 nic_mode_req = nic.get(constants.INIC_MODE, None)
10053 nic_mode = nic_mode_req
10054 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
10055 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
10057 net = nic.get(constants.INIC_NETWORK, None)
10058 link = nic.get(constants.NIC_LINK, None)
10059 ip = nic.get(constants.INIC_IP, None)
10061 if net is None or net.lower() == constants.VALUE_NONE:
10064 if nic_mode_req is not None or link is not None:
10065 raise errors.OpPrereqError("If network is given, no mode or link"
10066 " is allowed to be passed",
10067 errors.ECODE_INVAL)
10069 # ip validity checks
10070 if ip is None or ip.lower() == constants.VALUE_NONE:
10072 elif ip.lower() == constants.VALUE_AUTO:
10073 if not self.op.name_check:
10074 raise errors.OpPrereqError("IP address set to auto but name checks"
10075 " have been skipped",
10076 errors.ECODE_INVAL)
10077 nic_ip = self.hostname1.ip
10079 # We defer pool operations until later, so that the iallocator has
10080 # filled in the instance's node(s) dimara
10081 if ip.lower() == constants.NIC_IP_POOL:
10083 raise errors.OpPrereqError("if ip=pool, parameter network"
10084 " must be passed too",
10085 errors.ECODE_INVAL)
10087 elif not netutils.IPAddress.IsValid(ip):
10088 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
10089 errors.ECODE_INVAL)
10093 # TODO: check the ip address for uniqueness
10094 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
10095 raise errors.OpPrereqError("Routed nic mode requires an ip address",
10096 errors.ECODE_INVAL)
10098 # MAC address verification
10099 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
10100 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10101 mac = utils.NormalizeAndValidateMac(mac)
10104 self.cfg.ReserveMAC(mac, self.proc.GetECId())
10105 except errors.ReservationError:
10106 raise errors.OpPrereqError("MAC address %s already in use"
10107 " in cluster" % mac,
10108 errors.ECODE_NOTUNIQUE)
10110 # Build nic parameters
10113 nicparams[constants.NIC_MODE] = nic_mode
10115 nicparams[constants.NIC_LINK] = link
10117 check_params = cluster.SimpleFillNIC(nicparams)
10118 objects.NIC.CheckParameterSyntax(check_params)
10119 nic_idx, pci = _GetPCIInfo(self, 'nics')
10120 self.nics.append(objects.NIC(idx=nic_idx, pci=pci,
10121 mac=mac, ip=nic_ip, network=net,
10122 nicparams=check_params))
10124 # disk checks/pre-build
10125 default_vg = self.cfg.GetVGName()
10127 for disk in self.op.disks:
10128 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
10129 if mode not in constants.DISK_ACCESS_SET:
10130 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
10131 mode, errors.ECODE_INVAL)
10132 size = disk.get(constants.IDISK_SIZE, None)
10134 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
10137 except (TypeError, ValueError):
10138 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
10139 errors.ECODE_INVAL)
10141 ext_provider = disk.get(constants.IDISK_PROVIDER, None)
10142 if ext_provider and self.op.disk_template != constants.DT_EXT:
10143 raise errors.OpPrereqError("The '%s' option is only valid for the %s"
10144 " disk template, not %s" %
10145 (constants.IDISK_PROVIDER, constants.DT_EXT,
10146 self.op.disk_template), errors.ECODE_INVAL)
10148 data_vg = disk.get(constants.IDISK_VG, default_vg)
10150 constants.IDISK_SIZE: size,
10151 constants.IDISK_MODE: mode,
10152 constants.IDISK_VG: data_vg,
10155 if constants.IDISK_METAVG in disk:
10156 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
10157 if constants.IDISK_ADOPT in disk:
10158 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
10160 # For extstorage, demand the `provider' option and add any
10161 # additional parameters (ext-params) to the dict
10162 if self.op.disk_template == constants.DT_EXT:
10164 new_disk[constants.IDISK_PROVIDER] = ext_provider
10166 if key not in constants.IDISK_PARAMS:
10167 new_disk[key] = disk[key]
10169 raise errors.OpPrereqError("Missing provider for template '%s'" %
10170 constants.DT_EXT, errors.ECODE_INVAL)
10172 self.disks.append(new_disk)
10174 if self.op.mode == constants.INSTANCE_IMPORT:
10176 for idx in range(len(self.disks)):
10177 option = "disk%d_dump" % idx
10178 if export_info.has_option(constants.INISECT_INS, option):
10179 # FIXME: are the old os-es, disk sizes, etc. useful?
10180 export_name = export_info.get(constants.INISECT_INS, option)
10181 image = utils.PathJoin(self.op.src_path, export_name)
10182 disk_images.append(image)
10184 disk_images.append(False)
10186 self.src_images = disk_images
10188 if self.op.instance_name == self._old_instance_name:
10189 for idx, nic in enumerate(self.nics):
10190 if nic.mac == constants.VALUE_AUTO:
10191 nic_mac_ini = "nic%d_mac" % idx
10192 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10194 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10196 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10197 if self.op.ip_check:
10198 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10199 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10200 (self.check_ip, self.op.instance_name),
10201 errors.ECODE_NOTUNIQUE)
10203 #### mac address generation
10204 # By generating here the mac address both the allocator and the hooks get
10205 # the real final mac address rather than the 'auto' or 'generate' value.
10206 # There is a race condition between the generation and the instance object
10207 # creation, which means that we know the mac is valid now, but we're not
10208 # sure it will be when we actually add the instance. If things go bad
10209 # adding the instance will abort because of a duplicate mac, and the
10210 # creation job will fail.
10211 for nic in self.nics:
10212 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10213 nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
10217 if self.op.iallocator is not None:
10218 self._RunAllocator()
10220 # Release all unneeded node locks
10221 _ReleaseLocks(self, locking.LEVEL_NODE,
10222 keep=filter(None, [self.op.pnode, self.op.snode,
10223 self.op.src_node]))
10224 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10225 keep=filter(None, [self.op.pnode, self.op.snode,
10226 self.op.src_node]))
10228 #### node related checks
10230 # check primary node
10231 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10232 assert self.pnode is not None, \
10233 "Cannot retrieve locked node %s" % self.op.pnode
10235 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10236 pnode.name, errors.ECODE_STATE)
10238 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10239 pnode.name, errors.ECODE_STATE)
10240 if not pnode.vm_capable:
10241 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10242 " '%s'" % pnode.name, errors.ECODE_STATE)
10244 self.secondaries = []
10246 # Fill in any IPs from IP pools. This must happen here, because we need to
10247 # know the nic's primary node, as specified by the iallocator
10248 for idx, nic in enumerate(self.nics):
10250 if net is not None:
10251 netparams = self.cfg.GetGroupNetParams(net, self.pnode.name)
10252 if netparams is None:
10253 raise errors.OpPrereqError("No netparams found for network"
10254 " %s. Propably not connected to"
10255 " node's %s nodegroup" %
10256 (net, self.pnode.name),
10257 errors.ECODE_INVAL)
10258 self.LogInfo("NIC/%d inherits netparams %s" %
10259 (idx, netparams.values()))
10260 nic.nicparams = dict(netparams)
10261 if nic.ip is not None:
10262 filled_params = cluster.SimpleFillNIC(nic.nicparams)
10263 if nic.ip.lower() == constants.NIC_IP_POOL:
10265 nic.ip = self.cfg.GenerateIp(net, self.proc.GetECId())
10266 except errors.ReservationError:
10267 raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
10268 " from the address pool" % idx,
10269 errors.ECODE_STATE)
10270 self.LogInfo("Chose IP %s from network %s", nic.ip, net)
10273 self.cfg.ReserveIp(net, nic.ip, self.proc.GetECId())
10274 except errors.ReservationError:
10275 raise errors.OpPrereqError("IP address %s already in use"
10276 " or does not belong to network %s" %
10278 errors.ECODE_NOTUNIQUE)
10280 # net is None, ip None or given
10281 if self.op.conflicts_check:
10282 _CheckForConflictingIp(self, nic.ip, self.pnode.name)
10285 # mirror node verification
10286 if self.op.disk_template in constants.DTS_INT_MIRROR:
10287 if self.op.snode == pnode.name:
10288 raise errors.OpPrereqError("The secondary node cannot be the"
10289 " primary node", errors.ECODE_INVAL)
10290 _CheckNodeOnline(self, self.op.snode)
10291 _CheckNodeNotDrained(self, self.op.snode)
10292 _CheckNodeVmCapable(self, self.op.snode)
10293 self.secondaries.append(self.op.snode)
10295 snode = self.cfg.GetNodeInfo(self.op.snode)
10296 if pnode.group != snode.group:
10297 self.LogWarning("The primary and secondary nodes are in two"
10298 " different node groups; the disk parameters"
10299 " from the first disk's node group will be"
10302 nodenames = [pnode.name] + self.secondaries
10304 if not self.adopt_disks:
10305 if self.op.disk_template == constants.DT_RBD:
10306 # _CheckRADOSFreeSpace() is just a placeholder.
10307 # Any function that checks prerequisites can be placed here.
10308 # Check if there is enough space on the RADOS cluster.
10309 _CheckRADOSFreeSpace()
10310 elif self.op.disk_template == constants.DT_EXT:
10311 # FIXME: Function that checks prereqs if needed
10314 # Check lv size requirements, if not adopting
10315 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10316 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10318 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10319 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10320 disk[constants.IDISK_ADOPT])
10321 for disk in self.disks])
10322 if len(all_lvs) != len(self.disks):
10323 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10324 errors.ECODE_INVAL)
10325 for lv_name in all_lvs:
10327 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10328 # to ReserveLV uses the same syntax
10329 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10330 except errors.ReservationError:
10331 raise errors.OpPrereqError("LV named %s used by another instance" %
10332 lv_name, errors.ECODE_NOTUNIQUE)
10334 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10335 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10337 node_lvs = self.rpc.call_lv_list([pnode.name],
10338 vg_names.payload.keys())[pnode.name]
10339 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10340 node_lvs = node_lvs.payload
10342 delta = all_lvs.difference(node_lvs.keys())
10344 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10345 utils.CommaJoin(delta),
10346 errors.ECODE_INVAL)
10347 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10349 raise errors.OpPrereqError("Online logical volumes found, cannot"
10350 " adopt: %s" % utils.CommaJoin(online_lvs),
10351 errors.ECODE_STATE)
10352 # update the size of disk based on what is found
10353 for dsk in self.disks:
10354 dsk[constants.IDISK_SIZE] = \
10355 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10356 dsk[constants.IDISK_ADOPT])][0]))
10358 elif self.op.disk_template == constants.DT_BLOCK:
10359 # Normalize and de-duplicate device paths
10360 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10361 for disk in self.disks])
10362 if len(all_disks) != len(self.disks):
10363 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10364 errors.ECODE_INVAL)
10365 baddisks = [d for d in all_disks
10366 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10368 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10369 " cannot be adopted" %
10370 (", ".join(baddisks),
10371 constants.ADOPTABLE_BLOCKDEV_ROOT),
10372 errors.ECODE_INVAL)
10374 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10375 list(all_disks))[pnode.name]
10376 node_disks.Raise("Cannot get block device information from node %s" %
10378 node_disks = node_disks.payload
10379 delta = all_disks.difference(node_disks.keys())
10381 raise errors.OpPrereqError("Missing block device(s): %s" %
10382 utils.CommaJoin(delta),
10383 errors.ECODE_INVAL)
10384 for dsk in self.disks:
10385 dsk[constants.IDISK_SIZE] = \
10386 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10388 # Verify instance specs
10389 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10391 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10392 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10393 constants.ISPEC_DISK_COUNT: len(self.disks),
10394 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10395 for disk in self.disks],
10396 constants.ISPEC_NIC_COUNT: len(self.nics),
10397 constants.ISPEC_SPINDLE_USE: spindle_use,
10400 group_info = self.cfg.GetNodeGroup(pnode.group)
10401 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
10402 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10403 if not self.op.ignore_ipolicy and res:
10404 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10405 " policy: %s") % (pnode.group,
10406 utils.CommaJoin(res)),
10407 errors.ECODE_INVAL)
10409 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10411 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10412 # check OS parameters (remotely)
10413 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10415 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10417 #TODO: _CheckExtParams (remotely)
10418 # Check parameters for extstorage
10420 # memory check on primary node
10421 #TODO(dynmem): use MINMEM for checking
10423 _CheckNodeFreeMemory(self, self.pnode.name,
10424 "creating instance %s" % self.op.instance_name,
10425 self.be_full[constants.BE_MAXMEM],
10426 self.op.hypervisor)
10428 self.dry_run_result = list(nodenames)
10430 def Exec(self, feedback_fn):
10431 """Create and add the instance to the cluster.
10434 instance = self.op.instance_name
10435 pnode_name = self.pnode.name
10437 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10438 self.owned_locks(locking.LEVEL_NODE)), \
10439 "Node locks differ from node resource locks"
10441 ht_kind = self.op.hypervisor
10442 if ht_kind in constants.HTS_REQ_PORT:
10443 network_port = self.cfg.AllocatePort()
10445 network_port = None
10447 # This is ugly but we got a chicken-egg problem here
10448 # We can only take the group disk parameters, as the instance
10449 # has no disks yet (we are generating them right here).
10450 node = self.cfg.GetNodeInfo(pnode_name)
10451 nodegroup = self.cfg.GetNodeGroup(node.group)
10452 disks = _GenerateDiskTemplate(self,
10453 self.op.disk_template,
10454 instance, pnode_name,
10457 self.instance_file_storage_dir,
10458 self.op.file_driver,
10461 self.cfg.GetGroupDiskParams(nodegroup))
10463 iobj = objects.Instance(name=instance, os=self.op.os_type,
10464 primary_node=pnode_name,
10465 nics=self.nics, disks=disks,
10466 disk_template=self.op.disk_template,
10467 admin_state=constants.ADMINST_DOWN,
10468 network_port=network_port,
10469 beparams=self.op.beparams,
10470 hvparams=self.op.hvparams,
10471 hypervisor=self.op.hypervisor,
10472 osparams=self.op.osparams,
10473 hotplug_info=self.hotplug_info,
10477 for tag in self.op.tags:
10480 if self.adopt_disks:
10481 if self.op.disk_template == constants.DT_PLAIN:
10482 # rename LVs to the newly-generated names; we need to construct
10483 # 'fake' LV disks with the old data, plus the new unique_id
10484 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10486 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10487 rename_to.append(t_dsk.logical_id)
10488 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10489 self.cfg.SetDiskID(t_dsk, pnode_name)
10490 result = self.rpc.call_blockdev_rename(pnode_name,
10491 zip(tmp_disks, rename_to))
10492 result.Raise("Failed to rename adoped LVs")
10494 feedback_fn("* creating instance disks...")
10496 _CreateDisks(self, iobj)
10497 except errors.OpExecError:
10498 self.LogWarning("Device creation failed, reverting...")
10500 _RemoveDisks(self, iobj)
10502 self.cfg.ReleaseDRBDMinors(instance)
10505 feedback_fn("adding instance %s to cluster config" % instance)
10507 self.cfg.AddInstance(iobj, self.proc.GetECId())
10509 # Declare that we don't want to remove the instance lock anymore, as we've
10510 # added the instance to the config
10511 del self.remove_locks[locking.LEVEL_INSTANCE]
10513 if self.op.mode == constants.INSTANCE_IMPORT:
10514 # Release unused nodes
10515 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10517 # Release all nodes
10518 _ReleaseLocks(self, locking.LEVEL_NODE)
10521 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10522 feedback_fn("* wiping instance disks...")
10524 _WipeDisks(self, iobj)
10525 except errors.OpExecError, err:
10526 logging.exception("Wiping disks failed")
10527 self.LogWarning("Wiping instance disks failed (%s)", err)
10531 # Something is already wrong with the disks, don't do anything else
10533 elif self.op.wait_for_sync:
10534 disk_abort = not _WaitForSync(self, iobj)
10535 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10536 # make sure the disks are not degraded (still sync-ing is ok)
10537 feedback_fn("* checking mirrors status")
10538 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10543 _RemoveDisks(self, iobj)
10544 self.cfg.RemoveInstance(iobj.name)
10545 # Make sure the instance lock gets removed
10546 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10547 raise errors.OpExecError("There are some degraded disks for"
10550 # Release all node resource locks
10551 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10553 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10554 # we need to set the disks ID to the primary node, since the
10555 # preceding code might or might have not done it, depending on
10556 # disk template and other options
10557 for disk in iobj.disks:
10558 self.cfg.SetDiskID(disk, pnode_name)
10559 if self.op.mode == constants.INSTANCE_CREATE:
10560 if not self.op.no_install:
10561 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10562 not self.op.wait_for_sync)
10564 feedback_fn("* pausing disk sync to install instance OS")
10565 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10568 for idx, success in enumerate(result.payload):
10570 logging.warn("pause-sync of instance %s for disk %d failed",
10573 feedback_fn("* running the instance OS create scripts...")
10574 # FIXME: pass debug option from opcode to backend
10576 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10577 self.op.debug_level)
10579 feedback_fn("* resuming disk sync")
10580 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10583 for idx, success in enumerate(result.payload):
10585 logging.warn("resume-sync of instance %s for disk %d failed",
10588 os_add_result.Raise("Could not add os for instance %s"
10589 " on node %s" % (instance, pnode_name))
10592 if self.op.mode == constants.INSTANCE_IMPORT:
10593 feedback_fn("* running the instance OS import scripts...")
10597 for idx, image in enumerate(self.src_images):
10601 # FIXME: pass debug option from opcode to backend
10602 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10603 constants.IEIO_FILE, (image, ),
10604 constants.IEIO_SCRIPT,
10605 (iobj.disks[idx], idx),
10607 transfers.append(dt)
10610 masterd.instance.TransferInstanceData(self, feedback_fn,
10611 self.op.src_node, pnode_name,
10612 self.pnode.secondary_ip,
10614 if not compat.all(import_result):
10615 self.LogWarning("Some disks for instance %s on node %s were not"
10616 " imported successfully" % (instance, pnode_name))
10618 rename_from = self._old_instance_name
10620 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10621 feedback_fn("* preparing remote import...")
10622 # The source cluster will stop the instance before attempting to make
10623 # a connection. In some cases stopping an instance can take a long
10624 # time, hence the shutdown timeout is added to the connection
10626 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10627 self.op.source_shutdown_timeout)
10628 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10630 assert iobj.primary_node == self.pnode.name
10632 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10633 self.source_x509_ca,
10634 self._cds, timeouts)
10635 if not compat.all(disk_results):
10636 # TODO: Should the instance still be started, even if some disks
10637 # failed to import (valid for local imports, too)?
10638 self.LogWarning("Some disks for instance %s on node %s were not"
10639 " imported successfully" % (instance, pnode_name))
10641 rename_from = self.source_instance_name
10644 # also checked in the prereq part
10645 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10648 # Run rename script on newly imported instance
10649 assert iobj.name == instance
10650 feedback_fn("Running rename script for %s" % instance)
10651 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10653 self.op.debug_level)
10654 if result.fail_msg:
10655 self.LogWarning("Failed to run rename script for %s on node"
10656 " %s: %s" % (instance, pnode_name, result.fail_msg))
10658 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10661 iobj.admin_state = constants.ADMINST_UP
10662 self.cfg.Update(iobj, feedback_fn)
10663 logging.info("Starting instance %s on node %s", instance, pnode_name)
10664 feedback_fn("* starting instance...")
10665 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10667 result.Raise("Could not start instance")
10669 return list(iobj.all_nodes)
10672 def _CheckRADOSFreeSpace():
10673 """Compute disk size requirements inside the RADOS cluster.
10676 # For the RADOS cluster we assume there is always enough space.
10680 class LUInstanceConsole(NoHooksLU):
10681 """Connect to an instance's console.
10683 This is somewhat special in that it returns the command line that
10684 you need to run on the master node in order to connect to the
10690 def ExpandNames(self):
10691 self.share_locks = _ShareAll()
10692 self._ExpandAndLockInstance()
10694 def CheckPrereq(self):
10695 """Check prerequisites.
10697 This checks that the instance is in the cluster.
10700 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10701 assert self.instance is not None, \
10702 "Cannot retrieve locked instance %s" % self.op.instance_name
10703 _CheckNodeOnline(self, self.instance.primary_node)
10705 def Exec(self, feedback_fn):
10706 """Connect to the console of an instance
10709 instance = self.instance
10710 node = instance.primary_node
10712 node_insts = self.rpc.call_instance_list([node],
10713 [instance.hypervisor])[node]
10714 node_insts.Raise("Can't get node information from %s" % node)
10716 if instance.name not in node_insts.payload:
10717 if instance.admin_state == constants.ADMINST_UP:
10718 state = constants.INSTST_ERRORDOWN
10719 elif instance.admin_state == constants.ADMINST_DOWN:
10720 state = constants.INSTST_ADMINDOWN
10722 state = constants.INSTST_ADMINOFFLINE
10723 raise errors.OpExecError("Instance %s is not running (state %s)" %
10724 (instance.name, state))
10726 logging.debug("Connecting to console of %s on %s", instance.name, node)
10728 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10731 def _GetInstanceConsole(cluster, instance):
10732 """Returns console information for an instance.
10734 @type cluster: L{objects.Cluster}
10735 @type instance: L{objects.Instance}
10739 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10740 # beparams and hvparams are passed separately, to avoid editing the
10741 # instance and then saving the defaults in the instance itself.
10742 hvparams = cluster.FillHV(instance)
10743 beparams = cluster.FillBE(instance)
10744 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10746 assert console.instance == instance.name
10747 assert console.Validate()
10749 return console.ToDict()
10752 class LUInstanceReplaceDisks(LogicalUnit):
10753 """Replace the disks of an instance.
10756 HPATH = "mirrors-replace"
10757 HTYPE = constants.HTYPE_INSTANCE
10760 def CheckArguments(self):
10761 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10762 self.op.iallocator)
10764 def ExpandNames(self):
10765 self._ExpandAndLockInstance()
10767 assert locking.LEVEL_NODE not in self.needed_locks
10768 assert locking.LEVEL_NODE_RES not in self.needed_locks
10769 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10771 assert self.op.iallocator is None or self.op.remote_node is None, \
10772 "Conflicting options"
10774 if self.op.remote_node is not None:
10775 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10777 # Warning: do not remove the locking of the new secondary here
10778 # unless DRBD8.AddChildren is changed to work in parallel;
10779 # currently it doesn't since parallel invocations of
10780 # FindUnusedMinor will conflict
10781 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10782 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10784 self.needed_locks[locking.LEVEL_NODE] = []
10785 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10787 if self.op.iallocator is not None:
10788 # iallocator will select a new node in the same group
10789 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10791 self.needed_locks[locking.LEVEL_NODE_RES] = []
10793 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10794 self.op.iallocator, self.op.remote_node,
10795 self.op.disks, False, self.op.early_release,
10796 self.op.ignore_ipolicy)
10798 self.tasklets = [self.replacer]
10800 def DeclareLocks(self, level):
10801 if level == locking.LEVEL_NODEGROUP:
10802 assert self.op.remote_node is None
10803 assert self.op.iallocator is not None
10804 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10806 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10807 # Lock all groups used by instance optimistically; this requires going
10808 # via the node before it's locked, requiring verification later on
10809 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10810 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10812 elif level == locking.LEVEL_NODE:
10813 if self.op.iallocator is not None:
10814 assert self.op.remote_node is None
10815 assert not self.needed_locks[locking.LEVEL_NODE]
10817 # Lock member nodes of all locked groups
10818 self.needed_locks[locking.LEVEL_NODE] = [node_name
10819 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10820 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10822 self._LockInstancesNodes()
10823 elif level == locking.LEVEL_NODE_RES:
10825 self.needed_locks[locking.LEVEL_NODE_RES] = \
10826 self.needed_locks[locking.LEVEL_NODE]
10828 def BuildHooksEnv(self):
10829 """Build hooks env.
10831 This runs on the master, the primary and all the secondaries.
10834 instance = self.replacer.instance
10836 "MODE": self.op.mode,
10837 "NEW_SECONDARY": self.op.remote_node,
10838 "OLD_SECONDARY": instance.secondary_nodes[0],
10840 env.update(_BuildInstanceHookEnvByObject(self, instance))
10843 def BuildHooksNodes(self):
10844 """Build hooks nodes.
10847 instance = self.replacer.instance
10849 self.cfg.GetMasterNode(),
10850 instance.primary_node,
10852 if self.op.remote_node is not None:
10853 nl.append(self.op.remote_node)
10856 def CheckPrereq(self):
10857 """Check prerequisites.
10860 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10861 self.op.iallocator is None)
10863 # Verify if node group locks are still correct
10864 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10866 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10868 return LogicalUnit.CheckPrereq(self)
10871 class TLReplaceDisks(Tasklet):
10872 """Replaces disks for an instance.
10874 Note: Locking is not within the scope of this class.
10877 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10878 disks, delay_iallocator, early_release, ignore_ipolicy):
10879 """Initializes this class.
10882 Tasklet.__init__(self, lu)
10885 self.instance_name = instance_name
10887 self.iallocator_name = iallocator_name
10888 self.remote_node = remote_node
10890 self.delay_iallocator = delay_iallocator
10891 self.early_release = early_release
10892 self.ignore_ipolicy = ignore_ipolicy
10895 self.instance = None
10896 self.new_node = None
10897 self.target_node = None
10898 self.other_node = None
10899 self.remote_node_info = None
10900 self.node_secondary_ip = None
10903 def CheckArguments(mode, remote_node, iallocator):
10904 """Helper function for users of this class.
10907 # check for valid parameter combination
10908 if mode == constants.REPLACE_DISK_CHG:
10909 if remote_node is None and iallocator is None:
10910 raise errors.OpPrereqError("When changing the secondary either an"
10911 " iallocator script must be used or the"
10912 " new node given", errors.ECODE_INVAL)
10914 if remote_node is not None and iallocator is not None:
10915 raise errors.OpPrereqError("Give either the iallocator or the new"
10916 " secondary, not both", errors.ECODE_INVAL)
10918 elif remote_node is not None or iallocator is not None:
10919 # Not replacing the secondary
10920 raise errors.OpPrereqError("The iallocator and new node options can"
10921 " only be used when changing the"
10922 " secondary node", errors.ECODE_INVAL)
10925 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10926 """Compute a new secondary node using an IAllocator.
10929 ial = IAllocator(lu.cfg, lu.rpc,
10930 mode=constants.IALLOCATOR_MODE_RELOC,
10931 name=instance_name,
10932 relocate_from=list(relocate_from))
10934 ial.Run(iallocator_name)
10936 if not ial.success:
10937 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10938 " %s" % (iallocator_name, ial.info),
10939 errors.ECODE_NORES)
10941 if len(ial.result) != ial.required_nodes:
10942 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10943 " of nodes (%s), required %s" %
10945 len(ial.result), ial.required_nodes),
10946 errors.ECODE_FAULT)
10948 remote_node_name = ial.result[0]
10950 lu.LogInfo("Selected new secondary for instance '%s': %s",
10951 instance_name, remote_node_name)
10953 return remote_node_name
10955 def _FindFaultyDisks(self, node_name):
10956 """Wrapper for L{_FindFaultyInstanceDisks}.
10959 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10962 def _CheckDisksActivated(self, instance):
10963 """Checks if the instance disks are activated.
10965 @param instance: The instance to check disks
10966 @return: True if they are activated, False otherwise
10969 nodes = instance.all_nodes
10971 for idx, dev in enumerate(instance.disks):
10973 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10974 self.cfg.SetDiskID(dev, node)
10976 result = _BlockdevFind(self, node, dev, instance)
10980 elif result.fail_msg or not result.payload:
10985 def CheckPrereq(self):
10986 """Check prerequisites.
10988 This checks that the instance is in the cluster.
10991 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10992 assert instance is not None, \
10993 "Cannot retrieve locked instance %s" % self.instance_name
10995 if instance.disk_template != constants.DT_DRBD8:
10996 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10997 " instances", errors.ECODE_INVAL)
10999 if len(instance.secondary_nodes) != 1:
11000 raise errors.OpPrereqError("The instance has a strange layout,"
11001 " expected one secondary but found %d" %
11002 len(instance.secondary_nodes),
11003 errors.ECODE_FAULT)
11005 if not self.delay_iallocator:
11006 self._CheckPrereq2()
11008 def _CheckPrereq2(self):
11009 """Check prerequisites, second part.
11011 This function should always be part of CheckPrereq. It was separated and is
11012 now called from Exec because during node evacuation iallocator was only
11013 called with an unmodified cluster model, not taking planned changes into
11017 instance = self.instance
11018 secondary_node = instance.secondary_nodes[0]
11020 if self.iallocator_name is None:
11021 remote_node = self.remote_node
11023 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11024 instance.name, instance.secondary_nodes)
11026 if remote_node is None:
11027 self.remote_node_info = None
11029 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11030 "Remote node '%s' is not locked" % remote_node
11032 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11033 assert self.remote_node_info is not None, \
11034 "Cannot retrieve locked node %s" % remote_node
11036 if remote_node == self.instance.primary_node:
11037 raise errors.OpPrereqError("The specified node is the primary node of"
11038 " the instance", errors.ECODE_INVAL)
11040 if remote_node == secondary_node:
11041 raise errors.OpPrereqError("The specified node is already the"
11042 " secondary node of the instance",
11043 errors.ECODE_INVAL)
11045 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11046 constants.REPLACE_DISK_CHG):
11047 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11048 errors.ECODE_INVAL)
11050 if self.mode == constants.REPLACE_DISK_AUTO:
11051 if not self._CheckDisksActivated(instance):
11052 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11053 " first" % self.instance_name,
11054 errors.ECODE_STATE)
11055 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11056 faulty_secondary = self._FindFaultyDisks(secondary_node)
11058 if faulty_primary and faulty_secondary:
11059 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11060 " one node and can not be repaired"
11061 " automatically" % self.instance_name,
11062 errors.ECODE_STATE)
11065 self.disks = faulty_primary
11066 self.target_node = instance.primary_node
11067 self.other_node = secondary_node
11068 check_nodes = [self.target_node, self.other_node]
11069 elif faulty_secondary:
11070 self.disks = faulty_secondary
11071 self.target_node = secondary_node
11072 self.other_node = instance.primary_node
11073 check_nodes = [self.target_node, self.other_node]
11079 # Non-automatic modes
11080 if self.mode == constants.REPLACE_DISK_PRI:
11081 self.target_node = instance.primary_node
11082 self.other_node = secondary_node
11083 check_nodes = [self.target_node, self.other_node]
11085 elif self.mode == constants.REPLACE_DISK_SEC:
11086 self.target_node = secondary_node
11087 self.other_node = instance.primary_node
11088 check_nodes = [self.target_node, self.other_node]
11090 elif self.mode == constants.REPLACE_DISK_CHG:
11091 self.new_node = remote_node
11092 self.other_node = instance.primary_node
11093 self.target_node = secondary_node
11094 check_nodes = [self.new_node, self.other_node]
11096 _CheckNodeNotDrained(self.lu, remote_node)
11097 _CheckNodeVmCapable(self.lu, remote_node)
11099 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11100 assert old_node_info is not None
11101 if old_node_info.offline and not self.early_release:
11102 # doesn't make sense to delay the release
11103 self.early_release = True
11104 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11105 " early-release mode", secondary_node)
11108 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11111 # If not specified all disks should be replaced
11113 self.disks = range(len(self.instance.disks))
11115 # TODO: This is ugly, but right now we can't distinguish between internal
11116 # submitted opcode and external one. We should fix that.
11117 if self.remote_node_info:
11118 # We change the node, lets verify it still meets instance policy
11119 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11120 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
11122 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11123 ignore=self.ignore_ipolicy)
11125 for node in check_nodes:
11126 _CheckNodeOnline(self.lu, node)
11128 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11131 if node_name is not None)
11133 # Release unneeded node and node resource locks
11134 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11135 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11137 # Release any owned node group
11138 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
11139 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11141 # Check whether disks are valid
11142 for disk_idx in self.disks:
11143 instance.FindDisk(disk_idx)
11145 # Get secondary node IP addresses
11146 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11147 in self.cfg.GetMultiNodeInfo(touched_nodes))
11149 def Exec(self, feedback_fn):
11150 """Execute disk replacement.
11152 This dispatches the disk replacement to the appropriate handler.
11155 if self.delay_iallocator:
11156 self._CheckPrereq2()
11159 # Verify owned locks before starting operation
11160 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11161 assert set(owned_nodes) == set(self.node_secondary_ip), \
11162 ("Incorrect node locks, owning %s, expected %s" %
11163 (owned_nodes, self.node_secondary_ip.keys()))
11164 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11165 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11167 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11168 assert list(owned_instances) == [self.instance_name], \
11169 "Instance '%s' not locked" % self.instance_name
11171 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11172 "Should not own any node group lock at this point"
11175 feedback_fn("No disks need replacement for instance '%s'" %
11176 self.instance.name)
11179 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11180 (utils.CommaJoin(self.disks), self.instance.name))
11181 feedback_fn("Current primary node: %s", self.instance.primary_node)
11182 feedback_fn("Current seconary node: %s",
11183 utils.CommaJoin(self.instance.secondary_nodes))
11185 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11187 # Activate the instance disks if we're replacing them on a down instance
11189 _StartInstanceDisks(self.lu, self.instance, True)
11192 # Should we replace the secondary node?
11193 if self.new_node is not None:
11194 fn = self._ExecDrbd8Secondary
11196 fn = self._ExecDrbd8DiskOnly
11198 result = fn(feedback_fn)
11200 # Deactivate the instance disks if we're replacing them on a
11203 _SafeShutdownInstanceDisks(self.lu, self.instance)
11205 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11208 # Verify owned locks
11209 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11210 nodes = frozenset(self.node_secondary_ip)
11211 assert ((self.early_release and not owned_nodes) or
11212 (not self.early_release and not (set(owned_nodes) - nodes))), \
11213 ("Not owning the correct locks, early_release=%s, owned=%r,"
11214 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11218 def _CheckVolumeGroup(self, nodes):
11219 self.lu.LogInfo("Checking volume groups")
11221 vgname = self.cfg.GetVGName()
11223 # Make sure volume group exists on all involved nodes
11224 results = self.rpc.call_vg_list(nodes)
11226 raise errors.OpExecError("Can't list volume groups on the nodes")
11229 res = results[node]
11230 res.Raise("Error checking node %s" % node)
11231 if vgname not in res.payload:
11232 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11235 def _CheckDisksExistence(self, nodes):
11236 # Check disk existence
11237 for idx, dev in enumerate(self.instance.disks):
11238 if idx not in self.disks:
11242 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
11243 self.cfg.SetDiskID(dev, node)
11245 result = _BlockdevFind(self, node, dev, self.instance)
11247 msg = result.fail_msg
11248 if msg or not result.payload:
11250 msg = "disk not found"
11251 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11254 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11255 for idx, dev in enumerate(self.instance.disks):
11256 if idx not in self.disks:
11259 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11262 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11263 on_primary, ldisk=ldisk):
11264 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11265 " replace disks for instance %s" %
11266 (node_name, self.instance.name))
11268 def _CreateNewStorage(self, node_name):
11269 """Create new storage on the primary or secondary node.
11271 This is only used for same-node replaces, not for changing the
11272 secondary node, hence we don't want to modify the existing disk.
11277 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11278 for idx, dev in enumerate(disks):
11279 if idx not in self.disks:
11282 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11284 self.cfg.SetDiskID(dev, node_name)
11286 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11287 names = _GenerateUniqueNames(self.lu, lv_names)
11289 (data_disk, meta_disk) = dev.children
11290 vg_data = data_disk.logical_id[0]
11291 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11292 logical_id=(vg_data, names[0]),
11293 params=data_disk.params)
11294 vg_meta = meta_disk.logical_id[0]
11295 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
11296 logical_id=(vg_meta, names[1]),
11297 params=meta_disk.params)
11299 new_lvs = [lv_data, lv_meta]
11300 old_lvs = [child.Copy() for child in dev.children]
11301 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11303 # we pass force_create=True to force the LVM creation
11304 for new_lv in new_lvs:
11305 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11306 _GetInstanceInfoText(self.instance), False)
11310 def _CheckDevices(self, node_name, iv_names):
11311 for name, (dev, _, _) in iv_names.iteritems():
11312 self.cfg.SetDiskID(dev, node_name)
11314 result = _BlockdevFind(self, node_name, dev, self.instance)
11316 msg = result.fail_msg
11317 if msg or not result.payload:
11319 msg = "disk not found"
11320 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11323 if result.payload.is_degraded:
11324 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11326 def _RemoveOldStorage(self, node_name, iv_names):
11327 for name, (_, old_lvs, _) in iv_names.iteritems():
11328 self.lu.LogInfo("Remove logical volumes for %s" % name)
11331 self.cfg.SetDiskID(lv, node_name)
11333 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11335 self.lu.LogWarning("Can't remove old LV: %s" % msg,
11336 hint="remove unused LVs manually")
11338 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11339 """Replace a disk on the primary or secondary for DRBD 8.
11341 The algorithm for replace is quite complicated:
11343 1. for each disk to be replaced:
11345 1. create new LVs on the target node with unique names
11346 1. detach old LVs from the drbd device
11347 1. rename old LVs to name_replaced.<time_t>
11348 1. rename new LVs to old LVs
11349 1. attach the new LVs (with the old names now) to the drbd device
11351 1. wait for sync across all devices
11353 1. for each modified disk:
11355 1. remove old LVs (which have the name name_replaces.<time_t>)
11357 Failures are not very well handled.
11362 # Step: check device activation
11363 self.lu.LogStep(1, steps_total, "Check device existence")
11364 self._CheckDisksExistence([self.other_node, self.target_node])
11365 self._CheckVolumeGroup([self.target_node, self.other_node])
11367 # Step: check other node consistency
11368 self.lu.LogStep(2, steps_total, "Check peer consistency")
11369 self._CheckDisksConsistency(self.other_node,
11370 self.other_node == self.instance.primary_node,
11373 # Step: create new storage
11374 self.lu.LogStep(3, steps_total, "Allocate new storage")
11375 iv_names = self._CreateNewStorage(self.target_node)
11377 # Step: for each lv, detach+rename*2+attach
11378 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11379 for dev, old_lvs, new_lvs in iv_names.itervalues():
11380 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11382 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11384 result.Raise("Can't detach drbd from local storage on node"
11385 " %s for device %s" % (self.target_node, dev.iv_name))
11387 #cfg.Update(instance)
11389 # ok, we created the new LVs, so now we know we have the needed
11390 # storage; as such, we proceed on the target node to rename
11391 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11392 # using the assumption that logical_id == physical_id (which in
11393 # turn is the unique_id on that node)
11395 # FIXME(iustin): use a better name for the replaced LVs
11396 temp_suffix = int(time.time())
11397 ren_fn = lambda d, suff: (d.physical_id[0],
11398 d.physical_id[1] + "_replaced-%s" % suff)
11400 # Build the rename list based on what LVs exist on the node
11401 rename_old_to_new = []
11402 for to_ren in old_lvs:
11403 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11404 if not result.fail_msg and result.payload:
11406 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11408 self.lu.LogInfo("Renaming the old LVs on the target node")
11409 result = self.rpc.call_blockdev_rename(self.target_node,
11411 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11413 # Now we rename the new LVs to the old LVs
11414 self.lu.LogInfo("Renaming the new LVs on the target node")
11415 rename_new_to_old = [(new, old.physical_id)
11416 for old, new in zip(old_lvs, new_lvs)]
11417 result = self.rpc.call_blockdev_rename(self.target_node,
11419 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11421 # Intermediate steps of in memory modifications
11422 for old, new in zip(old_lvs, new_lvs):
11423 new.logical_id = old.logical_id
11424 self.cfg.SetDiskID(new, self.target_node)
11426 # We need to modify old_lvs so that removal later removes the
11427 # right LVs, not the newly added ones; note that old_lvs is a
11429 for disk in old_lvs:
11430 disk.logical_id = ren_fn(disk, temp_suffix)
11431 self.cfg.SetDiskID(disk, self.target_node)
11433 # Now that the new lvs have the old name, we can add them to the device
11434 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11435 result = self.rpc.call_blockdev_addchildren(self.target_node,
11436 (dev, self.instance), new_lvs)
11437 msg = result.fail_msg
11439 for new_lv in new_lvs:
11440 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11443 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11444 hint=("cleanup manually the unused logical"
11446 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11448 cstep = itertools.count(5)
11450 if self.early_release:
11451 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11452 self._RemoveOldStorage(self.target_node, iv_names)
11453 # TODO: Check if releasing locks early still makes sense
11454 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11456 # Release all resource locks except those used by the instance
11457 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11458 keep=self.node_secondary_ip.keys())
11460 # Release all node locks while waiting for sync
11461 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11463 # TODO: Can the instance lock be downgraded here? Take the optional disk
11464 # shutdown in the caller into consideration.
11467 # This can fail as the old devices are degraded and _WaitForSync
11468 # does a combined result over all disks, so we don't check its return value
11469 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11470 _WaitForSync(self.lu, self.instance)
11472 # Check all devices manually
11473 self._CheckDevices(self.instance.primary_node, iv_names)
11475 # Step: remove old storage
11476 if not self.early_release:
11477 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11478 self._RemoveOldStorage(self.target_node, iv_names)
11480 def _ExecDrbd8Secondary(self, feedback_fn):
11481 """Replace the secondary node for DRBD 8.
11483 The algorithm for replace is quite complicated:
11484 - for all disks of the instance:
11485 - create new LVs on the new node with same names
11486 - shutdown the drbd device on the old secondary
11487 - disconnect the drbd network on the primary
11488 - create the drbd device on the new secondary
11489 - network attach the drbd on the primary, using an artifice:
11490 the drbd code for Attach() will connect to the network if it
11491 finds a device which is connected to the good local disks but
11492 not network enabled
11493 - wait for sync across all devices
11494 - remove all disks from the old secondary
11496 Failures are not very well handled.
11501 pnode = self.instance.primary_node
11503 # Step: check device activation
11504 self.lu.LogStep(1, steps_total, "Check device existence")
11505 self._CheckDisksExistence([self.instance.primary_node])
11506 self._CheckVolumeGroup([self.instance.primary_node])
11508 # Step: check other node consistency
11509 self.lu.LogStep(2, steps_total, "Check peer consistency")
11510 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11512 # Step: create new storage
11513 self.lu.LogStep(3, steps_total, "Allocate new storage")
11514 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11515 for idx, dev in enumerate(disks):
11516 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11517 (self.new_node, idx))
11518 # we pass force_create=True to force LVM creation
11519 for new_lv in dev.children:
11520 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11521 True, _GetInstanceInfoText(self.instance), False)
11523 # Step 4: dbrd minors and drbd setups changes
11524 # after this, we must manually remove the drbd minors on both the
11525 # error and the success paths
11526 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11527 minors = self.cfg.AllocateDRBDMinor([self.new_node
11528 for dev in self.instance.disks],
11529 self.instance.name)
11530 logging.debug("Allocated minors %r", minors)
11533 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11534 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11535 (self.new_node, idx))
11536 # create new devices on new_node; note that we create two IDs:
11537 # one without port, so the drbd will be activated without
11538 # networking information on the new node at this stage, and one
11539 # with network, for the latter activation in step 4
11540 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11541 if self.instance.primary_node == o_node1:
11544 assert self.instance.primary_node == o_node2, "Three-node instance?"
11547 new_alone_id = (self.instance.primary_node, self.new_node, None,
11548 p_minor, new_minor, o_secret)
11549 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11550 p_minor, new_minor, o_secret)
11552 iv_names[idx] = (dev, dev.children, new_net_id)
11553 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11555 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11556 logical_id=new_alone_id,
11557 children=dev.children,
11560 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11563 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11565 _GetInstanceInfoText(self.instance), False)
11566 except errors.GenericError:
11567 self.cfg.ReleaseDRBDMinors(self.instance.name)
11570 # We have new devices, shutdown the drbd on the old secondary
11571 for idx, dev in enumerate(self.instance.disks):
11572 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11573 self.cfg.SetDiskID(dev, self.target_node)
11574 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11575 (dev, self.instance)).fail_msg
11577 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11578 "node: %s" % (idx, msg),
11579 hint=("Please cleanup this device manually as"
11580 " soon as possible"))
11582 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11583 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11584 self.instance.disks)[pnode]
11586 msg = result.fail_msg
11588 # detaches didn't succeed (unlikely)
11589 self.cfg.ReleaseDRBDMinors(self.instance.name)
11590 raise errors.OpExecError("Can't detach the disks from the network on"
11591 " old node: %s" % (msg,))
11593 # if we managed to detach at least one, we update all the disks of
11594 # the instance to point to the new secondary
11595 self.lu.LogInfo("Updating instance configuration")
11596 for dev, _, new_logical_id in iv_names.itervalues():
11597 dev.logical_id = new_logical_id
11598 self.cfg.SetDiskID(dev, self.instance.primary_node)
11600 self.cfg.Update(self.instance, feedback_fn)
11602 # Release all node locks (the configuration has been updated)
11603 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11605 # and now perform the drbd attach
11606 self.lu.LogInfo("Attaching primary drbds to new secondary"
11607 " (standalone => connected)")
11608 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11610 self.node_secondary_ip,
11611 (self.instance.disks, self.instance),
11612 self.instance.name,
11614 for to_node, to_result in result.items():
11615 msg = to_result.fail_msg
11617 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11619 hint=("please do a gnt-instance info to see the"
11620 " status of disks"))
11622 cstep = itertools.count(5)
11624 if self.early_release:
11625 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11626 self._RemoveOldStorage(self.target_node, iv_names)
11627 # TODO: Check if releasing locks early still makes sense
11628 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11630 # Release all resource locks except those used by the instance
11631 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11632 keep=self.node_secondary_ip.keys())
11634 # TODO: Can the instance lock be downgraded here? Take the optional disk
11635 # shutdown in the caller into consideration.
11638 # This can fail as the old devices are degraded and _WaitForSync
11639 # does a combined result over all disks, so we don't check its return value
11640 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11641 _WaitForSync(self.lu, self.instance)
11643 # Check all devices manually
11644 self._CheckDevices(self.instance.primary_node, iv_names)
11646 # Step: remove old storage
11647 if not self.early_release:
11648 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11649 self._RemoveOldStorage(self.target_node, iv_names)
11652 class LURepairNodeStorage(NoHooksLU):
11653 """Repairs the volume group on a node.
11658 def CheckArguments(self):
11659 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11661 storage_type = self.op.storage_type
11663 if (constants.SO_FIX_CONSISTENCY not in
11664 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11665 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11666 " repaired" % storage_type,
11667 errors.ECODE_INVAL)
11669 def ExpandNames(self):
11670 self.needed_locks = {
11671 locking.LEVEL_NODE: [self.op.node_name],
11674 def _CheckFaultyDisks(self, instance, node_name):
11675 """Ensure faulty disks abort the opcode or at least warn."""
11677 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11679 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11680 " node '%s'" % (instance.name, node_name),
11681 errors.ECODE_STATE)
11682 except errors.OpPrereqError, err:
11683 if self.op.ignore_consistency:
11684 self.proc.LogWarning(str(err.args[0]))
11688 def CheckPrereq(self):
11689 """Check prerequisites.
11692 # Check whether any instance on this node has faulty disks
11693 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11694 if inst.admin_state != constants.ADMINST_UP:
11696 check_nodes = set(inst.all_nodes)
11697 check_nodes.discard(self.op.node_name)
11698 for inst_node_name in check_nodes:
11699 self._CheckFaultyDisks(inst, inst_node_name)
11701 def Exec(self, feedback_fn):
11702 feedback_fn("Repairing storage unit '%s' on %s ..." %
11703 (self.op.name, self.op.node_name))
11705 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11706 result = self.rpc.call_storage_execute(self.op.node_name,
11707 self.op.storage_type, st_args,
11709 constants.SO_FIX_CONSISTENCY)
11710 result.Raise("Failed to repair storage unit '%s' on %s" %
11711 (self.op.name, self.op.node_name))
11714 class LUNodeEvacuate(NoHooksLU):
11715 """Evacuates instances off a list of nodes.
11720 _MODE2IALLOCATOR = {
11721 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11722 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11723 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11725 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11726 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11727 constants.IALLOCATOR_NEVAC_MODES)
11729 def CheckArguments(self):
11730 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11732 def ExpandNames(self):
11733 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11735 if self.op.remote_node is not None:
11736 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11737 assert self.op.remote_node
11739 if self.op.remote_node == self.op.node_name:
11740 raise errors.OpPrereqError("Can not use evacuated node as a new"
11741 " secondary node", errors.ECODE_INVAL)
11743 if self.op.mode != constants.NODE_EVAC_SEC:
11744 raise errors.OpPrereqError("Without the use of an iallocator only"
11745 " secondary instances can be evacuated",
11746 errors.ECODE_INVAL)
11749 self.share_locks = _ShareAll()
11750 self.needed_locks = {
11751 locking.LEVEL_INSTANCE: [],
11752 locking.LEVEL_NODEGROUP: [],
11753 locking.LEVEL_NODE: [],
11756 # Determine nodes (via group) optimistically, needs verification once locks
11757 # have been acquired
11758 self.lock_nodes = self._DetermineNodes()
11760 def _DetermineNodes(self):
11761 """Gets the list of nodes to operate on.
11764 if self.op.remote_node is None:
11765 # Iallocator will choose any node(s) in the same group
11766 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11768 group_nodes = frozenset([self.op.remote_node])
11770 # Determine nodes to be locked
11771 return set([self.op.node_name]) | group_nodes
11773 def _DetermineInstances(self):
11774 """Builds list of instances to operate on.
11777 assert self.op.mode in constants.NODE_EVAC_MODES
11779 if self.op.mode == constants.NODE_EVAC_PRI:
11780 # Primary instances only
11781 inst_fn = _GetNodePrimaryInstances
11782 assert self.op.remote_node is None, \
11783 "Evacuating primary instances requires iallocator"
11784 elif self.op.mode == constants.NODE_EVAC_SEC:
11785 # Secondary instances only
11786 inst_fn = _GetNodeSecondaryInstances
11789 assert self.op.mode == constants.NODE_EVAC_ALL
11790 inst_fn = _GetNodeInstances
11791 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11793 raise errors.OpPrereqError("Due to an issue with the iallocator"
11794 " interface it is not possible to evacuate"
11795 " all instances at once; specify explicitly"
11796 " whether to evacuate primary or secondary"
11798 errors.ECODE_INVAL)
11800 return inst_fn(self.cfg, self.op.node_name)
11802 def DeclareLocks(self, level):
11803 if level == locking.LEVEL_INSTANCE:
11804 # Lock instances optimistically, needs verification once node and group
11805 # locks have been acquired
11806 self.needed_locks[locking.LEVEL_INSTANCE] = \
11807 set(i.name for i in self._DetermineInstances())
11809 elif level == locking.LEVEL_NODEGROUP:
11810 # Lock node groups for all potential target nodes optimistically, needs
11811 # verification once nodes have been acquired
11812 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11813 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11815 elif level == locking.LEVEL_NODE:
11816 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11818 def CheckPrereq(self):
11820 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11821 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11822 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11824 need_nodes = self._DetermineNodes()
11826 if not owned_nodes.issuperset(need_nodes):
11827 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11828 " locks were acquired, current nodes are"
11829 " are '%s', used to be '%s'; retry the"
11831 (self.op.node_name,
11832 utils.CommaJoin(need_nodes),
11833 utils.CommaJoin(owned_nodes)),
11834 errors.ECODE_STATE)
11836 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11837 if owned_groups != wanted_groups:
11838 raise errors.OpExecError("Node groups changed since locks were acquired,"
11839 " current groups are '%s', used to be '%s';"
11840 " retry the operation" %
11841 (utils.CommaJoin(wanted_groups),
11842 utils.CommaJoin(owned_groups)))
11844 # Determine affected instances
11845 self.instances = self._DetermineInstances()
11846 self.instance_names = [i.name for i in self.instances]
11848 if set(self.instance_names) != owned_instances:
11849 raise errors.OpExecError("Instances on node '%s' changed since locks"
11850 " were acquired, current instances are '%s',"
11851 " used to be '%s'; retry the operation" %
11852 (self.op.node_name,
11853 utils.CommaJoin(self.instance_names),
11854 utils.CommaJoin(owned_instances)))
11856 if self.instance_names:
11857 self.LogInfo("Evacuating instances from node '%s': %s",
11859 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11861 self.LogInfo("No instances to evacuate from node '%s'",
11864 if self.op.remote_node is not None:
11865 for i in self.instances:
11866 if i.primary_node == self.op.remote_node:
11867 raise errors.OpPrereqError("Node %s is the primary node of"
11868 " instance %s, cannot use it as"
11870 (self.op.remote_node, i.name),
11871 errors.ECODE_INVAL)
11873 def Exec(self, feedback_fn):
11874 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11876 if not self.instance_names:
11877 # No instances to evacuate
11880 elif self.op.iallocator is not None:
11881 # TODO: Implement relocation to other group
11882 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11883 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11884 instances=list(self.instance_names))
11886 ial.Run(self.op.iallocator)
11888 if not ial.success:
11889 raise errors.OpPrereqError("Can't compute node evacuation using"
11890 " iallocator '%s': %s" %
11891 (self.op.iallocator, ial.info),
11892 errors.ECODE_NORES)
11894 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11896 elif self.op.remote_node is not None:
11897 assert self.op.mode == constants.NODE_EVAC_SEC
11899 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11900 remote_node=self.op.remote_node,
11902 mode=constants.REPLACE_DISK_CHG,
11903 early_release=self.op.early_release)]
11904 for instance_name in self.instance_names
11908 raise errors.ProgrammerError("No iallocator or remote node")
11910 return ResultWithJobs(jobs)
11913 def _SetOpEarlyRelease(early_release, op):
11914 """Sets C{early_release} flag on opcodes if available.
11918 op.early_release = early_release
11919 except AttributeError:
11920 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11925 def _NodeEvacDest(use_nodes, group, nodes):
11926 """Returns group or nodes depending on caller's choice.
11930 return utils.CommaJoin(nodes)
11935 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11936 """Unpacks the result of change-group and node-evacuate iallocator requests.
11938 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11939 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11941 @type lu: L{LogicalUnit}
11942 @param lu: Logical unit instance
11943 @type alloc_result: tuple/list
11944 @param alloc_result: Result from iallocator
11945 @type early_release: bool
11946 @param early_release: Whether to release locks early if possible
11947 @type use_nodes: bool
11948 @param use_nodes: Whether to display node names instead of groups
11951 (moved, failed, jobs) = alloc_result
11954 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11955 for (name, reason) in failed)
11956 lu.LogWarning("Unable to evacuate instances %s", failreason)
11957 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11960 lu.LogInfo("Instances to be moved: %s",
11961 utils.CommaJoin("%s (to %s)" %
11962 (name, _NodeEvacDest(use_nodes, group, nodes))
11963 for (name, group, nodes) in moved))
11965 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11966 map(opcodes.OpCode.LoadOpCode, ops))
11970 class LUInstanceGrowDisk(LogicalUnit):
11971 """Grow a disk of an instance.
11974 HPATH = "disk-grow"
11975 HTYPE = constants.HTYPE_INSTANCE
11978 def ExpandNames(self):
11979 self._ExpandAndLockInstance()
11980 self.needed_locks[locking.LEVEL_NODE] = []
11981 self.needed_locks[locking.LEVEL_NODE_RES] = []
11982 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11983 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11985 def DeclareLocks(self, level):
11986 if level == locking.LEVEL_NODE:
11987 self._LockInstancesNodes()
11988 elif level == locking.LEVEL_NODE_RES:
11990 self.needed_locks[locking.LEVEL_NODE_RES] = \
11991 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
11993 def BuildHooksEnv(self):
11994 """Build hooks env.
11996 This runs on the master, the primary and all the secondaries.
12000 "DISK": self.op.disk,
12001 "AMOUNT": self.op.amount,
12002 "ABSOLUTE": self.op.absolute,
12004 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12007 def BuildHooksNodes(self):
12008 """Build hooks nodes.
12011 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12014 def CheckPrereq(self):
12015 """Check prerequisites.
12017 This checks that the instance is in the cluster.
12020 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12021 assert instance is not None, \
12022 "Cannot retrieve locked instance %s" % self.op.instance_name
12023 nodenames = list(instance.all_nodes)
12024 for node in nodenames:
12025 _CheckNodeOnline(self, node)
12027 self.instance = instance
12029 if instance.disk_template not in constants.DTS_GROWABLE:
12030 raise errors.OpPrereqError("Instance's disk layout does not support"
12031 " growing", errors.ECODE_INVAL)
12033 self.disk = instance.FindDisk(self.op.disk)
12035 if self.op.absolute:
12036 self.target = self.op.amount
12037 self.delta = self.target - self.disk.size
12039 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12040 "current disk size (%s)" %
12041 (utils.FormatUnit(self.target, "h"),
12042 utils.FormatUnit(self.disk.size, "h")),
12043 errors.ECODE_STATE)
12045 self.delta = self.op.amount
12046 self.target = self.disk.size + self.delta
12048 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12049 utils.FormatUnit(self.delta, "h"),
12050 errors.ECODE_INVAL)
12052 if instance.disk_template not in (constants.DT_FILE,
12053 constants.DT_SHARED_FILE,
12056 # TODO: check the free disk space for file, when that feature will be
12058 _CheckNodesFreeDiskPerVG(self, nodenames,
12059 self.disk.ComputeGrowth(self.delta))
12061 def Exec(self, feedback_fn):
12062 """Execute disk grow.
12065 instance = self.instance
12068 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12069 assert (self.owned_locks(locking.LEVEL_NODE) ==
12070 self.owned_locks(locking.LEVEL_NODE_RES))
12072 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12074 raise errors.OpExecError("Cannot activate block device to grow")
12076 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12077 (self.op.disk, instance.name,
12078 utils.FormatUnit(self.delta, "h"),
12079 utils.FormatUnit(self.target, "h")))
12081 # First run all grow ops in dry-run mode
12082 for node in instance.all_nodes:
12083 self.cfg.SetDiskID(disk, node)
12084 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12086 result.Raise("Grow request failed to node %s" % node)
12088 # We know that (as far as we can test) operations across different
12089 # nodes will succeed, time to run it for real
12090 for node in instance.all_nodes:
12091 self.cfg.SetDiskID(disk, node)
12092 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12094 result.Raise("Grow request failed to node %s" % node)
12096 # TODO: Rewrite code to work properly
12097 # DRBD goes into sync mode for a short amount of time after executing the
12098 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
12099 # calling "resize" in sync mode fails. Sleeping for a short amount of
12100 # time is a work-around.
12103 disk.RecordGrow(self.delta)
12104 self.cfg.Update(instance, feedback_fn)
12106 # Changes have been recorded, release node lock
12107 _ReleaseLocks(self, locking.LEVEL_NODE)
12109 # Downgrade lock while waiting for sync
12110 self.glm.downgrade(locking.LEVEL_INSTANCE)
12112 if self.op.wait_for_sync:
12113 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12115 self.proc.LogWarning("Disk sync-ing has not returned a good"
12116 " status; please check the instance")
12117 if instance.admin_state != constants.ADMINST_UP:
12118 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12119 elif instance.admin_state != constants.ADMINST_UP:
12120 self.proc.LogWarning("Not shutting down the disk even if the instance is"
12121 " not supposed to be running because no wait for"
12122 " sync mode was requested")
12124 assert self.owned_locks(locking.LEVEL_NODE_RES)
12125 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12128 class LUInstanceQueryData(NoHooksLU):
12129 """Query runtime instance data.
12134 def ExpandNames(self):
12135 self.needed_locks = {}
12137 # Use locking if requested or when non-static information is wanted
12138 if not (self.op.static or self.op.use_locking):
12139 self.LogWarning("Non-static data requested, locks need to be acquired")
12140 self.op.use_locking = True
12142 if self.op.instances or not self.op.use_locking:
12143 # Expand instance names right here
12144 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12146 # Will use acquired locks
12147 self.wanted_names = None
12149 if self.op.use_locking:
12150 self.share_locks = _ShareAll()
12152 if self.wanted_names is None:
12153 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12155 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12157 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12158 self.needed_locks[locking.LEVEL_NODE] = []
12159 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12161 def DeclareLocks(self, level):
12162 if self.op.use_locking:
12163 if level == locking.LEVEL_NODEGROUP:
12164 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12166 # Lock all groups used by instances optimistically; this requires going
12167 # via the node before it's locked, requiring verification later on
12168 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12169 frozenset(group_uuid
12170 for instance_name in owned_instances
12172 self.cfg.GetInstanceNodeGroups(instance_name))
12174 elif level == locking.LEVEL_NODE:
12175 self._LockInstancesNodes()
12177 def CheckPrereq(self):
12178 """Check prerequisites.
12180 This only checks the optional instance list against the existing names.
12183 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12184 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12185 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12187 if self.wanted_names is None:
12188 assert self.op.use_locking, "Locking was not used"
12189 self.wanted_names = owned_instances
12191 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12193 if self.op.use_locking:
12194 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12197 assert not (owned_instances or owned_groups or owned_nodes)
12199 self.wanted_instances = instances.values()
12201 def _ComputeBlockdevStatus(self, node, instance, dev):
12202 """Returns the status of a block device
12205 if self.op.static or not node:
12208 self.cfg.SetDiskID(dev, node)
12210 result = self.rpc.call_blockdev_find(node, dev)
12214 result.Raise("Can't compute disk status for %s" % instance.name)
12216 status = result.payload
12220 return (status.dev_path, status.major, status.minor,
12221 status.sync_percent, status.estimated_time,
12222 status.is_degraded, status.ldisk_status)
12224 def _ComputeDiskStatus(self, instance, snode, dev):
12225 """Compute block device status.
12228 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12230 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12232 def _ComputeDiskStatusInner(self, instance, snode, dev):
12233 """Compute block device status.
12235 @attention: The device has to be annotated already.
12238 if dev.dev_type in constants.LDS_DRBD:
12239 # we change the snode then (otherwise we use the one passed in)
12240 if dev.logical_id[0] == instance.primary_node:
12241 snode = dev.logical_id[1]
12243 snode = dev.logical_id[0]
12245 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12247 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12250 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12257 "iv_name": dev.iv_name,
12258 "dev_type": dev.dev_type,
12259 "logical_id": dev.logical_id,
12260 "physical_id": dev.physical_id,
12261 "pstatus": dev_pstatus,
12262 "sstatus": dev_sstatus,
12263 "children": dev_children,
12268 def Exec(self, feedback_fn):
12269 """Gather and return data"""
12272 cluster = self.cfg.GetClusterInfo()
12274 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12275 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12277 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12278 for node in nodes.values()))
12280 group2name_fn = lambda uuid: groups[uuid].name
12282 for instance in self.wanted_instances:
12283 pnode = nodes[instance.primary_node]
12285 if self.op.static or pnode.offline:
12286 remote_state = None
12288 self.LogWarning("Primary node %s is marked offline, returning static"
12289 " information only for instance %s" %
12290 (pnode.name, instance.name))
12292 remote_info = self.rpc.call_instance_info(instance.primary_node,
12294 instance.hypervisor)
12295 remote_info.Raise("Error checking node %s" % instance.primary_node)
12296 remote_info = remote_info.payload
12297 if remote_info and "state" in remote_info:
12298 remote_state = "up"
12300 if instance.admin_state == constants.ADMINST_UP:
12301 remote_state = "down"
12303 remote_state = instance.admin_state
12305 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12308 snodes_group_uuids = [nodes[snode_name].group
12309 for snode_name in instance.secondary_nodes]
12311 result[instance.name] = {
12312 "name": instance.name,
12313 "config_state": instance.admin_state,
12314 "run_state": remote_state,
12315 "pnode": instance.primary_node,
12316 "pnode_group_uuid": pnode.group,
12317 "pnode_group_name": group2name_fn(pnode.group),
12318 "snodes": instance.secondary_nodes,
12319 "snodes_group_uuids": snodes_group_uuids,
12320 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12322 # this happens to be the same format used for hooks
12323 "nics": _NICListToTuple(self, instance.nics),
12324 "disk_template": instance.disk_template,
12326 "hypervisor": instance.hypervisor,
12327 "network_port": instance.network_port,
12328 "hv_instance": instance.hvparams,
12329 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12330 "be_instance": instance.beparams,
12331 "be_actual": cluster.FillBE(instance),
12332 "os_instance": instance.osparams,
12333 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12334 "serial_no": instance.serial_no,
12335 "mtime": instance.mtime,
12336 "ctime": instance.ctime,
12337 "uuid": instance.uuid,
12343 def PrepareContainerMods(mods, private_fn):
12344 """Prepares a list of container modifications by adding a private data field.
12346 @type mods: list of tuples; (operation, index, parameters)
12347 @param mods: List of modifications
12348 @type private_fn: callable or None
12349 @param private_fn: Callable for constructing a private data field for a
12354 if private_fn is None:
12359 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12362 #: Type description for changes as returned by L{ApplyContainerMods}'s
12364 _TApplyContModsCbChanges = \
12365 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12366 ht.TNonEmptyString,
12371 def ApplyContainerMods(kind, container, chgdesc, mods,
12372 create_fn, modify_fn, remove_fn):
12373 """Applies descriptions in C{mods} to C{container}.
12376 @param kind: One-word item description
12377 @type container: list
12378 @param container: Container to modify
12379 @type chgdesc: None or list
12380 @param chgdesc: List of applied changes
12382 @param mods: Modifications as returned by L{PrepareContainerMods}
12383 @type create_fn: callable
12384 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12385 receives absolute item index, parameters and private data object as added
12386 by L{PrepareContainerMods}, returns tuple containing new item and changes
12388 @type modify_fn: callable
12389 @param modify_fn: Callback for modifying an existing item
12390 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12391 and private data object as added by L{PrepareContainerMods}, returns
12393 @type remove_fn: callable
12394 @param remove_fn: Callback on removing item; receives absolute item index,
12395 item and private data object as added by L{PrepareContainerMods}
12398 for (op, idx, params, private) in mods:
12401 absidx = len(container) - 1
12403 raise IndexError("Not accepting negative indices other than -1")
12404 elif idx > len(container):
12405 raise IndexError("Got %s index %s, but there are only %s" %
12406 (kind, idx, len(container)))
12412 if op == constants.DDM_ADD:
12413 # Calculate where item will be added
12415 addidx = len(container)
12419 if create_fn is None:
12422 (item, changes) = create_fn(addidx, params, private)
12425 container.append(item)
12428 assert idx <= len(container)
12429 # list.insert does so before the specified index
12430 container.insert(idx, item)
12432 # Retrieve existing item
12434 item = container[absidx]
12436 raise IndexError("Invalid %s index %s" % (kind, idx))
12438 if op == constants.DDM_REMOVE:
12441 if remove_fn is not None:
12442 remove_fn(absidx, item, private)
12444 #TODO: include a hotplugged msg in changes
12445 changes = [("%s/%s" % (kind, absidx), "remove")]
12447 assert container[absidx] == item
12448 del container[absidx]
12449 elif op == constants.DDM_MODIFY:
12450 if modify_fn is not None:
12451 #TODO: include a hotplugged msg in changes
12452 changes = modify_fn(absidx, item, params, private)
12455 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12457 assert _TApplyContModsCbChanges(changes)
12459 if not (chgdesc is None or changes is None):
12460 chgdesc.extend(changes)
12463 def _UpdateIvNames(base_index, disks):
12464 """Updates the C{iv_name} attribute of disks.
12466 @type disks: list of L{objects.Disk}
12469 for (idx, disk) in enumerate(disks):
12470 disk.iv_name = "disk/%s" % (base_index + idx, )
12473 class _InstNicModPrivate:
12474 """Data structure for network interface modifications.
12476 Used by L{LUInstanceSetParams}.
12479 def __init__(self):
12484 class LUInstanceSetParams(LogicalUnit):
12485 """Modifies an instances's parameters.
12488 HPATH = "instance-modify"
12489 HTYPE = constants.HTYPE_INSTANCE
12493 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12494 assert ht.TList(mods)
12495 assert not mods or len(mods[0]) in (2, 3)
12497 if mods and len(mods[0]) == 2:
12501 for op, params in mods:
12502 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12503 result.append((op, -1, params))
12507 raise errors.OpPrereqError("Only one %s add or remove operation is"
12508 " supported at a time" % kind,
12509 errors.ECODE_INVAL)
12511 result.append((constants.DDM_MODIFY, op, params))
12513 assert verify_fn(result)
12520 def _CheckMods(kind, mods, key_types, item_fn):
12521 """Ensures requested disk/NIC modifications are valid.
12524 for (op, _, params) in mods:
12525 assert ht.TDict(params)
12527 # If key_types is an empty dict, we assume we have an 'ext' template
12528 # and thus do not ForceDictType
12530 utils.ForceDictType(params, key_types)
12532 if op == constants.DDM_REMOVE:
12534 raise errors.OpPrereqError("No settings should be passed when"
12535 " removing a %s" % kind,
12536 errors.ECODE_INVAL)
12537 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12538 item_fn(op, params)
12540 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12543 def _VerifyDiskModification(op, params):
12544 """Verifies a disk modification.
12547 if op == constants.DDM_ADD:
12548 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12549 if mode not in constants.DISK_ACCESS_SET:
12550 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12551 errors.ECODE_INVAL)
12553 size = params.get(constants.IDISK_SIZE, None)
12555 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12556 constants.IDISK_SIZE, errors.ECODE_INVAL)
12560 except (TypeError, ValueError), err:
12561 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12562 errors.ECODE_INVAL)
12564 params[constants.IDISK_SIZE] = size
12566 elif op == constants.DDM_MODIFY:
12567 if constants.IDISK_SIZE in params:
12568 raise errors.OpPrereqError("Disk size change not possible, use"
12569 " grow-disk", errors.ECODE_INVAL)
12570 if constants.IDISK_MODE not in params:
12571 raise errors.OpPrereqError("Disk 'mode' is the only kind of"
12572 " modification supported, but missing",
12573 errors.ECODE_NOENT)
12574 if len(params) > 1:
12575 raise errors.OpPrereqError("Disk modification doesn't support"
12576 " additional arbitrary parameters",
12577 errors.ECODE_INVAL)
12580 def _VerifyNicModification(op, params):
12581 """Verifies a network interface modification.
12584 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12585 ip = params.get(constants.INIC_IP, None)
12586 req_net = params.get(constants.INIC_NETWORK, None)
12587 link = params.get(constants.NIC_LINK, None)
12588 mode = params.get(constants.NIC_MODE, None)
12589 if req_net is not None:
12590 if req_net.lower() == constants.VALUE_NONE:
12591 params[constants.INIC_NETWORK] = None
12593 elif link is not None or mode is not None:
12594 raise errors.OpPrereqError("If network is given"
12595 " mode or link should not",
12596 errors.ECODE_INVAL)
12598 if op == constants.DDM_ADD:
12599 macaddr = params.get(constants.INIC_MAC, None)
12600 if macaddr is None:
12601 params[constants.INIC_MAC] = constants.VALUE_AUTO
12604 if ip.lower() == constants.VALUE_NONE:
12605 params[constants.INIC_IP] = None
12607 if ip.lower() == constants.NIC_IP_POOL:
12608 if op == constants.DDM_ADD and req_net is None:
12609 raise errors.OpPrereqError("If ip=pool, parameter network"
12611 errors.ECODE_INVAL)
12613 if not netutils.IPAddress.IsValid(ip):
12614 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12615 errors.ECODE_INVAL)
12617 if constants.INIC_MAC in params:
12618 macaddr = params[constants.INIC_MAC]
12619 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12620 macaddr = utils.NormalizeAndValidateMac(macaddr)
12622 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12623 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12624 " modifying an existing NIC",
12625 errors.ECODE_INVAL)
12627 def CheckArguments(self):
12628 if not (self.op.nics or self.op.disks or self.op.disk_template or
12629 self.op.hvparams or self.op.beparams or self.op.os_name or
12630 self.op.offline is not None or self.op.runtime_mem):
12631 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12633 if self.op.hvparams:
12634 _CheckGlobalHvParams(self.op.hvparams)
12636 if self.op.allow_arbit_params:
12638 self._UpgradeDiskNicMods("disk", self.op.disks,
12639 opcodes.OpInstanceSetParams.TestExtDiskModifications)
12642 self._UpgradeDiskNicMods("disk", self.op.disks,
12643 opcodes.OpInstanceSetParams.TestDiskModifications)
12646 self._UpgradeDiskNicMods("NIC", self.op.nics,
12647 opcodes.OpInstanceSetParams.TestNicModifications)
12649 # Check disk modifications
12650 if self.op.allow_arbit_params:
12651 self._CheckMods("disk", self.op.disks, {},
12652 self._VerifyDiskModification)
12654 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12655 self._VerifyDiskModification)
12657 if self.op.disks and self.op.disk_template is not None:
12658 raise errors.OpPrereqError("Disk template conversion and other disk"
12659 " changes not supported at the same time",
12660 errors.ECODE_INVAL)
12662 if (self.op.disk_template and
12663 self.op.disk_template in constants.DTS_INT_MIRROR and
12664 self.op.remote_node is None):
12665 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12666 " one requires specifying a secondary node",
12667 errors.ECODE_INVAL)
12669 # Check NIC modifications
12670 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12671 self._VerifyNicModification)
12673 def ExpandNames(self):
12674 self._ExpandAndLockInstance()
12675 # Can't even acquire node locks in shared mode as upcoming changes in
12676 # Ganeti 2.6 will start to modify the node object on disk conversion
12677 self.needed_locks[locking.LEVEL_NODE] = []
12678 self.needed_locks[locking.LEVEL_NODE_RES] = []
12679 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12681 def DeclareLocks(self, level):
12682 # TODO: Acquire group lock in shared mode (disk parameters)
12683 if level == locking.LEVEL_NODE:
12684 self._LockInstancesNodes()
12685 if self.op.disk_template and self.op.remote_node:
12686 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12687 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12688 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12690 self.needed_locks[locking.LEVEL_NODE_RES] = \
12691 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12693 def BuildHooksEnv(self):
12694 """Build hooks env.
12696 This runs on the master, primary and secondaries.
12700 if constants.BE_MINMEM in self.be_new:
12701 args["minmem"] = self.be_new[constants.BE_MINMEM]
12702 if constants.BE_MAXMEM in self.be_new:
12703 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12704 if constants.BE_VCPUS in self.be_new:
12705 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12706 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12707 # information at all.
12709 if self._new_nics is not None:
12712 for nic in self._new_nics:
12713 n = copy.deepcopy(nic)
12714 nicparams = self.cluster.SimpleFillNIC(n.nicparams)
12715 n.nicparams = nicparams
12716 nics.append(_NICToTuple(self, n))
12718 args["nics"] = nics
12720 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12721 if self.op.disk_template:
12722 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12723 if self.op.runtime_mem:
12724 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12728 def BuildHooksNodes(self):
12729 """Build hooks nodes.
12732 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12735 def _PrepareNicModification(self, params, private, old_ip, old_net,
12736 old_params, cluster, pnode):
12738 update_params_dict = dict([(key, params[key])
12739 for key in constants.NICS_PARAMETERS
12742 req_link = update_params_dict.get(constants.NIC_LINK, None)
12743 req_mode = update_params_dict.get(constants.NIC_MODE, None)
12745 new_net = params.get(constants.INIC_NETWORK, old_net)
12746 if new_net is not None:
12747 netparams = self.cfg.GetGroupNetParams(new_net, pnode)
12748 if netparams is None:
12749 raise errors.OpPrereqError("No netparams found for the network"
12750 " %s, propably not connected." % new_net,
12751 errors.ECODE_INVAL)
12752 new_params = dict(netparams)
12754 new_params = _GetUpdatedParams(old_params, update_params_dict)
12756 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12758 new_filled_params = cluster.SimpleFillNIC(new_params)
12759 objects.NIC.CheckParameterSyntax(new_filled_params)
12761 new_mode = new_filled_params[constants.NIC_MODE]
12762 if new_mode == constants.NIC_MODE_BRIDGED:
12763 bridge = new_filled_params[constants.NIC_LINK]
12764 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12766 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12768 self.warn.append(msg)
12770 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12772 elif new_mode == constants.NIC_MODE_ROUTED:
12773 ip = params.get(constants.INIC_IP, old_ip)
12775 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12776 " on a routed NIC", errors.ECODE_INVAL)
12778 if constants.INIC_MAC in params:
12779 mac = params[constants.INIC_MAC]
12781 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12782 errors.ECODE_INVAL)
12783 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12784 # otherwise generate the MAC address
12785 params[constants.INIC_MAC] = \
12786 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12788 # or validate/reserve the current one
12790 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12791 except errors.ReservationError:
12792 raise errors.OpPrereqError("MAC address '%s' already in use"
12793 " in cluster" % mac,
12794 errors.ECODE_NOTUNIQUE)
12795 elif new_net != old_net:
12796 def get_net_prefix(net):
12798 uuid = self.cfg.LookupNetwork(net)
12800 nobj = self.cfg.GetNetwork(uuid)
12801 return nobj.mac_prefix
12803 new_prefix = get_net_prefix(new_net)
12804 old_prefix = get_net_prefix(old_net)
12805 if old_prefix != new_prefix:
12806 params[constants.INIC_MAC] = \
12807 self.cfg.GenerateMAC(new_net, self.proc.GetECId())
12809 #if there is a change in nic-network configuration
12810 new_ip = params.get(constants.INIC_IP, old_ip)
12811 if (new_ip, new_net) != (old_ip, old_net):
12814 if new_ip.lower() == constants.NIC_IP_POOL:
12816 new_ip = self.cfg.GenerateIp(new_net, self.proc.GetECId())
12817 except errors.ReservationError:
12818 raise errors.OpPrereqError("Unable to get a free IP"
12819 " from the address pool",
12820 errors.ECODE_STATE)
12821 self.LogInfo("Chose IP %s from pool %s", new_ip, new_net)
12822 params[constants.INIC_IP] = new_ip
12823 elif new_ip != old_ip or new_net != old_net:
12825 self.LogInfo("Reserving IP %s in pool %s", new_ip, new_net)
12826 self.cfg.ReserveIp(new_net, new_ip, self.proc.GetECId())
12827 except errors.ReservationError:
12828 raise errors.OpPrereqError("IP %s not available in network %s" %
12830 errors.ECODE_NOTUNIQUE)
12831 elif new_ip.lower() == constants.NIC_IP_POOL:
12832 raise errors.OpPrereqError("ip=pool, but no network found",
12836 if self.op.conflicts_check:
12837 _CheckForConflictingIp(self, new_ip, pnode)
12842 self.cfg.ReleaseIp(old_net, old_ip, self.proc.GetECId())
12843 except errors.AddressPoolError:
12844 logging.warning("Release IP %s not contained in network %s",
12847 # there are no changes in (net, ip) tuple
12848 elif (old_net is not None and
12849 (req_link is not None or req_mode is not None)):
12850 raise errors.OpPrereqError("Not allowed to change link or mode of"
12851 " a NIC that is connected to a network.",
12852 errors.ECODE_INVAL)
12854 logging.info("new_params %s", new_params)
12855 logging.info("new_filled_params %s", new_filled_params)
12856 private.params = new_params
12857 private.filled = new_filled_params
12859 def CheckPrereq(self):
12860 """Check prerequisites.
12862 This only checks the instance list against the existing names.
12865 # checking the new params on the primary/secondary nodes
12867 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12868 cluster = self.cluster = self.cfg.GetClusterInfo()
12869 assert self.instance is not None, \
12870 "Cannot retrieve locked instance %s" % self.op.instance_name
12871 pnode = instance.primary_node
12872 nodelist = list(instance.all_nodes)
12873 pnode_info = self.cfg.GetNodeInfo(pnode)
12874 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12876 # Prepare disk/NIC modifications
12877 self.diskmod = PrepareContainerMods(self.op.disks, None)
12878 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12879 logging.info("nicmod %s", self.nicmod)
12881 # Check the validity of the `provider' parameter
12882 if instance.disk_template in constants.DT_EXT:
12883 for mod in self.diskmod:
12884 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
12885 if mod[0] == constants.DDM_ADD:
12886 if ext_provider is None:
12887 raise errors.OpPrereqError("Instance template is '%s' and parameter"
12888 " '%s' missing, during disk add" %
12890 constants.IDISK_PROVIDER),
12891 errors.ECODE_NOENT)
12892 elif mod[0] == constants.DDM_MODIFY:
12894 raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
12895 " modification" % constants.IDISK_PROVIDER,
12896 errors.ECODE_INVAL)
12898 for mod in self.diskmod:
12899 ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
12900 if ext_provider is not None:
12901 raise errors.OpPrereqError("Parameter '%s' is only valid for instances"
12902 " of type '%s'" % (constants.IDISK_PROVIDER,
12903 constants.DT_EXT), errors.ECODE_INVAL)
12906 if self.op.os_name and not self.op.force:
12907 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12908 self.op.force_variant)
12909 instance_os = self.op.os_name
12911 instance_os = instance.os
12913 assert not (self.op.disk_template and self.op.disks), \
12914 "Can't modify disk template and apply disk changes at the same time"
12916 if self.op.disk_template:
12917 if instance.disk_template == self.op.disk_template:
12918 raise errors.OpPrereqError("Instance already has disk template %s" %
12919 instance.disk_template, errors.ECODE_INVAL)
12921 if (instance.disk_template,
12922 self.op.disk_template) not in self._DISK_CONVERSIONS:
12923 raise errors.OpPrereqError("Unsupported disk template conversion from"
12924 " %s to %s" % (instance.disk_template,
12925 self.op.disk_template),
12926 errors.ECODE_INVAL)
12927 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12928 msg="cannot change disk template")
12929 if self.op.disk_template in constants.DTS_INT_MIRROR:
12930 if self.op.remote_node == pnode:
12931 raise errors.OpPrereqError("Given new secondary node %s is the same"
12932 " as the primary node of the instance" %
12933 self.op.remote_node, errors.ECODE_STATE)
12934 _CheckNodeOnline(self, self.op.remote_node)
12935 _CheckNodeNotDrained(self, self.op.remote_node)
12936 # FIXME: here we assume that the old instance type is DT_PLAIN
12937 assert instance.disk_template == constants.DT_PLAIN
12938 disks = [{constants.IDISK_SIZE: d.size,
12939 constants.IDISK_VG: d.logical_id[0]}
12940 for d in instance.disks]
12941 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12942 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12944 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12945 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12946 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12947 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12948 ignore=self.op.ignore_ipolicy)
12949 if pnode_info.group != snode_info.group:
12950 self.LogWarning("The primary and secondary nodes are in two"
12951 " different node groups; the disk parameters"
12952 " from the first disk's node group will be"
12955 # hvparams processing
12956 if self.op.hvparams:
12957 hv_type = instance.hypervisor
12958 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12959 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12960 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12963 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12964 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12965 self.hv_proposed = self.hv_new = hv_new # the new actual values
12966 self.hv_inst = i_hvdict # the new dict (without defaults)
12968 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12970 self.hv_new = self.hv_inst = {}
12972 # beparams processing
12973 if self.op.beparams:
12974 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12976 objects.UpgradeBeParams(i_bedict)
12977 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12978 be_new = cluster.SimpleFillBE(i_bedict)
12979 self.be_proposed = self.be_new = be_new # the new actual values
12980 self.be_inst = i_bedict # the new dict (without defaults)
12982 self.be_new = self.be_inst = {}
12983 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12984 be_old = cluster.FillBE(instance)
12986 # CPU param validation -- checking every time a parameter is
12987 # changed to cover all cases where either CPU mask or vcpus have
12989 if (constants.BE_VCPUS in self.be_proposed and
12990 constants.HV_CPU_MASK in self.hv_proposed):
12992 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12993 # Verify mask is consistent with number of vCPUs. Can skip this
12994 # test if only 1 entry in the CPU mask, which means same mask
12995 # is applied to all vCPUs.
12996 if (len(cpu_list) > 1 and
12997 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12998 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
13000 (self.be_proposed[constants.BE_VCPUS],
13001 self.hv_proposed[constants.HV_CPU_MASK]),
13002 errors.ECODE_INVAL)
13004 # Only perform this test if a new CPU mask is given
13005 if constants.HV_CPU_MASK in self.hv_new:
13006 # Calculate the largest CPU number requested
13007 max_requested_cpu = max(map(max, cpu_list))
13008 # Check that all of the instance's nodes have enough physical CPUs to
13009 # satisfy the requested CPU mask
13010 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
13011 max_requested_cpu + 1, instance.hypervisor)
13013 # osparams processing
13014 if self.op.osparams:
13015 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
13016 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
13017 self.os_inst = i_osdict # the new dict (without defaults)
13023 #TODO(dynmem): do the appropriate check involving MINMEM
13024 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
13025 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
13026 mem_check_list = [pnode]
13027 if be_new[constants.BE_AUTO_BALANCE]:
13028 # either we changed auto_balance to yes or it was from before
13029 mem_check_list.extend(instance.secondary_nodes)
13030 instance_info = self.rpc.call_instance_info(pnode, instance.name,
13031 instance.hypervisor)
13032 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
13033 [instance.hypervisor])
13034 pninfo = nodeinfo[pnode]
13035 msg = pninfo.fail_msg
13037 # Assume the primary node is unreachable and go ahead
13038 self.warn.append("Can't get info from primary node %s: %s" %
13041 (_, _, (pnhvinfo, )) = pninfo.payload
13042 if not isinstance(pnhvinfo.get("memory_free", None), int):
13043 self.warn.append("Node data from primary node %s doesn't contain"
13044 " free memory information" % pnode)
13045 elif instance_info.fail_msg:
13046 self.warn.append("Can't get instance runtime information: %s" %
13047 instance_info.fail_msg)
13049 if instance_info.payload:
13050 current_mem = int(instance_info.payload["memory"])
13052 # Assume instance not running
13053 # (there is a slight race condition here, but it's not very
13054 # probable, and we have no other way to check)
13055 # TODO: Describe race condition
13057 #TODO(dynmem): do the appropriate check involving MINMEM
13058 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
13059 pnhvinfo["memory_free"])
13061 raise errors.OpPrereqError("This change will prevent the instance"
13062 " from starting, due to %d MB of memory"
13063 " missing on its primary node" %
13065 errors.ECODE_NORES)
13067 if be_new[constants.BE_AUTO_BALANCE]:
13068 for node, nres in nodeinfo.items():
13069 if node not in instance.secondary_nodes:
13071 nres.Raise("Can't get info from secondary node %s" % node,
13072 prereq=True, ecode=errors.ECODE_STATE)
13073 (_, _, (nhvinfo, )) = nres.payload
13074 if not isinstance(nhvinfo.get("memory_free", None), int):
13075 raise errors.OpPrereqError("Secondary node %s didn't return free"
13076 " memory information" % node,
13077 errors.ECODE_STATE)
13078 #TODO(dynmem): do the appropriate check involving MINMEM
13079 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
13080 raise errors.OpPrereqError("This change will prevent the instance"
13081 " from failover to its secondary node"
13082 " %s, due to not enough memory" % node,
13083 errors.ECODE_STATE)
13085 if self.op.runtime_mem:
13086 remote_info = self.rpc.call_instance_info(instance.primary_node,
13088 instance.hypervisor)
13089 remote_info.Raise("Error checking node %s" % instance.primary_node)
13090 if not remote_info.payload: # not running already
13091 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
13092 errors.ECODE_STATE)
13094 current_memory = remote_info.payload["memory"]
13095 if (not self.op.force and
13096 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
13097 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
13098 raise errors.OpPrereqError("Instance %s must have memory between %d"
13099 " and %d MB of memory unless --force is"
13100 " given" % (instance.name,
13101 self.be_proposed[constants.BE_MINMEM],
13102 self.be_proposed[constants.BE_MAXMEM]),
13103 errors.ECODE_INVAL)
13105 delta = self.op.runtime_mem - current_memory
13107 _CheckNodeFreeMemory(self, instance.primary_node,
13108 "ballooning memory for instance %s" %
13109 instance.name, delta, instance.hypervisor)
13111 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13112 raise errors.OpPrereqError("Disk operations not supported for"
13113 " diskless instances",
13114 errors.ECODE_INVAL)
13116 def _PrepareNicCreate(_, params, private):
13117 self._PrepareNicModification(params, private, None, None,
13118 {}, cluster, pnode)
13119 return (None, None)
13121 def _PrepareNicMod(_, nic, params, private):
13122 self._PrepareNicModification(params, private, nic.ip, nic.network,
13123 nic.nicparams, cluster, pnode)
13126 def _PrepareNicRemove(_, params, private):
13128 net = params.network
13129 if net is not None and ip is not None:
13130 self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
13132 # Verify NIC changes (operating on copy)
13133 nics = instance.nics[:]
13134 ApplyContainerMods("NIC", nics, None, self.nicmod,
13135 _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
13136 if len(nics) > constants.MAX_NICS:
13137 raise errors.OpPrereqError("Instance has too many network interfaces"
13138 " (%d), cannot add more" % constants.MAX_NICS,
13139 errors.ECODE_STATE)
13142 # Verify disk changes (operating on a copy)
13143 disks = instance.disks[:]
13144 ApplyContainerMods("disk", disks, None, self.diskmod,
13146 if len(disks) > constants.MAX_DISKS:
13147 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13148 " more" % constants.MAX_DISKS,
13149 errors.ECODE_STATE)
13151 if self.op.offline is not None:
13152 if self.op.offline:
13153 msg = "can't change to offline"
13155 msg = "can't change to online"
13156 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13158 # Pre-compute NIC changes (necessary to use result in hooks)
13159 self._nic_chgdesc = []
13161 # Operate on copies as this is still in prereq
13162 nics = [nic.Copy() for nic in instance.nics]
13163 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13164 self._CreateNewNic, self._ApplyNicMods,
13166 self._new_nics = nics
13168 self._new_nics = None
13171 def _ConvertPlainToDrbd(self, feedback_fn):
13172 """Converts an instance from plain to drbd.
13175 feedback_fn("Converting template to drbd")
13176 instance = self.instance
13177 pnode = instance.primary_node
13178 snode = self.op.remote_node
13180 assert instance.disk_template == constants.DT_PLAIN
13182 # create a fake disk info for _GenerateDiskTemplate
13183 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13184 constants.IDISK_VG: d.logical_id[0]}
13185 for d in instance.disks]
13186 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13187 instance.name, pnode, [snode],
13188 disk_info, None, None, 0, feedback_fn,
13190 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13192 info = _GetInstanceInfoText(instance)
13193 feedback_fn("Creating additional volumes...")
13194 # first, create the missing data and meta devices
13195 for disk in anno_disks:
13196 # unfortunately this is... not too nice
13197 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13199 for child in disk.children:
13200 _CreateSingleBlockDev(self, snode, instance, child, info, True)
13201 # at this stage, all new LVs have been created, we can rename the
13203 feedback_fn("Renaming original volumes...")
13204 rename_list = [(o, n.children[0].logical_id)
13205 for (o, n) in zip(instance.disks, new_disks)]
13206 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13207 result.Raise("Failed to rename original LVs")
13209 feedback_fn("Initializing DRBD devices...")
13210 # all child devices are in place, we can now create the DRBD devices
13211 for disk in anno_disks:
13212 for node in [pnode, snode]:
13213 f_create = node == pnode
13214 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13216 # at this point, the instance has been modified
13217 instance.disk_template = constants.DT_DRBD8
13218 instance.disks = new_disks
13219 self.cfg.Update(instance, feedback_fn)
13221 # Release node locks while waiting for sync
13222 _ReleaseLocks(self, locking.LEVEL_NODE)
13224 # disks are created, waiting for sync
13225 disk_abort = not _WaitForSync(self, instance,
13226 oneshot=not self.op.wait_for_sync)
13228 raise errors.OpExecError("There are some degraded disks for"
13229 " this instance, please cleanup manually")
13231 # Node resource locks will be released by caller
13233 def _ConvertDrbdToPlain(self, feedback_fn):
13234 """Converts an instance from drbd to plain.
13237 instance = self.instance
13239 assert len(instance.secondary_nodes) == 1
13240 assert instance.disk_template == constants.DT_DRBD8
13242 pnode = instance.primary_node
13243 snode = instance.secondary_nodes[0]
13244 feedback_fn("Converting template to plain")
13246 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13247 new_disks = [d.children[0] for d in instance.disks]
13249 # copy over size and mode
13250 for parent, child in zip(old_disks, new_disks):
13251 child.size = parent.size
13252 child.mode = parent.mode
13254 # this is a DRBD disk, return its port to the pool
13255 # NOTE: this must be done right before the call to cfg.Update!
13256 for disk in old_disks:
13257 tcp_port = disk.logical_id[2]
13258 self.cfg.AddTcpUdpPort(tcp_port)
13260 # update instance structure
13261 instance.disks = new_disks
13262 instance.disk_template = constants.DT_PLAIN
13263 self.cfg.Update(instance, feedback_fn)
13265 # Release locks in case removing disks takes a while
13266 _ReleaseLocks(self, locking.LEVEL_NODE)
13268 feedback_fn("Removing volumes on the secondary node...")
13269 for disk in old_disks:
13270 self.cfg.SetDiskID(disk, snode)
13271 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13273 self.LogWarning("Could not remove block device %s on node %s,"
13274 " continuing anyway: %s", disk.iv_name, snode, msg)
13276 feedback_fn("Removing unneeded volumes on the primary node...")
13277 for idx, disk in enumerate(old_disks):
13278 meta = disk.children[1]
13279 self.cfg.SetDiskID(meta, pnode)
13280 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13282 self.LogWarning("Could not remove metadata for disk %d on node %s,"
13283 " continuing anyway: %s", idx, pnode, msg)
13285 def _CreateNewDisk(self, idx, params, _):
13286 """Creates a new disk.
13289 instance = self.instance
13292 if instance.disk_template in constants.DTS_FILEBASED:
13293 (file_driver, file_path) = instance.disks[0].logical_id
13294 file_path = os.path.dirname(file_path)
13296 file_driver = file_path = None
13299 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13300 instance.primary_node, instance.secondary_nodes,
13301 [params], file_path, file_driver, idx,
13302 self.Log, self.diskparams)[0]
13304 info = _GetInstanceInfoText(instance)
13306 logging.info("Creating volume %s for instance %s",
13307 disk.iv_name, instance.name)
13308 # Note: this needs to be kept in sync with _CreateDisks
13310 for node in instance.all_nodes:
13311 f_create = (node == instance.primary_node)
13313 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13314 except errors.OpExecError, err:
13315 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13316 disk.iv_name, disk, node, err)
13318 if self.op.hotplug and disk.pci and _InstanceRunning(self, self.instance):
13319 self.LogInfo("Trying to hotplug device.")
13320 _, device_info = _AssembleInstanceDisks(self, self.instance,
13321 [disk], check=False)
13322 _, _, dev_path = device_info[0]
13323 #TODO: handle result
13324 self.rpc.call_hot_add_disk(self.instance.primary_node,
13325 self.instance, disk, dev_path, idx)
13327 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13331 def _ModifyDisk(idx, disk, params, _):
13332 """Modifies a disk.
13335 disk.mode = params[constants.IDISK_MODE]
13338 ("disk.mode/%d" % idx, disk.mode),
13341 def _RemoveDisk(self, idx, root, _):
13345 #TODO: log warning in case hotplug is not possible
13347 if root.pci and not self.op.hotplug:
13348 raise errors.OpPrereqError("Cannot remove a disk that has"
13350 " without removing it with hotplug",
13351 errors.ECODE_INVAL)
13352 if self.op.hotplug and root.pci:
13353 if _InstanceRunning(self, self.instance):
13354 self.LogInfo("Trying to hotplug device.")
13355 self.rpc.call_hot_del_disk(self.instance.primary_node,
13356 self.instance, root, idx)
13357 _ShutdownInstanceDisks(self, self.instance, [root])
13358 self.cfg.UpdatePCIInfo(self.instance, root.pci)
13360 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13361 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13362 self.cfg.SetDiskID(disk, node)
13363 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13365 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13366 " continuing anyway", idx, node, msg)
13368 # if this is a DRBD disk, return its port to the pool
13369 if root.dev_type in constants.LDS_DRBD:
13370 self.cfg.AddTcpUdpPort(root.logical_id[2])
13372 def _CreateNewNic(self, idx, params, private):
13373 """Creates data structure for a new network interface.
13376 mac = params[constants.INIC_MAC]
13377 ip = params.get(constants.INIC_IP, None)
13378 network = params.get(constants.INIC_NETWORK, None)
13379 #TODO: not private.filled?? can a nic have no nicparams??
13380 nicparams = private.filled
13382 nic = objects.NIC(mac=mac, ip=ip, network=network, nicparams=nicparams)
13384 #TODO: log warning in case hotplug is not possible
13387 if self.op.hotplug:
13388 nic.idx, nic.pci = _GetPCIInfo(self, 'nics')
13389 if nic.pci is not None and _InstanceRunning(self, self.instance):
13390 self.rpc.call_hot_add_nic(self.instance.primary_node,
13391 self.instance, nic, idx)
13394 "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
13395 (mac, ip, private.filled[constants.NIC_MODE],
13396 private.filled[constants.NIC_LINK],
13401 def _ApplyNicMods(self, idx, nic, params, private):
13402 """Modifies a network interface.
13407 for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NETWORK]:
13409 changes.append(("nic.%s/%d" % (key, idx), params[key]))
13410 setattr(nic, key, params[key])
13413 nic.nicparams = private.filled
13415 for (key, val) in nic.nicparams.items():
13416 changes.append(("nic.%s/%d" % (key, idx), val))
13418 #TODO: log warning in case hotplug is not possible
13420 if self.op.hotplug and nic.pci and _InstanceRunning(self, self.instance):
13421 self.LogInfo("Trying to hotplug device.")
13422 self.rpc.call_hot_del_nic(self.instance.primary_node,
13423 self.instance, nic, idx)
13424 self.rpc.call_hot_add_nic(self.instance.primary_node,
13425 self.instance, nic, idx)
13428 def _RemoveNic(self, idx, nic, _):
13429 if nic.pci and not self.op.hotplug:
13430 raise errors.OpPrereqError("Cannot remove a nic that has been hotplugged"
13431 " without removing it with hotplug",
13432 errors.ECODE_INVAL)
13433 #TODO: log warning in case hotplug is not possible
13435 if self.op.hotplug and nic.pci:
13436 if _InstanceRunning(self, self.instance):
13437 self.LogInfo("Trying to hotplug device.")
13438 self.rpc.call_hot_del_nic(self.instance.primary_node,
13439 self.instance, nic, idx)
13440 self.cfg.UpdatePCIInfo(self.instance, nic.pci)
13443 def Exec(self, feedback_fn):
13444 """Modifies an instance.
13446 All parameters take effect only at the next restart of the instance.
13449 # Process here the warnings from CheckPrereq, as we don't have a
13450 # feedback_fn there.
13451 # TODO: Replace with self.LogWarning
13452 for warn in self.warn:
13453 feedback_fn("WARNING: %s" % warn)
13455 assert ((self.op.disk_template is None) ^
13456 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13457 "Not owning any node resource locks"
13460 instance = self.instance
13463 if self.op.runtime_mem:
13464 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13466 self.op.runtime_mem)
13467 rpcres.Raise("Cannot modify instance runtime memory")
13468 result.append(("runtime_memory", self.op.runtime_mem))
13470 # Apply disk changes
13471 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13472 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13473 _UpdateIvNames(0, instance.disks)
13475 if self.op.disk_template:
13477 check_nodes = set(instance.all_nodes)
13478 if self.op.remote_node:
13479 check_nodes.add(self.op.remote_node)
13480 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13481 owned = self.owned_locks(level)
13482 assert not (check_nodes - owned), \
13483 ("Not owning the correct locks, owning %r, expected at least %r" %
13484 (owned, check_nodes))
13486 r_shut = _ShutdownInstanceDisks(self, instance)
13488 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13489 " proceed with disk template conversion")
13490 mode = (instance.disk_template, self.op.disk_template)
13492 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13494 self.cfg.ReleaseDRBDMinors(instance.name)
13496 result.append(("disk_template", self.op.disk_template))
13498 assert instance.disk_template == self.op.disk_template, \
13499 ("Expected disk template '%s', found '%s'" %
13500 (self.op.disk_template, instance.disk_template))
13502 # Release node and resource locks if there are any (they might already have
13503 # been released during disk conversion)
13504 _ReleaseLocks(self, locking.LEVEL_NODE)
13505 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13507 # Apply NIC changes
13508 if self._new_nics is not None:
13509 instance.nics = self._new_nics
13510 result.extend(self._nic_chgdesc)
13513 if self.op.hvparams:
13514 instance.hvparams = self.hv_inst
13515 for key, val in self.op.hvparams.iteritems():
13516 result.append(("hv/%s" % key, val))
13519 if self.op.beparams:
13520 instance.beparams = self.be_inst
13521 for key, val in self.op.beparams.iteritems():
13522 result.append(("be/%s" % key, val))
13525 if self.op.os_name:
13526 instance.os = self.op.os_name
13529 if self.op.osparams:
13530 instance.osparams = self.os_inst
13531 for key, val in self.op.osparams.iteritems():
13532 result.append(("os/%s" % key, val))
13534 if self.op.offline is None:
13537 elif self.op.offline:
13538 # Mark instance as offline
13539 self.cfg.MarkInstanceOffline(instance.name)
13540 result.append(("admin_state", constants.ADMINST_OFFLINE))
13542 # Mark instance as online, but stopped
13543 self.cfg.MarkInstanceDown(instance.name)
13544 result.append(("admin_state", constants.ADMINST_DOWN))
13546 self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
13548 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13549 self.owned_locks(locking.LEVEL_NODE)), \
13550 "All node locks should have been released by now"
13554 _DISK_CONVERSIONS = {
13555 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13556 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13560 class LUInstanceChangeGroup(LogicalUnit):
13561 HPATH = "instance-change-group"
13562 HTYPE = constants.HTYPE_INSTANCE
13565 def ExpandNames(self):
13566 self.share_locks = _ShareAll()
13567 self.needed_locks = {
13568 locking.LEVEL_NODEGROUP: [],
13569 locking.LEVEL_NODE: [],
13572 self._ExpandAndLockInstance()
13574 if self.op.target_groups:
13575 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13576 self.op.target_groups)
13578 self.req_target_uuids = None
13580 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13582 def DeclareLocks(self, level):
13583 if level == locking.LEVEL_NODEGROUP:
13584 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13586 if self.req_target_uuids:
13587 lock_groups = set(self.req_target_uuids)
13589 # Lock all groups used by instance optimistically; this requires going
13590 # via the node before it's locked, requiring verification later on
13591 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13592 lock_groups.update(instance_groups)
13594 # No target groups, need to lock all of them
13595 lock_groups = locking.ALL_SET
13597 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13599 elif level == locking.LEVEL_NODE:
13600 if self.req_target_uuids:
13601 # Lock all nodes used by instances
13602 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13603 self._LockInstancesNodes()
13605 # Lock all nodes in all potential target groups
13606 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13607 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13608 member_nodes = [node_name
13609 for group in lock_groups
13610 for node_name in self.cfg.GetNodeGroup(group).members]
13611 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13613 # Lock all nodes as all groups are potential targets
13614 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13616 def CheckPrereq(self):
13617 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13618 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13619 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13621 assert (self.req_target_uuids is None or
13622 owned_groups.issuperset(self.req_target_uuids))
13623 assert owned_instances == set([self.op.instance_name])
13625 # Get instance information
13626 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13628 # Check if node groups for locked instance are still correct
13629 assert owned_nodes.issuperset(self.instance.all_nodes), \
13630 ("Instance %s's nodes changed while we kept the lock" %
13631 self.op.instance_name)
13633 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13636 if self.req_target_uuids:
13637 # User requested specific target groups
13638 self.target_uuids = frozenset(self.req_target_uuids)
13640 # All groups except those used by the instance are potential targets
13641 self.target_uuids = owned_groups - inst_groups
13643 conflicting_groups = self.target_uuids & inst_groups
13644 if conflicting_groups:
13645 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13646 " used by the instance '%s'" %
13647 (utils.CommaJoin(conflicting_groups),
13648 self.op.instance_name),
13649 errors.ECODE_INVAL)
13651 if not self.target_uuids:
13652 raise errors.OpPrereqError("There are no possible target groups",
13653 errors.ECODE_INVAL)
13655 def BuildHooksEnv(self):
13656 """Build hooks env.
13659 assert self.target_uuids
13662 "TARGET_GROUPS": " ".join(self.target_uuids),
13665 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13669 def BuildHooksNodes(self):
13670 """Build hooks nodes.
13673 mn = self.cfg.GetMasterNode()
13674 return ([mn], [mn])
13676 def Exec(self, feedback_fn):
13677 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13679 assert instances == [self.op.instance_name], "Instance not locked"
13681 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13682 instances=instances, target_groups=list(self.target_uuids))
13684 ial.Run(self.op.iallocator)
13686 if not ial.success:
13687 raise errors.OpPrereqError("Can't compute solution for changing group of"
13688 " instance '%s' using iallocator '%s': %s" %
13689 (self.op.instance_name, self.op.iallocator,
13691 errors.ECODE_NORES)
13693 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13695 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13696 " instance '%s'", len(jobs), self.op.instance_name)
13698 return ResultWithJobs(jobs)
13701 class LUBackupQuery(NoHooksLU):
13702 """Query the exports list
13707 def CheckArguments(self):
13708 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13709 ["node", "export"], self.op.use_locking)
13711 def ExpandNames(self):
13712 self.expq.ExpandNames(self)
13714 def DeclareLocks(self, level):
13715 self.expq.DeclareLocks(self, level)
13717 def Exec(self, feedback_fn):
13720 for (node, expname) in self.expq.OldStyleQuery(self):
13721 if expname is None:
13722 result[node] = False
13724 result.setdefault(node, []).append(expname)
13729 class _ExportQuery(_QueryBase):
13730 FIELDS = query.EXPORT_FIELDS
13732 #: The node name is not a unique key for this query
13733 SORT_FIELD = "node"
13735 def ExpandNames(self, lu):
13736 lu.needed_locks = {}
13738 # The following variables interact with _QueryBase._GetNames
13740 self.wanted = _GetWantedNodes(lu, self.names)
13742 self.wanted = locking.ALL_SET
13744 self.do_locking = self.use_locking
13746 if self.do_locking:
13747 lu.share_locks = _ShareAll()
13748 lu.needed_locks = {
13749 locking.LEVEL_NODE: self.wanted,
13752 def DeclareLocks(self, lu, level):
13755 def _GetQueryData(self, lu):
13756 """Computes the list of nodes and their attributes.
13759 # Locking is not used
13761 assert not (compat.any(lu.glm.is_owned(level)
13762 for level in locking.LEVELS
13763 if level != locking.LEVEL_CLUSTER) or
13764 self.do_locking or self.use_locking)
13766 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13770 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13772 result.append((node, None))
13774 result.extend((node, expname) for expname in nres.payload)
13779 class LUBackupPrepare(NoHooksLU):
13780 """Prepares an instance for an export and returns useful information.
13785 def ExpandNames(self):
13786 self._ExpandAndLockInstance()
13788 def CheckPrereq(self):
13789 """Check prerequisites.
13792 instance_name = self.op.instance_name
13794 self.instance = self.cfg.GetInstanceInfo(instance_name)
13795 assert self.instance is not None, \
13796 "Cannot retrieve locked instance %s" % self.op.instance_name
13797 _CheckNodeOnline(self, self.instance.primary_node)
13799 self._cds = _GetClusterDomainSecret()
13801 def Exec(self, feedback_fn):
13802 """Prepares an instance for an export.
13805 instance = self.instance
13807 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13808 salt = utils.GenerateSecret(8)
13810 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13811 result = self.rpc.call_x509_cert_create(instance.primary_node,
13812 constants.RIE_CERT_VALIDITY)
13813 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13815 (name, cert_pem) = result.payload
13817 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13821 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13822 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13824 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13830 class LUBackupExport(LogicalUnit):
13831 """Export an instance to an image in the cluster.
13834 HPATH = "instance-export"
13835 HTYPE = constants.HTYPE_INSTANCE
13838 def CheckArguments(self):
13839 """Check the arguments.
13842 self.x509_key_name = self.op.x509_key_name
13843 self.dest_x509_ca_pem = self.op.destination_x509_ca
13845 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13846 if not self.x509_key_name:
13847 raise errors.OpPrereqError("Missing X509 key name for encryption",
13848 errors.ECODE_INVAL)
13850 if not self.dest_x509_ca_pem:
13851 raise errors.OpPrereqError("Missing destination X509 CA",
13852 errors.ECODE_INVAL)
13854 def ExpandNames(self):
13855 self._ExpandAndLockInstance()
13857 # Lock all nodes for local exports
13858 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13859 # FIXME: lock only instance primary and destination node
13861 # Sad but true, for now we have do lock all nodes, as we don't know where
13862 # the previous export might be, and in this LU we search for it and
13863 # remove it from its current node. In the future we could fix this by:
13864 # - making a tasklet to search (share-lock all), then create the
13865 # new one, then one to remove, after
13866 # - removing the removal operation altogether
13867 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13869 def DeclareLocks(self, level):
13870 """Last minute lock declaration."""
13871 # All nodes are locked anyway, so nothing to do here.
13873 def BuildHooksEnv(self):
13874 """Build hooks env.
13876 This will run on the master, primary node and target node.
13880 "EXPORT_MODE": self.op.mode,
13881 "EXPORT_NODE": self.op.target_node,
13882 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13883 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13884 # TODO: Generic function for boolean env variables
13885 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13888 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13892 def BuildHooksNodes(self):
13893 """Build hooks nodes.
13896 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13898 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13899 nl.append(self.op.target_node)
13903 def CheckPrereq(self):
13904 """Check prerequisites.
13906 This checks that the instance and node names are valid.
13909 instance_name = self.op.instance_name
13911 self.instance = self.cfg.GetInstanceInfo(instance_name)
13912 assert self.instance is not None, \
13913 "Cannot retrieve locked instance %s" % self.op.instance_name
13914 _CheckNodeOnline(self, self.instance.primary_node)
13916 if (self.op.remove_instance and
13917 self.instance.admin_state == constants.ADMINST_UP and
13918 not self.op.shutdown):
13919 raise errors.OpPrereqError("Can not remove instance without shutting it"
13922 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13923 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13924 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13925 assert self.dst_node is not None
13927 _CheckNodeOnline(self, self.dst_node.name)
13928 _CheckNodeNotDrained(self, self.dst_node.name)
13931 self.dest_disk_info = None
13932 self.dest_x509_ca = None
13934 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13935 self.dst_node = None
13937 if len(self.op.target_node) != len(self.instance.disks):
13938 raise errors.OpPrereqError(("Received destination information for %s"
13939 " disks, but instance %s has %s disks") %
13940 (len(self.op.target_node), instance_name,
13941 len(self.instance.disks)),
13942 errors.ECODE_INVAL)
13944 cds = _GetClusterDomainSecret()
13946 # Check X509 key name
13948 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13949 except (TypeError, ValueError), err:
13950 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13952 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13953 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13954 errors.ECODE_INVAL)
13956 # Load and verify CA
13958 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13959 except OpenSSL.crypto.Error, err:
13960 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13961 (err, ), errors.ECODE_INVAL)
13963 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13964 if errcode is not None:
13965 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13966 (msg, ), errors.ECODE_INVAL)
13968 self.dest_x509_ca = cert
13970 # Verify target information
13972 for idx, disk_data in enumerate(self.op.target_node):
13974 (host, port, magic) = \
13975 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13976 except errors.GenericError, err:
13977 raise errors.OpPrereqError("Target info for disk %s: %s" %
13978 (idx, err), errors.ECODE_INVAL)
13980 disk_info.append((host, port, magic))
13982 assert len(disk_info) == len(self.op.target_node)
13983 self.dest_disk_info = disk_info
13986 raise errors.ProgrammerError("Unhandled export mode %r" %
13989 # instance disk type verification
13990 # TODO: Implement export support for file-based disks
13991 for disk in self.instance.disks:
13992 if disk.dev_type == constants.LD_FILE:
13993 raise errors.OpPrereqError("Export not supported for instances with"
13994 " file-based disks", errors.ECODE_INVAL)
13996 def _CleanupExports(self, feedback_fn):
13997 """Removes exports of current instance from all other nodes.
13999 If an instance in a cluster with nodes A..D was exported to node C, its
14000 exports will be removed from the nodes A, B and D.
14003 assert self.op.mode != constants.EXPORT_MODE_REMOTE
14005 nodelist = self.cfg.GetNodeList()
14006 nodelist.remove(self.dst_node.name)
14008 # on one-node clusters nodelist will be empty after the removal
14009 # if we proceed the backup would be removed because OpBackupQuery
14010 # substitutes an empty list with the full cluster node list.
14011 iname = self.instance.name
14013 feedback_fn("Removing old exports for instance %s" % iname)
14014 exportlist = self.rpc.call_export_list(nodelist)
14015 for node in exportlist:
14016 if exportlist[node].fail_msg:
14018 if iname in exportlist[node].payload:
14019 msg = self.rpc.call_export_remove(node, iname).fail_msg
14021 self.LogWarning("Could not remove older export for instance %s"
14022 " on node %s: %s", iname, node, msg)
14024 def Exec(self, feedback_fn):
14025 """Export an instance to an image in the cluster.
14028 assert self.op.mode in constants.EXPORT_MODES
14030 instance = self.instance
14031 src_node = instance.primary_node
14033 if self.op.shutdown:
14034 # shutdown the instance, but not the disks
14035 feedback_fn("Shutting down instance %s" % instance.name)
14036 result = self.rpc.call_instance_shutdown(src_node, instance,
14037 self.op.shutdown_timeout)
14038 # TODO: Maybe ignore failures if ignore_remove_failures is set
14039 result.Raise("Could not shutdown instance %s on"
14040 " node %s" % (instance.name, src_node))
14042 # set the disks ID correctly since call_instance_start needs the
14043 # correct drbd minor to create the symlinks
14044 for disk in instance.disks:
14045 self.cfg.SetDiskID(disk, src_node)
14047 activate_disks = (instance.admin_state != constants.ADMINST_UP)
14050 # Activate the instance disks if we'exporting a stopped instance
14051 feedback_fn("Activating disks for %s" % instance.name)
14052 _StartInstanceDisks(self, instance, None)
14055 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
14058 helper.CreateSnapshots()
14060 if (self.op.shutdown and
14061 instance.admin_state == constants.ADMINST_UP and
14062 not self.op.remove_instance):
14063 assert not activate_disks
14064 feedback_fn("Starting instance %s" % instance.name)
14065 result = self.rpc.call_instance_start(src_node,
14066 (instance, None, None), False)
14067 msg = result.fail_msg
14069 feedback_fn("Failed to start instance: %s" % msg)
14070 _ShutdownInstanceDisks(self, instance)
14071 raise errors.OpExecError("Could not start instance: %s" % msg)
14073 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14074 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
14075 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
14076 connect_timeout = constants.RIE_CONNECT_TIMEOUT
14077 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
14079 (key_name, _, _) = self.x509_key_name
14082 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
14085 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
14086 key_name, dest_ca_pem,
14091 # Check for backwards compatibility
14092 assert len(dresults) == len(instance.disks)
14093 assert compat.all(isinstance(i, bool) for i in dresults), \
14094 "Not all results are boolean: %r" % dresults
14098 feedback_fn("Deactivating disks for %s" % instance.name)
14099 _ShutdownInstanceDisks(self, instance)
14101 if not (compat.all(dresults) and fin_resu):
14104 failures.append("export finalization")
14105 if not compat.all(dresults):
14106 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
14108 failures.append("disk export: disk(s) %s" % fdsk)
14110 raise errors.OpExecError("Export failed, errors in %s" %
14111 utils.CommaJoin(failures))
14113 # At this point, the export was successful, we can cleanup/finish
14115 # Remove instance if requested
14116 if self.op.remove_instance:
14117 feedback_fn("Removing instance %s" % instance.name)
14118 _RemoveInstance(self, feedback_fn, instance,
14119 self.op.ignore_remove_failures)
14121 if self.op.mode == constants.EXPORT_MODE_LOCAL:
14122 self._CleanupExports(feedback_fn)
14124 return fin_resu, dresults
14127 class LUBackupRemove(NoHooksLU):
14128 """Remove exports related to the named instance.
14133 def ExpandNames(self):
14134 self.needed_locks = {}
14135 # We need all nodes to be locked in order for RemoveExport to work, but we
14136 # don't need to lock the instance itself, as nothing will happen to it (and
14137 # we can remove exports also for a removed instance)
14138 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
14140 def Exec(self, feedback_fn):
14141 """Remove any export.
14144 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
14145 # If the instance was not found we'll try with the name that was passed in.
14146 # This will only work if it was an FQDN, though.
14148 if not instance_name:
14150 instance_name = self.op.instance_name
14152 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
14153 exportlist = self.rpc.call_export_list(locked_nodes)
14155 for node in exportlist:
14156 msg = exportlist[node].fail_msg
14158 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
14160 if instance_name in exportlist[node].payload:
14162 result = self.rpc.call_export_remove(node, instance_name)
14163 msg = result.fail_msg
14165 logging.error("Could not remove export for instance %s"
14166 " on node %s: %s", instance_name, node, msg)
14168 if fqdn_warn and not found:
14169 feedback_fn("Export not found. If trying to remove an export belonging"
14170 " to a deleted instance please use its Fully Qualified"
14174 class LUGroupAdd(LogicalUnit):
14175 """Logical unit for creating node groups.
14178 HPATH = "group-add"
14179 HTYPE = constants.HTYPE_GROUP
14182 def ExpandNames(self):
14183 # We need the new group's UUID here so that we can create and acquire the
14184 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14185 # that it should not check whether the UUID exists in the configuration.
14186 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14187 self.needed_locks = {}
14188 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14190 def CheckPrereq(self):
14191 """Check prerequisites.
14193 This checks that the given group name is not an existing node group
14198 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14199 except errors.OpPrereqError:
14202 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14203 " node group (UUID: %s)" %
14204 (self.op.group_name, existing_uuid),
14205 errors.ECODE_EXISTS)
14207 if self.op.ndparams:
14208 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14210 if self.op.hv_state:
14211 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14213 self.new_hv_state = None
14215 if self.op.disk_state:
14216 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14218 self.new_disk_state = None
14220 if self.op.diskparams:
14221 for templ in constants.DISK_TEMPLATES:
14222 if templ in self.op.diskparams:
14223 utils.ForceDictType(self.op.diskparams[templ],
14224 constants.DISK_DT_TYPES)
14225 self.new_diskparams = self.op.diskparams
14227 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14228 except errors.OpPrereqError, err:
14229 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14230 errors.ECODE_INVAL)
14232 self.new_diskparams = {}
14234 if self.op.ipolicy:
14235 cluster = self.cfg.GetClusterInfo()
14236 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14238 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14239 except errors.ConfigurationError, err:
14240 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14241 errors.ECODE_INVAL)
14243 def BuildHooksEnv(self):
14244 """Build hooks env.
14248 "GROUP_NAME": self.op.group_name,
14251 def BuildHooksNodes(self):
14252 """Build hooks nodes.
14255 mn = self.cfg.GetMasterNode()
14256 return ([mn], [mn])
14258 def Exec(self, feedback_fn):
14259 """Add the node group to the cluster.
14262 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14263 uuid=self.group_uuid,
14264 alloc_policy=self.op.alloc_policy,
14265 ndparams=self.op.ndparams,
14266 diskparams=self.new_diskparams,
14267 ipolicy=self.op.ipolicy,
14268 hv_state_static=self.new_hv_state,
14269 disk_state_static=self.new_disk_state)
14271 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14272 del self.remove_locks[locking.LEVEL_NODEGROUP]
14275 class LUGroupAssignNodes(NoHooksLU):
14276 """Logical unit for assigning nodes to groups.
14281 def ExpandNames(self):
14282 # These raise errors.OpPrereqError on their own:
14283 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14284 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14286 # We want to lock all the affected nodes and groups. We have readily
14287 # available the list of nodes, and the *destination* group. To gather the
14288 # list of "source" groups, we need to fetch node information later on.
14289 self.needed_locks = {
14290 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14291 locking.LEVEL_NODE: self.op.nodes,
14294 def DeclareLocks(self, level):
14295 if level == locking.LEVEL_NODEGROUP:
14296 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14298 # Try to get all affected nodes' groups without having the group or node
14299 # lock yet. Needs verification later in the code flow.
14300 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14302 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14304 def CheckPrereq(self):
14305 """Check prerequisites.
14308 assert self.needed_locks[locking.LEVEL_NODEGROUP]
14309 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14310 frozenset(self.op.nodes))
14312 expected_locks = (set([self.group_uuid]) |
14313 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14314 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14315 if actual_locks != expected_locks:
14316 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14317 " current groups are '%s', used to be '%s'" %
14318 (utils.CommaJoin(expected_locks),
14319 utils.CommaJoin(actual_locks)))
14321 self.node_data = self.cfg.GetAllNodesInfo()
14322 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14323 instance_data = self.cfg.GetAllInstancesInfo()
14325 if self.group is None:
14326 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14327 (self.op.group_name, self.group_uuid))
14329 (new_splits, previous_splits) = \
14330 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14331 for node in self.op.nodes],
14332 self.node_data, instance_data)
14335 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14337 if not self.op.force:
14338 raise errors.OpExecError("The following instances get split by this"
14339 " change and --force was not given: %s" %
14342 self.LogWarning("This operation will split the following instances: %s",
14345 if previous_splits:
14346 self.LogWarning("In addition, these already-split instances continue"
14347 " to be split across groups: %s",
14348 utils.CommaJoin(utils.NiceSort(previous_splits)))
14350 def Exec(self, feedback_fn):
14351 """Assign nodes to a new group.
14354 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14356 self.cfg.AssignGroupNodes(mods)
14359 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14360 """Check for split instances after a node assignment.
14362 This method considers a series of node assignments as an atomic operation,
14363 and returns information about split instances after applying the set of
14366 In particular, it returns information about newly split instances, and
14367 instances that were already split, and remain so after the change.
14369 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14372 @type changes: list of (node_name, new_group_uuid) pairs.
14373 @param changes: list of node assignments to consider.
14374 @param node_data: a dict with data for all nodes
14375 @param instance_data: a dict with all instances to consider
14376 @rtype: a two-tuple
14377 @return: a list of instances that were previously okay and result split as a
14378 consequence of this change, and a list of instances that were previously
14379 split and this change does not fix.
14382 changed_nodes = dict((node, group) for node, group in changes
14383 if node_data[node].group != group)
14385 all_split_instances = set()
14386 previously_split_instances = set()
14388 def InstanceNodes(instance):
14389 return [instance.primary_node] + list(instance.secondary_nodes)
14391 for inst in instance_data.values():
14392 if inst.disk_template not in constants.DTS_INT_MIRROR:
14395 instance_nodes = InstanceNodes(inst)
14397 if len(set(node_data[node].group for node in instance_nodes)) > 1:
14398 previously_split_instances.add(inst.name)
14400 if len(set(changed_nodes.get(node, node_data[node].group)
14401 for node in instance_nodes)) > 1:
14402 all_split_instances.add(inst.name)
14404 return (list(all_split_instances - previously_split_instances),
14405 list(previously_split_instances & all_split_instances))
14408 class _GroupQuery(_QueryBase):
14409 FIELDS = query.GROUP_FIELDS
14411 def ExpandNames(self, lu):
14412 lu.needed_locks = {}
14414 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14415 self._cluster = lu.cfg.GetClusterInfo()
14416 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14419 self.wanted = [name_to_uuid[name]
14420 for name in utils.NiceSort(name_to_uuid.keys())]
14422 # Accept names to be either names or UUIDs.
14425 all_uuid = frozenset(self._all_groups.keys())
14427 for name in self.names:
14428 if name in all_uuid:
14429 self.wanted.append(name)
14430 elif name in name_to_uuid:
14431 self.wanted.append(name_to_uuid[name])
14433 missing.append(name)
14436 raise errors.OpPrereqError("Some groups do not exist: %s" %
14437 utils.CommaJoin(missing),
14438 errors.ECODE_NOENT)
14440 def DeclareLocks(self, lu, level):
14443 def _GetQueryData(self, lu):
14444 """Computes the list of node groups and their attributes.
14447 do_nodes = query.GQ_NODE in self.requested_data
14448 do_instances = query.GQ_INST in self.requested_data
14450 group_to_nodes = None
14451 group_to_instances = None
14453 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14454 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14455 # latter GetAllInstancesInfo() is not enough, for we have to go through
14456 # instance->node. Hence, we will need to process nodes even if we only need
14457 # instance information.
14458 if do_nodes or do_instances:
14459 all_nodes = lu.cfg.GetAllNodesInfo()
14460 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14463 for node in all_nodes.values():
14464 if node.group in group_to_nodes:
14465 group_to_nodes[node.group].append(node.name)
14466 node_to_group[node.name] = node.group
14469 all_instances = lu.cfg.GetAllInstancesInfo()
14470 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14472 for instance in all_instances.values():
14473 node = instance.primary_node
14474 if node in node_to_group:
14475 group_to_instances[node_to_group[node]].append(instance.name)
14478 # Do not pass on node information if it was not requested.
14479 group_to_nodes = None
14481 return query.GroupQueryData(self._cluster,
14482 [self._all_groups[uuid]
14483 for uuid in self.wanted],
14484 group_to_nodes, group_to_instances,
14485 query.GQ_DISKPARAMS in self.requested_data)
14488 class LUGroupQuery(NoHooksLU):
14489 """Logical unit for querying node groups.
14494 def CheckArguments(self):
14495 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14496 self.op.output_fields, False)
14498 def ExpandNames(self):
14499 self.gq.ExpandNames(self)
14501 def DeclareLocks(self, level):
14502 self.gq.DeclareLocks(self, level)
14504 def Exec(self, feedback_fn):
14505 return self.gq.OldStyleQuery(self)
14508 class LUGroupSetParams(LogicalUnit):
14509 """Modifies the parameters of a node group.
14512 HPATH = "group-modify"
14513 HTYPE = constants.HTYPE_GROUP
14516 def CheckArguments(self):
14519 self.op.diskparams,
14520 self.op.alloc_policy,
14522 self.op.disk_state,
14526 if all_changes.count(None) == len(all_changes):
14527 raise errors.OpPrereqError("Please pass at least one modification",
14528 errors.ECODE_INVAL)
14530 def ExpandNames(self):
14531 # This raises errors.OpPrereqError on its own:
14532 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14534 self.needed_locks = {
14535 locking.LEVEL_INSTANCE: [],
14536 locking.LEVEL_NODEGROUP: [self.group_uuid],
14539 self.share_locks[locking.LEVEL_INSTANCE] = 1
14541 def DeclareLocks(self, level):
14542 if level == locking.LEVEL_INSTANCE:
14543 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14545 # Lock instances optimistically, needs verification once group lock has
14547 self.needed_locks[locking.LEVEL_INSTANCE] = \
14548 self.cfg.GetNodeGroupInstances(self.group_uuid)
14551 def _UpdateAndVerifyDiskParams(old, new):
14552 """Updates and verifies disk parameters.
14555 new_params = _GetUpdatedParams(old, new)
14556 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14559 def CheckPrereq(self):
14560 """Check prerequisites.
14563 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14565 # Check if locked instances are still correct
14566 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14568 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14569 cluster = self.cfg.GetClusterInfo()
14571 if self.group is None:
14572 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14573 (self.op.group_name, self.group_uuid))
14575 if self.op.ndparams:
14576 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14577 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14578 self.new_ndparams = new_ndparams
14580 if self.op.diskparams:
14581 diskparams = self.group.diskparams
14582 uavdp = self._UpdateAndVerifyDiskParams
14583 # For each disktemplate subdict update and verify the values
14584 new_diskparams = dict((dt,
14585 uavdp(diskparams.get(dt, {}),
14586 self.op.diskparams[dt]))
14587 for dt in constants.DISK_TEMPLATES
14588 if dt in self.op.diskparams)
14589 # As we've all subdicts of diskparams ready, lets merge the actual
14590 # dict with all updated subdicts
14591 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14593 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14594 except errors.OpPrereqError, err:
14595 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14596 errors.ECODE_INVAL)
14598 if self.op.hv_state:
14599 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14600 self.group.hv_state_static)
14602 if self.op.disk_state:
14603 self.new_disk_state = \
14604 _MergeAndVerifyDiskState(self.op.disk_state,
14605 self.group.disk_state_static)
14607 if self.op.ipolicy:
14608 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14612 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14613 inst_filter = lambda inst: inst.name in owned_instances
14614 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14616 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
14618 new_ipolicy, instances)
14621 self.LogWarning("After the ipolicy change the following instances"
14622 " violate them: %s",
14623 utils.CommaJoin(violations))
14625 def BuildHooksEnv(self):
14626 """Build hooks env.
14630 "GROUP_NAME": self.op.group_name,
14631 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14634 def BuildHooksNodes(self):
14635 """Build hooks nodes.
14638 mn = self.cfg.GetMasterNode()
14639 return ([mn], [mn])
14641 def Exec(self, feedback_fn):
14642 """Modifies the node group.
14647 if self.op.ndparams:
14648 self.group.ndparams = self.new_ndparams
14649 result.append(("ndparams", str(self.group.ndparams)))
14651 if self.op.diskparams:
14652 self.group.diskparams = self.new_diskparams
14653 result.append(("diskparams", str(self.group.diskparams)))
14655 if self.op.alloc_policy:
14656 self.group.alloc_policy = self.op.alloc_policy
14658 if self.op.hv_state:
14659 self.group.hv_state_static = self.new_hv_state
14661 if self.op.disk_state:
14662 self.group.disk_state_static = self.new_disk_state
14664 if self.op.ipolicy:
14665 self.group.ipolicy = self.new_ipolicy
14667 self.cfg.Update(self.group, feedback_fn)
14671 class LUGroupRemove(LogicalUnit):
14672 HPATH = "group-remove"
14673 HTYPE = constants.HTYPE_GROUP
14676 def ExpandNames(self):
14677 # This will raises errors.OpPrereqError on its own:
14678 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14679 self.needed_locks = {
14680 locking.LEVEL_NODEGROUP: [self.group_uuid],
14683 def CheckPrereq(self):
14684 """Check prerequisites.
14686 This checks that the given group name exists as a node group, that is
14687 empty (i.e., contains no nodes), and that is not the last group of the
14691 # Verify that the group is empty.
14692 group_nodes = [node.name
14693 for node in self.cfg.GetAllNodesInfo().values()
14694 if node.group == self.group_uuid]
14697 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14699 (self.op.group_name,
14700 utils.CommaJoin(utils.NiceSort(group_nodes))),
14701 errors.ECODE_STATE)
14703 # Verify the cluster would not be left group-less.
14704 if len(self.cfg.GetNodeGroupList()) == 1:
14705 raise errors.OpPrereqError("Group '%s' is the only group,"
14706 " cannot be removed" %
14707 self.op.group_name,
14708 errors.ECODE_STATE)
14710 def BuildHooksEnv(self):
14711 """Build hooks env.
14715 "GROUP_NAME": self.op.group_name,
14718 def BuildHooksNodes(self):
14719 """Build hooks nodes.
14722 mn = self.cfg.GetMasterNode()
14723 return ([mn], [mn])
14725 def Exec(self, feedback_fn):
14726 """Remove the node group.
14730 self.cfg.RemoveNodeGroup(self.group_uuid)
14731 except errors.ConfigurationError:
14732 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14733 (self.op.group_name, self.group_uuid))
14735 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14738 class LUGroupRename(LogicalUnit):
14739 HPATH = "group-rename"
14740 HTYPE = constants.HTYPE_GROUP
14743 def ExpandNames(self):
14744 # This raises errors.OpPrereqError on its own:
14745 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14747 self.needed_locks = {
14748 locking.LEVEL_NODEGROUP: [self.group_uuid],
14751 def CheckPrereq(self):
14752 """Check prerequisites.
14754 Ensures requested new name is not yet used.
14758 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14759 except errors.OpPrereqError:
14762 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14763 " node group (UUID: %s)" %
14764 (self.op.new_name, new_name_uuid),
14765 errors.ECODE_EXISTS)
14767 def BuildHooksEnv(self):
14768 """Build hooks env.
14772 "OLD_NAME": self.op.group_name,
14773 "NEW_NAME": self.op.new_name,
14776 def BuildHooksNodes(self):
14777 """Build hooks nodes.
14780 mn = self.cfg.GetMasterNode()
14782 all_nodes = self.cfg.GetAllNodesInfo()
14783 all_nodes.pop(mn, None)
14786 run_nodes.extend(node.name for node in all_nodes.values()
14787 if node.group == self.group_uuid)
14789 return (run_nodes, run_nodes)
14791 def Exec(self, feedback_fn):
14792 """Rename the node group.
14795 group = self.cfg.GetNodeGroup(self.group_uuid)
14798 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14799 (self.op.group_name, self.group_uuid))
14801 group.name = self.op.new_name
14802 self.cfg.Update(group, feedback_fn)
14804 return self.op.new_name
14807 class LUGroupEvacuate(LogicalUnit):
14808 HPATH = "group-evacuate"
14809 HTYPE = constants.HTYPE_GROUP
14812 def ExpandNames(self):
14813 # This raises errors.OpPrereqError on its own:
14814 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14816 if self.op.target_groups:
14817 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14818 self.op.target_groups)
14820 self.req_target_uuids = []
14822 if self.group_uuid in self.req_target_uuids:
14823 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14824 " as a target group (targets are %s)" %
14826 utils.CommaJoin(self.req_target_uuids)),
14827 errors.ECODE_INVAL)
14829 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14831 self.share_locks = _ShareAll()
14832 self.needed_locks = {
14833 locking.LEVEL_INSTANCE: [],
14834 locking.LEVEL_NODEGROUP: [],
14835 locking.LEVEL_NODE: [],
14838 def DeclareLocks(self, level):
14839 if level == locking.LEVEL_INSTANCE:
14840 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14842 # Lock instances optimistically, needs verification once node and group
14843 # locks have been acquired
14844 self.needed_locks[locking.LEVEL_INSTANCE] = \
14845 self.cfg.GetNodeGroupInstances(self.group_uuid)
14847 elif level == locking.LEVEL_NODEGROUP:
14848 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14850 if self.req_target_uuids:
14851 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14853 # Lock all groups used by instances optimistically; this requires going
14854 # via the node before it's locked, requiring verification later on
14855 lock_groups.update(group_uuid
14856 for instance_name in
14857 self.owned_locks(locking.LEVEL_INSTANCE)
14859 self.cfg.GetInstanceNodeGroups(instance_name))
14861 # No target groups, need to lock all of them
14862 lock_groups = locking.ALL_SET
14864 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14866 elif level == locking.LEVEL_NODE:
14867 # This will only lock the nodes in the group to be evacuated which
14868 # contain actual instances
14869 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14870 self._LockInstancesNodes()
14872 # Lock all nodes in group to be evacuated and target groups
14873 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14874 assert self.group_uuid in owned_groups
14875 member_nodes = [node_name
14876 for group in owned_groups
14877 for node_name in self.cfg.GetNodeGroup(group).members]
14878 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14880 def CheckPrereq(self):
14881 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14882 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14883 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14885 assert owned_groups.issuperset(self.req_target_uuids)
14886 assert self.group_uuid in owned_groups
14888 # Check if locked instances are still correct
14889 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14891 # Get instance information
14892 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14894 # Check if node groups for locked instances are still correct
14895 _CheckInstancesNodeGroups(self.cfg, self.instances,
14896 owned_groups, owned_nodes, self.group_uuid)
14898 if self.req_target_uuids:
14899 # User requested specific target groups
14900 self.target_uuids = self.req_target_uuids
14902 # All groups except the one to be evacuated are potential targets
14903 self.target_uuids = [group_uuid for group_uuid in owned_groups
14904 if group_uuid != self.group_uuid]
14906 if not self.target_uuids:
14907 raise errors.OpPrereqError("There are no possible target groups",
14908 errors.ECODE_INVAL)
14910 def BuildHooksEnv(self):
14911 """Build hooks env.
14915 "GROUP_NAME": self.op.group_name,
14916 "TARGET_GROUPS": " ".join(self.target_uuids),
14919 def BuildHooksNodes(self):
14920 """Build hooks nodes.
14923 mn = self.cfg.GetMasterNode()
14925 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14927 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14929 return (run_nodes, run_nodes)
14931 def Exec(self, feedback_fn):
14932 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14934 assert self.group_uuid not in self.target_uuids
14936 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14937 instances=instances, target_groups=self.target_uuids)
14939 ial.Run(self.op.iallocator)
14941 if not ial.success:
14942 raise errors.OpPrereqError("Can't compute group evacuation using"
14943 " iallocator '%s': %s" %
14944 (self.op.iallocator, ial.info),
14945 errors.ECODE_NORES)
14947 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14949 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14950 len(jobs), self.op.group_name)
14952 return ResultWithJobs(jobs)
14955 class TagsLU(NoHooksLU): # pylint: disable=W0223
14956 """Generic tags LU.
14958 This is an abstract class which is the parent of all the other tags LUs.
14961 def ExpandNames(self):
14962 self.group_uuid = None
14963 self.needed_locks = {}
14965 if self.op.kind == constants.TAG_NODE:
14966 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14967 lock_level = locking.LEVEL_NODE
14968 lock_name = self.op.name
14969 elif self.op.kind == constants.TAG_INSTANCE:
14970 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14971 lock_level = locking.LEVEL_INSTANCE
14972 lock_name = self.op.name
14973 elif self.op.kind == constants.TAG_NODEGROUP:
14974 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14975 lock_level = locking.LEVEL_NODEGROUP
14976 lock_name = self.group_uuid
14977 elif self.op.kind == constants.TAG_NETWORK:
14978 self.network_uuid = self.cfg.LookupNetwork(self.op.name)
14979 lock_level = locking.LEVEL_NETWORK
14980 lock_name = self.network_uuid
14985 if lock_level and getattr(self.op, "use_locking", True):
14986 self.needed_locks[lock_level] = lock_name
14988 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14989 # not possible to acquire the BGL based on opcode parameters)
14991 def CheckPrereq(self):
14992 """Check prerequisites.
14995 if self.op.kind == constants.TAG_CLUSTER:
14996 self.target = self.cfg.GetClusterInfo()
14997 elif self.op.kind == constants.TAG_NODE:
14998 self.target = self.cfg.GetNodeInfo(self.op.name)
14999 elif self.op.kind == constants.TAG_INSTANCE:
15000 self.target = self.cfg.GetInstanceInfo(self.op.name)
15001 elif self.op.kind == constants.TAG_NODEGROUP:
15002 self.target = self.cfg.GetNodeGroup(self.group_uuid)
15003 elif self.op.kind == constants.TAG_NETWORK:
15004 self.target = self.cfg.GetNetwork(self.network_uuid)
15006 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
15007 str(self.op.kind), errors.ECODE_INVAL)
15010 class LUTagsGet(TagsLU):
15011 """Returns the tags of a given object.
15016 def ExpandNames(self):
15017 TagsLU.ExpandNames(self)
15019 # Share locks as this is only a read operation
15020 self.share_locks = _ShareAll()
15022 def Exec(self, feedback_fn):
15023 """Returns the tag list.
15026 return list(self.target.GetTags())
15029 class LUTagsSearch(NoHooksLU):
15030 """Searches the tags for a given pattern.
15035 def ExpandNames(self):
15036 self.needed_locks = {}
15038 def CheckPrereq(self):
15039 """Check prerequisites.
15041 This checks the pattern passed for validity by compiling it.
15045 self.re = re.compile(self.op.pattern)
15046 except re.error, err:
15047 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
15048 (self.op.pattern, err), errors.ECODE_INVAL)
15050 def Exec(self, feedback_fn):
15051 """Returns the tag list.
15055 tgts = [("/cluster", cfg.GetClusterInfo())]
15056 ilist = cfg.GetAllInstancesInfo().values()
15057 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
15058 nlist = cfg.GetAllNodesInfo().values()
15059 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
15060 tgts.extend(("/nodegroup/%s" % n.name, n)
15061 for n in cfg.GetAllNodeGroupsInfo().values())
15063 for path, target in tgts:
15064 for tag in target.GetTags():
15065 if self.re.search(tag):
15066 results.append((path, tag))
15070 class LUTagsSet(TagsLU):
15071 """Sets a tag on a given object.
15076 def CheckPrereq(self):
15077 """Check prerequisites.
15079 This checks the type and length of the tag name and value.
15082 TagsLU.CheckPrereq(self)
15083 for tag in self.op.tags:
15084 objects.TaggableObject.ValidateTag(tag)
15086 def Exec(self, feedback_fn):
15091 for tag in self.op.tags:
15092 self.target.AddTag(tag)
15093 except errors.TagError, err:
15094 raise errors.OpExecError("Error while setting tag: %s" % str(err))
15095 self.cfg.Update(self.target, feedback_fn)
15098 class LUTagsDel(TagsLU):
15099 """Delete a list of tags from a given object.
15104 def CheckPrereq(self):
15105 """Check prerequisites.
15107 This checks that we have the given tag.
15110 TagsLU.CheckPrereq(self)
15111 for tag in self.op.tags:
15112 objects.TaggableObject.ValidateTag(tag)
15113 del_tags = frozenset(self.op.tags)
15114 cur_tags = self.target.GetTags()
15116 diff_tags = del_tags - cur_tags
15118 diff_names = ("'%s'" % i for i in sorted(diff_tags))
15119 raise errors.OpPrereqError("Tag(s) %s not found" %
15120 (utils.CommaJoin(diff_names), ),
15121 errors.ECODE_NOENT)
15123 def Exec(self, feedback_fn):
15124 """Remove the tag from the object.
15127 for tag in self.op.tags:
15128 self.target.RemoveTag(tag)
15129 self.cfg.Update(self.target, feedback_fn)
15132 class LUTestDelay(NoHooksLU):
15133 """Sleep for a specified amount of time.
15135 This LU sleeps on the master and/or nodes for a specified amount of
15141 def ExpandNames(self):
15142 """Expand names and set required locks.
15144 This expands the node list, if any.
15147 self.needed_locks = {}
15148 if self.op.on_nodes:
15149 # _GetWantedNodes can be used here, but is not always appropriate to use
15150 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
15151 # more information.
15152 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
15153 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
15155 def _TestDelay(self):
15156 """Do the actual sleep.
15159 if self.op.on_master:
15160 if not utils.TestDelay(self.op.duration):
15161 raise errors.OpExecError("Error during master delay test")
15162 if self.op.on_nodes:
15163 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
15164 for node, node_result in result.items():
15165 node_result.Raise("Failure during rpc call to node %s" % node)
15167 def Exec(self, feedback_fn):
15168 """Execute the test delay opcode, with the wanted repetitions.
15171 if self.op.repeat == 0:
15174 top_value = self.op.repeat - 1
15175 for i in range(self.op.repeat):
15176 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
15180 class LUTestJqueue(NoHooksLU):
15181 """Utility LU to test some aspects of the job queue.
15186 # Must be lower than default timeout for WaitForJobChange to see whether it
15187 # notices changed jobs
15188 _CLIENT_CONNECT_TIMEOUT = 20.0
15189 _CLIENT_CONFIRM_TIMEOUT = 60.0
15192 def _NotifyUsingSocket(cls, cb, errcls):
15193 """Opens a Unix socket and waits for another program to connect.
15196 @param cb: Callback to send socket name to client
15197 @type errcls: class
15198 @param errcls: Exception class to use for errors
15201 # Using a temporary directory as there's no easy way to create temporary
15202 # sockets without writing a custom loop around tempfile.mktemp and
15204 tmpdir = tempfile.mkdtemp()
15206 tmpsock = utils.PathJoin(tmpdir, "sock")
15208 logging.debug("Creating temporary socket at %s", tmpsock)
15209 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15214 # Send details to client
15217 # Wait for client to connect before continuing
15218 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15220 (conn, _) = sock.accept()
15221 except socket.error, err:
15222 raise errcls("Client didn't connect in time (%s)" % err)
15226 # Remove as soon as client is connected
15227 shutil.rmtree(tmpdir)
15229 # Wait for client to close
15232 # pylint: disable=E1101
15233 # Instance of '_socketobject' has no ... member
15234 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15236 except socket.error, err:
15237 raise errcls("Client failed to confirm notification (%s)" % err)
15241 def _SendNotification(self, test, arg, sockname):
15242 """Sends a notification to the client.
15245 @param test: Test name
15246 @param arg: Test argument (depends on test)
15247 @type sockname: string
15248 @param sockname: Socket path
15251 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15253 def _Notify(self, prereq, test, arg):
15254 """Notifies the client of a test.
15257 @param prereq: Whether this is a prereq-phase test
15259 @param test: Test name
15260 @param arg: Test argument (depends on test)
15264 errcls = errors.OpPrereqError
15266 errcls = errors.OpExecError
15268 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15272 def CheckArguments(self):
15273 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15274 self.expandnames_calls = 0
15276 def ExpandNames(self):
15277 checkargs_calls = getattr(self, "checkargs_calls", 0)
15278 if checkargs_calls < 1:
15279 raise errors.ProgrammerError("CheckArguments was not called")
15281 self.expandnames_calls += 1
15283 if self.op.notify_waitlock:
15284 self._Notify(True, constants.JQT_EXPANDNAMES, None)
15286 self.LogInfo("Expanding names")
15288 # Get lock on master node (just to get a lock, not for a particular reason)
15289 self.needed_locks = {
15290 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15293 def Exec(self, feedback_fn):
15294 if self.expandnames_calls < 1:
15295 raise errors.ProgrammerError("ExpandNames was not called")
15297 if self.op.notify_exec:
15298 self._Notify(False, constants.JQT_EXEC, None)
15300 self.LogInfo("Executing")
15302 if self.op.log_messages:
15303 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15304 for idx, msg in enumerate(self.op.log_messages):
15305 self.LogInfo("Sending log message %s", idx + 1)
15306 feedback_fn(constants.JQT_MSGPREFIX + msg)
15307 # Report how many test messages have been sent
15308 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15311 raise errors.OpExecError("Opcode failure was requested")
15316 class IAllocator(object):
15317 """IAllocator framework.
15319 An IAllocator instance has three sets of attributes:
15320 - cfg that is needed to query the cluster
15321 - input data (all members of the _KEYS class attribute are required)
15322 - four buffer attributes (in|out_data|text), that represent the
15323 input (to the external script) in text and data structure format,
15324 and the output from it, again in two formats
15325 - the result variables from the script (success, info, nodes) for
15329 # pylint: disable=R0902
15330 # lots of instance attributes
15332 def __init__(self, cfg, rpc_runner, mode, **kwargs):
15334 self.rpc = rpc_runner
15335 # init buffer variables
15336 self.in_text = self.out_text = self.in_data = self.out_data = None
15337 # init all input fields so that pylint is happy
15339 self.memory = self.disks = self.disk_template = self.spindle_use = None
15340 self.os = self.tags = self.nics = self.vcpus = None
15341 self.hypervisor = None
15342 self.relocate_from = None
15344 self.instances = None
15345 self.evac_mode = None
15346 self.target_groups = []
15348 self.required_nodes = None
15349 # init result fields
15350 self.success = self.info = self.result = None
15353 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
15355 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
15356 " IAllocator" % self.mode)
15358 keyset = [n for (n, _) in keydata]
15361 if key not in keyset:
15362 raise errors.ProgrammerError("Invalid input parameter '%s' to"
15363 " IAllocator" % key)
15364 setattr(self, key, kwargs[key])
15367 if key not in kwargs:
15368 raise errors.ProgrammerError("Missing input parameter '%s' to"
15369 " IAllocator" % key)
15370 self._BuildInputData(compat.partial(fn, self), keydata)
15372 def _ComputeClusterData(self):
15373 """Compute the generic allocator input data.
15375 This is the data that is independent of the actual operation.
15379 cluster_info = cfg.GetClusterInfo()
15382 "version": constants.IALLOCATOR_VERSION,
15383 "cluster_name": cfg.GetClusterName(),
15384 "cluster_tags": list(cluster_info.GetTags()),
15385 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
15386 "ipolicy": cluster_info.ipolicy,
15388 ninfo = cfg.GetAllNodesInfo()
15389 iinfo = cfg.GetAllInstancesInfo().values()
15390 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
15393 node_list = [n.name for n in ninfo.values() if n.vm_capable]
15395 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
15396 hypervisor_name = self.hypervisor
15397 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
15398 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
15400 hypervisor_name = cluster_info.primary_hypervisor
15402 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
15405 self.rpc.call_all_instances_info(node_list,
15406 cluster_info.enabled_hypervisors)
15408 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
15410 config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
15411 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
15412 i_list, config_ndata)
15413 assert len(data["nodes"]) == len(ninfo), \
15414 "Incomplete node data computed"
15416 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
15418 self.in_data = data
15421 def _ComputeNodeGroupData(cfg):
15422 """Compute node groups data.
15425 cluster = cfg.GetClusterInfo()
15426 ng = dict((guuid, {
15427 "name": gdata.name,
15428 "alloc_policy": gdata.alloc_policy,
15429 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
15431 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
15436 def _ComputeBasicNodeData(cfg, node_cfg):
15437 """Compute global node data.
15440 @returns: a dict of name: (node dict, node config)
15443 # fill in static (config-based) values
15444 node_results = dict((ninfo.name, {
15445 "tags": list(ninfo.GetTags()),
15446 "primary_ip": ninfo.primary_ip,
15447 "secondary_ip": ninfo.secondary_ip,
15448 "offline": ninfo.offline,
15449 "drained": ninfo.drained,
15450 "master_candidate": ninfo.master_candidate,
15451 "group": ninfo.group,
15452 "master_capable": ninfo.master_capable,
15453 "vm_capable": ninfo.vm_capable,
15454 "ndparams": cfg.GetNdParams(ninfo),
15456 for ninfo in node_cfg.values())
15458 return node_results
15461 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
15463 """Compute global node data.
15465 @param node_results: the basic node structures as filled from the config
15468 #TODO(dynmem): compute the right data on MAX and MIN memory
15469 # make a copy of the current dict
15470 node_results = dict(node_results)
15471 for nname, nresult in node_data.items():
15472 assert nname in node_results, "Missing basic data for node %s" % nname
15473 ninfo = node_cfg[nname]
15475 if not (ninfo.offline or ninfo.drained):
15476 nresult.Raise("Can't get data for node %s" % nname)
15477 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
15479 remote_info = _MakeLegacyNodeInfo(nresult.payload)
15481 for attr in ["memory_total", "memory_free", "memory_dom0",
15482 "vg_size", "vg_free", "cpu_total"]:
15483 if attr not in remote_info:
15484 raise errors.OpExecError("Node '%s' didn't return attribute"
15485 " '%s'" % (nname, attr))
15486 if not isinstance(remote_info[attr], int):
15487 raise errors.OpExecError("Node '%s' returned invalid value"
15489 (nname, attr, remote_info[attr]))
15490 # compute memory used by primary instances
15491 i_p_mem = i_p_up_mem = 0
15492 for iinfo, beinfo in i_list:
15493 if iinfo.primary_node == nname:
15494 i_p_mem += beinfo[constants.BE_MAXMEM]
15495 if iinfo.name not in node_iinfo[nname].payload:
15498 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
15499 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
15500 remote_info["memory_free"] -= max(0, i_mem_diff)
15502 if iinfo.admin_state == constants.ADMINST_UP:
15503 i_p_up_mem += beinfo[constants.BE_MAXMEM]
15505 # compute memory used by instances
15507 "total_memory": remote_info["memory_total"],
15508 "reserved_memory": remote_info["memory_dom0"],
15509 "free_memory": remote_info["memory_free"],
15510 "total_disk": remote_info["vg_size"],
15511 "free_disk": remote_info["vg_free"],
15512 "total_cpus": remote_info["cpu_total"],
15513 "i_pri_memory": i_p_mem,
15514 "i_pri_up_memory": i_p_up_mem,
15516 pnr_dyn.update(node_results[nname])
15517 node_results[nname] = pnr_dyn
15519 return node_results
15522 def _ComputeInstanceData(cluster_info, i_list):
15523 """Compute global instance data.
15527 for iinfo, beinfo in i_list:
15529 for nic in iinfo.nics:
15530 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
15534 "mode": filled_params[constants.NIC_MODE],
15535 "link": filled_params[constants.NIC_LINK],
15536 "network": nic.network,
15538 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
15539 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
15540 nic_data.append(nic_dict)
15542 "tags": list(iinfo.GetTags()),
15543 "admin_state": iinfo.admin_state,
15544 "vcpus": beinfo[constants.BE_VCPUS],
15545 "memory": beinfo[constants.BE_MAXMEM],
15546 "spindle_use": beinfo[constants.BE_SPINDLE_USE],
15548 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
15550 "disks": [{constants.IDISK_SIZE: dsk.size,
15551 constants.IDISK_MODE: dsk.mode}
15552 for dsk in iinfo.disks],
15553 "disk_template": iinfo.disk_template,
15554 "hypervisor": iinfo.hypervisor,
15556 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
15558 instance_data[iinfo.name] = pir
15560 return instance_data
15562 def _AddNewInstance(self):
15563 """Add new instance data to allocator structure.
15565 This in combination with _AllocatorGetClusterData will create the
15566 correct structure needed as input for the allocator.
15568 The checks for the completeness of the opcode must have already been
15572 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
15574 if self.disk_template in constants.DTS_INT_MIRROR:
15575 self.required_nodes = 2
15577 self.required_nodes = 1
15581 "disk_template": self.disk_template,
15584 "vcpus": self.vcpus,
15585 "memory": self.memory,
15586 "spindle_use": self.spindle_use,
15587 "disks": self.disks,
15588 "disk_space_total": disk_space,
15590 "required_nodes": self.required_nodes,
15591 "hypervisor": self.hypervisor,
15596 def _AddRelocateInstance(self):
15597 """Add relocate instance data to allocator structure.
15599 This in combination with _IAllocatorGetClusterData will create the
15600 correct structure needed as input for the allocator.
15602 The checks for the completeness of the opcode must have already been
15606 instance = self.cfg.GetInstanceInfo(self.name)
15607 if instance is None:
15608 raise errors.ProgrammerError("Unknown instance '%s' passed to"
15609 " IAllocator" % self.name)
15611 if instance.disk_template not in constants.DTS_MIRRORED:
15612 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
15613 errors.ECODE_INVAL)
15615 if instance.disk_template in constants.DTS_INT_MIRROR and \
15616 len(instance.secondary_nodes) != 1:
15617 raise errors.OpPrereqError("Instance has not exactly one secondary node",
15618 errors.ECODE_STATE)
15620 self.required_nodes = 1
15621 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
15622 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
15626 "disk_space_total": disk_space,
15627 "required_nodes": self.required_nodes,
15628 "relocate_from": self.relocate_from,
15632 def _AddNodeEvacuate(self):
15633 """Get data for node-evacuate requests.
15637 "instances": self.instances,
15638 "evac_mode": self.evac_mode,
15641 def _AddChangeGroup(self):
15642 """Get data for node-evacuate requests.
15646 "instances": self.instances,
15647 "target_groups": self.target_groups,
15650 def _BuildInputData(self, fn, keydata):
15651 """Build input data structures.
15654 self._ComputeClusterData()
15657 request["type"] = self.mode
15658 for keyname, keytype in keydata:
15659 if keyname not in request:
15660 raise errors.ProgrammerError("Request parameter %s is missing" %
15662 val = request[keyname]
15663 if not keytype(val):
15664 raise errors.ProgrammerError("Request parameter %s doesn't pass"
15665 " validation, value %s, expected"
15666 " type %s" % (keyname, val, keytype))
15667 self.in_data["request"] = request
15669 self.in_text = serializer.Dump(self.in_data)
15671 _STRING_LIST = ht.TListOf(ht.TString)
15672 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
15673 # pylint: disable=E1101
15674 # Class '...' has no 'OP_ID' member
15675 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
15676 opcodes.OpInstanceMigrate.OP_ID,
15677 opcodes.OpInstanceReplaceDisks.OP_ID])
15681 ht.TListOf(ht.TAnd(ht.TIsLength(3),
15682 ht.TItems([ht.TNonEmptyString,
15683 ht.TNonEmptyString,
15684 ht.TListOf(ht.TNonEmptyString),
15687 ht.TListOf(ht.TAnd(ht.TIsLength(2),
15688 ht.TItems([ht.TNonEmptyString,
15691 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
15692 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
15695 constants.IALLOCATOR_MODE_ALLOC:
15698 ("name", ht.TString),
15699 ("memory", ht.TInt),
15700 ("spindle_use", ht.TInt),
15701 ("disks", ht.TListOf(ht.TDict)),
15702 ("disk_template", ht.TString),
15703 ("os", ht.TString),
15704 ("tags", _STRING_LIST),
15705 ("nics", ht.TListOf(ht.TDict)),
15706 ("vcpus", ht.TInt),
15707 ("hypervisor", ht.TString),
15709 constants.IALLOCATOR_MODE_RELOC:
15710 (_AddRelocateInstance,
15711 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
15713 constants.IALLOCATOR_MODE_NODE_EVAC:
15714 (_AddNodeEvacuate, [
15715 ("instances", _STRING_LIST),
15716 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
15718 constants.IALLOCATOR_MODE_CHG_GROUP:
15719 (_AddChangeGroup, [
15720 ("instances", _STRING_LIST),
15721 ("target_groups", _STRING_LIST),
15725 def Run(self, name, validate=True, call_fn=None):
15726 """Run an instance allocator and return the results.
15729 if call_fn is None:
15730 call_fn = self.rpc.call_iallocator_runner
15732 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
15733 result.Raise("Failure while running the iallocator script")
15735 self.out_text = result.payload
15737 self._ValidateResult()
15739 def _ValidateResult(self):
15740 """Process the allocator results.
15742 This will process and if successful save the result in
15743 self.out_data and the other parameters.
15747 rdict = serializer.Load(self.out_text)
15748 except Exception, err:
15749 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
15751 if not isinstance(rdict, dict):
15752 raise errors.OpExecError("Can't parse iallocator results: not a dict")
15754 # TODO: remove backwards compatiblity in later versions
15755 if "nodes" in rdict and "result" not in rdict:
15756 rdict["result"] = rdict["nodes"]
15759 for key in "success", "info", "result":
15760 if key not in rdict:
15761 raise errors.OpExecError("Can't parse iallocator results:"
15762 " missing key '%s'" % key)
15763 setattr(self, key, rdict[key])
15765 if not self._result_check(self.result):
15766 raise errors.OpExecError("Iallocator returned invalid result,"
15767 " expected %s, got %s" %
15768 (self._result_check, self.result),
15769 errors.ECODE_INVAL)
15771 if self.mode == constants.IALLOCATOR_MODE_RELOC:
15772 assert self.relocate_from is not None
15773 assert self.required_nodes == 1
15775 node2group = dict((name, ndata["group"])
15776 for (name, ndata) in self.in_data["nodes"].items())
15778 fn = compat.partial(self._NodesToGroups, node2group,
15779 self.in_data["nodegroups"])
15781 instance = self.cfg.GetInstanceInfo(self.name)
15782 request_groups = fn(self.relocate_from + [instance.primary_node])
15783 result_groups = fn(rdict["result"] + [instance.primary_node])
15785 if self.success and not set(result_groups).issubset(request_groups):
15786 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
15787 " differ from original groups (%s)" %
15788 (utils.CommaJoin(result_groups),
15789 utils.CommaJoin(request_groups)))
15791 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15792 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
15794 self.out_data = rdict
15797 def _NodesToGroups(node2group, groups, nodes):
15798 """Returns a list of unique group names for a list of nodes.
15800 @type node2group: dict
15801 @param node2group: Map from node name to group UUID
15803 @param groups: Group information
15805 @param nodes: Node names
15812 group_uuid = node2group[node]
15814 # Ignore unknown node
15818 group = groups[group_uuid]
15820 # Can't find group, let's use UUID
15821 group_name = group_uuid
15823 group_name = group["name"]
15825 result.add(group_name)
15827 return sorted(result)
15830 class LUTestAllocator(NoHooksLU):
15831 """Run allocator tests.
15833 This LU runs the allocator tests
15836 def CheckPrereq(self):
15837 """Check prerequisites.
15839 This checks the opcode parameters depending on the director and mode test.
15842 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15843 for attr in ["memory", "disks", "disk_template",
15844 "os", "tags", "nics", "vcpus"]:
15845 if not hasattr(self.op, attr):
15846 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15847 attr, errors.ECODE_INVAL)
15848 iname = self.cfg.ExpandInstanceName(self.op.name)
15849 if iname is not None:
15850 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15851 iname, errors.ECODE_EXISTS)
15852 if not isinstance(self.op.nics, list):
15853 raise errors.OpPrereqError("Invalid parameter 'nics'",
15854 errors.ECODE_INVAL)
15855 if not isinstance(self.op.disks, list):
15856 raise errors.OpPrereqError("Invalid parameter 'disks'",
15857 errors.ECODE_INVAL)
15858 for row in self.op.disks:
15859 if (not isinstance(row, dict) or
15860 constants.IDISK_SIZE not in row or
15861 not isinstance(row[constants.IDISK_SIZE], int) or
15862 constants.IDISK_MODE not in row or
15863 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15864 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15865 " parameter", errors.ECODE_INVAL)
15866 if self.op.hypervisor is None:
15867 self.op.hypervisor = self.cfg.GetHypervisorType()
15868 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15869 fname = _ExpandInstanceName(self.cfg, self.op.name)
15870 self.op.name = fname
15871 self.relocate_from = \
15872 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15873 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15874 constants.IALLOCATOR_MODE_NODE_EVAC):
15875 if not self.op.instances:
15876 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15877 self.op.instances = _GetWantedInstances(self, self.op.instances)
15879 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15880 self.op.mode, errors.ECODE_INVAL)
15882 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15883 if self.op.allocator is None:
15884 raise errors.OpPrereqError("Missing allocator name",
15885 errors.ECODE_INVAL)
15886 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15887 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15888 self.op.direction, errors.ECODE_INVAL)
15890 def Exec(self, feedback_fn):
15891 """Run the allocator test.
15894 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15895 ial = IAllocator(self.cfg, self.rpc,
15898 memory=self.op.memory,
15899 disks=self.op.disks,
15900 disk_template=self.op.disk_template,
15904 vcpus=self.op.vcpus,
15905 hypervisor=self.op.hypervisor,
15906 spindle_use=self.op.spindle_use,
15908 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15909 ial = IAllocator(self.cfg, self.rpc,
15912 relocate_from=list(self.relocate_from),
15914 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15915 ial = IAllocator(self.cfg, self.rpc,
15917 instances=self.op.instances,
15918 target_groups=self.op.target_groups)
15919 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15920 ial = IAllocator(self.cfg, self.rpc,
15922 instances=self.op.instances,
15923 evac_mode=self.op.evac_mode)
15925 raise errors.ProgrammerError("Uncatched mode %s in"
15926 " LUTestAllocator.Exec", self.op.mode)
15928 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15929 result = ial.in_text
15931 ial.Run(self.op.allocator, validate=False)
15932 result = ial.out_text
15936 class LUNetworkAdd(LogicalUnit):
15937 """Logical unit for creating networks.
15940 HPATH = "network-add"
15941 HTYPE = constants.HTYPE_NETWORK
15944 def BuildHooksNodes(self):
15945 """Build hooks nodes.
15948 mn = self.cfg.GetMasterNode()
15949 return ([mn], [mn])
15951 def ExpandNames(self):
15952 self.network_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
15953 self.needed_locks = {}
15954 if self.op.conflicts_check:
15955 self.needed_locks = {
15956 locking.LEVEL_NODE: locking.ALL_SET,
15958 self.share_locks[locking.LEVEL_NODE] = 1
15959 self.add_locks[locking.LEVEL_NETWORK] = self.network_uuid
15961 def CheckPrereq(self):
15962 """Check prerequisites.
15964 This checks that the given group name is not an existing node group
15968 if self.op.network is None:
15969 raise errors.OpPrereqError("Network must be given",
15970 errors.ECODE_INVAL)
15972 uuid = self.cfg.LookupNetwork(self.op.network_name)
15975 raise errors.OpPrereqError("Network '%s' already defined" %
15976 self.op.network, errors.ECODE_EXISTS)
15978 if self.op.mac_prefix:
15979 utils.NormalizeAndValidateMac(self.op.mac_prefix+":00:00:00")
15981 # Check tag validity
15982 for tag in self.op.tags:
15983 objects.TaggableObject.ValidateTag(tag)
15986 def BuildHooksEnv(self):
15987 """Build hooks env.
15991 "name": self.op.network_name,
15992 "network": self.op.network,
15993 "gateway": self.op.gateway,
15994 "network6": self.op.network6,
15995 "gateway6": self.op.gateway6,
15996 "mac_prefix": self.op.mac_prefix,
15997 "network_type": self.op.network_type,
15998 "tags": self.op.tags,
16001 return _BuildNetworkHookEnv(**args)
16003 def Exec(self, feedback_fn):
16004 """Add the ip pool to the cluster.
16007 nobj = objects.Network(name=self.op.network_name,
16008 network=self.op.network,
16009 gateway=self.op.gateway,
16010 network6=self.op.network6,
16011 gateway6=self.op.gateway6,
16012 mac_prefix=self.op.mac_prefix,
16013 network_type=self.op.network_type,
16014 uuid=self.network_uuid,
16016 # Initialize the associated address pool
16018 pool = network.AddressPool.InitializeNetwork(nobj)
16019 except errors.AddressPoolError, e:
16020 raise errors.OpExecError("Cannot create IP pool for this network. %s" % e)
16022 # Check if we need to reserve the nodes and the cluster master IP
16023 # These may not be allocated to any instances in routed mode, as
16024 # they wouldn't function anyway.
16025 if self.op.conflicts_check:
16026 for node in self.cfg.GetAllNodesInfo().values():
16027 for ip in [node.primary_ip, node.secondary_ip]:
16030 self.LogInfo("Reserved node %s's IP (%s)", node.name, ip)
16032 except errors.AddressPoolError:
16035 master_ip = self.cfg.GetClusterInfo().master_ip
16037 pool.Reserve(master_ip)
16038 self.LogInfo("Reserved cluster master IP (%s)", master_ip)
16039 except errors.AddressPoolError:
16042 if self.op.add_reserved_ips:
16043 for ip in self.op.add_reserved_ips:
16045 pool.Reserve(ip, external=True)
16046 except errors.AddressPoolError, e:
16047 raise errors.OpExecError("Cannot reserve IP %s. %s " % (ip, e))
16050 for tag in self.op.tags:
16053 self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
16054 del self.remove_locks[locking.LEVEL_NETWORK]
16057 class LUNetworkRemove(LogicalUnit):
16058 HPATH = "network-remove"
16059 HTYPE = constants.HTYPE_NETWORK
16062 def ExpandNames(self):
16063 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16065 if not self.network_uuid:
16066 raise errors.OpPrereqError("Network %s not found" % self.op.network_name,
16067 errors.ECODE_INVAL)
16068 self.needed_locks = {
16069 locking.LEVEL_NETWORK: [self.network_uuid],
16070 locking.LEVEL_NODEGROUP: locking.ALL_SET,
16072 self.share_locks[locking.LEVEL_NODEGROUP] = 1
16074 def CheckPrereq(self):
16075 """Check prerequisites.
16077 This checks that the given network name exists as a network, that is
16078 empty (i.e., contains no nodes), and that is not the last group of the
16083 # Verify that the network is not conncted.
16084 node_groups = [group.name
16085 for group in self.cfg.GetAllNodeGroupsInfo().values()
16086 for network in group.networks.keys()
16087 if network == self.network_uuid]
16090 self.LogWarning("Nework '%s' is connected to the following"
16091 " node groups: %s" % (self.op.network_name,
16092 utils.CommaJoin(utils.NiceSort(node_groups))))
16093 raise errors.OpPrereqError("Network still connected",
16094 errors.ECODE_STATE)
16096 def BuildHooksEnv(self):
16097 """Build hooks env.
16101 "NETWORK_NAME": self.op.network_name,
16104 def BuildHooksNodes(self):
16105 """Build hooks nodes.
16108 mn = self.cfg.GetMasterNode()
16109 return ([mn], [mn])
16111 def Exec(self, feedback_fn):
16112 """Remove the network.
16116 self.cfg.RemoveNetwork(self.network_uuid)
16117 except errors.ConfigurationError:
16118 raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
16119 (self.op.network_name, self.network_uuid))
16122 class LUNetworkSetParams(LogicalUnit):
16123 """Modifies the parameters of a network.
16126 HPATH = "network-modify"
16127 HTYPE = constants.HTYPE_NETWORK
16130 def CheckArguments(self):
16131 if (self.op.gateway and
16132 (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
16133 raise errors.OpPrereqError("Cannot modify gateway and reserved ips"
16134 " at once", errors.ECODE_INVAL)
16137 def ExpandNames(self):
16138 self.network_uuid = self.cfg.LookupNetwork(self.op.network_name)
16139 self.network = self.cfg.GetNetwork(self.network_uuid)
16140 if self.network is None:
16141 raise errors.OpPrereqError("Could not retrieve network '%s' (UUID: %s)" %
16142 (self.op.network_name, self.network_uuid),
16143 errors.ECODE_INVAL)
16144 self.needed_locks = {
16145 locking.LEVEL_NETWORK: [self.network_uuid],
16148 def CheckPrereq(self):
16149 """Check prerequisites.
16152 self.gateway = self.network.gateway
16153 self.network_type = self.network.network_type
16154 self.mac_prefix = self.network.mac_prefix
16155 self.network6 = self.network.network6
16156 self.gateway6 = self.network.gateway6
16157 self.tags = self.network.tags
16159 self.pool = network.AddressPool(self.network)
16161 if self.op.gateway:
16162 if self.op.gateway == constants.VALUE_NONE:
16163 self.gateway = None
16165 self.gateway = self.op.gateway
16166 if self.pool.IsReserved(self.gateway):
16167 raise errors.OpPrereqError("%s is already reserved" %
16168 self.gateway, errors.ECODE_INVAL)
16170 if self.op.network_type:
16171 if self.op.network_type == constants.VALUE_NONE:
16172 self.network_type = None
16174 self.network_type = self.op.network_type
16176 if self.op.mac_prefix:
16177 if self.op.mac_prefix == constants.VALUE_NONE:
16178 self.mac_prefix = None
16180 utils.NormalizeAndValidateMac(self.op.mac_prefix+":00:00:00")
16181 self.mac_prefix = self.op.mac_prefix
16183 if self.op.gateway6:
16184 if self.op.gateway6 == constants.VALUE_NONE:
16185 self.gateway6 = None
16187 self.gateway6 = self.op.gateway6
16189 if self.op.network6:
16190 if self.op.network6 == constants.VALUE_NONE:
16191 self.network6 = None
16193 self.network6 = self.op.network6
16197 def BuildHooksEnv(self):
16198 """Build hooks env.
16202 "name": self.op.network_name,
16203 "network": self.network.network,
16204 "gateway": self.gateway,
16205 "network6": self.network6,
16206 "gateway6": self.gateway6,
16207 "mac_prefix": self.mac_prefix,
16208 "network_type": self.network_type,
16210 "serial_no": self.network.serial_no,
16212 return _BuildNetworkHookEnv(**args)
16214 def BuildHooksNodes(self):
16215 """Build hooks nodes.
16218 mn = self.cfg.GetMasterNode()
16219 return ([mn], [mn])
16221 def Exec(self, feedback_fn):
16222 """Modifies the network.
16225 #TODO: reserve/release via temporary reservation manager
16226 # extend cfg.ReserveIp/ReleaseIp with the external flag
16227 if self.op.gateway:
16228 if self.gateway == self.network.gateway:
16229 self.LogWarning("Gateway is already %s" % self.gateway)
16232 self.pool.Reserve(self.gateway, external=True)
16233 if self.network.gateway:
16234 self.pool.Release(self.network.gateway, external=True)
16235 self.network.gateway = self.gateway
16237 if self.op.add_reserved_ips:
16238 for ip in self.op.add_reserved_ips:
16240 if self.pool.IsReserved(ip):
16241 self.LogWarning("IP %s is already reserved" % ip)
16243 self.pool.Reserve(ip, external=True)
16244 except errors.AddressPoolError, e:
16245 self.LogWarning("Cannot reserve ip %s. %s" % (ip, e))
16247 if self.op.remove_reserved_ips:
16248 for ip in self.op.remove_reserved_ips:
16249 if ip == self.network.gateway:
16250 self.LogWarning("Cannot unreserve Gateway's IP")
16253 if not self.pool.IsReserved(ip):
16254 self.LogWarning("IP %s is already unreserved" % ip)
16256 self.pool.Release(ip, external=True)
16257 except errors.AddressPoolError, e:
16258 self.LogWarning("Cannot release ip %s. %s" % (ip, e))
16260 if self.op.mac_prefix:
16261 self.network.mac_prefix = self.mac_prefix
16263 if self.op.network6:
16264 self.network.network6 = self.network6
16266 if self.op.gateway6:
16267 self.network.gateway6 = self.gateway6
16269 if self.op.network_type:
16270 self.network.network_type = self.network_type
16272 self.pool.Validate()
16274 self.cfg.Update(self.network, feedback_fn)
16277 class _NetworkQuery(_QueryBase):
16278 FIELDS = query.NETWORK_FIELDS
16280 def ExpandNames(self, lu):
16281 lu.needed_locks = {}
16283 self._all_networks = lu.cfg.GetAllNetworksInfo()
16284 name_to_uuid = dict((n.name, n.uuid) for n in self._all_networks.values())
16287 self.wanted = [name_to_uuid[name]
16288 for name in utils.NiceSort(name_to_uuid.keys())]
16290 # Accept names to be either names or UUIDs.
16293 all_uuid = frozenset(self._all_networks.keys())
16295 for name in self.names:
16296 if name in all_uuid:
16297 self.wanted.append(name)
16298 elif name in name_to_uuid:
16299 self.wanted.append(name_to_uuid[name])
16301 missing.append(name)
16304 raise errors.OpPrereqError("Some networks do not exist: %s" % missing,
16305 errors.ECODE_NOENT)
16307 def DeclareLocks(self, lu, level):
16310 def _GetQueryData(self, lu):
16311 """Computes the list of networks and their attributes.
16314 do_instances = query.NETQ_INST in self.requested_data
16315 do_groups = do_instances or (query.NETQ_GROUP in self.requested_data)
16316 do_stats = query.NETQ_STATS in self.requested_data
16317 cluster = lu.cfg.GetClusterInfo()
16319 network_to_groups = None
16320 network_to_instances = None
16323 # For NETQ_GROUP, we need to map network->[groups]
16325 all_groups = lu.cfg.GetAllNodeGroupsInfo()
16326 network_to_groups = dict((uuid, []) for uuid in self.wanted)
16327 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
16330 all_instances = lu.cfg.GetAllInstancesInfo()
16331 all_nodes = lu.cfg.GetAllNodesInfo()
16332 network_to_instances = dict((uuid, []) for uuid in self.wanted)
16335 for group in all_groups.values():
16337 group_nodes = [node.name for node in all_nodes.values() if
16338 node.group == group.uuid]
16339 group_instances = [instance for instance in all_instances.values()
16340 if instance.primary_node in group_nodes]
16342 for net_uuid in group.networks.keys():
16343 if net_uuid in network_to_groups:
16344 netparams = group.networks[net_uuid]
16345 mode = netparams[constants.NIC_MODE]
16346 link = netparams[constants.NIC_LINK]
16347 info = group.name + '(' + mode + ', ' + link + ')'
16348 network_to_groups[net_uuid].append(info)
16351 for instance in group_instances:
16352 for nic in instance.nics:
16353 if nic.network == self._all_networks[net_uuid].name:
16354 network_to_instances[net_uuid].append(instance.name)
16359 for uuid, net in self._all_networks.items():
16360 if uuid in self.wanted:
16361 pool = network.AddressPool(net)
16363 "free_count": pool.GetFreeCount(),
16364 "reserved_count": pool.GetReservedCount(),
16365 "map": pool.GetMap(),
16366 "external_reservations": ", ".join(pool.GetExternalReservations()),
16369 return query.NetworkQueryData([self._all_networks[uuid]
16370 for uuid in self.wanted],
16372 network_to_instances,
16376 class LUNetworkQuery(NoHooksLU):
16377 """Logical unit for querying networks.
16382 def CheckArguments(self):
16383 self.nq = _NetworkQuery(qlang.MakeSimpleFilter("name", self.op.names),
16384 self.op.output_fields, False)
16386 def ExpandNames(self):
16387 self.nq.ExpandNames(self)
16389 def Exec(self, feedback_fn):
16390 return self.nq.OldStyleQuery(self)
16394 class LUNetworkConnect(LogicalUnit):
16395 """Connect a network to a nodegroup
16398 HPATH = "network-connect"
16399 HTYPE = constants.HTYPE_NETWORK
16402 def ExpandNames(self):
16403 self.network_name = self.op.network_name
16404 self.group_name = self.op.group_name
16405 self.network_mode = self.op.network_mode
16406 self.network_link = self.op.network_link
16408 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16409 self.network = self.cfg.GetNetwork(self.network_uuid)
16410 if self.network is None:
16411 raise errors.OpPrereqError("Network %s does not exist" %
16412 self.network_name, errors.ECODE_INVAL)
16414 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16415 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16416 if self.group is None:
16417 raise errors.OpPrereqError("Group %s does not exist" %
16418 self.group_name, errors.ECODE_INVAL)
16420 self.needed_locks = {
16421 locking.LEVEL_NODEGROUP: [self.group_uuid],
16423 self.share_locks[locking.LEVEL_INSTANCE] = 1
16425 def DeclareLocks(self, level):
16426 if level == locking.LEVEL_INSTANCE:
16427 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16429 # Lock instances optimistically, needs verification once group lock has
16431 if self.op.conflicts_check:
16432 self.needed_locks[locking.LEVEL_INSTANCE] = \
16433 self.cfg.GetNodeGroupInstances(self.group_uuid)
16435 def BuildHooksEnv(self):
16437 ret["GROUP_NAME"] = self.group_name
16438 ret["GROUP_NETWORK_MODE"] = self.network_mode
16439 ret["GROUP_NETWORK_LINK"] = self.network_link
16440 ret.update(_BuildNetworkHookEnvByObject(self, self.network))
16443 def BuildHooksNodes(self):
16444 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16445 return (nodes, nodes)
16448 def CheckPrereq(self):
16449 l = lambda value: ", ".join("%s: %s/%s" % (i[0], i[1], i[2])
16452 self.netparams = dict()
16453 self.netparams[constants.NIC_MODE] = self.network_mode
16454 self.netparams[constants.NIC_LINK] = self.network_link
16455 objects.NIC.CheckParameterSyntax(self.netparams)
16457 #if self.network_mode == constants.NIC_MODE_BRIDGED:
16458 # _CheckNodeGroupBridgesExist(self, self.network_link, self.group_uuid)
16459 self.connected = False
16460 if self.network_uuid in self.group.networks:
16461 self.LogWarning("Network '%s' is already mapped to group '%s'" %
16462 (self.network_name, self.group.name))
16463 self.connected = True
16466 pool = network.AddressPool(self.network)
16467 if self.op.conflicts_check:
16468 groupinstances = []
16469 for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
16470 groupinstances.append(self.cfg.GetInstanceInfo(n))
16471 instances = [(instance.name, idx, nic.ip)
16472 for instance in groupinstances
16473 for idx, nic in enumerate(instance.nics)
16474 if (not nic.network and pool._Contains(nic.ip))]
16476 self.LogWarning("Following occurences use IPs from network %s"
16477 " that is about to connect to nodegroup %s: %s" %
16478 (self.network_name, self.group.name,
16480 raise errors.OpPrereqError("Conflicting IPs found."
16481 " Please remove/modify"
16482 " corresponding NICs",
16483 errors.ECODE_INVAL)
16485 def Exec(self, feedback_fn):
16489 self.group.networks[self.network_uuid] = self.netparams
16490 self.cfg.Update(self.group, feedback_fn)
16493 class LUNetworkDisconnect(LogicalUnit):
16494 """Disconnect a network to a nodegroup
16497 HPATH = "network-disconnect"
16498 HTYPE = constants.HTYPE_NETWORK
16501 def ExpandNames(self):
16502 self.network_name = self.op.network_name
16503 self.group_name = self.op.group_name
16505 self.network_uuid = self.cfg.LookupNetwork(self.network_name)
16506 self.network = self.cfg.GetNetwork(self.network_uuid)
16507 if self.network is None:
16508 raise errors.OpPrereqError("Network %s does not exist" %
16509 self.network_name, errors.ECODE_INVAL)
16511 self.group_uuid = self.cfg.LookupNodeGroup(self.group_name)
16512 self.group = self.cfg.GetNodeGroup(self.group_uuid)
16513 if self.group is None:
16514 raise errors.OpPrereqError("Group %s does not exist" %
16515 self.group_name, errors.ECODE_INVAL)
16517 self.needed_locks = {
16518 locking.LEVEL_NODEGROUP: [self.group_uuid],
16520 self.share_locks[locking.LEVEL_INSTANCE] = 1
16522 def DeclareLocks(self, level):
16523 if level == locking.LEVEL_INSTANCE:
16524 assert not self.needed_locks[locking.LEVEL_INSTANCE]
16526 # Lock instances optimistically, needs verification once group lock has
16528 if self.op.conflicts_check:
16529 self.needed_locks[locking.LEVEL_INSTANCE] = \
16530 self.cfg.GetNodeGroupInstances(self.group_uuid)
16532 def BuildHooksEnv(self):
16534 ret["GROUP_NAME"] = self.group_name
16535 ret.update(_BuildNetworkHookEnvByObject(self, self.network))
16538 def BuildHooksNodes(self):
16539 nodes = self.cfg.GetNodeGroup(self.group_uuid).members
16540 return (nodes, nodes)
16543 def CheckPrereq(self):
16544 l = lambda value: ", ".join("%s: %s/%s" % (i[0], i[1], i[2])
16547 self.connected = True
16548 if self.network_uuid not in self.group.networks:
16549 self.LogWarning("Network '%s' is"
16550 " not mapped to group '%s'" %
16551 (self.network_name, self.group.name))
16552 self.connected = False
16555 if self.op.conflicts_check:
16556 groupinstances = []
16557 for n in self.cfg.GetNodeGroupInstances(self.group_uuid):
16558 groupinstances.append(self.cfg.GetInstanceInfo(n))
16559 instances = [(instance.name, idx, nic.ip)
16560 for instance in groupinstances
16561 for idx, nic in enumerate(instance.nics)
16562 if nic.network == self.network_name]
16564 self.LogWarning("Following occurences use IPs from network %s"
16565 " that is about to disconnected from the nodegroup"
16567 (self.network_name, self.group.name,
16569 raise errors.OpPrereqError("Conflicting IPs."
16570 " Please remove/modify"
16571 " corresponding NICS",
16572 errors.ECODE_INVAL)
16574 def Exec(self, feedback_fn):
16575 if not self.connected:
16578 del self.group.networks[self.network_uuid]
16579 self.cfg.Update(self.group, feedback_fn)
16582 #: Query type implementations
16584 constants.QR_CLUSTER: _ClusterQuery,
16585 constants.QR_INSTANCE: _InstanceQuery,
16586 constants.QR_NODE: _NodeQuery,
16587 constants.QR_GROUP: _GroupQuery,
16588 constants.QR_NETWORK: _NetworkQuery,
16589 constants.QR_OS: _OsQuery,
16590 constants.QR_EXTSTORAGE: _ExtStorageQuery,
16591 constants.QR_EXPORT: _ExportQuery,
16594 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
16597 def _GetQueryImplementation(name):
16598 """Returns the implemtnation for a query type.
16600 @param name: Query type, must be one of L{constants.QR_VIA_OP}
16604 return _QUERY_IMPL[name]
16606 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
16607 errors.ECODE_INVAL)
16609 def _CheckForConflictingIp(lu, ip, node):
16610 """In case of conflicting ip raise error.
16613 @param ip: ip address
16615 @param node: node name
16618 (conf_net, conf_netparams) = lu.cfg.CheckIPInNodeGroup(ip, node)
16619 if conf_net is not None:
16620 raise errors.OpPrereqError("Conflicting IP found:"
16621 " %s <> %s." % (ip, conf_net),
16622 errors.ECODE_INVAL)
16624 return (None, None)