4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _MakeLegacyNodeInfo(data):
577 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
579 Converts the data into a single dictionary. This is fine for most use cases,
580 but some require information from more than one volume group or hypervisor.
583 (bootid, (vg_info, ), (hv_info, )) = data
585 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591 """Checks if the owned node groups are still correct for an instance.
593 @type cfg: L{config.ConfigWriter}
594 @param cfg: The cluster configuration
595 @type instance_name: string
596 @param instance_name: Instance name
597 @type owned_groups: set or frozenset
598 @param owned_groups: List of currently owned node groups
601 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
603 if not owned_groups.issuperset(inst_groups):
604 raise errors.OpPrereqError("Instance %s's node groups changed since"
605 " locks were acquired, current groups are"
606 " are '%s', owning groups '%s'; retry the"
609 utils.CommaJoin(inst_groups),
610 utils.CommaJoin(owned_groups)),
616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617 """Checks if the instances in a node group are still correct.
619 @type cfg: L{config.ConfigWriter}
620 @param cfg: The cluster configuration
621 @type group_uuid: string
622 @param group_uuid: Node group UUID
623 @type owned_instances: set or frozenset
624 @param owned_instances: List of currently owned instances
627 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628 if owned_instances != wanted_instances:
629 raise errors.OpPrereqError("Instances in node group '%s' changed since"
630 " locks were acquired, wanted '%s', have '%s';"
631 " retry the operation" %
633 utils.CommaJoin(wanted_instances),
634 utils.CommaJoin(owned_instances)),
637 return wanted_instances
640 def _SupportsOob(cfg, node):
641 """Tells if node supports OOB.
643 @type cfg: L{config.ConfigWriter}
644 @param cfg: The cluster configuration
645 @type node: L{objects.Node}
646 @param node: The node
647 @return: The OOB script if supported or an empty string otherwise
650 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
653 def _GetWantedNodes(lu, nodes):
654 """Returns list of checked and expanded node names.
656 @type lu: L{LogicalUnit}
657 @param lu: the logical unit on whose behalf we execute
659 @param nodes: list of node names or None for all nodes
661 @return: the list of nodes, sorted
662 @raise errors.ProgrammerError: if the nodes parameter is wrong type
666 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
668 return utils.NiceSort(lu.cfg.GetNodeList())
671 def _GetWantedInstances(lu, instances):
672 """Returns list of checked and expanded instance names.
674 @type lu: L{LogicalUnit}
675 @param lu: the logical unit on whose behalf we execute
676 @type instances: list
677 @param instances: list of instance names or None for all instances
679 @return: the list of instances, sorted
680 @raise errors.OpPrereqError: if the instances parameter is wrong type
681 @raise errors.OpPrereqError: if any of the passed instances is not found
685 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
687 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
691 def _GetUpdatedParams(old_params, update_dict,
692 use_default=True, use_none=False):
693 """Return the new version of a parameter dictionary.
695 @type old_params: dict
696 @param old_params: old parameters
697 @type update_dict: dict
698 @param update_dict: dict containing new parameter values, or
699 constants.VALUE_DEFAULT to reset the parameter to its default
701 @param use_default: boolean
702 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703 values as 'to be deleted' values
704 @param use_none: boolean
705 @type use_none: whether to recognise C{None} values as 'to be
708 @return: the new parameter dictionary
711 params_copy = copy.deepcopy(old_params)
712 for key, val in update_dict.iteritems():
713 if ((use_default and val == constants.VALUE_DEFAULT) or
714 (use_none and val is None)):
720 params_copy[key] = val
724 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
725 """Return the new version of a instance policy.
727 @param group_policy: whether this policy applies to a group and thus
728 we should support removal of policy entries
731 use_none = use_default = group_policy
732 ipolicy = copy.deepcopy(old_ipolicy)
733 for key, value in new_ipolicy.items():
734 if key not in constants.IPOLICY_ALL_KEYS:
735 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
737 if key in constants.IPOLICY_ISPECS:
738 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
739 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
741 use_default=use_default)
743 # FIXME: we assume all others are lists; this should be redone
745 if not value or value == [constants.VALUE_DEFAULT]:
749 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
750 " on the cluster'" % key,
753 ipolicy[key] = list(value)
755 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
756 except errors.ConfigurationError, err:
757 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
762 def _UpdateAndVerifySubDict(base, updates, type_check):
763 """Updates and verifies a dict with sub dicts of the same type.
765 @param base: The dict with the old data
766 @param updates: The dict with the new data
767 @param type_check: Dict suitable to ForceDictType to verify correct types
768 @returns: A new dict with updated and verified values
772 new = _GetUpdatedParams(old, value)
773 utils.ForceDictType(new, type_check)
776 ret = copy.deepcopy(base)
777 ret.update(dict((key, fn(base.get(key, {}), value))
778 for key, value in updates.items()))
782 def _MergeAndVerifyHvState(op_input, obj_input):
783 """Combines the hv state from an opcode with the one of the object
785 @param op_input: The input dict from the opcode
786 @param obj_input: The input dict from the objects
787 @return: The verified and updated dict
791 invalid_hvs = set(op_input) - constants.HYPER_TYPES
793 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
794 " %s" % utils.CommaJoin(invalid_hvs),
796 if obj_input is None:
798 type_check = constants.HVSTS_PARAMETER_TYPES
799 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
804 def _MergeAndVerifyDiskState(op_input, obj_input):
805 """Combines the disk state from an opcode with the one of the object
807 @param op_input: The input dict from the opcode
808 @param obj_input: The input dict from the objects
809 @return: The verified and updated dict
812 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
814 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
815 utils.CommaJoin(invalid_dst),
817 type_check = constants.DSS_PARAMETER_TYPES
818 if obj_input is None:
820 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
822 for key, value in op_input.items())
827 def _ReleaseLocks(lu, level, names=None, keep=None):
828 """Releases locks owned by an LU.
830 @type lu: L{LogicalUnit}
831 @param level: Lock level
832 @type names: list or None
833 @param names: Names of locks to release
834 @type keep: list or None
835 @param keep: Names of locks to retain
838 assert not (keep is not None and names is not None), \
839 "Only one of the 'names' and the 'keep' parameters can be given"
841 if names is not None:
842 should_release = names.__contains__
844 should_release = lambda name: name not in keep
846 should_release = None
848 owned = lu.owned_locks(level)
850 # Not owning any lock at this level, do nothing
857 # Determine which locks to release
859 if should_release(name):
864 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
866 # Release just some locks
867 lu.glm.release(level, names=release)
869 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
872 lu.glm.release(level)
874 assert not lu.glm.is_owned(level), "No locks should be owned"
877 def _MapInstanceDisksToNodes(instances):
878 """Creates a map from (node, volume) to instance name.
880 @type instances: list of L{objects.Instance}
881 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
884 return dict(((node, vol), inst.name)
885 for inst in instances
886 for (node, vols) in inst.MapLVsByNode().items()
890 def _RunPostHook(lu, node_name):
891 """Runs the post-hook for an opcode on a single node.
894 hm = lu.proc.BuildHooksManager(lu)
896 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
898 # pylint: disable=W0702
899 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
902 def _CheckOutputFields(static, dynamic, selected):
903 """Checks whether all selected fields are valid.
905 @type static: L{utils.FieldSet}
906 @param static: static fields set
907 @type dynamic: L{utils.FieldSet}
908 @param dynamic: dynamic fields set
915 delta = f.NonMatching(selected)
917 raise errors.OpPrereqError("Unknown output fields selected: %s"
918 % ",".join(delta), errors.ECODE_INVAL)
921 def _CheckGlobalHvParams(params):
922 """Validates that given hypervisor params are not global ones.
924 This will ensure that instances don't get customised versions of
928 used_globals = constants.HVC_GLOBALS.intersection(params)
930 msg = ("The following hypervisor parameters are global and cannot"
931 " be customized at instance level, please modify them at"
932 " cluster level: %s" % utils.CommaJoin(used_globals))
933 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
936 def _CheckNodeOnline(lu, node, msg=None):
937 """Ensure that a given node is online.
939 @param lu: the LU on behalf of which we make the check
940 @param node: the node to check
941 @param msg: if passed, should be a message to replace the default one
942 @raise errors.OpPrereqError: if the node is offline
946 msg = "Can't use offline node"
947 if lu.cfg.GetNodeInfo(node).offline:
948 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
951 def _CheckNodeNotDrained(lu, node):
952 """Ensure that a given node is not drained.
954 @param lu: the LU on behalf of which we make the check
955 @param node: the node to check
956 @raise errors.OpPrereqError: if the node is drained
959 if lu.cfg.GetNodeInfo(node).drained:
960 raise errors.OpPrereqError("Can't use drained node %s" % node,
964 def _CheckNodeVmCapable(lu, node):
965 """Ensure that a given node is vm capable.
967 @param lu: the LU on behalf of which we make the check
968 @param node: the node to check
969 @raise errors.OpPrereqError: if the node is not vm capable
972 if not lu.cfg.GetNodeInfo(node).vm_capable:
973 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
977 def _CheckNodeHasOS(lu, node, os_name, force_variant):
978 """Ensure that a node supports a given OS.
980 @param lu: the LU on behalf of which we make the check
981 @param node: the node to check
982 @param os_name: the OS to query about
983 @param force_variant: whether to ignore variant errors
984 @raise errors.OpPrereqError: if the node is not supporting the OS
987 result = lu.rpc.call_os_get(node, os_name)
988 result.Raise("OS '%s' not in supported OS list for node %s" %
990 prereq=True, ecode=errors.ECODE_INVAL)
991 if not force_variant:
992 _CheckOSVariant(result.payload, os_name)
995 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
996 """Ensure that a node has the given secondary ip.
998 @type lu: L{LogicalUnit}
999 @param lu: the LU on behalf of which we make the check
1001 @param node: the node to check
1002 @type secondary_ip: string
1003 @param secondary_ip: the ip to check
1004 @type prereq: boolean
1005 @param prereq: whether to throw a prerequisite or an execute error
1006 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1007 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1010 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1011 result.Raise("Failure checking secondary ip on node %s" % node,
1012 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1013 if not result.payload:
1014 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1015 " please fix and re-run this command" % secondary_ip)
1017 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1019 raise errors.OpExecError(msg)
1022 def _GetClusterDomainSecret():
1023 """Reads the cluster domain secret.
1026 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1030 def _CheckInstanceState(lu, instance, req_states, msg=None):
1031 """Ensure that an instance is in one of the required states.
1033 @param lu: the LU on behalf of which we make the check
1034 @param instance: the instance to check
1035 @param msg: if passed, should be a message to replace the default one
1036 @raise errors.OpPrereqError: if the instance is not in the required state
1040 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1041 if instance.admin_state not in req_states:
1042 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1043 (instance.name, instance.admin_state, msg),
1046 if constants.ADMINST_UP not in req_states:
1047 pnode = instance.primary_node
1048 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1049 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1050 prereq=True, ecode=errors.ECODE_ENVIRON)
1052 if instance.name in ins_l.payload:
1053 raise errors.OpPrereqError("Instance %s is running, %s" %
1054 (instance.name, msg), errors.ECODE_STATE)
1057 def _ComputeMinMaxSpec(name, ipolicy, value):
1058 """Computes if value is in the desired range.
1060 @param name: name of the parameter for which we perform the check
1061 @param ipolicy: dictionary containing min, max and std values
1062 @param value: actual value that we want to use
1063 @return: None or element not meeting the criteria
1067 if value in [None, constants.VALUE_AUTO]:
1069 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1070 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1071 if value > max_v or min_v > value:
1072 return ("%s value %s is not in range [%s, %s]" %
1073 (name, value, min_v, max_v))
1077 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1078 nic_count, disk_sizes,
1079 _compute_fn=_ComputeMinMaxSpec):
1080 """Verifies ipolicy against provided specs.
1083 @param ipolicy: The ipolicy
1085 @param mem_size: The memory size
1086 @type cpu_count: int
1087 @param cpu_count: Used cpu cores
1088 @type disk_count: int
1089 @param disk_count: Number of disks used
1090 @type nic_count: int
1091 @param nic_count: Number of nics used
1092 @type disk_sizes: list of ints
1093 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1094 @param _compute_fn: The compute function (unittest only)
1095 @return: A list of violations, or an empty list of no violations are found
1098 assert disk_count == len(disk_sizes)
1101 (constants.ISPEC_MEM_SIZE, mem_size),
1102 (constants.ISPEC_CPU_COUNT, cpu_count),
1103 (constants.ISPEC_DISK_COUNT, disk_count),
1104 (constants.ISPEC_NIC_COUNT, nic_count),
1105 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1108 (_compute_fn(name, ipolicy, value)
1109 for (name, value) in test_settings))
1112 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1113 _compute_fn=_ComputeIPolicySpecViolation):
1114 """Compute if instance meets the specs of ipolicy.
1117 @param ipolicy: The ipolicy to verify against
1118 @type instance: L{objects.Instance}
1119 @param instance: The instance to verify
1120 @param _compute_fn: The function to verify ipolicy (unittest only)
1121 @see: L{_ComputeIPolicySpecViolation}
1124 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1125 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1126 disk_count = len(instance.disks)
1127 disk_sizes = [disk.size for disk in instance.disks]
1128 nic_count = len(instance.nics)
1130 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1134 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1135 _compute_fn=_ComputeIPolicySpecViolation):
1136 """Compute if instance specs meets the specs of ipolicy.
1139 @param ipolicy: The ipolicy to verify against
1140 @param instance_spec: dict
1141 @param instance_spec: The instance spec to verify
1142 @param _compute_fn: The function to verify ipolicy (unittest only)
1143 @see: L{_ComputeIPolicySpecViolation}
1146 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1147 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1148 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1149 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1150 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1152 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1156 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1158 _compute_fn=_ComputeIPolicyInstanceViolation):
1159 """Compute if instance meets the specs of the new target group.
1161 @param ipolicy: The ipolicy to verify
1162 @param instance: The instance object to verify
1163 @param current_group: The current group of the instance
1164 @param target_group: The new group of the instance
1165 @param _compute_fn: The function to verify ipolicy (unittest only)
1166 @see: L{_ComputeIPolicySpecViolation}
1169 if current_group == target_group:
1172 return _compute_fn(ipolicy, instance)
1175 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1176 _compute_fn=_ComputeIPolicyNodeViolation):
1177 """Checks that the target node is correct in terms of instance policy.
1179 @param ipolicy: The ipolicy to verify
1180 @param instance: The instance object to verify
1181 @param node: The new node to relocate
1182 @param ignore: Ignore violations of the ipolicy
1183 @param _compute_fn: The function to verify ipolicy (unittest only)
1184 @see: L{_ComputeIPolicySpecViolation}
1187 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1188 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1191 msg = ("Instance does not meet target node group's (%s) instance"
1192 " policy: %s") % (node.group, utils.CommaJoin(res))
1196 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1199 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1200 """Computes a set of any instances that would violate the new ipolicy.
1202 @param old_ipolicy: The current (still in-place) ipolicy
1203 @param new_ipolicy: The new (to become) ipolicy
1204 @param instances: List of instances to verify
1205 @return: A list of instances which violates the new ipolicy but did not before
1208 return (_ComputeViolatingInstances(old_ipolicy, instances) -
1209 _ComputeViolatingInstances(new_ipolicy, instances))
1212 def _ExpandItemName(fn, name, kind):
1213 """Expand an item name.
1215 @param fn: the function to use for expansion
1216 @param name: requested item name
1217 @param kind: text description ('Node' or 'Instance')
1218 @return: the resolved (full) name
1219 @raise errors.OpPrereqError: if the item is not found
1222 full_name = fn(name)
1223 if full_name is None:
1224 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1229 def _ExpandNodeName(cfg, name):
1230 """Wrapper over L{_ExpandItemName} for nodes."""
1231 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1234 def _ExpandInstanceName(cfg, name):
1235 """Wrapper over L{_ExpandItemName} for instance."""
1236 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1239 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1240 minmem, maxmem, vcpus, nics, disk_template, disks,
1241 bep, hvp, hypervisor_name, tags):
1242 """Builds instance related env variables for hooks
1244 This builds the hook environment from individual variables.
1247 @param name: the name of the instance
1248 @type primary_node: string
1249 @param primary_node: the name of the instance's primary node
1250 @type secondary_nodes: list
1251 @param secondary_nodes: list of secondary nodes as strings
1252 @type os_type: string
1253 @param os_type: the name of the instance's OS
1254 @type status: string
1255 @param status: the desired status of the instance
1256 @type minmem: string
1257 @param minmem: the minimum memory size of the instance
1258 @type maxmem: string
1259 @param maxmem: the maximum memory size of the instance
1261 @param vcpus: the count of VCPUs the instance has
1263 @param nics: list of tuples (ip, mac, mode, link) representing
1264 the NICs the instance has
1265 @type disk_template: string
1266 @param disk_template: the disk template of the instance
1268 @param disks: the list of (size, mode) pairs
1270 @param bep: the backend parameters for the instance
1272 @param hvp: the hypervisor parameters for the instance
1273 @type hypervisor_name: string
1274 @param hypervisor_name: the hypervisor for the instance
1276 @param tags: list of instance tags as strings
1278 @return: the hook environment for this instance
1283 "INSTANCE_NAME": name,
1284 "INSTANCE_PRIMARY": primary_node,
1285 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1286 "INSTANCE_OS_TYPE": os_type,
1287 "INSTANCE_STATUS": status,
1288 "INSTANCE_MINMEM": minmem,
1289 "INSTANCE_MAXMEM": maxmem,
1290 # TODO(2.7) remove deprecated "memory" value
1291 "INSTANCE_MEMORY": maxmem,
1292 "INSTANCE_VCPUS": vcpus,
1293 "INSTANCE_DISK_TEMPLATE": disk_template,
1294 "INSTANCE_HYPERVISOR": hypervisor_name,
1297 nic_count = len(nics)
1298 for idx, (ip, mac, mode, link) in enumerate(nics):
1301 env["INSTANCE_NIC%d_IP" % idx] = ip
1302 env["INSTANCE_NIC%d_MAC" % idx] = mac
1303 env["INSTANCE_NIC%d_MODE" % idx] = mode
1304 env["INSTANCE_NIC%d_LINK" % idx] = link
1305 if mode == constants.NIC_MODE_BRIDGED:
1306 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1310 env["INSTANCE_NIC_COUNT"] = nic_count
1313 disk_count = len(disks)
1314 for idx, (size, mode) in enumerate(disks):
1315 env["INSTANCE_DISK%d_SIZE" % idx] = size
1316 env["INSTANCE_DISK%d_MODE" % idx] = mode
1320 env["INSTANCE_DISK_COUNT"] = disk_count
1325 env["INSTANCE_TAGS"] = " ".join(tags)
1327 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1328 for key, value in source.items():
1329 env["INSTANCE_%s_%s" % (kind, key)] = value
1334 def _NICListToTuple(lu, nics):
1335 """Build a list of nic information tuples.
1337 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1338 value in LUInstanceQueryData.
1340 @type lu: L{LogicalUnit}
1341 @param lu: the logical unit on whose behalf we execute
1342 @type nics: list of L{objects.NIC}
1343 @param nics: list of nics to convert to hooks tuples
1347 cluster = lu.cfg.GetClusterInfo()
1351 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1352 mode = filled_params[constants.NIC_MODE]
1353 link = filled_params[constants.NIC_LINK]
1354 hooks_nics.append((ip, mac, mode, link))
1358 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1359 """Builds instance related env variables for hooks from an object.
1361 @type lu: L{LogicalUnit}
1362 @param lu: the logical unit on whose behalf we execute
1363 @type instance: L{objects.Instance}
1364 @param instance: the instance for which we should build the
1366 @type override: dict
1367 @param override: dictionary with key/values that will override
1370 @return: the hook environment dictionary
1373 cluster = lu.cfg.GetClusterInfo()
1374 bep = cluster.FillBE(instance)
1375 hvp = cluster.FillHV(instance)
1377 "name": instance.name,
1378 "primary_node": instance.primary_node,
1379 "secondary_nodes": instance.secondary_nodes,
1380 "os_type": instance.os,
1381 "status": instance.admin_state,
1382 "maxmem": bep[constants.BE_MAXMEM],
1383 "minmem": bep[constants.BE_MINMEM],
1384 "vcpus": bep[constants.BE_VCPUS],
1385 "nics": _NICListToTuple(lu, instance.nics),
1386 "disk_template": instance.disk_template,
1387 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1390 "hypervisor_name": instance.hypervisor,
1391 "tags": instance.tags,
1394 args.update(override)
1395 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1398 def _AdjustCandidatePool(lu, exceptions):
1399 """Adjust the candidate pool after node operations.
1402 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1404 lu.LogInfo("Promoted nodes to master candidate role: %s",
1405 utils.CommaJoin(node.name for node in mod_list))
1406 for name in mod_list:
1407 lu.context.ReaddNode(name)
1408 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1410 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1414 def _DecideSelfPromotion(lu, exceptions=None):
1415 """Decide whether I should promote myself as a master candidate.
1418 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1419 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1420 # the new node will increase mc_max with one, so:
1421 mc_should = min(mc_should + 1, cp_size)
1422 return mc_now < mc_should
1425 def _CalculateGroupIPolicy(cluster, group):
1426 """Calculate instance policy for group.
1429 return cluster.SimpleFillIPolicy(group.ipolicy)
1432 def _ComputeViolatingInstances(ipolicy, instances):
1433 """Computes a set of instances who violates given ipolicy.
1435 @param ipolicy: The ipolicy to verify
1436 @type instances: object.Instance
1437 @param instances: List of instances to verify
1438 @return: A frozenset of instance names violating the ipolicy
1441 return frozenset([inst.name for inst in instances
1442 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1445 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1446 """Check that the brigdes needed by a list of nics exist.
1449 cluster = lu.cfg.GetClusterInfo()
1450 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1451 brlist = [params[constants.NIC_LINK] for params in paramslist
1452 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1454 result = lu.rpc.call_bridges_exist(target_node, brlist)
1455 result.Raise("Error checking bridges on destination node '%s'" %
1456 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1459 def _CheckInstanceBridgesExist(lu, instance, node=None):
1460 """Check that the brigdes needed by an instance exist.
1464 node = instance.primary_node
1465 _CheckNicsBridgesExist(lu, instance.nics, node)
1468 def _CheckOSVariant(os_obj, name):
1469 """Check whether an OS name conforms to the os variants specification.
1471 @type os_obj: L{objects.OS}
1472 @param os_obj: OS object to check
1474 @param name: OS name passed by the user, to check for validity
1477 variant = objects.OS.GetVariant(name)
1478 if not os_obj.supported_variants:
1480 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1481 " passed)" % (os_obj.name, variant),
1485 raise errors.OpPrereqError("OS name must include a variant",
1488 if variant not in os_obj.supported_variants:
1489 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1492 def _GetNodeInstancesInner(cfg, fn):
1493 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1496 def _GetNodeInstances(cfg, node_name):
1497 """Returns a list of all primary and secondary instances on a node.
1501 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1504 def _GetNodePrimaryInstances(cfg, node_name):
1505 """Returns primary instances on a node.
1508 return _GetNodeInstancesInner(cfg,
1509 lambda inst: node_name == inst.primary_node)
1512 def _GetNodeSecondaryInstances(cfg, node_name):
1513 """Returns secondary instances on a node.
1516 return _GetNodeInstancesInner(cfg,
1517 lambda inst: node_name in inst.secondary_nodes)
1520 def _GetStorageTypeArgs(cfg, storage_type):
1521 """Returns the arguments for a storage type.
1524 # Special case for file storage
1525 if storage_type == constants.ST_FILE:
1526 # storage.FileStorage wants a list of storage directories
1527 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1532 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1535 for dev in instance.disks:
1536 cfg.SetDiskID(dev, node_name)
1538 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1539 result.Raise("Failed to get disk status from node %s" % node_name,
1540 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1542 for idx, bdev_status in enumerate(result.payload):
1543 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1549 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1550 """Check the sanity of iallocator and node arguments and use the
1551 cluster-wide iallocator if appropriate.
1553 Check that at most one of (iallocator, node) is specified. If none is
1554 specified, then the LU's opcode's iallocator slot is filled with the
1555 cluster-wide default iallocator.
1557 @type iallocator_slot: string
1558 @param iallocator_slot: the name of the opcode iallocator slot
1559 @type node_slot: string
1560 @param node_slot: the name of the opcode target node slot
1563 node = getattr(lu.op, node_slot, None)
1564 iallocator = getattr(lu.op, iallocator_slot, None)
1566 if node is not None and iallocator is not None:
1567 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1569 elif node is None and iallocator is None:
1570 default_iallocator = lu.cfg.GetDefaultIAllocator()
1571 if default_iallocator:
1572 setattr(lu.op, iallocator_slot, default_iallocator)
1574 raise errors.OpPrereqError("No iallocator or node given and no"
1575 " cluster-wide default iallocator found;"
1576 " please specify either an iallocator or a"
1577 " node, or set a cluster-wide default"
1581 def _GetDefaultIAllocator(cfg, iallocator):
1582 """Decides on which iallocator to use.
1584 @type cfg: L{config.ConfigWriter}
1585 @param cfg: Cluster configuration object
1586 @type iallocator: string or None
1587 @param iallocator: Iallocator specified in opcode
1589 @return: Iallocator name
1593 # Use default iallocator
1594 iallocator = cfg.GetDefaultIAllocator()
1597 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1598 " opcode nor as a cluster-wide default",
1604 class LUClusterPostInit(LogicalUnit):
1605 """Logical unit for running hooks after cluster initialization.
1608 HPATH = "cluster-init"
1609 HTYPE = constants.HTYPE_CLUSTER
1611 def BuildHooksEnv(self):
1616 "OP_TARGET": self.cfg.GetClusterName(),
1619 def BuildHooksNodes(self):
1620 """Build hooks nodes.
1623 return ([], [self.cfg.GetMasterNode()])
1625 def Exec(self, feedback_fn):
1632 class LUClusterDestroy(LogicalUnit):
1633 """Logical unit for destroying the cluster.
1636 HPATH = "cluster-destroy"
1637 HTYPE = constants.HTYPE_CLUSTER
1639 def BuildHooksEnv(self):
1644 "OP_TARGET": self.cfg.GetClusterName(),
1647 def BuildHooksNodes(self):
1648 """Build hooks nodes.
1653 def CheckPrereq(self):
1654 """Check prerequisites.
1656 This checks whether the cluster is empty.
1658 Any errors are signaled by raising errors.OpPrereqError.
1661 master = self.cfg.GetMasterNode()
1663 nodelist = self.cfg.GetNodeList()
1664 if len(nodelist) != 1 or nodelist[0] != master:
1665 raise errors.OpPrereqError("There are still %d node(s) in"
1666 " this cluster." % (len(nodelist) - 1),
1668 instancelist = self.cfg.GetInstanceList()
1670 raise errors.OpPrereqError("There are still %d instance(s) in"
1671 " this cluster." % len(instancelist),
1674 def Exec(self, feedback_fn):
1675 """Destroys the cluster.
1678 master_params = self.cfg.GetMasterNetworkParameters()
1680 # Run post hooks on master node before it's removed
1681 _RunPostHook(self, master_params.name)
1683 ems = self.cfg.GetUseExternalMipScript()
1684 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1687 self.LogWarning("Error disabling the master IP address: %s",
1690 return master_params.name
1693 def _VerifyCertificate(filename):
1694 """Verifies a certificate for L{LUClusterVerifyConfig}.
1696 @type filename: string
1697 @param filename: Path to PEM file
1701 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1702 utils.ReadFile(filename))
1703 except Exception, err: # pylint: disable=W0703
1704 return (LUClusterVerifyConfig.ETYPE_ERROR,
1705 "Failed to load X509 certificate %s: %s" % (filename, err))
1708 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1709 constants.SSL_CERT_EXPIRATION_ERROR)
1712 fnamemsg = "While verifying %s: %s" % (filename, msg)
1717 return (None, fnamemsg)
1718 elif errcode == utils.CERT_WARNING:
1719 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1720 elif errcode == utils.CERT_ERROR:
1721 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1723 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1726 def _GetAllHypervisorParameters(cluster, instances):
1727 """Compute the set of all hypervisor parameters.
1729 @type cluster: L{objects.Cluster}
1730 @param cluster: the cluster object
1731 @param instances: list of L{objects.Instance}
1732 @param instances: additional instances from which to obtain parameters
1733 @rtype: list of (origin, hypervisor, parameters)
1734 @return: a list with all parameters found, indicating the hypervisor they
1735 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1740 for hv_name in cluster.enabled_hypervisors:
1741 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1743 for os_name, os_hvp in cluster.os_hvp.items():
1744 for hv_name, hv_params in os_hvp.items():
1746 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1747 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1749 # TODO: collapse identical parameter values in a single one
1750 for instance in instances:
1751 if instance.hvparams:
1752 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1753 cluster.FillHV(instance)))
1758 class _VerifyErrors(object):
1759 """Mix-in for cluster/group verify LUs.
1761 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1762 self.op and self._feedback_fn to be available.)
1766 ETYPE_FIELD = "code"
1767 ETYPE_ERROR = "ERROR"
1768 ETYPE_WARNING = "WARNING"
1770 def _Error(self, ecode, item, msg, *args, **kwargs):
1771 """Format an error message.
1773 Based on the opcode's error_codes parameter, either format a
1774 parseable error code, or a simpler error string.
1776 This must be called only from Exec and functions called from Exec.
1779 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1780 itype, etxt, _ = ecode
1781 # first complete the msg
1784 # then format the whole message
1785 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1786 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1792 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1793 # and finally report it via the feedback_fn
1794 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1796 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1797 """Log an error message if the passed condition is True.
1801 or self.op.debug_simulate_errors) # pylint: disable=E1101
1803 # If the error code is in the list of ignored errors, demote the error to a
1805 (_, etxt, _) = ecode
1806 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1807 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1810 self._Error(ecode, *args, **kwargs)
1812 # do not mark the operation as failed for WARN cases only
1813 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1814 self.bad = self.bad or cond
1817 class LUClusterVerify(NoHooksLU):
1818 """Submits all jobs necessary to verify the cluster.
1823 def ExpandNames(self):
1824 self.needed_locks = {}
1826 def Exec(self, feedback_fn):
1829 if self.op.group_name:
1830 groups = [self.op.group_name]
1831 depends_fn = lambda: None
1833 groups = self.cfg.GetNodeGroupList()
1835 # Verify global configuration
1837 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1840 # Always depend on global verification
1841 depends_fn = lambda: [(-len(jobs), [])]
1843 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1844 ignore_errors=self.op.ignore_errors,
1845 depends=depends_fn())]
1846 for group in groups)
1848 # Fix up all parameters
1849 for op in itertools.chain(*jobs): # pylint: disable=W0142
1850 op.debug_simulate_errors = self.op.debug_simulate_errors
1851 op.verbose = self.op.verbose
1852 op.error_codes = self.op.error_codes
1854 op.skip_checks = self.op.skip_checks
1855 except AttributeError:
1856 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1858 return ResultWithJobs(jobs)
1861 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1862 """Verifies the cluster config.
1867 def _VerifyHVP(self, hvp_data):
1868 """Verifies locally the syntax of the hypervisor parameters.
1871 for item, hv_name, hv_params in hvp_data:
1872 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1875 hv_class = hypervisor.GetHypervisor(hv_name)
1876 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1877 hv_class.CheckParameterSyntax(hv_params)
1878 except errors.GenericError, err:
1879 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1881 def ExpandNames(self):
1882 # Information can be safely retrieved as the BGL is acquired in exclusive
1884 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1885 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1886 self.all_node_info = self.cfg.GetAllNodesInfo()
1887 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1888 self.needed_locks = {}
1890 def Exec(self, feedback_fn):
1891 """Verify integrity of cluster, performing various test on nodes.
1895 self._feedback_fn = feedback_fn
1897 feedback_fn("* Verifying cluster config")
1899 for msg in self.cfg.VerifyConfig():
1900 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1902 feedback_fn("* Verifying cluster certificate files")
1904 for cert_filename in constants.ALL_CERT_FILES:
1905 (errcode, msg) = _VerifyCertificate(cert_filename)
1906 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1908 feedback_fn("* Verifying hypervisor parameters")
1910 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1911 self.all_inst_info.values()))
1913 feedback_fn("* Verifying all nodes belong to an existing group")
1915 # We do this verification here because, should this bogus circumstance
1916 # occur, it would never be caught by VerifyGroup, which only acts on
1917 # nodes/instances reachable from existing node groups.
1919 dangling_nodes = set(node.name for node in self.all_node_info.values()
1920 if node.group not in self.all_group_info)
1922 dangling_instances = {}
1923 no_node_instances = []
1925 for inst in self.all_inst_info.values():
1926 if inst.primary_node in dangling_nodes:
1927 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1928 elif inst.primary_node not in self.all_node_info:
1929 no_node_instances.append(inst.name)
1934 utils.CommaJoin(dangling_instances.get(node.name,
1936 for node in dangling_nodes]
1938 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1940 "the following nodes (and their instances) belong to a non"
1941 " existing group: %s", utils.CommaJoin(pretty_dangling))
1943 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1945 "the following instances have a non-existing primary-node:"
1946 " %s", utils.CommaJoin(no_node_instances))
1951 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1952 """Verifies the status of a node group.
1955 HPATH = "cluster-verify"
1956 HTYPE = constants.HTYPE_CLUSTER
1959 _HOOKS_INDENT_RE = re.compile("^", re.M)
1961 class NodeImage(object):
1962 """A class representing the logical and physical status of a node.
1965 @ivar name: the node name to which this object refers
1966 @ivar volumes: a structure as returned from
1967 L{ganeti.backend.GetVolumeList} (runtime)
1968 @ivar instances: a list of running instances (runtime)
1969 @ivar pinst: list of configured primary instances (config)
1970 @ivar sinst: list of configured secondary instances (config)
1971 @ivar sbp: dictionary of {primary-node: list of instances} for all
1972 instances for which this node is secondary (config)
1973 @ivar mfree: free memory, as reported by hypervisor (runtime)
1974 @ivar dfree: free disk, as reported by the node (runtime)
1975 @ivar offline: the offline status (config)
1976 @type rpc_fail: boolean
1977 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1978 not whether the individual keys were correct) (runtime)
1979 @type lvm_fail: boolean
1980 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1981 @type hyp_fail: boolean
1982 @ivar hyp_fail: whether the RPC call didn't return the instance list
1983 @type ghost: boolean
1984 @ivar ghost: whether this is a known node or not (config)
1985 @type os_fail: boolean
1986 @ivar os_fail: whether the RPC call didn't return valid OS data
1988 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1989 @type vm_capable: boolean
1990 @ivar vm_capable: whether the node can host instances
1993 def __init__(self, offline=False, name=None, vm_capable=True):
2002 self.offline = offline
2003 self.vm_capable = vm_capable
2004 self.rpc_fail = False
2005 self.lvm_fail = False
2006 self.hyp_fail = False
2008 self.os_fail = False
2011 def ExpandNames(self):
2012 # This raises errors.OpPrereqError on its own:
2013 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2015 # Get instances in node group; this is unsafe and needs verification later
2016 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
2018 self.needed_locks = {
2019 locking.LEVEL_INSTANCE: inst_names,
2020 locking.LEVEL_NODEGROUP: [self.group_uuid],
2021 locking.LEVEL_NODE: [],
2024 self.share_locks = _ShareAll()
2026 def DeclareLocks(self, level):
2027 if level == locking.LEVEL_NODE:
2028 # Get members of node group; this is unsafe and needs verification later
2029 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2031 all_inst_info = self.cfg.GetAllInstancesInfo()
2033 # In Exec(), we warn about mirrored instances that have primary and
2034 # secondary living in separate node groups. To fully verify that
2035 # volumes for these instances are healthy, we will need to do an
2036 # extra call to their secondaries. We ensure here those nodes will
2038 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2039 # Important: access only the instances whose lock is owned
2040 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2041 nodes.update(all_inst_info[inst].secondary_nodes)
2043 self.needed_locks[locking.LEVEL_NODE] = nodes
2045 def CheckPrereq(self):
2046 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2047 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2049 group_nodes = set(self.group_info.members)
2050 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2053 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2055 unlocked_instances = \
2056 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2059 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2060 utils.CommaJoin(unlocked_nodes))
2062 if unlocked_instances:
2063 raise errors.OpPrereqError("Missing lock for instances: %s" %
2064 utils.CommaJoin(unlocked_instances))
2066 self.all_node_info = self.cfg.GetAllNodesInfo()
2067 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2069 self.my_node_names = utils.NiceSort(group_nodes)
2070 self.my_inst_names = utils.NiceSort(group_instances)
2072 self.my_node_info = dict((name, self.all_node_info[name])
2073 for name in self.my_node_names)
2075 self.my_inst_info = dict((name, self.all_inst_info[name])
2076 for name in self.my_inst_names)
2078 # We detect here the nodes that will need the extra RPC calls for verifying
2079 # split LV volumes; they should be locked.
2080 extra_lv_nodes = set()
2082 for inst in self.my_inst_info.values():
2083 if inst.disk_template in constants.DTS_INT_MIRROR:
2084 group = self.my_node_info[inst.primary_node].group
2085 for nname in inst.secondary_nodes:
2086 if self.all_node_info[nname].group != group:
2087 extra_lv_nodes.add(nname)
2089 unlocked_lv_nodes = \
2090 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2092 if unlocked_lv_nodes:
2093 raise errors.OpPrereqError("these nodes could be locked: %s" %
2094 utils.CommaJoin(unlocked_lv_nodes))
2095 self.extra_lv_nodes = list(extra_lv_nodes)
2097 def _VerifyNode(self, ninfo, nresult):
2098 """Perform some basic validation on data returned from a node.
2100 - check the result data structure is well formed and has all the
2102 - check ganeti version
2104 @type ninfo: L{objects.Node}
2105 @param ninfo: the node to check
2106 @param nresult: the results from the node
2108 @return: whether overall this call was successful (and we can expect
2109 reasonable values in the respose)
2113 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2115 # main result, nresult should be a non-empty dict
2116 test = not nresult or not isinstance(nresult, dict)
2117 _ErrorIf(test, constants.CV_ENODERPC, node,
2118 "unable to verify node: no data returned")
2122 # compares ganeti version
2123 local_version = constants.PROTOCOL_VERSION
2124 remote_version = nresult.get("version", None)
2125 test = not (remote_version and
2126 isinstance(remote_version, (list, tuple)) and
2127 len(remote_version) == 2)
2128 _ErrorIf(test, constants.CV_ENODERPC, node,
2129 "connection to node returned invalid data")
2133 test = local_version != remote_version[0]
2134 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2135 "incompatible protocol versions: master %s,"
2136 " node %s", local_version, remote_version[0])
2140 # node seems compatible, we can actually try to look into its results
2142 # full package version
2143 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2144 constants.CV_ENODEVERSION, node,
2145 "software version mismatch: master %s, node %s",
2146 constants.RELEASE_VERSION, remote_version[1],
2147 code=self.ETYPE_WARNING)
2149 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2150 if ninfo.vm_capable and isinstance(hyp_result, dict):
2151 for hv_name, hv_result in hyp_result.iteritems():
2152 test = hv_result is not None
2153 _ErrorIf(test, constants.CV_ENODEHV, node,
2154 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2156 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2157 if ninfo.vm_capable and isinstance(hvp_result, list):
2158 for item, hv_name, hv_result in hvp_result:
2159 _ErrorIf(True, constants.CV_ENODEHV, node,
2160 "hypervisor %s parameter verify failure (source %s): %s",
2161 hv_name, item, hv_result)
2163 test = nresult.get(constants.NV_NODESETUP,
2164 ["Missing NODESETUP results"])
2165 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2170 def _VerifyNodeTime(self, ninfo, nresult,
2171 nvinfo_starttime, nvinfo_endtime):
2172 """Check the node time.
2174 @type ninfo: L{objects.Node}
2175 @param ninfo: the node to check
2176 @param nresult: the remote results for the node
2177 @param nvinfo_starttime: the start time of the RPC call
2178 @param nvinfo_endtime: the end time of the RPC call
2182 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2184 ntime = nresult.get(constants.NV_TIME, None)
2186 ntime_merged = utils.MergeTime(ntime)
2187 except (ValueError, TypeError):
2188 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2191 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2192 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2193 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2194 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2198 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2199 "Node time diverges by at least %s from master node time",
2202 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2203 """Check the node LVM results.
2205 @type ninfo: L{objects.Node}
2206 @param ninfo: the node to check
2207 @param nresult: the remote results for the node
2208 @param vg_name: the configured VG name
2215 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2217 # checks vg existence and size > 20G
2218 vglist = nresult.get(constants.NV_VGLIST, None)
2220 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2222 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2223 constants.MIN_VG_SIZE)
2224 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2227 pvlist = nresult.get(constants.NV_PVLIST, None)
2228 test = pvlist is None
2229 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2231 # check that ':' is not present in PV names, since it's a
2232 # special character for lvcreate (denotes the range of PEs to
2234 for _, pvname, owner_vg in pvlist:
2235 test = ":" in pvname
2236 _ErrorIf(test, constants.CV_ENODELVM, node,
2237 "Invalid character ':' in PV '%s' of VG '%s'",
2240 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2241 """Check the node bridges.
2243 @type ninfo: L{objects.Node}
2244 @param ninfo: the node to check
2245 @param nresult: the remote results for the node
2246 @param bridges: the expected list of bridges
2253 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2255 missing = nresult.get(constants.NV_BRIDGES, None)
2256 test = not isinstance(missing, list)
2257 _ErrorIf(test, constants.CV_ENODENET, node,
2258 "did not return valid bridge information")
2260 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2261 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2263 def _VerifyNodeUserScripts(self, ninfo, nresult):
2264 """Check the results of user scripts presence and executability on the node
2266 @type ninfo: L{objects.Node}
2267 @param ninfo: the node to check
2268 @param nresult: the remote results for the node
2273 test = not constants.NV_USERSCRIPTS in nresult
2274 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2275 "did not return user scripts information")
2277 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2279 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2280 "user scripts not present or not executable: %s" %
2281 utils.CommaJoin(sorted(broken_scripts)))
2283 def _VerifyNodeNetwork(self, ninfo, nresult):
2284 """Check the node network connectivity results.
2286 @type ninfo: L{objects.Node}
2287 @param ninfo: the node to check
2288 @param nresult: the remote results for the node
2292 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2294 test = constants.NV_NODELIST not in nresult
2295 _ErrorIf(test, constants.CV_ENODESSH, node,
2296 "node hasn't returned node ssh connectivity data")
2298 if nresult[constants.NV_NODELIST]:
2299 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2300 _ErrorIf(True, constants.CV_ENODESSH, node,
2301 "ssh communication with node '%s': %s", a_node, a_msg)
2303 test = constants.NV_NODENETTEST not in nresult
2304 _ErrorIf(test, constants.CV_ENODENET, node,
2305 "node hasn't returned node tcp connectivity data")
2307 if nresult[constants.NV_NODENETTEST]:
2308 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2310 _ErrorIf(True, constants.CV_ENODENET, node,
2311 "tcp communication with node '%s': %s",
2312 anode, nresult[constants.NV_NODENETTEST][anode])
2314 test = constants.NV_MASTERIP not in nresult
2315 _ErrorIf(test, constants.CV_ENODENET, node,
2316 "node hasn't returned node master IP reachability data")
2318 if not nresult[constants.NV_MASTERIP]:
2319 if node == self.master_node:
2320 msg = "the master node cannot reach the master IP (not configured?)"
2322 msg = "cannot reach the master IP"
2323 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2325 def _VerifyInstance(self, instance, instanceconfig, node_image,
2327 """Verify an instance.
2329 This function checks to see if the required block devices are
2330 available on the instance's node.
2333 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2334 node_current = instanceconfig.primary_node
2336 node_vol_should = {}
2337 instanceconfig.MapLVsByNode(node_vol_should)
2339 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2340 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2341 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2343 for node in node_vol_should:
2344 n_img = node_image[node]
2345 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2346 # ignore missing volumes on offline or broken nodes
2348 for volume in node_vol_should[node]:
2349 test = volume not in n_img.volumes
2350 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2351 "volume %s missing on node %s", volume, node)
2353 if instanceconfig.admin_state == constants.ADMINST_UP:
2354 pri_img = node_image[node_current]
2355 test = instance not in pri_img.instances and not pri_img.offline
2356 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2357 "instance not running on its primary node %s",
2360 diskdata = [(nname, success, status, idx)
2361 for (nname, disks) in diskstatus.items()
2362 for idx, (success, status) in enumerate(disks)]
2364 for nname, success, bdev_status, idx in diskdata:
2365 # the 'ghost node' construction in Exec() ensures that we have a
2367 snode = node_image[nname]
2368 bad_snode = snode.ghost or snode.offline
2369 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2370 not success and not bad_snode,
2371 constants.CV_EINSTANCEFAULTYDISK, instance,
2372 "couldn't retrieve status for disk/%s on %s: %s",
2373 idx, nname, bdev_status)
2374 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2375 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2376 constants.CV_EINSTANCEFAULTYDISK, instance,
2377 "disk/%s on %s is faulty", idx, nname)
2379 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2380 """Verify if there are any unknown volumes in the cluster.
2382 The .os, .swap and backup volumes are ignored. All other volumes are
2383 reported as unknown.
2385 @type reserved: L{ganeti.utils.FieldSet}
2386 @param reserved: a FieldSet of reserved volume names
2389 for node, n_img in node_image.items():
2390 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2391 # skip non-healthy nodes
2393 for volume in n_img.volumes:
2394 test = ((node not in node_vol_should or
2395 volume not in node_vol_should[node]) and
2396 not reserved.Matches(volume))
2397 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2398 "volume %s is unknown", volume)
2400 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2401 """Verify N+1 Memory Resilience.
2403 Check that if one single node dies we can still start all the
2404 instances it was primary for.
2407 cluster_info = self.cfg.GetClusterInfo()
2408 for node, n_img in node_image.items():
2409 # This code checks that every node which is now listed as
2410 # secondary has enough memory to host all instances it is
2411 # supposed to should a single other node in the cluster fail.
2412 # FIXME: not ready for failover to an arbitrary node
2413 # FIXME: does not support file-backed instances
2414 # WARNING: we currently take into account down instances as well
2415 # as up ones, considering that even if they're down someone
2416 # might want to start them even in the event of a node failure.
2418 # we're skipping offline nodes from the N+1 warning, since
2419 # most likely we don't have good memory infromation from them;
2420 # we already list instances living on such nodes, and that's
2423 #TODO(dynmem): use MINMEM for checking
2424 #TODO(dynmem): also consider ballooning out other instances
2425 for prinode, instances in n_img.sbp.items():
2427 for instance in instances:
2428 bep = cluster_info.FillBE(instance_cfg[instance])
2429 if bep[constants.BE_AUTO_BALANCE]:
2430 needed_mem += bep[constants.BE_MAXMEM]
2431 test = n_img.mfree < needed_mem
2432 self._ErrorIf(test, constants.CV_ENODEN1, node,
2433 "not enough memory to accomodate instance failovers"
2434 " should node %s fail (%dMiB needed, %dMiB available)",
2435 prinode, needed_mem, n_img.mfree)
2438 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2439 (files_all, files_opt, files_mc, files_vm)):
2440 """Verifies file checksums collected from all nodes.
2442 @param errorif: Callback for reporting errors
2443 @param nodeinfo: List of L{objects.Node} objects
2444 @param master_node: Name of master node
2445 @param all_nvinfo: RPC results
2448 # Define functions determining which nodes to consider for a file
2451 (files_mc, lambda node: (node.master_candidate or
2452 node.name == master_node)),
2453 (files_vm, lambda node: node.vm_capable),
2456 # Build mapping from filename to list of nodes which should have the file
2458 for (files, fn) in files2nodefn:
2460 filenodes = nodeinfo
2462 filenodes = filter(fn, nodeinfo)
2463 nodefiles.update((filename,
2464 frozenset(map(operator.attrgetter("name"), filenodes)))
2465 for filename in files)
2467 assert set(nodefiles) == (files_all | files_mc | files_vm)
2469 fileinfo = dict((filename, {}) for filename in nodefiles)
2470 ignore_nodes = set()
2472 for node in nodeinfo:
2474 ignore_nodes.add(node.name)
2477 nresult = all_nvinfo[node.name]
2479 if nresult.fail_msg or not nresult.payload:
2482 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2484 test = not (node_files and isinstance(node_files, dict))
2485 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2486 "Node did not return file checksum data")
2488 ignore_nodes.add(node.name)
2491 # Build per-checksum mapping from filename to nodes having it
2492 for (filename, checksum) in node_files.items():
2493 assert filename in nodefiles
2494 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2496 for (filename, checksums) in fileinfo.items():
2497 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2499 # Nodes having the file
2500 with_file = frozenset(node_name
2501 for nodes in fileinfo[filename].values()
2502 for node_name in nodes) - ignore_nodes
2504 expected_nodes = nodefiles[filename] - ignore_nodes
2506 # Nodes missing file
2507 missing_file = expected_nodes - with_file
2509 if filename in files_opt:
2511 errorif(missing_file and missing_file != expected_nodes,
2512 constants.CV_ECLUSTERFILECHECK, None,
2513 "File %s is optional, but it must exist on all or no"
2514 " nodes (not found on %s)",
2515 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2517 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2518 "File %s is missing from node(s) %s", filename,
2519 utils.CommaJoin(utils.NiceSort(missing_file)))
2521 # Warn if a node has a file it shouldn't
2522 unexpected = with_file - expected_nodes
2524 constants.CV_ECLUSTERFILECHECK, None,
2525 "File %s should not exist on node(s) %s",
2526 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2528 # See if there are multiple versions of the file
2529 test = len(checksums) > 1
2531 variants = ["variant %s on %s" %
2532 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2533 for (idx, (checksum, nodes)) in
2534 enumerate(sorted(checksums.items()))]
2538 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2539 "File %s found with %s different checksums (%s)",
2540 filename, len(checksums), "; ".join(variants))
2542 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2544 """Verifies and the node DRBD status.
2546 @type ninfo: L{objects.Node}
2547 @param ninfo: the node to check
2548 @param nresult: the remote results for the node
2549 @param instanceinfo: the dict of instances
2550 @param drbd_helper: the configured DRBD usermode helper
2551 @param drbd_map: the DRBD map as returned by
2552 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2556 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2559 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2560 test = (helper_result == None)
2561 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2562 "no drbd usermode helper returned")
2564 status, payload = helper_result
2566 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2567 "drbd usermode helper check unsuccessful: %s", payload)
2568 test = status and (payload != drbd_helper)
2569 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2570 "wrong drbd usermode helper: %s", payload)
2572 # compute the DRBD minors
2574 for minor, instance in drbd_map[node].items():
2575 test = instance not in instanceinfo
2576 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2577 "ghost instance '%s' in temporary DRBD map", instance)
2578 # ghost instance should not be running, but otherwise we
2579 # don't give double warnings (both ghost instance and
2580 # unallocated minor in use)
2582 node_drbd[minor] = (instance, False)
2584 instance = instanceinfo[instance]
2585 node_drbd[minor] = (instance.name,
2586 instance.admin_state == constants.ADMINST_UP)
2588 # and now check them
2589 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2590 test = not isinstance(used_minors, (tuple, list))
2591 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2592 "cannot parse drbd status file: %s", str(used_minors))
2594 # we cannot check drbd status
2597 for minor, (iname, must_exist) in node_drbd.items():
2598 test = minor not in used_minors and must_exist
2599 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2600 "drbd minor %d of instance %s is not active", minor, iname)
2601 for minor in used_minors:
2602 test = minor not in node_drbd
2603 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2604 "unallocated drbd minor %d is in use", minor)
2606 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2607 """Builds the node OS structures.
2609 @type ninfo: L{objects.Node}
2610 @param ninfo: the node to check
2611 @param nresult: the remote results for the node
2612 @param nimg: the node image object
2616 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2618 remote_os = nresult.get(constants.NV_OSLIST, None)
2619 test = (not isinstance(remote_os, list) or
2620 not compat.all(isinstance(v, list) and len(v) == 7
2621 for v in remote_os))
2623 _ErrorIf(test, constants.CV_ENODEOS, node,
2624 "node hasn't returned valid OS data")
2633 for (name, os_path, status, diagnose,
2634 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2636 if name not in os_dict:
2639 # parameters is a list of lists instead of list of tuples due to
2640 # JSON lacking a real tuple type, fix it:
2641 parameters = [tuple(v) for v in parameters]
2642 os_dict[name].append((os_path, status, diagnose,
2643 set(variants), set(parameters), set(api_ver)))
2645 nimg.oslist = os_dict
2647 def _VerifyNodeOS(self, ninfo, nimg, base):
2648 """Verifies the node OS list.
2650 @type ninfo: L{objects.Node}
2651 @param ninfo: the node to check
2652 @param nimg: the node image object
2653 @param base: the 'template' node we match against (e.g. from the master)
2657 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2659 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2661 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2662 for os_name, os_data in nimg.oslist.items():
2663 assert os_data, "Empty OS status for OS %s?!" % os_name
2664 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2665 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2666 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2667 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2668 "OS '%s' has multiple entries (first one shadows the rest): %s",
2669 os_name, utils.CommaJoin([v[0] for v in os_data]))
2670 # comparisons with the 'base' image
2671 test = os_name not in base.oslist
2672 _ErrorIf(test, constants.CV_ENODEOS, node,
2673 "Extra OS %s not present on reference node (%s)",
2677 assert base.oslist[os_name], "Base node has empty OS status?"
2678 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2680 # base OS is invalid, skipping
2682 for kind, a, b in [("API version", f_api, b_api),
2683 ("variants list", f_var, b_var),
2684 ("parameters", beautify_params(f_param),
2685 beautify_params(b_param))]:
2686 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2687 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2688 kind, os_name, base.name,
2689 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2691 # check any missing OSes
2692 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2693 _ErrorIf(missing, constants.CV_ENODEOS, node,
2694 "OSes present on reference node %s but missing on this node: %s",
2695 base.name, utils.CommaJoin(missing))
2697 def _VerifyOob(self, ninfo, nresult):
2698 """Verifies out of band functionality of a node.
2700 @type ninfo: L{objects.Node}
2701 @param ninfo: the node to check
2702 @param nresult: the remote results for the node
2706 # We just have to verify the paths on master and/or master candidates
2707 # as the oob helper is invoked on the master
2708 if ((ninfo.master_candidate or ninfo.master_capable) and
2709 constants.NV_OOB_PATHS in nresult):
2710 for path_result in nresult[constants.NV_OOB_PATHS]:
2711 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2713 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2714 """Verifies and updates the node volume data.
2716 This function will update a L{NodeImage}'s internal structures
2717 with data from the remote call.
2719 @type ninfo: L{objects.Node}
2720 @param ninfo: the node to check
2721 @param nresult: the remote results for the node
2722 @param nimg: the node image object
2723 @param vg_name: the configured VG name
2727 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2729 nimg.lvm_fail = True
2730 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2733 elif isinstance(lvdata, basestring):
2734 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2735 utils.SafeEncode(lvdata))
2736 elif not isinstance(lvdata, dict):
2737 _ErrorIf(True, constants.CV_ENODELVM, node,
2738 "rpc call to node failed (lvlist)")
2740 nimg.volumes = lvdata
2741 nimg.lvm_fail = False
2743 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2744 """Verifies and updates the node instance list.
2746 If the listing was successful, then updates this node's instance
2747 list. Otherwise, it marks the RPC call as failed for the instance
2750 @type ninfo: L{objects.Node}
2751 @param ninfo: the node to check
2752 @param nresult: the remote results for the node
2753 @param nimg: the node image object
2756 idata = nresult.get(constants.NV_INSTANCELIST, None)
2757 test = not isinstance(idata, list)
2758 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2759 "rpc call to node failed (instancelist): %s",
2760 utils.SafeEncode(str(idata)))
2762 nimg.hyp_fail = True
2764 nimg.instances = idata
2766 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2767 """Verifies and computes a node information map
2769 @type ninfo: L{objects.Node}
2770 @param ninfo: the node to check
2771 @param nresult: the remote results for the node
2772 @param nimg: the node image object
2773 @param vg_name: the configured VG name
2777 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2779 # try to read free memory (from the hypervisor)
2780 hv_info = nresult.get(constants.NV_HVINFO, None)
2781 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2782 _ErrorIf(test, constants.CV_ENODEHV, node,
2783 "rpc call to node failed (hvinfo)")
2786 nimg.mfree = int(hv_info["memory_free"])
2787 except (ValueError, TypeError):
2788 _ErrorIf(True, constants.CV_ENODERPC, node,
2789 "node returned invalid nodeinfo, check hypervisor")
2791 # FIXME: devise a free space model for file based instances as well
2792 if vg_name is not None:
2793 test = (constants.NV_VGLIST not in nresult or
2794 vg_name not in nresult[constants.NV_VGLIST])
2795 _ErrorIf(test, constants.CV_ENODELVM, node,
2796 "node didn't return data for the volume group '%s'"
2797 " - it is either missing or broken", vg_name)
2800 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2801 except (ValueError, TypeError):
2802 _ErrorIf(True, constants.CV_ENODERPC, node,
2803 "node returned invalid LVM info, check LVM status")
2805 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2806 """Gets per-disk status information for all instances.
2808 @type nodelist: list of strings
2809 @param nodelist: Node names
2810 @type node_image: dict of (name, L{objects.Node})
2811 @param node_image: Node objects
2812 @type instanceinfo: dict of (name, L{objects.Instance})
2813 @param instanceinfo: Instance objects
2814 @rtype: {instance: {node: [(succes, payload)]}}
2815 @return: a dictionary of per-instance dictionaries with nodes as
2816 keys and disk information as values; the disk information is a
2817 list of tuples (success, payload)
2820 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2823 node_disks_devonly = {}
2824 diskless_instances = set()
2825 diskless = constants.DT_DISKLESS
2827 for nname in nodelist:
2828 node_instances = list(itertools.chain(node_image[nname].pinst,
2829 node_image[nname].sinst))
2830 diskless_instances.update(inst for inst in node_instances
2831 if instanceinfo[inst].disk_template == diskless)
2832 disks = [(inst, disk)
2833 for inst in node_instances
2834 for disk in instanceinfo[inst].disks]
2837 # No need to collect data
2840 node_disks[nname] = disks
2842 # Creating copies as SetDiskID below will modify the objects and that can
2843 # lead to incorrect data returned from nodes
2844 devonly = [dev.Copy() for (_, dev) in disks]
2847 self.cfg.SetDiskID(dev, nname)
2849 node_disks_devonly[nname] = devonly
2851 assert len(node_disks) == len(node_disks_devonly)
2853 # Collect data from all nodes with disks
2854 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2857 assert len(result) == len(node_disks)
2861 for (nname, nres) in result.items():
2862 disks = node_disks[nname]
2865 # No data from this node
2866 data = len(disks) * [(False, "node offline")]
2869 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2870 "while getting disk information: %s", msg)
2872 # No data from this node
2873 data = len(disks) * [(False, msg)]
2876 for idx, i in enumerate(nres.payload):
2877 if isinstance(i, (tuple, list)) and len(i) == 2:
2880 logging.warning("Invalid result from node %s, entry %d: %s",
2882 data.append((False, "Invalid result from the remote node"))
2884 for ((inst, _), status) in zip(disks, data):
2885 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2887 # Add empty entries for diskless instances.
2888 for inst in diskless_instances:
2889 assert inst not in instdisk
2892 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2893 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2894 compat.all(isinstance(s, (tuple, list)) and
2895 len(s) == 2 for s in statuses)
2896 for inst, nnames in instdisk.items()
2897 for nname, statuses in nnames.items())
2898 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2903 def _SshNodeSelector(group_uuid, all_nodes):
2904 """Create endless iterators for all potential SSH check hosts.
2907 nodes = [node for node in all_nodes
2908 if (node.group != group_uuid and
2910 keyfunc = operator.attrgetter("group")
2912 return map(itertools.cycle,
2913 [sorted(map(operator.attrgetter("name"), names))
2914 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2918 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2919 """Choose which nodes should talk to which other nodes.
2921 We will make nodes contact all nodes in their group, and one node from
2924 @warning: This algorithm has a known issue if one node group is much
2925 smaller than others (e.g. just one node). In such a case all other
2926 nodes will talk to the single node.
2929 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2930 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2932 return (online_nodes,
2933 dict((name, sorted([i.next() for i in sel]))
2934 for name in online_nodes))
2936 def BuildHooksEnv(self):
2939 Cluster-Verify hooks just ran in the post phase and their failure makes
2940 the output be logged in the verify output and the verification to fail.
2944 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2947 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2948 for node in self.my_node_info.values())
2952 def BuildHooksNodes(self):
2953 """Build hooks nodes.
2956 return ([], self.my_node_names)
2958 def Exec(self, feedback_fn):
2959 """Verify integrity of the node group, performing various test on nodes.
2962 # This method has too many local variables. pylint: disable=R0914
2963 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2965 if not self.my_node_names:
2967 feedback_fn("* Empty node group, skipping verification")
2971 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2972 verbose = self.op.verbose
2973 self._feedback_fn = feedback_fn
2975 vg_name = self.cfg.GetVGName()
2976 drbd_helper = self.cfg.GetDRBDHelper()
2977 cluster = self.cfg.GetClusterInfo()
2978 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2979 hypervisors = cluster.enabled_hypervisors
2980 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2982 i_non_redundant = [] # Non redundant instances
2983 i_non_a_balanced = [] # Non auto-balanced instances
2984 i_offline = 0 # Count of offline instances
2985 n_offline = 0 # Count of offline nodes
2986 n_drained = 0 # Count of nodes being drained
2987 node_vol_should = {}
2989 # FIXME: verify OS list
2992 filemap = _ComputeAncillaryFiles(cluster, False)
2994 # do local checksums
2995 master_node = self.master_node = self.cfg.GetMasterNode()
2996 master_ip = self.cfg.GetMasterIP()
2998 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3001 if self.cfg.GetUseExternalMipScript():
3002 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3004 node_verify_param = {
3005 constants.NV_FILELIST:
3006 utils.UniqueSequence(filename
3007 for files in filemap
3008 for filename in files),
3009 constants.NV_NODELIST:
3010 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3011 self.all_node_info.values()),
3012 constants.NV_HYPERVISOR: hypervisors,
3013 constants.NV_HVPARAMS:
3014 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3015 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3016 for node in node_data_list
3017 if not node.offline],
3018 constants.NV_INSTANCELIST: hypervisors,
3019 constants.NV_VERSION: None,
3020 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3021 constants.NV_NODESETUP: None,
3022 constants.NV_TIME: None,
3023 constants.NV_MASTERIP: (master_node, master_ip),
3024 constants.NV_OSLIST: None,
3025 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3026 constants.NV_USERSCRIPTS: user_scripts,
3029 if vg_name is not None:
3030 node_verify_param[constants.NV_VGLIST] = None
3031 node_verify_param[constants.NV_LVLIST] = vg_name
3032 node_verify_param[constants.NV_PVLIST] = [vg_name]
3033 node_verify_param[constants.NV_DRBDLIST] = None
3036 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3039 # FIXME: this needs to be changed per node-group, not cluster-wide
3041 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3042 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3043 bridges.add(default_nicpp[constants.NIC_LINK])
3044 for instance in self.my_inst_info.values():
3045 for nic in instance.nics:
3046 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3047 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3048 bridges.add(full_nic[constants.NIC_LINK])
3051 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3053 # Build our expected cluster state
3054 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3056 vm_capable=node.vm_capable))
3057 for node in node_data_list)
3061 for node in self.all_node_info.values():
3062 path = _SupportsOob(self.cfg, node)
3063 if path and path not in oob_paths:
3064 oob_paths.append(path)
3067 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3069 for instance in self.my_inst_names:
3070 inst_config = self.my_inst_info[instance]
3072 for nname in inst_config.all_nodes:
3073 if nname not in node_image:
3074 gnode = self.NodeImage(name=nname)
3075 gnode.ghost = (nname not in self.all_node_info)
3076 node_image[nname] = gnode
3078 inst_config.MapLVsByNode(node_vol_should)
3080 pnode = inst_config.primary_node
3081 node_image[pnode].pinst.append(instance)
3083 for snode in inst_config.secondary_nodes:
3084 nimg = node_image[snode]
3085 nimg.sinst.append(instance)
3086 if pnode not in nimg.sbp:
3087 nimg.sbp[pnode] = []
3088 nimg.sbp[pnode].append(instance)
3090 # At this point, we have the in-memory data structures complete,
3091 # except for the runtime information, which we'll gather next
3093 # Due to the way our RPC system works, exact response times cannot be
3094 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3095 # time before and after executing the request, we can at least have a time
3097 nvinfo_starttime = time.time()
3098 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3100 self.cfg.GetClusterName())
3101 nvinfo_endtime = time.time()
3103 if self.extra_lv_nodes and vg_name is not None:
3105 self.rpc.call_node_verify(self.extra_lv_nodes,
3106 {constants.NV_LVLIST: vg_name},
3107 self.cfg.GetClusterName())
3109 extra_lv_nvinfo = {}
3111 all_drbd_map = self.cfg.ComputeDRBDMap()
3113 feedback_fn("* Gathering disk information (%s nodes)" %
3114 len(self.my_node_names))
3115 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3118 feedback_fn("* Verifying configuration file consistency")
3120 # If not all nodes are being checked, we need to make sure the master node
3121 # and a non-checked vm_capable node are in the list.
3122 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3124 vf_nvinfo = all_nvinfo.copy()
3125 vf_node_info = list(self.my_node_info.values())
3126 additional_nodes = []
3127 if master_node not in self.my_node_info:
3128 additional_nodes.append(master_node)
3129 vf_node_info.append(self.all_node_info[master_node])
3130 # Add the first vm_capable node we find which is not included
3131 for node in absent_nodes:
3132 nodeinfo = self.all_node_info[node]
3133 if nodeinfo.vm_capable and not nodeinfo.offline:
3134 additional_nodes.append(node)
3135 vf_node_info.append(self.all_node_info[node])
3137 key = constants.NV_FILELIST
3138 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3139 {key: node_verify_param[key]},
3140 self.cfg.GetClusterName()))
3142 vf_nvinfo = all_nvinfo
3143 vf_node_info = self.my_node_info.values()
3145 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3147 feedback_fn("* Verifying node status")
3151 for node_i in node_data_list:
3153 nimg = node_image[node]
3157 feedback_fn("* Skipping offline node %s" % (node,))
3161 if node == master_node:
3163 elif node_i.master_candidate:
3164 ntype = "master candidate"
3165 elif node_i.drained:
3171 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3173 msg = all_nvinfo[node].fail_msg
3174 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3177 nimg.rpc_fail = True
3180 nresult = all_nvinfo[node].payload
3182 nimg.call_ok = self._VerifyNode(node_i, nresult)
3183 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3184 self._VerifyNodeNetwork(node_i, nresult)
3185 self._VerifyNodeUserScripts(node_i, nresult)
3186 self._VerifyOob(node_i, nresult)
3189 self._VerifyNodeLVM(node_i, nresult, vg_name)
3190 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3193 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3194 self._UpdateNodeInstances(node_i, nresult, nimg)
3195 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3196 self._UpdateNodeOS(node_i, nresult, nimg)
3198 if not nimg.os_fail:
3199 if refos_img is None:
3201 self._VerifyNodeOS(node_i, nimg, refos_img)
3202 self._VerifyNodeBridges(node_i, nresult, bridges)
3204 # Check whether all running instancies are primary for the node. (This
3205 # can no longer be done from _VerifyInstance below, since some of the
3206 # wrong instances could be from other node groups.)
3207 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3209 for inst in non_primary_inst:
3210 # FIXME: investigate best way to handle offline insts
3211 if inst.admin_state == constants.ADMINST_OFFLINE:
3213 feedback_fn("* Skipping offline instance %s" % inst.name)
3216 test = inst in self.all_inst_info
3217 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3218 "instance should not run on node %s", node_i.name)
3219 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3220 "node is running unknown instance %s", inst)
3222 for node, result in extra_lv_nvinfo.items():
3223 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3224 node_image[node], vg_name)
3226 feedback_fn("* Verifying instance status")
3227 for instance in self.my_inst_names:
3229 feedback_fn("* Verifying instance %s" % instance)
3230 inst_config = self.my_inst_info[instance]
3231 self._VerifyInstance(instance, inst_config, node_image,
3233 inst_nodes_offline = []
3235 pnode = inst_config.primary_node
3236 pnode_img = node_image[pnode]
3237 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3238 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3239 " primary node failed", instance)
3241 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3243 constants.CV_EINSTANCEBADNODE, instance,
3244 "instance is marked as running and lives on offline node %s",
3245 inst_config.primary_node)
3247 # If the instance is non-redundant we cannot survive losing its primary
3248 # node, so we are not N+1 compliant. On the other hand we have no disk
3249 # templates with more than one secondary so that situation is not well
3251 # FIXME: does not support file-backed instances
3252 if not inst_config.secondary_nodes:
3253 i_non_redundant.append(instance)
3255 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3256 constants.CV_EINSTANCELAYOUT,
3257 instance, "instance has multiple secondary nodes: %s",
3258 utils.CommaJoin(inst_config.secondary_nodes),
3259 code=self.ETYPE_WARNING)
3261 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3262 pnode = inst_config.primary_node
3263 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3264 instance_groups = {}
3266 for node in instance_nodes:
3267 instance_groups.setdefault(self.all_node_info[node].group,
3271 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3272 # Sort so that we always list the primary node first.
3273 for group, nodes in sorted(instance_groups.items(),
3274 key=lambda (_, nodes): pnode in nodes,
3277 self._ErrorIf(len(instance_groups) > 1,
3278 constants.CV_EINSTANCESPLITGROUPS,
3279 instance, "instance has primary and secondary nodes in"
3280 " different groups: %s", utils.CommaJoin(pretty_list),
3281 code=self.ETYPE_WARNING)
3283 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3284 i_non_a_balanced.append(instance)
3286 for snode in inst_config.secondary_nodes:
3287 s_img = node_image[snode]
3288 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3289 snode, "instance %s, connection to secondary node failed",
3293 inst_nodes_offline.append(snode)
3295 # warn that the instance lives on offline nodes
3296 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3297 "instance has offline secondary node(s) %s",
3298 utils.CommaJoin(inst_nodes_offline))
3299 # ... or ghost/non-vm_capable nodes
3300 for node in inst_config.all_nodes:
3301 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3302 instance, "instance lives on ghost node %s", node)
3303 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3304 instance, "instance lives on non-vm_capable node %s", node)
3306 feedback_fn("* Verifying orphan volumes")
3307 reserved = utils.FieldSet(*cluster.reserved_lvs)
3309 # We will get spurious "unknown volume" warnings if any node of this group
3310 # is secondary for an instance whose primary is in another group. To avoid
3311 # them, we find these instances and add their volumes to node_vol_should.
3312 for inst in self.all_inst_info.values():
3313 for secondary in inst.secondary_nodes:
3314 if (secondary in self.my_node_info
3315 and inst.name not in self.my_inst_info):
3316 inst.MapLVsByNode(node_vol_should)
3319 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3321 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3322 feedback_fn("* Verifying N+1 Memory redundancy")
3323 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3325 feedback_fn("* Other Notes")
3327 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3328 % len(i_non_redundant))
3330 if i_non_a_balanced:
3331 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3332 % len(i_non_a_balanced))
3335 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3338 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3341 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3345 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3346 """Analyze the post-hooks' result
3348 This method analyses the hook result, handles it, and sends some
3349 nicely-formatted feedback back to the user.
3351 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3352 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3353 @param hooks_results: the results of the multi-node hooks rpc call
3354 @param feedback_fn: function used send feedback back to the caller
3355 @param lu_result: previous Exec result
3356 @return: the new Exec result, based on the previous result
3360 # We only really run POST phase hooks, only for non-empty groups,
3361 # and are only interested in their results
3362 if not self.my_node_names:
3365 elif phase == constants.HOOKS_PHASE_POST:
3366 # Used to change hooks' output to proper indentation
3367 feedback_fn("* Hooks Results")
3368 assert hooks_results, "invalid result from hooks"
3370 for node_name in hooks_results:
3371 res = hooks_results[node_name]
3373 test = msg and not res.offline
3374 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3375 "Communication failure in hooks execution: %s", msg)
3376 if res.offline or msg:
3377 # No need to investigate payload if node is offline or gave
3380 for script, hkr, output in res.payload:
3381 test = hkr == constants.HKR_FAIL
3382 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3383 "Script %s failed, output:", script)
3385 output = self._HOOKS_INDENT_RE.sub(" ", output)
3386 feedback_fn("%s" % output)
3392 class LUClusterVerifyDisks(NoHooksLU):
3393 """Verifies the cluster disks status.
3398 def ExpandNames(self):
3399 self.share_locks = _ShareAll()
3400 self.needed_locks = {
3401 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3404 def Exec(self, feedback_fn):
3405 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3407 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3408 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3409 for group in group_names])
3412 class LUGroupVerifyDisks(NoHooksLU):
3413 """Verifies the status of all disks in a node group.
3418 def ExpandNames(self):
3419 # Raises errors.OpPrereqError on its own if group can't be found
3420 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3422 self.share_locks = _ShareAll()
3423 self.needed_locks = {
3424 locking.LEVEL_INSTANCE: [],
3425 locking.LEVEL_NODEGROUP: [],
3426 locking.LEVEL_NODE: [],
3429 def DeclareLocks(self, level):
3430 if level == locking.LEVEL_INSTANCE:
3431 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3433 # Lock instances optimistically, needs verification once node and group
3434 # locks have been acquired
3435 self.needed_locks[locking.LEVEL_INSTANCE] = \
3436 self.cfg.GetNodeGroupInstances(self.group_uuid)
3438 elif level == locking.LEVEL_NODEGROUP:
3439 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3441 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3442 set([self.group_uuid] +
3443 # Lock all groups used by instances optimistically; this requires
3444 # going via the node before it's locked, requiring verification
3447 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3448 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3450 elif level == locking.LEVEL_NODE:
3451 # This will only lock the nodes in the group to be verified which contain
3453 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3454 self._LockInstancesNodes()
3456 # Lock all nodes in group to be verified
3457 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3458 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3459 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3461 def CheckPrereq(self):
3462 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3463 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3464 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3466 assert self.group_uuid in owned_groups
3468 # Check if locked instances are still correct
3469 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3471 # Get instance information
3472 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3474 # Check if node groups for locked instances are still correct
3475 for (instance_name, inst) in self.instances.items():
3476 assert owned_nodes.issuperset(inst.all_nodes), \
3477 "Instance %s's nodes changed while we kept the lock" % instance_name
3479 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3482 assert self.group_uuid in inst_groups, \
3483 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3485 def Exec(self, feedback_fn):
3486 """Verify integrity of cluster disks.
3488 @rtype: tuple of three items
3489 @return: a tuple of (dict of node-to-node_error, list of instances
3490 which need activate-disks, dict of instance: (node, volume) for
3495 res_instances = set()
3498 nv_dict = _MapInstanceDisksToNodes([inst
3499 for inst in self.instances.values()
3500 if inst.admin_state == constants.ADMINST_UP])
3503 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3504 set(self.cfg.GetVmCapableNodeList()))
3506 node_lvs = self.rpc.call_lv_list(nodes, [])
3508 for (node, node_res) in node_lvs.items():
3509 if node_res.offline:
3512 msg = node_res.fail_msg
3514 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3515 res_nodes[node] = msg
3518 for lv_name, (_, _, lv_online) in node_res.payload.items():
3519 inst = nv_dict.pop((node, lv_name), None)
3520 if not (lv_online or inst is None):
3521 res_instances.add(inst)
3523 # any leftover items in nv_dict are missing LVs, let's arrange the data
3525 for key, inst in nv_dict.iteritems():
3526 res_missing.setdefault(inst, []).append(list(key))
3528 return (res_nodes, list(res_instances), res_missing)
3531 class LUClusterRepairDiskSizes(NoHooksLU):
3532 """Verifies the cluster disks sizes.
3537 def ExpandNames(self):
3538 if self.op.instances:
3539 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3540 self.needed_locks = {
3541 locking.LEVEL_NODE_RES: [],
3542 locking.LEVEL_INSTANCE: self.wanted_names,
3544 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3546 self.wanted_names = None
3547 self.needed_locks = {
3548 locking.LEVEL_NODE_RES: locking.ALL_SET,
3549 locking.LEVEL_INSTANCE: locking.ALL_SET,
3551 self.share_locks = {
3552 locking.LEVEL_NODE_RES: 1,
3553 locking.LEVEL_INSTANCE: 0,
3556 def DeclareLocks(self, level):
3557 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3558 self._LockInstancesNodes(primary_only=True, level=level)
3560 def CheckPrereq(self):
3561 """Check prerequisites.
3563 This only checks the optional instance list against the existing names.
3566 if self.wanted_names is None:
3567 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3569 self.wanted_instances = \
3570 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3572 def _EnsureChildSizes(self, disk):
3573 """Ensure children of the disk have the needed disk size.
3575 This is valid mainly for DRBD8 and fixes an issue where the
3576 children have smaller disk size.
3578 @param disk: an L{ganeti.objects.Disk} object
3581 if disk.dev_type == constants.LD_DRBD8:
3582 assert disk.children, "Empty children for DRBD8?"
3583 fchild = disk.children[0]
3584 mismatch = fchild.size < disk.size
3586 self.LogInfo("Child disk has size %d, parent %d, fixing",
3587 fchild.size, disk.size)
3588 fchild.size = disk.size
3590 # and we recurse on this child only, not on the metadev
3591 return self._EnsureChildSizes(fchild) or mismatch
3595 def Exec(self, feedback_fn):
3596 """Verify the size of cluster disks.
3599 # TODO: check child disks too
3600 # TODO: check differences in size between primary/secondary nodes
3602 for instance in self.wanted_instances:
3603 pnode = instance.primary_node
3604 if pnode not in per_node_disks:
3605 per_node_disks[pnode] = []
3606 for idx, disk in enumerate(instance.disks):
3607 per_node_disks[pnode].append((instance, idx, disk))
3609 assert not (frozenset(per_node_disks.keys()) -
3610 self.owned_locks(locking.LEVEL_NODE_RES)), \
3611 "Not owning correct locks"
3612 assert not self.owned_locks(locking.LEVEL_NODE)
3615 for node, dskl in per_node_disks.items():
3616 newl = [v[2].Copy() for v in dskl]
3618 self.cfg.SetDiskID(dsk, node)
3619 result = self.rpc.call_blockdev_getsize(node, newl)
3621 self.LogWarning("Failure in blockdev_getsize call to node"
3622 " %s, ignoring", node)
3624 if len(result.payload) != len(dskl):
3625 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3626 " result.payload=%s", node, len(dskl), result.payload)
3627 self.LogWarning("Invalid result from node %s, ignoring node results",
3630 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3632 self.LogWarning("Disk %d of instance %s did not return size"
3633 " information, ignoring", idx, instance.name)
3635 if not isinstance(size, (int, long)):
3636 self.LogWarning("Disk %d of instance %s did not return valid"
3637 " size information, ignoring", idx, instance.name)
3640 if size != disk.size:
3641 self.LogInfo("Disk %d of instance %s has mismatched size,"
3642 " correcting: recorded %d, actual %d", idx,
3643 instance.name, disk.size, size)
3645 self.cfg.Update(instance, feedback_fn)
3646 changed.append((instance.name, idx, size))
3647 if self._EnsureChildSizes(disk):
3648 self.cfg.Update(instance, feedback_fn)
3649 changed.append((instance.name, idx, disk.size))
3653 class LUClusterRename(LogicalUnit):
3654 """Rename the cluster.
3657 HPATH = "cluster-rename"
3658 HTYPE = constants.HTYPE_CLUSTER
3660 def BuildHooksEnv(self):
3665 "OP_TARGET": self.cfg.GetClusterName(),
3666 "NEW_NAME": self.op.name,
3669 def BuildHooksNodes(self):
3670 """Build hooks nodes.
3673 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3675 def CheckPrereq(self):
3676 """Verify that the passed name is a valid one.
3679 hostname = netutils.GetHostname(name=self.op.name,
3680 family=self.cfg.GetPrimaryIPFamily())
3682 new_name = hostname.name
3683 self.ip = new_ip = hostname.ip
3684 old_name = self.cfg.GetClusterName()
3685 old_ip = self.cfg.GetMasterIP()
3686 if new_name == old_name and new_ip == old_ip:
3687 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3688 " cluster has changed",
3690 if new_ip != old_ip:
3691 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3692 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3693 " reachable on the network" %
3694 new_ip, errors.ECODE_NOTUNIQUE)
3696 self.op.name = new_name
3698 def Exec(self, feedback_fn):
3699 """Rename the cluster.
3702 clustername = self.op.name
3705 # shutdown the master IP
3706 master_params = self.cfg.GetMasterNetworkParameters()
3707 ems = self.cfg.GetUseExternalMipScript()
3708 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3710 result.Raise("Could not disable the master role")
3713 cluster = self.cfg.GetClusterInfo()
3714 cluster.cluster_name = clustername
3715 cluster.master_ip = new_ip
3716 self.cfg.Update(cluster, feedback_fn)
3718 # update the known hosts file
3719 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3720 node_list = self.cfg.GetOnlineNodeList()
3722 node_list.remove(master_params.name)
3725 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3727 master_params.ip = new_ip
3728 result = self.rpc.call_node_activate_master_ip(master_params.name,
3730 msg = result.fail_msg
3732 self.LogWarning("Could not re-enable the master role on"
3733 " the master, please restart manually: %s", msg)
3738 def _ValidateNetmask(cfg, netmask):
3739 """Checks if a netmask is valid.
3741 @type cfg: L{config.ConfigWriter}
3742 @param cfg: The cluster configuration
3744 @param netmask: the netmask to be verified
3745 @raise errors.OpPrereqError: if the validation fails
3748 ip_family = cfg.GetPrimaryIPFamily()
3750 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3751 except errors.ProgrammerError:
3752 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3754 if not ipcls.ValidateNetmask(netmask):
3755 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3759 class LUClusterSetParams(LogicalUnit):
3760 """Change the parameters of the cluster.
3763 HPATH = "cluster-modify"
3764 HTYPE = constants.HTYPE_CLUSTER
3767 def CheckArguments(self):
3771 if self.op.uid_pool:
3772 uidpool.CheckUidPool(self.op.uid_pool)
3774 if self.op.add_uids:
3775 uidpool.CheckUidPool(self.op.add_uids)
3777 if self.op.remove_uids:
3778 uidpool.CheckUidPool(self.op.remove_uids)
3780 if self.op.master_netmask is not None:
3781 _ValidateNetmask(self.cfg, self.op.master_netmask)
3783 if self.op.diskparams:
3784 for dt_params in self.op.diskparams.values():
3785 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3787 def ExpandNames(self):
3788 # FIXME: in the future maybe other cluster params won't require checking on
3789 # all nodes to be modified.
3790 self.needed_locks = {
3791 locking.LEVEL_NODE: locking.ALL_SET,
3792 locking.LEVEL_INSTANCE: locking.ALL_SET,
3793 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3795 self.share_locks = {
3796 locking.LEVEL_NODE: 1,
3797 locking.LEVEL_INSTANCE: 1,
3798 locking.LEVEL_NODEGROUP: 1,
3801 def BuildHooksEnv(self):
3806 "OP_TARGET": self.cfg.GetClusterName(),
3807 "NEW_VG_NAME": self.op.vg_name,
3810 def BuildHooksNodes(self):
3811 """Build hooks nodes.
3814 mn = self.cfg.GetMasterNode()
3817 def CheckPrereq(self):
3818 """Check prerequisites.
3820 This checks whether the given params don't conflict and
3821 if the given volume group is valid.
3824 if self.op.vg_name is not None and not self.op.vg_name:
3825 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3826 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3827 " instances exist", errors.ECODE_INVAL)
3829 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3830 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3831 raise errors.OpPrereqError("Cannot disable drbd helper while"
3832 " drbd-based instances exist",
3835 node_list = self.owned_locks(locking.LEVEL_NODE)
3837 # if vg_name not None, checks given volume group on all nodes
3839 vglist = self.rpc.call_vg_list(node_list)
3840 for node in node_list:
3841 msg = vglist[node].fail_msg
3843 # ignoring down node
3844 self.LogWarning("Error while gathering data on node %s"
3845 " (ignoring node): %s", node, msg)
3847 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3849 constants.MIN_VG_SIZE)
3851 raise errors.OpPrereqError("Error on node '%s': %s" %
3852 (node, vgstatus), errors.ECODE_ENVIRON)
3854 if self.op.drbd_helper:
3855 # checks given drbd helper on all nodes
3856 helpers = self.rpc.call_drbd_helper(node_list)
3857 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3859 self.LogInfo("Not checking drbd helper on offline node %s", node)
3861 msg = helpers[node].fail_msg
3863 raise errors.OpPrereqError("Error checking drbd helper on node"
3864 " '%s': %s" % (node, msg),
3865 errors.ECODE_ENVIRON)
3866 node_helper = helpers[node].payload
3867 if node_helper != self.op.drbd_helper:
3868 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3869 (node, node_helper), errors.ECODE_ENVIRON)
3871 self.cluster = cluster = self.cfg.GetClusterInfo()
3872 # validate params changes
3873 if self.op.beparams:
3874 objects.UpgradeBeParams(self.op.beparams)
3875 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3876 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3878 if self.op.ndparams:
3879 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3880 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3882 # TODO: we need a more general way to handle resetting
3883 # cluster-level parameters to default values
3884 if self.new_ndparams["oob_program"] == "":
3885 self.new_ndparams["oob_program"] = \
3886 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3888 if self.op.hv_state:
3889 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3890 self.cluster.hv_state_static)
3891 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3892 for hv, values in new_hv_state.items())
3894 if self.op.disk_state:
3895 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3896 self.cluster.disk_state_static)
3897 self.new_disk_state = \
3898 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3899 for name, values in svalues.items()))
3900 for storage, svalues in new_disk_state.items())
3903 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3906 all_instances = self.cfg.GetAllInstancesInfo().values()
3908 for group in self.cfg.GetAllNodeGroupsInfo().values():
3909 instances = frozenset([inst for inst in all_instances
3910 if compat.any(node in group.members
3911 for node in inst.all_nodes)])
3912 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3913 new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3915 new_ipolicy, instances)
3917 violations.update(new)
3920 self.LogWarning("After the ipolicy change the following instances"
3921 " violate them: %s",
3922 utils.CommaJoin(violations))
3924 if self.op.nicparams:
3925 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3926 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3927 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3930 # check all instances for consistency
3931 for instance in self.cfg.GetAllInstancesInfo().values():
3932 for nic_idx, nic in enumerate(instance.nics):
3933 params_copy = copy.deepcopy(nic.nicparams)
3934 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3936 # check parameter syntax
3938 objects.NIC.CheckParameterSyntax(params_filled)
3939 except errors.ConfigurationError, err:
3940 nic_errors.append("Instance %s, nic/%d: %s" %
3941 (instance.name, nic_idx, err))
3943 # if we're moving instances to routed, check that they have an ip
3944 target_mode = params_filled[constants.NIC_MODE]
3945 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3946 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3947 " address" % (instance.name, nic_idx))
3949 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3950 "\n".join(nic_errors))
3952 # hypervisor list/parameters
3953 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3954 if self.op.hvparams:
3955 for hv_name, hv_dict in self.op.hvparams.items():
3956 if hv_name not in self.new_hvparams:
3957 self.new_hvparams[hv_name] = hv_dict
3959 self.new_hvparams[hv_name].update(hv_dict)
3961 # disk template parameters
3962 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3963 if self.op.diskparams:
3964 for dt_name, dt_params in self.op.diskparams.items():
3965 if dt_name not in self.op.diskparams:
3966 self.new_diskparams[dt_name] = dt_params
3968 self.new_diskparams[dt_name].update(dt_params)
3970 # os hypervisor parameters
3971 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3973 for os_name, hvs in self.op.os_hvp.items():
3974 if os_name not in self.new_os_hvp:
3975 self.new_os_hvp[os_name] = hvs
3977 for hv_name, hv_dict in hvs.items():
3978 if hv_name not in self.new_os_hvp[os_name]:
3979 self.new_os_hvp[os_name][hv_name] = hv_dict
3981 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3984 self.new_osp = objects.FillDict(cluster.osparams, {})
3985 if self.op.osparams:
3986 for os_name, osp in self.op.osparams.items():
3987 if os_name not in self.new_osp:
3988 self.new_osp[os_name] = {}
3990 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3993 if not self.new_osp[os_name]:
3994 # we removed all parameters
3995 del self.new_osp[os_name]
3997 # check the parameter validity (remote check)
3998 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3999 os_name, self.new_osp[os_name])
4001 # changes to the hypervisor list
4002 if self.op.enabled_hypervisors is not None:
4003 self.hv_list = self.op.enabled_hypervisors
4004 for hv in self.hv_list:
4005 # if the hypervisor doesn't already exist in the cluster
4006 # hvparams, we initialize it to empty, and then (in both
4007 # cases) we make sure to fill the defaults, as we might not
4008 # have a complete defaults list if the hypervisor wasn't
4010 if hv not in new_hvp:
4012 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4013 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4015 self.hv_list = cluster.enabled_hypervisors
4017 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4018 # either the enabled list has changed, or the parameters have, validate
4019 for hv_name, hv_params in self.new_hvparams.items():
4020 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4021 (self.op.enabled_hypervisors and
4022 hv_name in self.op.enabled_hypervisors)):
4023 # either this is a new hypervisor, or its parameters have changed
4024 hv_class = hypervisor.GetHypervisor(hv_name)
4025 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4026 hv_class.CheckParameterSyntax(hv_params)
4027 _CheckHVParams(self, node_list, hv_name, hv_params)
4030 # no need to check any newly-enabled hypervisors, since the
4031 # defaults have already been checked in the above code-block
4032 for os_name, os_hvp in self.new_os_hvp.items():
4033 for hv_name, hv_params in os_hvp.items():
4034 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4035 # we need to fill in the new os_hvp on top of the actual hv_p
4036 cluster_defaults = self.new_hvparams.get(hv_name, {})
4037 new_osp = objects.FillDict(cluster_defaults, hv_params)
4038 hv_class = hypervisor.GetHypervisor(hv_name)
4039 hv_class.CheckParameterSyntax(new_osp)
4040 _CheckHVParams(self, node_list, hv_name, new_osp)
4042 if self.op.default_iallocator:
4043 alloc_script = utils.FindFile(self.op.default_iallocator,
4044 constants.IALLOCATOR_SEARCH_PATH,
4046 if alloc_script is None:
4047 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4048 " specified" % self.op.default_iallocator,
4051 def Exec(self, feedback_fn):
4052 """Change the parameters of the cluster.
4055 if self.op.vg_name is not None:
4056 new_volume = self.op.vg_name
4059 if new_volume != self.cfg.GetVGName():
4060 self.cfg.SetVGName(new_volume)
4062 feedback_fn("Cluster LVM configuration already in desired"
4063 " state, not changing")
4064 if self.op.drbd_helper is not None:
4065 new_helper = self.op.drbd_helper
4068 if new_helper != self.cfg.GetDRBDHelper():
4069 self.cfg.SetDRBDHelper(new_helper)
4071 feedback_fn("Cluster DRBD helper already in desired state,"
4073 if self.op.hvparams:
4074 self.cluster.hvparams = self.new_hvparams
4076 self.cluster.os_hvp = self.new_os_hvp
4077 if self.op.enabled_hypervisors is not None:
4078 self.cluster.hvparams = self.new_hvparams
4079 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4080 if self.op.beparams:
4081 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4082 if self.op.nicparams:
4083 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4085 self.cluster.ipolicy = self.new_ipolicy
4086 if self.op.osparams:
4087 self.cluster.osparams = self.new_osp
4088 if self.op.ndparams:
4089 self.cluster.ndparams = self.new_ndparams
4090 if self.op.diskparams:
4091 self.cluster.diskparams = self.new_diskparams
4092 if self.op.hv_state:
4093 self.cluster.hv_state_static = self.new_hv_state
4094 if self.op.disk_state:
4095 self.cluster.disk_state_static = self.new_disk_state
4097 if self.op.candidate_pool_size is not None:
4098 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4099 # we need to update the pool size here, otherwise the save will fail
4100 _AdjustCandidatePool(self, [])
4102 if self.op.maintain_node_health is not None:
4103 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4104 feedback_fn("Note: CONFD was disabled at build time, node health"
4105 " maintenance is not useful (still enabling it)")
4106 self.cluster.maintain_node_health = self.op.maintain_node_health
4108 if self.op.prealloc_wipe_disks is not None:
4109 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4111 if self.op.add_uids is not None:
4112 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4114 if self.op.remove_uids is not None:
4115 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4117 if self.op.uid_pool is not None:
4118 self.cluster.uid_pool = self.op.uid_pool
4120 if self.op.default_iallocator is not None:
4121 self.cluster.default_iallocator = self.op.default_iallocator
4123 if self.op.reserved_lvs is not None:
4124 self.cluster.reserved_lvs = self.op.reserved_lvs
4126 if self.op.use_external_mip_script is not None:
4127 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4129 def helper_os(aname, mods, desc):
4131 lst = getattr(self.cluster, aname)
4132 for key, val in mods:
4133 if key == constants.DDM_ADD:
4135 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4138 elif key == constants.DDM_REMOVE:
4142 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4144 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4146 if self.op.hidden_os:
4147 helper_os("hidden_os", self.op.hidden_os, "hidden")
4149 if self.op.blacklisted_os:
4150 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4152 if self.op.master_netdev:
4153 master_params = self.cfg.GetMasterNetworkParameters()
4154 ems = self.cfg.GetUseExternalMipScript()
4155 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4156 self.cluster.master_netdev)
4157 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4159 result.Raise("Could not disable the master ip")
4160 feedback_fn("Changing master_netdev from %s to %s" %
4161 (master_params.netdev, self.op.master_netdev))
4162 self.cluster.master_netdev = self.op.master_netdev
4164 if self.op.master_netmask:
4165 master_params = self.cfg.GetMasterNetworkParameters()
4166 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4167 result = self.rpc.call_node_change_master_netmask(master_params.name,
4168 master_params.netmask,
4169 self.op.master_netmask,
4171 master_params.netdev)
4173 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4176 self.cluster.master_netmask = self.op.master_netmask
4178 self.cfg.Update(self.cluster, feedback_fn)
4180 if self.op.master_netdev:
4181 master_params = self.cfg.GetMasterNetworkParameters()
4182 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4183 self.op.master_netdev)
4184 ems = self.cfg.GetUseExternalMipScript()
4185 result = self.rpc.call_node_activate_master_ip(master_params.name,
4188 self.LogWarning("Could not re-enable the master ip on"
4189 " the master, please restart manually: %s",
4193 def _UploadHelper(lu, nodes, fname):
4194 """Helper for uploading a file and showing warnings.
4197 if os.path.exists(fname):
4198 result = lu.rpc.call_upload_file(nodes, fname)
4199 for to_node, to_result in result.items():
4200 msg = to_result.fail_msg
4202 msg = ("Copy of file %s to node %s failed: %s" %
4203 (fname, to_node, msg))
4204 lu.proc.LogWarning(msg)
4207 def _ComputeAncillaryFiles(cluster, redist):
4208 """Compute files external to Ganeti which need to be consistent.
4210 @type redist: boolean
4211 @param redist: Whether to include files which need to be redistributed
4214 # Compute files for all nodes
4216 constants.SSH_KNOWN_HOSTS_FILE,
4217 constants.CONFD_HMAC_KEY,
4218 constants.CLUSTER_DOMAIN_SECRET_FILE,
4219 constants.SPICE_CERT_FILE,
4220 constants.SPICE_CACERT_FILE,
4221 constants.RAPI_USERS_FILE,
4225 files_all.update(constants.ALL_CERT_FILES)
4226 files_all.update(ssconf.SimpleStore().GetFileList())
4228 # we need to ship at least the RAPI certificate
4229 files_all.add(constants.RAPI_CERT_FILE)
4231 if cluster.modify_etc_hosts:
4232 files_all.add(constants.ETC_HOSTS)
4234 # Files which are optional, these must:
4235 # - be present in one other category as well
4236 # - either exist or not exist on all nodes of that category (mc, vm all)
4238 constants.RAPI_USERS_FILE,
4241 # Files which should only be on master candidates
4245 files_mc.add(constants.CLUSTER_CONF_FILE)
4247 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4249 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4251 # Files which should only be on VM-capable nodes
4252 files_vm = set(filename
4253 for hv_name in cluster.enabled_hypervisors
4254 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4256 files_opt |= set(filename
4257 for hv_name in cluster.enabled_hypervisors
4258 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4260 # Filenames in each category must be unique
4261 all_files_set = files_all | files_mc | files_vm
4262 assert (len(all_files_set) ==
4263 sum(map(len, [files_all, files_mc, files_vm]))), \
4264 "Found file listed in more than one file list"
4266 # Optional files must be present in one other category
4267 assert all_files_set.issuperset(files_opt), \
4268 "Optional file not in a different required list"
4270 return (files_all, files_opt, files_mc, files_vm)
4273 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4274 """Distribute additional files which are part of the cluster configuration.
4276 ConfigWriter takes care of distributing the config and ssconf files, but
4277 there are more files which should be distributed to all nodes. This function
4278 makes sure those are copied.
4280 @param lu: calling logical unit
4281 @param additional_nodes: list of nodes not in the config to distribute to
4282 @type additional_vm: boolean
4283 @param additional_vm: whether the additional nodes are vm-capable or not
4286 # Gather target nodes
4287 cluster = lu.cfg.GetClusterInfo()
4288 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4290 online_nodes = lu.cfg.GetOnlineNodeList()
4291 vm_nodes = lu.cfg.GetVmCapableNodeList()
4293 if additional_nodes is not None:
4294 online_nodes.extend(additional_nodes)
4296 vm_nodes.extend(additional_nodes)
4298 # Never distribute to master node
4299 for nodelist in [online_nodes, vm_nodes]:
4300 if master_info.name in nodelist:
4301 nodelist.remove(master_info.name)
4304 (files_all, _, files_mc, files_vm) = \
4305 _ComputeAncillaryFiles(cluster, True)
4307 # Never re-distribute configuration file from here
4308 assert not (constants.CLUSTER_CONF_FILE in files_all or
4309 constants.CLUSTER_CONF_FILE in files_vm)
4310 assert not files_mc, "Master candidates not handled in this function"
4313 (online_nodes, files_all),
4314 (vm_nodes, files_vm),
4318 for (node_list, files) in filemap:
4320 _UploadHelper(lu, node_list, fname)
4323 class LUClusterRedistConf(NoHooksLU):
4324 """Force the redistribution of cluster configuration.
4326 This is a very simple LU.
4331 def ExpandNames(self):
4332 self.needed_locks = {
4333 locking.LEVEL_NODE: locking.ALL_SET,
4335 self.share_locks[locking.LEVEL_NODE] = 1
4337 def Exec(self, feedback_fn):
4338 """Redistribute the configuration.
4341 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4342 _RedistributeAncillaryFiles(self)
4345 class LUClusterActivateMasterIp(NoHooksLU):
4346 """Activate the master IP on the master node.
4349 def Exec(self, feedback_fn):
4350 """Activate the master IP.
4353 master_params = self.cfg.GetMasterNetworkParameters()
4354 ems = self.cfg.GetUseExternalMipScript()
4355 result = self.rpc.call_node_activate_master_ip(master_params.name,
4357 result.Raise("Could not activate the master IP")
4360 class LUClusterDeactivateMasterIp(NoHooksLU):
4361 """Deactivate the master IP on the master node.
4364 def Exec(self, feedback_fn):
4365 """Deactivate the master IP.
4368 master_params = self.cfg.GetMasterNetworkParameters()
4369 ems = self.cfg.GetUseExternalMipScript()
4370 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4372 result.Raise("Could not deactivate the master IP")
4375 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4376 """Sleep and poll for an instance's disk to sync.
4379 if not instance.disks or disks is not None and not disks:
4382 disks = _ExpandCheckDisks(instance, disks)
4385 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4387 node = instance.primary_node
4390 lu.cfg.SetDiskID(dev, node)
4392 # TODO: Convert to utils.Retry
4395 degr_retries = 10 # in seconds, as we sleep 1 second each time
4399 cumul_degraded = False
4400 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4401 msg = rstats.fail_msg
4403 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4406 raise errors.RemoteError("Can't contact node %s for mirror data,"
4407 " aborting." % node)
4410 rstats = rstats.payload
4412 for i, mstat in enumerate(rstats):
4414 lu.LogWarning("Can't compute data for node %s/%s",
4415 node, disks[i].iv_name)
4418 cumul_degraded = (cumul_degraded or
4419 (mstat.is_degraded and mstat.sync_percent is None))
4420 if mstat.sync_percent is not None:
4422 if mstat.estimated_time is not None:
4423 rem_time = ("%s remaining (estimated)" %
4424 utils.FormatSeconds(mstat.estimated_time))
4425 max_time = mstat.estimated_time
4427 rem_time = "no time estimate"
4428 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4429 (disks[i].iv_name, mstat.sync_percent, rem_time))
4431 # if we're done but degraded, let's do a few small retries, to
4432 # make sure we see a stable and not transient situation; therefore
4433 # we force restart of the loop
4434 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4435 logging.info("Degraded disks found, %d retries left", degr_retries)
4443 time.sleep(min(60, max_time))
4446 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4447 return not cumul_degraded
4450 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4451 """Check that mirrors are not degraded.
4453 The ldisk parameter, if True, will change the test from the
4454 is_degraded attribute (which represents overall non-ok status for
4455 the device(s)) to the ldisk (representing the local storage status).
4458 lu.cfg.SetDiskID(dev, node)
4462 if on_primary or dev.AssembleOnSecondary():
4463 rstats = lu.rpc.call_blockdev_find(node, dev)
4464 msg = rstats.fail_msg
4466 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4468 elif not rstats.payload:
4469 lu.LogWarning("Can't find disk on node %s", node)
4473 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4475 result = result and not rstats.payload.is_degraded
4478 for child in dev.children:
4479 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4484 class LUOobCommand(NoHooksLU):
4485 """Logical unit for OOB handling.
4489 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4491 def ExpandNames(self):
4492 """Gather locks we need.
4495 if self.op.node_names:
4496 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4497 lock_names = self.op.node_names
4499 lock_names = locking.ALL_SET
4501 self.needed_locks = {
4502 locking.LEVEL_NODE: lock_names,
4505 def CheckPrereq(self):
4506 """Check prerequisites.
4509 - the node exists in the configuration
4512 Any errors are signaled by raising errors.OpPrereqError.
4516 self.master_node = self.cfg.GetMasterNode()
4518 assert self.op.power_delay >= 0.0
4520 if self.op.node_names:
4521 if (self.op.command in self._SKIP_MASTER and
4522 self.master_node in self.op.node_names):
4523 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4524 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4526 if master_oob_handler:
4527 additional_text = ("run '%s %s %s' if you want to operate on the"
4528 " master regardless") % (master_oob_handler,
4532 additional_text = "it does not support out-of-band operations"
4534 raise errors.OpPrereqError(("Operating on the master node %s is not"
4535 " allowed for %s; %s") %
4536 (self.master_node, self.op.command,
4537 additional_text), errors.ECODE_INVAL)
4539 self.op.node_names = self.cfg.GetNodeList()
4540 if self.op.command in self._SKIP_MASTER:
4541 self.op.node_names.remove(self.master_node)
4543 if self.op.command in self._SKIP_MASTER:
4544 assert self.master_node not in self.op.node_names
4546 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4548 raise errors.OpPrereqError("Node %s not found" % node_name,
4551 self.nodes.append(node)
4553 if (not self.op.ignore_status and
4554 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4555 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4556 " not marked offline") % node_name,
4559 def Exec(self, feedback_fn):
4560 """Execute OOB and return result if we expect any.
4563 master_node = self.master_node
4566 for idx, node in enumerate(utils.NiceSort(self.nodes,
4567 key=lambda node: node.name)):
4568 node_entry = [(constants.RS_NORMAL, node.name)]
4569 ret.append(node_entry)
4571 oob_program = _SupportsOob(self.cfg, node)
4574 node_entry.append((constants.RS_UNAVAIL, None))
4577 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4578 self.op.command, oob_program, node.name)
4579 result = self.rpc.call_run_oob(master_node, oob_program,
4580 self.op.command, node.name,
4584 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4585 node.name, result.fail_msg)
4586 node_entry.append((constants.RS_NODATA, None))
4589 self._CheckPayload(result)
4590 except errors.OpExecError, err:
4591 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4593 node_entry.append((constants.RS_NODATA, None))
4595 if self.op.command == constants.OOB_HEALTH:
4596 # For health we should log important events
4597 for item, status in result.payload:
4598 if status in [constants.OOB_STATUS_WARNING,
4599 constants.OOB_STATUS_CRITICAL]:
4600 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4601 item, node.name, status)
4603 if self.op.command == constants.OOB_POWER_ON:
4605 elif self.op.command == constants.OOB_POWER_OFF:
4606 node.powered = False
4607 elif self.op.command == constants.OOB_POWER_STATUS:
4608 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4609 if powered != node.powered:
4610 logging.warning(("Recorded power state (%s) of node '%s' does not"
4611 " match actual power state (%s)"), node.powered,
4614 # For configuration changing commands we should update the node
4615 if self.op.command in (constants.OOB_POWER_ON,
4616 constants.OOB_POWER_OFF):
4617 self.cfg.Update(node, feedback_fn)
4619 node_entry.append((constants.RS_NORMAL, result.payload))
4621 if (self.op.command == constants.OOB_POWER_ON and
4622 idx < len(self.nodes) - 1):
4623 time.sleep(self.op.power_delay)
4627 def _CheckPayload(self, result):
4628 """Checks if the payload is valid.
4630 @param result: RPC result
4631 @raises errors.OpExecError: If payload is not valid
4635 if self.op.command == constants.OOB_HEALTH:
4636 if not isinstance(result.payload, list):
4637 errs.append("command 'health' is expected to return a list but got %s" %
4638 type(result.payload))
4640 for item, status in result.payload:
4641 if status not in constants.OOB_STATUSES:
4642 errs.append("health item '%s' has invalid status '%s'" %
4645 if self.op.command == constants.OOB_POWER_STATUS:
4646 if not isinstance(result.payload, dict):
4647 errs.append("power-status is expected to return a dict but got %s" %
4648 type(result.payload))
4650 if self.op.command in [
4651 constants.OOB_POWER_ON,
4652 constants.OOB_POWER_OFF,
4653 constants.OOB_POWER_CYCLE,
4655 if result.payload is not None:
4656 errs.append("%s is expected to not return payload but got '%s'" %
4657 (self.op.command, result.payload))
4660 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4661 utils.CommaJoin(errs))
4664 class _OsQuery(_QueryBase):
4665 FIELDS = query.OS_FIELDS
4667 def ExpandNames(self, lu):
4668 # Lock all nodes in shared mode
4669 # Temporary removal of locks, should be reverted later
4670 # TODO: reintroduce locks when they are lighter-weight
4671 lu.needed_locks = {}
4672 #self.share_locks[locking.LEVEL_NODE] = 1
4673 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4675 # The following variables interact with _QueryBase._GetNames
4677 self.wanted = self.names
4679 self.wanted = locking.ALL_SET
4681 self.do_locking = self.use_locking
4683 def DeclareLocks(self, lu, level):
4687 def _DiagnoseByOS(rlist):
4688 """Remaps a per-node return list into an a per-os per-node dictionary
4690 @param rlist: a map with node names as keys and OS objects as values
4693 @return: a dictionary with osnames as keys and as value another
4694 map, with nodes as keys and tuples of (path, status, diagnose,
4695 variants, parameters, api_versions) as values, eg::
4697 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4698 (/srv/..., False, "invalid api")],
4699 "node2": [(/srv/..., True, "", [], [])]}
4704 # we build here the list of nodes that didn't fail the RPC (at RPC
4705 # level), so that nodes with a non-responding node daemon don't
4706 # make all OSes invalid
4707 good_nodes = [node_name for node_name in rlist
4708 if not rlist[node_name].fail_msg]
4709 for node_name, nr in rlist.items():
4710 if nr.fail_msg or not nr.payload:
4712 for (name, path, status, diagnose, variants,
4713 params, api_versions) in nr.payload:
4714 if name not in all_os:
4715 # build a list of nodes for this os containing empty lists
4716 # for each node in node_list
4718 for nname in good_nodes:
4719 all_os[name][nname] = []
4720 # convert params from [name, help] to (name, help)
4721 params = [tuple(v) for v in params]
4722 all_os[name][node_name].append((path, status, diagnose,
4723 variants, params, api_versions))
4726 def _GetQueryData(self, lu):
4727 """Computes the list of nodes and their attributes.
4730 # Locking is not used
4731 assert not (compat.any(lu.glm.is_owned(level)
4732 for level in locking.LEVELS
4733 if level != locking.LEVEL_CLUSTER) or
4734 self.do_locking or self.use_locking)
4736 valid_nodes = [node.name
4737 for node in lu.cfg.GetAllNodesInfo().values()
4738 if not node.offline and node.vm_capable]
4739 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4740 cluster = lu.cfg.GetClusterInfo()
4744 for (os_name, os_data) in pol.items():
4745 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4746 hidden=(os_name in cluster.hidden_os),
4747 blacklisted=(os_name in cluster.blacklisted_os))
4751 api_versions = set()
4753 for idx, osl in enumerate(os_data.values()):
4754 info.valid = bool(info.valid and osl and osl[0][1])
4758 (node_variants, node_params, node_api) = osl[0][3:6]
4761 variants.update(node_variants)
4762 parameters.update(node_params)
4763 api_versions.update(node_api)
4765 # Filter out inconsistent values
4766 variants.intersection_update(node_variants)
4767 parameters.intersection_update(node_params)
4768 api_versions.intersection_update(node_api)
4770 info.variants = list(variants)
4771 info.parameters = list(parameters)
4772 info.api_versions = list(api_versions)
4774 data[os_name] = info
4776 # Prepare data in requested order
4777 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4781 class LUOsDiagnose(NoHooksLU):
4782 """Logical unit for OS diagnose/query.
4788 def _BuildFilter(fields, names):
4789 """Builds a filter for querying OSes.
4792 name_filter = qlang.MakeSimpleFilter("name", names)
4794 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4795 # respective field is not requested
4796 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4797 for fname in ["hidden", "blacklisted"]
4798 if fname not in fields]
4799 if "valid" not in fields:
4800 status_filter.append([qlang.OP_TRUE, "valid"])
4803 status_filter.insert(0, qlang.OP_AND)
4805 status_filter = None
4807 if name_filter and status_filter:
4808 return [qlang.OP_AND, name_filter, status_filter]
4812 return status_filter
4814 def CheckArguments(self):
4815 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4816 self.op.output_fields, False)
4818 def ExpandNames(self):
4819 self.oq.ExpandNames(self)
4821 def Exec(self, feedback_fn):
4822 return self.oq.OldStyleQuery(self)
4825 class LUNodeRemove(LogicalUnit):
4826 """Logical unit for removing a node.
4829 HPATH = "node-remove"
4830 HTYPE = constants.HTYPE_NODE
4832 def BuildHooksEnv(self):
4835 This doesn't run on the target node in the pre phase as a failed
4836 node would then be impossible to remove.
4840 "OP_TARGET": self.op.node_name,
4841 "NODE_NAME": self.op.node_name,
4844 def BuildHooksNodes(self):
4845 """Build hooks nodes.
4848 all_nodes = self.cfg.GetNodeList()
4850 all_nodes.remove(self.op.node_name)
4852 logging.warning("Node '%s', which is about to be removed, was not found"
4853 " in the list of all nodes", self.op.node_name)
4854 return (all_nodes, all_nodes)
4856 def CheckPrereq(self):
4857 """Check prerequisites.
4860 - the node exists in the configuration
4861 - it does not have primary or secondary instances
4862 - it's not the master
4864 Any errors are signaled by raising errors.OpPrereqError.
4867 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4868 node = self.cfg.GetNodeInfo(self.op.node_name)
4869 assert node is not None
4871 masternode = self.cfg.GetMasterNode()
4872 if node.name == masternode:
4873 raise errors.OpPrereqError("Node is the master node, failover to another"
4874 " node is required", errors.ECODE_INVAL)
4876 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4877 if node.name in instance.all_nodes:
4878 raise errors.OpPrereqError("Instance %s is still running on the node,"
4879 " please remove first" % instance_name,
4881 self.op.node_name = node.name
4884 def Exec(self, feedback_fn):
4885 """Removes the node from the cluster.
4889 logging.info("Stopping the node daemon and removing configs from node %s",
4892 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4894 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4897 # Promote nodes to master candidate as needed
4898 _AdjustCandidatePool(self, exceptions=[node.name])
4899 self.context.RemoveNode(node.name)
4901 # Run post hooks on the node before it's removed
4902 _RunPostHook(self, node.name)
4904 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4905 msg = result.fail_msg
4907 self.LogWarning("Errors encountered on the remote node while leaving"
4908 " the cluster: %s", msg)
4910 # Remove node from our /etc/hosts
4911 if self.cfg.GetClusterInfo().modify_etc_hosts:
4912 master_node = self.cfg.GetMasterNode()
4913 result = self.rpc.call_etc_hosts_modify(master_node,
4914 constants.ETC_HOSTS_REMOVE,
4916 result.Raise("Can't update hosts file with new host data")
4917 _RedistributeAncillaryFiles(self)
4920 class _NodeQuery(_QueryBase):
4921 FIELDS = query.NODE_FIELDS
4923 def ExpandNames(self, lu):
4924 lu.needed_locks = {}
4925 lu.share_locks = _ShareAll()
4928 self.wanted = _GetWantedNodes(lu, self.names)
4930 self.wanted = locking.ALL_SET
4932 self.do_locking = (self.use_locking and
4933 query.NQ_LIVE in self.requested_data)
4936 # If any non-static field is requested we need to lock the nodes
4937 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4939 def DeclareLocks(self, lu, level):
4942 def _GetQueryData(self, lu):
4943 """Computes the list of nodes and their attributes.
4946 all_info = lu.cfg.GetAllNodesInfo()
4948 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4950 # Gather data as requested
4951 if query.NQ_LIVE in self.requested_data:
4952 # filter out non-vm_capable nodes
4953 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4955 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4956 [lu.cfg.GetHypervisorType()])
4957 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4958 for (name, nresult) in node_data.items()
4959 if not nresult.fail_msg and nresult.payload)
4963 if query.NQ_INST in self.requested_data:
4964 node_to_primary = dict([(name, set()) for name in nodenames])
4965 node_to_secondary = dict([(name, set()) for name in nodenames])
4967 inst_data = lu.cfg.GetAllInstancesInfo()
4969 for inst in inst_data.values():
4970 if inst.primary_node in node_to_primary:
4971 node_to_primary[inst.primary_node].add(inst.name)
4972 for secnode in inst.secondary_nodes:
4973 if secnode in node_to_secondary:
4974 node_to_secondary[secnode].add(inst.name)
4976 node_to_primary = None
4977 node_to_secondary = None
4979 if query.NQ_OOB in self.requested_data:
4980 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4981 for name, node in all_info.iteritems())
4985 if query.NQ_GROUP in self.requested_data:
4986 groups = lu.cfg.GetAllNodeGroupsInfo()
4990 return query.NodeQueryData([all_info[name] for name in nodenames],
4991 live_data, lu.cfg.GetMasterNode(),
4992 node_to_primary, node_to_secondary, groups,
4993 oob_support, lu.cfg.GetClusterInfo())
4996 class LUNodeQuery(NoHooksLU):
4997 """Logical unit for querying nodes.
5000 # pylint: disable=W0142
5003 def CheckArguments(self):
5004 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5005 self.op.output_fields, self.op.use_locking)
5007 def ExpandNames(self):
5008 self.nq.ExpandNames(self)
5010 def DeclareLocks(self, level):
5011 self.nq.DeclareLocks(self, level)
5013 def Exec(self, feedback_fn):
5014 return self.nq.OldStyleQuery(self)
5017 class LUNodeQueryvols(NoHooksLU):
5018 """Logical unit for getting volumes on node(s).
5022 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5023 _FIELDS_STATIC = utils.FieldSet("node")
5025 def CheckArguments(self):
5026 _CheckOutputFields(static=self._FIELDS_STATIC,
5027 dynamic=self._FIELDS_DYNAMIC,
5028 selected=self.op.output_fields)
5030 def ExpandNames(self):
5031 self.share_locks = _ShareAll()
5032 self.needed_locks = {}
5034 if not self.op.nodes:
5035 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5037 self.needed_locks[locking.LEVEL_NODE] = \
5038 _GetWantedNodes(self, self.op.nodes)
5040 def Exec(self, feedback_fn):
5041 """Computes the list of nodes and their attributes.
5044 nodenames = self.owned_locks(locking.LEVEL_NODE)
5045 volumes = self.rpc.call_node_volumes(nodenames)
5047 ilist = self.cfg.GetAllInstancesInfo()
5048 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5051 for node in nodenames:
5052 nresult = volumes[node]
5055 msg = nresult.fail_msg
5057 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5060 node_vols = sorted(nresult.payload,
5061 key=operator.itemgetter("dev"))
5063 for vol in node_vols:
5065 for field in self.op.output_fields:
5068 elif field == "phys":
5072 elif field == "name":
5074 elif field == "size":
5075 val = int(float(vol["size"]))
5076 elif field == "instance":
5077 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5079 raise errors.ParameterError(field)
5080 node_output.append(str(val))
5082 output.append(node_output)
5087 class LUNodeQueryStorage(NoHooksLU):
5088 """Logical unit for getting information on storage units on node(s).
5091 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5094 def CheckArguments(self):
5095 _CheckOutputFields(static=self._FIELDS_STATIC,
5096 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5097 selected=self.op.output_fields)
5099 def ExpandNames(self):
5100 self.share_locks = _ShareAll()
5101 self.needed_locks = {}
5104 self.needed_locks[locking.LEVEL_NODE] = \
5105 _GetWantedNodes(self, self.op.nodes)
5107 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5109 def Exec(self, feedback_fn):
5110 """Computes the list of nodes and their attributes.
5113 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5115 # Always get name to sort by
5116 if constants.SF_NAME in self.op.output_fields:
5117 fields = self.op.output_fields[:]
5119 fields = [constants.SF_NAME] + self.op.output_fields
5121 # Never ask for node or type as it's only known to the LU
5122 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5123 while extra in fields:
5124 fields.remove(extra)
5126 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5127 name_idx = field_idx[constants.SF_NAME]
5129 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5130 data = self.rpc.call_storage_list(self.nodes,
5131 self.op.storage_type, st_args,
5132 self.op.name, fields)
5136 for node in utils.NiceSort(self.nodes):
5137 nresult = data[node]
5141 msg = nresult.fail_msg
5143 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5146 rows = dict([(row[name_idx], row) for row in nresult.payload])
5148 for name in utils.NiceSort(rows.keys()):
5153 for field in self.op.output_fields:
5154 if field == constants.SF_NODE:
5156 elif field == constants.SF_TYPE:
5157 val = self.op.storage_type
5158 elif field in field_idx:
5159 val = row[field_idx[field]]
5161 raise errors.ParameterError(field)
5170 class _InstanceQuery(_QueryBase):
5171 FIELDS = query.INSTANCE_FIELDS
5173 def ExpandNames(self, lu):
5174 lu.needed_locks = {}
5175 lu.share_locks = _ShareAll()
5178 self.wanted = _GetWantedInstances(lu, self.names)
5180 self.wanted = locking.ALL_SET
5182 self.do_locking = (self.use_locking and
5183 query.IQ_LIVE in self.requested_data)
5185 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5186 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5187 lu.needed_locks[locking.LEVEL_NODE] = []
5188 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5190 self.do_grouplocks = (self.do_locking and
5191 query.IQ_NODES in self.requested_data)
5193 def DeclareLocks(self, lu, level):
5195 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5196 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5198 # Lock all groups used by instances optimistically; this requires going
5199 # via the node before it's locked, requiring verification later on
5200 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5202 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5203 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5204 elif level == locking.LEVEL_NODE:
5205 lu._LockInstancesNodes() # pylint: disable=W0212
5208 def _CheckGroupLocks(lu):
5209 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5210 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5212 # Check if node groups for locked instances are still correct
5213 for instance_name in owned_instances:
5214 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5216 def _GetQueryData(self, lu):
5217 """Computes the list of instances and their attributes.
5220 if self.do_grouplocks:
5221 self._CheckGroupLocks(lu)
5223 cluster = lu.cfg.GetClusterInfo()
5224 all_info = lu.cfg.GetAllInstancesInfo()
5226 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5228 instance_list = [all_info[name] for name in instance_names]
5229 nodes = frozenset(itertools.chain(*(inst.all_nodes
5230 for inst in instance_list)))
5231 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5234 wrongnode_inst = set()
5236 # Gather data as requested
5237 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5239 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5241 result = node_data[name]
5243 # offline nodes will be in both lists
5244 assert result.fail_msg
5245 offline_nodes.append(name)
5247 bad_nodes.append(name)
5248 elif result.payload:
5249 for inst in result.payload:
5250 if inst in all_info:
5251 if all_info[inst].primary_node == name:
5252 live_data.update(result.payload)
5254 wrongnode_inst.add(inst)
5256 # orphan instance; we don't list it here as we don't
5257 # handle this case yet in the output of instance listing
5258 logging.warning("Orphan instance '%s' found on node %s",
5260 # else no instance is alive
5264 if query.IQ_DISKUSAGE in self.requested_data:
5265 disk_usage = dict((inst.name,
5266 _ComputeDiskSize(inst.disk_template,
5267 [{constants.IDISK_SIZE: disk.size}
5268 for disk in inst.disks]))
5269 for inst in instance_list)
5273 if query.IQ_CONSOLE in self.requested_data:
5275 for inst in instance_list:
5276 if inst.name in live_data:
5277 # Instance is running
5278 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5280 consinfo[inst.name] = None
5281 assert set(consinfo.keys()) == set(instance_names)
5285 if query.IQ_NODES in self.requested_data:
5286 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5288 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5289 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5290 for uuid in set(map(operator.attrgetter("group"),
5296 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5297 disk_usage, offline_nodes, bad_nodes,
5298 live_data, wrongnode_inst, consinfo,
5302 class LUQuery(NoHooksLU):
5303 """Query for resources/items of a certain kind.
5306 # pylint: disable=W0142
5309 def CheckArguments(self):
5310 qcls = _GetQueryImplementation(self.op.what)
5312 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5314 def ExpandNames(self):
5315 self.impl.ExpandNames(self)
5317 def DeclareLocks(self, level):
5318 self.impl.DeclareLocks(self, level)
5320 def Exec(self, feedback_fn):
5321 return self.impl.NewStyleQuery(self)
5324 class LUQueryFields(NoHooksLU):
5325 """Query for resources/items of a certain kind.
5328 # pylint: disable=W0142
5331 def CheckArguments(self):
5332 self.qcls = _GetQueryImplementation(self.op.what)
5334 def ExpandNames(self):
5335 self.needed_locks = {}
5337 def Exec(self, feedback_fn):
5338 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5341 class LUNodeModifyStorage(NoHooksLU):
5342 """Logical unit for modifying a storage volume on a node.
5347 def CheckArguments(self):
5348 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5350 storage_type = self.op.storage_type
5353 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5355 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5356 " modified" % storage_type,
5359 diff = set(self.op.changes.keys()) - modifiable
5361 raise errors.OpPrereqError("The following fields can not be modified for"
5362 " storage units of type '%s': %r" %
5363 (storage_type, list(diff)),
5366 def ExpandNames(self):
5367 self.needed_locks = {
5368 locking.LEVEL_NODE: self.op.node_name,
5371 def Exec(self, feedback_fn):
5372 """Computes the list of nodes and their attributes.
5375 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5376 result = self.rpc.call_storage_modify(self.op.node_name,
5377 self.op.storage_type, st_args,
5378 self.op.name, self.op.changes)
5379 result.Raise("Failed to modify storage unit '%s' on %s" %
5380 (self.op.name, self.op.node_name))
5383 class LUNodeAdd(LogicalUnit):
5384 """Logical unit for adding node to the cluster.
5388 HTYPE = constants.HTYPE_NODE
5389 _NFLAGS = ["master_capable", "vm_capable"]
5391 def CheckArguments(self):
5392 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5393 # validate/normalize the node name
5394 self.hostname = netutils.GetHostname(name=self.op.node_name,
5395 family=self.primary_ip_family)
5396 self.op.node_name = self.hostname.name
5398 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5399 raise errors.OpPrereqError("Cannot readd the master node",
5402 if self.op.readd and self.op.group:
5403 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5404 " being readded", errors.ECODE_INVAL)
5406 def BuildHooksEnv(self):
5409 This will run on all nodes before, and on all nodes + the new node after.
5413 "OP_TARGET": self.op.node_name,
5414 "NODE_NAME": self.op.node_name,
5415 "NODE_PIP": self.op.primary_ip,
5416 "NODE_SIP": self.op.secondary_ip,
5417 "MASTER_CAPABLE": str(self.op.master_capable),
5418 "VM_CAPABLE": str(self.op.vm_capable),
5421 def BuildHooksNodes(self):
5422 """Build hooks nodes.
5425 # Exclude added node
5426 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5427 post_nodes = pre_nodes + [self.op.node_name, ]
5429 return (pre_nodes, post_nodes)
5431 def CheckPrereq(self):
5432 """Check prerequisites.
5435 - the new node is not already in the config
5437 - its parameters (single/dual homed) matches the cluster
5439 Any errors are signaled by raising errors.OpPrereqError.
5443 hostname = self.hostname
5444 node = hostname.name
5445 primary_ip = self.op.primary_ip = hostname.ip
5446 if self.op.secondary_ip is None:
5447 if self.primary_ip_family == netutils.IP6Address.family:
5448 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5449 " IPv4 address must be given as secondary",
5451 self.op.secondary_ip = primary_ip
5453 secondary_ip = self.op.secondary_ip
5454 if not netutils.IP4Address.IsValid(secondary_ip):
5455 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5456 " address" % secondary_ip, errors.ECODE_INVAL)
5458 node_list = cfg.GetNodeList()
5459 if not self.op.readd and node in node_list:
5460 raise errors.OpPrereqError("Node %s is already in the configuration" %
5461 node, errors.ECODE_EXISTS)
5462 elif self.op.readd and node not in node_list:
5463 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5466 self.changed_primary_ip = False
5468 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5469 if self.op.readd and node == existing_node_name:
5470 if existing_node.secondary_ip != secondary_ip:
5471 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5472 " address configuration as before",
5474 if existing_node.primary_ip != primary_ip:
5475 self.changed_primary_ip = True
5479 if (existing_node.primary_ip == primary_ip or
5480 existing_node.secondary_ip == primary_ip or
5481 existing_node.primary_ip == secondary_ip or
5482 existing_node.secondary_ip == secondary_ip):
5483 raise errors.OpPrereqError("New node ip address(es) conflict with"
5484 " existing node %s" % existing_node.name,
5485 errors.ECODE_NOTUNIQUE)
5487 # After this 'if' block, None is no longer a valid value for the
5488 # _capable op attributes
5490 old_node = self.cfg.GetNodeInfo(node)
5491 assert old_node is not None, "Can't retrieve locked node %s" % node
5492 for attr in self._NFLAGS:
5493 if getattr(self.op, attr) is None:
5494 setattr(self.op, attr, getattr(old_node, attr))
5496 for attr in self._NFLAGS:
5497 if getattr(self.op, attr) is None:
5498 setattr(self.op, attr, True)
5500 if self.op.readd and not self.op.vm_capable:
5501 pri, sec = cfg.GetNodeInstances(node)
5503 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5504 " flag set to false, but it already holds"
5505 " instances" % node,
5508 # check that the type of the node (single versus dual homed) is the
5509 # same as for the master
5510 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5511 master_singlehomed = myself.secondary_ip == myself.primary_ip
5512 newbie_singlehomed = secondary_ip == primary_ip
5513 if master_singlehomed != newbie_singlehomed:
5514 if master_singlehomed:
5515 raise errors.OpPrereqError("The master has no secondary ip but the"
5516 " new node has one",
5519 raise errors.OpPrereqError("The master has a secondary ip but the"
5520 " new node doesn't have one",
5523 # checks reachability
5524 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5525 raise errors.OpPrereqError("Node not reachable by ping",
5526 errors.ECODE_ENVIRON)
5528 if not newbie_singlehomed:
5529 # check reachability from my secondary ip to newbie's secondary ip
5530 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5531 source=myself.secondary_ip):
5532 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5533 " based ping to node daemon port",
5534 errors.ECODE_ENVIRON)
5541 if self.op.master_capable:
5542 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5544 self.master_candidate = False
5547 self.new_node = old_node
5549 node_group = cfg.LookupNodeGroup(self.op.group)
5550 self.new_node = objects.Node(name=node,
5551 primary_ip=primary_ip,
5552 secondary_ip=secondary_ip,
5553 master_candidate=self.master_candidate,
5554 offline=False, drained=False,
5557 if self.op.ndparams:
5558 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5560 if self.op.hv_state:
5561 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5563 if self.op.disk_state:
5564 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5566 def Exec(self, feedback_fn):
5567 """Adds the new node to the cluster.
5570 new_node = self.new_node
5571 node = new_node.name
5573 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5576 # We adding a new node so we assume it's powered
5577 new_node.powered = True
5579 # for re-adds, reset the offline/drained/master-candidate flags;
5580 # we need to reset here, otherwise offline would prevent RPC calls
5581 # later in the procedure; this also means that if the re-add
5582 # fails, we are left with a non-offlined, broken node
5584 new_node.drained = new_node.offline = False # pylint: disable=W0201
5585 self.LogInfo("Readding a node, the offline/drained flags were reset")
5586 # if we demote the node, we do cleanup later in the procedure
5587 new_node.master_candidate = self.master_candidate
5588 if self.changed_primary_ip:
5589 new_node.primary_ip = self.op.primary_ip
5591 # copy the master/vm_capable flags
5592 for attr in self._NFLAGS:
5593 setattr(new_node, attr, getattr(self.op, attr))
5595 # notify the user about any possible mc promotion
5596 if new_node.master_candidate:
5597 self.LogInfo("Node will be a master candidate")
5599 if self.op.ndparams:
5600 new_node.ndparams = self.op.ndparams
5602 new_node.ndparams = {}
5604 if self.op.hv_state:
5605 new_node.hv_state_static = self.new_hv_state
5607 if self.op.disk_state:
5608 new_node.disk_state_static = self.new_disk_state
5610 # check connectivity
5611 result = self.rpc.call_version([node])[node]
5612 result.Raise("Can't get version information from node %s" % node)
5613 if constants.PROTOCOL_VERSION == result.payload:
5614 logging.info("Communication to node %s fine, sw version %s match",
5615 node, result.payload)
5617 raise errors.OpExecError("Version mismatch master version %s,"
5618 " node version %s" %
5619 (constants.PROTOCOL_VERSION, result.payload))
5621 # Add node to our /etc/hosts, and add key to known_hosts
5622 if self.cfg.GetClusterInfo().modify_etc_hosts:
5623 master_node = self.cfg.GetMasterNode()
5624 result = self.rpc.call_etc_hosts_modify(master_node,
5625 constants.ETC_HOSTS_ADD,
5628 result.Raise("Can't update hosts file with new host data")
5630 if new_node.secondary_ip != new_node.primary_ip:
5631 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5634 node_verify_list = [self.cfg.GetMasterNode()]
5635 node_verify_param = {
5636 constants.NV_NODELIST: ([node], {}),
5637 # TODO: do a node-net-test as well?
5640 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5641 self.cfg.GetClusterName())
5642 for verifier in node_verify_list:
5643 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5644 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5646 for failed in nl_payload:
5647 feedback_fn("ssh/hostname verification failed"
5648 " (checking from %s): %s" %
5649 (verifier, nl_payload[failed]))
5650 raise errors.OpExecError("ssh/hostname verification failed")
5653 _RedistributeAncillaryFiles(self)
5654 self.context.ReaddNode(new_node)
5655 # make sure we redistribute the config
5656 self.cfg.Update(new_node, feedback_fn)
5657 # and make sure the new node will not have old files around
5658 if not new_node.master_candidate:
5659 result = self.rpc.call_node_demote_from_mc(new_node.name)
5660 msg = result.fail_msg
5662 self.LogWarning("Node failed to demote itself from master"
5663 " candidate status: %s" % msg)
5665 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5666 additional_vm=self.op.vm_capable)
5667 self.context.AddNode(new_node, self.proc.GetECId())
5670 class LUNodeSetParams(LogicalUnit):
5671 """Modifies the parameters of a node.
5673 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5674 to the node role (as _ROLE_*)
5675 @cvar _R2F: a dictionary from node role to tuples of flags
5676 @cvar _FLAGS: a list of attribute names corresponding to the flags
5679 HPATH = "node-modify"
5680 HTYPE = constants.HTYPE_NODE
5682 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5684 (True, False, False): _ROLE_CANDIDATE,
5685 (False, True, False): _ROLE_DRAINED,
5686 (False, False, True): _ROLE_OFFLINE,
5687 (False, False, False): _ROLE_REGULAR,
5689 _R2F = dict((v, k) for k, v in _F2R.items())
5690 _FLAGS = ["master_candidate", "drained", "offline"]
5692 def CheckArguments(self):
5693 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5694 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5695 self.op.master_capable, self.op.vm_capable,
5696 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5698 if all_mods.count(None) == len(all_mods):
5699 raise errors.OpPrereqError("Please pass at least one modification",
5701 if all_mods.count(True) > 1:
5702 raise errors.OpPrereqError("Can't set the node into more than one"
5703 " state at the same time",
5706 # Boolean value that tells us whether we might be demoting from MC
5707 self.might_demote = (self.op.master_candidate == False or
5708 self.op.offline == True or
5709 self.op.drained == True or
5710 self.op.master_capable == False)
5712 if self.op.secondary_ip:
5713 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5714 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5715 " address" % self.op.secondary_ip,
5718 self.lock_all = self.op.auto_promote and self.might_demote
5719 self.lock_instances = self.op.secondary_ip is not None
5721 def _InstanceFilter(self, instance):
5722 """Filter for getting affected instances.
5725 return (instance.disk_template in constants.DTS_INT_MIRROR and
5726 self.op.node_name in instance.all_nodes)
5728 def ExpandNames(self):
5730 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5732 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5734 # Since modifying a node can have severe effects on currently running
5735 # operations the resource lock is at least acquired in shared mode
5736 self.needed_locks[locking.LEVEL_NODE_RES] = \
5737 self.needed_locks[locking.LEVEL_NODE]
5739 # Get node resource and instance locks in shared mode; they are not used
5740 # for anything but read-only access
5741 self.share_locks[locking.LEVEL_NODE_RES] = 1
5742 self.share_locks[locking.LEVEL_INSTANCE] = 1
5744 if self.lock_instances:
5745 self.needed_locks[locking.LEVEL_INSTANCE] = \
5746 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5748 def BuildHooksEnv(self):
5751 This runs on the master node.
5755 "OP_TARGET": self.op.node_name,
5756 "MASTER_CANDIDATE": str(self.op.master_candidate),
5757 "OFFLINE": str(self.op.offline),
5758 "DRAINED": str(self.op.drained),
5759 "MASTER_CAPABLE": str(self.op.master_capable),
5760 "VM_CAPABLE": str(self.op.vm_capable),
5763 def BuildHooksNodes(self):
5764 """Build hooks nodes.
5767 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5770 def CheckPrereq(self):
5771 """Check prerequisites.
5773 This only checks the instance list against the existing names.
5776 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5778 if self.lock_instances:
5779 affected_instances = \
5780 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5782 # Verify instance locks
5783 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5784 wanted_instances = frozenset(affected_instances.keys())
5785 if wanted_instances - owned_instances:
5786 raise errors.OpPrereqError("Instances affected by changing node %s's"
5787 " secondary IP address have changed since"
5788 " locks were acquired, wanted '%s', have"
5789 " '%s'; retry the operation" %
5791 utils.CommaJoin(wanted_instances),
5792 utils.CommaJoin(owned_instances)),
5795 affected_instances = None
5797 if (self.op.master_candidate is not None or
5798 self.op.drained is not None or
5799 self.op.offline is not None):
5800 # we can't change the master's node flags
5801 if self.op.node_name == self.cfg.GetMasterNode():
5802 raise errors.OpPrereqError("The master role can be changed"
5803 " only via master-failover",
5806 if self.op.master_candidate and not node.master_capable:
5807 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5808 " it a master candidate" % node.name,
5811 if self.op.vm_capable == False:
5812 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5814 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5815 " the vm_capable flag" % node.name,
5818 if node.master_candidate and self.might_demote and not self.lock_all:
5819 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5820 # check if after removing the current node, we're missing master
5822 (mc_remaining, mc_should, _) = \
5823 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5824 if mc_remaining < mc_should:
5825 raise errors.OpPrereqError("Not enough master candidates, please"
5826 " pass auto promote option to allow"
5827 " promotion", errors.ECODE_STATE)
5829 self.old_flags = old_flags = (node.master_candidate,
5830 node.drained, node.offline)
5831 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5832 self.old_role = old_role = self._F2R[old_flags]
5834 # Check for ineffective changes
5835 for attr in self._FLAGS:
5836 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5837 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5838 setattr(self.op, attr, None)
5840 # Past this point, any flag change to False means a transition
5841 # away from the respective state, as only real changes are kept
5843 # TODO: We might query the real power state if it supports OOB
5844 if _SupportsOob(self.cfg, node):
5845 if self.op.offline is False and not (node.powered or
5846 self.op.powered == True):
5847 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5848 " offline status can be reset") %
5850 elif self.op.powered is not None:
5851 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5852 " as it does not support out-of-band"
5853 " handling") % self.op.node_name)
5855 # If we're being deofflined/drained, we'll MC ourself if needed
5856 if (self.op.drained == False or self.op.offline == False or
5857 (self.op.master_capable and not node.master_capable)):
5858 if _DecideSelfPromotion(self):
5859 self.op.master_candidate = True
5860 self.LogInfo("Auto-promoting node to master candidate")
5862 # If we're no longer master capable, we'll demote ourselves from MC
5863 if self.op.master_capable == False and node.master_candidate:
5864 self.LogInfo("Demoting from master candidate")
5865 self.op.master_candidate = False
5868 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5869 if self.op.master_candidate:
5870 new_role = self._ROLE_CANDIDATE
5871 elif self.op.drained:
5872 new_role = self._ROLE_DRAINED
5873 elif self.op.offline:
5874 new_role = self._ROLE_OFFLINE
5875 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5876 # False is still in new flags, which means we're un-setting (the
5878 new_role = self._ROLE_REGULAR
5879 else: # no new flags, nothing, keep old role
5882 self.new_role = new_role
5884 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5885 # Trying to transition out of offline status
5886 # TODO: Use standard RPC runner, but make sure it works when the node is
5887 # still marked offline
5888 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5890 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5891 " to report its version: %s" %
5892 (node.name, result.fail_msg),
5895 self.LogWarning("Transitioning node from offline to online state"
5896 " without using re-add. Please make sure the node"
5899 if self.op.secondary_ip:
5900 # Ok even without locking, because this can't be changed by any LU
5901 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5902 master_singlehomed = master.secondary_ip == master.primary_ip
5903 if master_singlehomed and self.op.secondary_ip:
5904 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5905 " homed cluster", errors.ECODE_INVAL)
5907 assert not (frozenset(affected_instances) -
5908 self.owned_locks(locking.LEVEL_INSTANCE))
5911 if affected_instances:
5912 raise errors.OpPrereqError("Cannot change secondary IP address:"
5913 " offline node has instances (%s)"
5914 " configured to use it" %
5915 utils.CommaJoin(affected_instances.keys()))
5917 # On online nodes, check that no instances are running, and that
5918 # the node has the new ip and we can reach it.
5919 for instance in affected_instances.values():
5920 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5921 msg="cannot change secondary ip")
5923 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5924 if master.name != node.name:
5925 # check reachability from master secondary ip to new secondary ip
5926 if not netutils.TcpPing(self.op.secondary_ip,
5927 constants.DEFAULT_NODED_PORT,
5928 source=master.secondary_ip):
5929 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5930 " based ping to node daemon port",
5931 errors.ECODE_ENVIRON)
5933 if self.op.ndparams:
5934 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5935 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5936 self.new_ndparams = new_ndparams
5938 if self.op.hv_state:
5939 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5940 self.node.hv_state_static)
5942 if self.op.disk_state:
5943 self.new_disk_state = \
5944 _MergeAndVerifyDiskState(self.op.disk_state,
5945 self.node.disk_state_static)
5947 def Exec(self, feedback_fn):
5952 old_role = self.old_role
5953 new_role = self.new_role
5957 if self.op.ndparams:
5958 node.ndparams = self.new_ndparams
5960 if self.op.powered is not None:
5961 node.powered = self.op.powered
5963 if self.op.hv_state:
5964 node.hv_state_static = self.new_hv_state
5966 if self.op.disk_state:
5967 node.disk_state_static = self.new_disk_state
5969 for attr in ["master_capable", "vm_capable"]:
5970 val = getattr(self.op, attr)
5972 setattr(node, attr, val)
5973 result.append((attr, str(val)))
5975 if new_role != old_role:
5976 # Tell the node to demote itself, if no longer MC and not offline
5977 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5978 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5980 self.LogWarning("Node failed to demote itself: %s", msg)
5982 new_flags = self._R2F[new_role]
5983 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5985 result.append((desc, str(nf)))
5986 (node.master_candidate, node.drained, node.offline) = new_flags
5988 # we locked all nodes, we adjust the CP before updating this node
5990 _AdjustCandidatePool(self, [node.name])
5992 if self.op.secondary_ip:
5993 node.secondary_ip = self.op.secondary_ip
5994 result.append(("secondary_ip", self.op.secondary_ip))
5996 # this will trigger configuration file update, if needed
5997 self.cfg.Update(node, feedback_fn)
5999 # this will trigger job queue propagation or cleanup if the mc
6001 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6002 self.context.ReaddNode(node)
6007 class LUNodePowercycle(NoHooksLU):
6008 """Powercycles a node.
6013 def CheckArguments(self):
6014 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6015 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6016 raise errors.OpPrereqError("The node is the master and the force"
6017 " parameter was not set",
6020 def ExpandNames(self):
6021 """Locking for PowercycleNode.
6023 This is a last-resort option and shouldn't block on other
6024 jobs. Therefore, we grab no locks.
6027 self.needed_locks = {}
6029 def Exec(self, feedback_fn):
6033 result = self.rpc.call_node_powercycle(self.op.node_name,
6034 self.cfg.GetHypervisorType())
6035 result.Raise("Failed to schedule the reboot")
6036 return result.payload
6039 class LUClusterQuery(NoHooksLU):
6040 """Query cluster configuration.
6045 def ExpandNames(self):
6046 self.needed_locks = {}
6048 def Exec(self, feedback_fn):
6049 """Return cluster config.
6052 cluster = self.cfg.GetClusterInfo()
6055 # Filter just for enabled hypervisors
6056 for os_name, hv_dict in cluster.os_hvp.items():
6057 os_hvp[os_name] = {}
6058 for hv_name, hv_params in hv_dict.items():
6059 if hv_name in cluster.enabled_hypervisors:
6060 os_hvp[os_name][hv_name] = hv_params
6062 # Convert ip_family to ip_version
6063 primary_ip_version = constants.IP4_VERSION
6064 if cluster.primary_ip_family == netutils.IP6Address.family:
6065 primary_ip_version = constants.IP6_VERSION
6068 "software_version": constants.RELEASE_VERSION,
6069 "protocol_version": constants.PROTOCOL_VERSION,
6070 "config_version": constants.CONFIG_VERSION,
6071 "os_api_version": max(constants.OS_API_VERSIONS),
6072 "export_version": constants.EXPORT_VERSION,
6073 "architecture": (platform.architecture()[0], platform.machine()),
6074 "name": cluster.cluster_name,
6075 "master": cluster.master_node,
6076 "default_hypervisor": cluster.primary_hypervisor,
6077 "enabled_hypervisors": cluster.enabled_hypervisors,
6078 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6079 for hypervisor_name in cluster.enabled_hypervisors]),
6081 "beparams": cluster.beparams,
6082 "osparams": cluster.osparams,
6083 "ipolicy": cluster.ipolicy,
6084 "nicparams": cluster.nicparams,
6085 "ndparams": cluster.ndparams,
6086 "candidate_pool_size": cluster.candidate_pool_size,
6087 "master_netdev": cluster.master_netdev,
6088 "master_netmask": cluster.master_netmask,
6089 "use_external_mip_script": cluster.use_external_mip_script,
6090 "volume_group_name": cluster.volume_group_name,
6091 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6092 "file_storage_dir": cluster.file_storage_dir,
6093 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6094 "maintain_node_health": cluster.maintain_node_health,
6095 "ctime": cluster.ctime,
6096 "mtime": cluster.mtime,
6097 "uuid": cluster.uuid,
6098 "tags": list(cluster.GetTags()),
6099 "uid_pool": cluster.uid_pool,
6100 "default_iallocator": cluster.default_iallocator,
6101 "reserved_lvs": cluster.reserved_lvs,
6102 "primary_ip_version": primary_ip_version,
6103 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6104 "hidden_os": cluster.hidden_os,
6105 "blacklisted_os": cluster.blacklisted_os,
6111 class LUClusterConfigQuery(NoHooksLU):
6112 """Return configuration values.
6116 _FIELDS_DYNAMIC = utils.FieldSet()
6117 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6118 "watcher_pause", "volume_group_name")
6120 def CheckArguments(self):
6121 _CheckOutputFields(static=self._FIELDS_STATIC,
6122 dynamic=self._FIELDS_DYNAMIC,
6123 selected=self.op.output_fields)
6125 def ExpandNames(self):
6126 self.needed_locks = {}
6128 def Exec(self, feedback_fn):
6129 """Dump a representation of the cluster config to the standard output.
6133 for field in self.op.output_fields:
6134 if field == "cluster_name":
6135 entry = self.cfg.GetClusterName()
6136 elif field == "master_node":
6137 entry = self.cfg.GetMasterNode()
6138 elif field == "drain_flag":
6139 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6140 elif field == "watcher_pause":
6141 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6142 elif field == "volume_group_name":
6143 entry = self.cfg.GetVGName()
6145 raise errors.ParameterError(field)
6146 values.append(entry)
6150 class LUInstanceActivateDisks(NoHooksLU):
6151 """Bring up an instance's disks.
6156 def ExpandNames(self):
6157 self._ExpandAndLockInstance()
6158 self.needed_locks[locking.LEVEL_NODE] = []
6159 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6161 def DeclareLocks(self, level):
6162 if level == locking.LEVEL_NODE:
6163 self._LockInstancesNodes()
6165 def CheckPrereq(self):
6166 """Check prerequisites.
6168 This checks that the instance is in the cluster.
6171 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6172 assert self.instance is not None, \
6173 "Cannot retrieve locked instance %s" % self.op.instance_name
6174 _CheckNodeOnline(self, self.instance.primary_node)
6176 def Exec(self, feedback_fn):
6177 """Activate the disks.
6180 disks_ok, disks_info = \
6181 _AssembleInstanceDisks(self, self.instance,
6182 ignore_size=self.op.ignore_size)
6184 raise errors.OpExecError("Cannot activate block devices")
6189 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6191 """Prepare the block devices for an instance.
6193 This sets up the block devices on all nodes.
6195 @type lu: L{LogicalUnit}
6196 @param lu: the logical unit on whose behalf we execute
6197 @type instance: L{objects.Instance}
6198 @param instance: the instance for whose disks we assemble
6199 @type disks: list of L{objects.Disk} or None
6200 @param disks: which disks to assemble (or all, if None)
6201 @type ignore_secondaries: boolean
6202 @param ignore_secondaries: if true, errors on secondary nodes
6203 won't result in an error return from the function
6204 @type ignore_size: boolean
6205 @param ignore_size: if true, the current known size of the disk
6206 will not be used during the disk activation, useful for cases
6207 when the size is wrong
6208 @return: False if the operation failed, otherwise a list of
6209 (host, instance_visible_name, node_visible_name)
6210 with the mapping from node devices to instance devices
6215 iname = instance.name
6216 disks = _ExpandCheckDisks(instance, disks)
6218 # With the two passes mechanism we try to reduce the window of
6219 # opportunity for the race condition of switching DRBD to primary
6220 # before handshaking occured, but we do not eliminate it
6222 # The proper fix would be to wait (with some limits) until the
6223 # connection has been made and drbd transitions from WFConnection
6224 # into any other network-connected state (Connected, SyncTarget,
6227 # 1st pass, assemble on all nodes in secondary mode
6228 for idx, inst_disk in enumerate(disks):
6229 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6231 node_disk = node_disk.Copy()
6232 node_disk.UnsetSize()
6233 lu.cfg.SetDiskID(node_disk, node)
6234 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6235 msg = result.fail_msg
6237 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6238 " (is_primary=False, pass=1): %s",
6239 inst_disk.iv_name, node, msg)
6240 if not ignore_secondaries:
6243 # FIXME: race condition on drbd migration to primary
6245 # 2nd pass, do only the primary node
6246 for idx, inst_disk in enumerate(disks):
6249 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6250 if node != instance.primary_node:
6253 node_disk = node_disk.Copy()
6254 node_disk.UnsetSize()
6255 lu.cfg.SetDiskID(node_disk, node)
6256 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6257 msg = result.fail_msg
6259 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6260 " (is_primary=True, pass=2): %s",
6261 inst_disk.iv_name, node, msg)
6264 dev_path = result.payload
6266 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6268 # leave the disks configured for the primary node
6269 # this is a workaround that would be fixed better by
6270 # improving the logical/physical id handling
6272 lu.cfg.SetDiskID(disk, instance.primary_node)
6274 return disks_ok, device_info
6277 def _StartInstanceDisks(lu, instance, force):
6278 """Start the disks of an instance.
6281 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6282 ignore_secondaries=force)
6284 _ShutdownInstanceDisks(lu, instance)
6285 if force is not None and not force:
6286 lu.proc.LogWarning("", hint="If the message above refers to a"
6288 " you can retry the operation using '--force'.")
6289 raise errors.OpExecError("Disk consistency error")
6292 class LUInstanceDeactivateDisks(NoHooksLU):
6293 """Shutdown an instance's disks.
6298 def ExpandNames(self):
6299 self._ExpandAndLockInstance()
6300 self.needed_locks[locking.LEVEL_NODE] = []
6301 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6303 def DeclareLocks(self, level):
6304 if level == locking.LEVEL_NODE:
6305 self._LockInstancesNodes()
6307 def CheckPrereq(self):
6308 """Check prerequisites.
6310 This checks that the instance is in the cluster.
6313 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6314 assert self.instance is not None, \
6315 "Cannot retrieve locked instance %s" % self.op.instance_name
6317 def Exec(self, feedback_fn):
6318 """Deactivate the disks
6321 instance = self.instance
6323 _ShutdownInstanceDisks(self, instance)
6325 _SafeShutdownInstanceDisks(self, instance)
6328 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6329 """Shutdown block devices of an instance.
6331 This function checks if an instance is running, before calling
6332 _ShutdownInstanceDisks.
6335 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6336 _ShutdownInstanceDisks(lu, instance, disks=disks)
6339 def _ExpandCheckDisks(instance, disks):
6340 """Return the instance disks selected by the disks list
6342 @type disks: list of L{objects.Disk} or None
6343 @param disks: selected disks
6344 @rtype: list of L{objects.Disk}
6345 @return: selected instance disks to act on
6349 return instance.disks
6351 if not set(disks).issubset(instance.disks):
6352 raise errors.ProgrammerError("Can only act on disks belonging to the"
6357 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6358 """Shutdown block devices of an instance.
6360 This does the shutdown on all nodes of the instance.
6362 If the ignore_primary is false, errors on the primary node are
6367 disks = _ExpandCheckDisks(instance, disks)
6370 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6371 lu.cfg.SetDiskID(top_disk, node)
6372 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6373 msg = result.fail_msg
6375 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6376 disk.iv_name, node, msg)
6377 if ((node == instance.primary_node and not ignore_primary) or
6378 (node != instance.primary_node and not result.offline)):
6383 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6384 """Checks if a node has enough free memory.
6386 This function check if a given node has the needed amount of free
6387 memory. In case the node has less memory or we cannot get the
6388 information from the node, this function raise an OpPrereqError
6391 @type lu: C{LogicalUnit}
6392 @param lu: a logical unit from which we get configuration data
6394 @param node: the node to check
6395 @type reason: C{str}
6396 @param reason: string to use in the error message
6397 @type requested: C{int}
6398 @param requested: the amount of memory in MiB to check for
6399 @type hypervisor_name: C{str}
6400 @param hypervisor_name: the hypervisor to ask for memory stats
6401 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6402 we cannot check the node
6405 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6406 nodeinfo[node].Raise("Can't get data from node %s" % node,
6407 prereq=True, ecode=errors.ECODE_ENVIRON)
6408 (_, _, (hv_info, )) = nodeinfo[node].payload
6410 free_mem = hv_info.get("memory_free", None)
6411 if not isinstance(free_mem, int):
6412 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6413 " was '%s'" % (node, free_mem),
6414 errors.ECODE_ENVIRON)
6415 if requested > free_mem:
6416 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6417 " needed %s MiB, available %s MiB" %
6418 (node, reason, requested, free_mem),
6422 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6423 """Checks if nodes have enough free disk space in the all VGs.
6425 This function check if all given nodes have the needed amount of
6426 free disk. In case any node has less disk or we cannot get the
6427 information from the node, this function raise an OpPrereqError
6430 @type lu: C{LogicalUnit}
6431 @param lu: a logical unit from which we get configuration data
6432 @type nodenames: C{list}
6433 @param nodenames: the list of node names to check
6434 @type req_sizes: C{dict}
6435 @param req_sizes: the hash of vg and corresponding amount of disk in
6437 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6438 or we cannot check the node
6441 for vg, req_size in req_sizes.items():
6442 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6445 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6446 """Checks if nodes have enough free disk space in the specified VG.
6448 This function check if all given nodes have the needed amount of
6449 free disk. In case any node has less disk or we cannot get the
6450 information from the node, this function raise an OpPrereqError
6453 @type lu: C{LogicalUnit}
6454 @param lu: a logical unit from which we get configuration data
6455 @type nodenames: C{list}
6456 @param nodenames: the list of node names to check
6458 @param vg: the volume group to check
6459 @type requested: C{int}
6460 @param requested: the amount of disk in MiB to check for
6461 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6462 or we cannot check the node
6465 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6466 for node in nodenames:
6467 info = nodeinfo[node]
6468 info.Raise("Cannot get current information from node %s" % node,
6469 prereq=True, ecode=errors.ECODE_ENVIRON)
6470 (_, (vg_info, ), _) = info.payload
6471 vg_free = vg_info.get("vg_free", None)
6472 if not isinstance(vg_free, int):
6473 raise errors.OpPrereqError("Can't compute free disk space on node"
6474 " %s for vg %s, result was '%s'" %
6475 (node, vg, vg_free), errors.ECODE_ENVIRON)
6476 if requested > vg_free:
6477 raise errors.OpPrereqError("Not enough disk space on target node %s"
6478 " vg %s: required %d MiB, available %d MiB" %
6479 (node, vg, requested, vg_free),
6483 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6484 """Checks if nodes have enough physical CPUs
6486 This function checks if all given nodes have the needed number of
6487 physical CPUs. In case any node has less CPUs or we cannot get the
6488 information from the node, this function raises an OpPrereqError
6491 @type lu: C{LogicalUnit}
6492 @param lu: a logical unit from which we get configuration data
6493 @type nodenames: C{list}
6494 @param nodenames: the list of node names to check
6495 @type requested: C{int}
6496 @param requested: the minimum acceptable number of physical CPUs
6497 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6498 or we cannot check the node
6501 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6502 for node in nodenames:
6503 info = nodeinfo[node]
6504 info.Raise("Cannot get current information from node %s" % node,
6505 prereq=True, ecode=errors.ECODE_ENVIRON)
6506 (_, _, (hv_info, )) = info.payload
6507 num_cpus = hv_info.get("cpu_total", None)
6508 if not isinstance(num_cpus, int):
6509 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6510 " on node %s, result was '%s'" %
6511 (node, num_cpus), errors.ECODE_ENVIRON)
6512 if requested > num_cpus:
6513 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6514 "required" % (node, num_cpus, requested),
6518 class LUInstanceStartup(LogicalUnit):
6519 """Starts an instance.
6522 HPATH = "instance-start"
6523 HTYPE = constants.HTYPE_INSTANCE
6526 def CheckArguments(self):
6528 if self.op.beparams:
6529 # fill the beparams dict
6530 objects.UpgradeBeParams(self.op.beparams)
6531 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6533 def ExpandNames(self):
6534 self._ExpandAndLockInstance()
6535 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6537 def DeclareLocks(self, level):
6538 if level == locking.LEVEL_NODE_RES:
6539 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6541 def BuildHooksEnv(self):
6544 This runs on master, primary and secondary nodes of the instance.
6548 "FORCE": self.op.force,
6551 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6555 def BuildHooksNodes(self):
6556 """Build hooks nodes.
6559 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6562 def CheckPrereq(self):
6563 """Check prerequisites.
6565 This checks that the instance is in the cluster.
6568 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6569 assert self.instance is not None, \
6570 "Cannot retrieve locked instance %s" % self.op.instance_name
6573 if self.op.hvparams:
6574 # check hypervisor parameter syntax (locally)
6575 cluster = self.cfg.GetClusterInfo()
6576 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6577 filled_hvp = cluster.FillHV(instance)
6578 filled_hvp.update(self.op.hvparams)
6579 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6580 hv_type.CheckParameterSyntax(filled_hvp)
6581 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6583 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6585 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6587 if self.primary_offline and self.op.ignore_offline_nodes:
6588 self.proc.LogWarning("Ignoring offline primary node")
6590 if self.op.hvparams or self.op.beparams:
6591 self.proc.LogWarning("Overridden parameters are ignored")
6593 _CheckNodeOnline(self, instance.primary_node)
6595 bep = self.cfg.GetClusterInfo().FillBE(instance)
6596 bep.update(self.op.beparams)
6598 # check bridges existence
6599 _CheckInstanceBridgesExist(self, instance)
6601 remote_info = self.rpc.call_instance_info(instance.primary_node,
6603 instance.hypervisor)
6604 remote_info.Raise("Error checking node %s" % instance.primary_node,
6605 prereq=True, ecode=errors.ECODE_ENVIRON)
6606 if not remote_info.payload: # not running already
6607 _CheckNodeFreeMemory(self, instance.primary_node,
6608 "starting instance %s" % instance.name,
6609 bep[constants.BE_MINMEM], instance.hypervisor)
6611 def Exec(self, feedback_fn):
6612 """Start the instance.
6615 instance = self.instance
6616 force = self.op.force
6618 if not self.op.no_remember:
6619 self.cfg.MarkInstanceUp(instance.name)
6621 if self.primary_offline:
6622 assert self.op.ignore_offline_nodes
6623 self.proc.LogInfo("Primary node offline, marked instance as started")
6625 node_current = instance.primary_node
6627 _StartInstanceDisks(self, instance, force)
6630 self.rpc.call_instance_start(node_current,
6631 (instance, self.op.hvparams,
6633 self.op.startup_paused)
6634 msg = result.fail_msg
6636 _ShutdownInstanceDisks(self, instance)
6637 raise errors.OpExecError("Could not start instance: %s" % msg)
6640 class LUInstanceReboot(LogicalUnit):
6641 """Reboot an instance.
6644 HPATH = "instance-reboot"
6645 HTYPE = constants.HTYPE_INSTANCE
6648 def ExpandNames(self):
6649 self._ExpandAndLockInstance()
6651 def BuildHooksEnv(self):
6654 This runs on master, primary and secondary nodes of the instance.
6658 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6659 "REBOOT_TYPE": self.op.reboot_type,
6660 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6663 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6667 def BuildHooksNodes(self):
6668 """Build hooks nodes.
6671 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6674 def CheckPrereq(self):
6675 """Check prerequisites.
6677 This checks that the instance is in the cluster.
6680 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6681 assert self.instance is not None, \
6682 "Cannot retrieve locked instance %s" % self.op.instance_name
6683 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6684 _CheckNodeOnline(self, instance.primary_node)
6686 # check bridges existence
6687 _CheckInstanceBridgesExist(self, instance)
6689 def Exec(self, feedback_fn):
6690 """Reboot the instance.
6693 instance = self.instance
6694 ignore_secondaries = self.op.ignore_secondaries
6695 reboot_type = self.op.reboot_type
6697 remote_info = self.rpc.call_instance_info(instance.primary_node,
6699 instance.hypervisor)
6700 remote_info.Raise("Error checking node %s" % instance.primary_node)
6701 instance_running = bool(remote_info.payload)
6703 node_current = instance.primary_node
6705 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6706 constants.INSTANCE_REBOOT_HARD]:
6707 for disk in instance.disks:
6708 self.cfg.SetDiskID(disk, node_current)
6709 result = self.rpc.call_instance_reboot(node_current, instance,
6711 self.op.shutdown_timeout)
6712 result.Raise("Could not reboot instance")
6714 if instance_running:
6715 result = self.rpc.call_instance_shutdown(node_current, instance,
6716 self.op.shutdown_timeout)
6717 result.Raise("Could not shutdown instance for full reboot")
6718 _ShutdownInstanceDisks(self, instance)
6720 self.LogInfo("Instance %s was already stopped, starting now",
6722 _StartInstanceDisks(self, instance, ignore_secondaries)
6723 result = self.rpc.call_instance_start(node_current,
6724 (instance, None, None), False)
6725 msg = result.fail_msg
6727 _ShutdownInstanceDisks(self, instance)
6728 raise errors.OpExecError("Could not start instance for"
6729 " full reboot: %s" % msg)
6731 self.cfg.MarkInstanceUp(instance.name)
6734 class LUInstanceShutdown(LogicalUnit):
6735 """Shutdown an instance.
6738 HPATH = "instance-stop"
6739 HTYPE = constants.HTYPE_INSTANCE
6742 def ExpandNames(self):
6743 self._ExpandAndLockInstance()
6745 def BuildHooksEnv(self):
6748 This runs on master, primary and secondary nodes of the instance.
6751 env = _BuildInstanceHookEnvByObject(self, self.instance)
6752 env["TIMEOUT"] = self.op.timeout
6755 def BuildHooksNodes(self):
6756 """Build hooks nodes.
6759 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6762 def CheckPrereq(self):
6763 """Check prerequisites.
6765 This checks that the instance is in the cluster.
6768 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6769 assert self.instance is not None, \
6770 "Cannot retrieve locked instance %s" % self.op.instance_name
6772 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6774 self.primary_offline = \
6775 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6777 if self.primary_offline and self.op.ignore_offline_nodes:
6778 self.proc.LogWarning("Ignoring offline primary node")
6780 _CheckNodeOnline(self, self.instance.primary_node)
6782 def Exec(self, feedback_fn):
6783 """Shutdown the instance.
6786 instance = self.instance
6787 node_current = instance.primary_node
6788 timeout = self.op.timeout
6790 if not self.op.no_remember:
6791 self.cfg.MarkInstanceDown(instance.name)
6793 if self.primary_offline:
6794 assert self.op.ignore_offline_nodes
6795 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6797 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6798 msg = result.fail_msg
6800 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6802 _ShutdownInstanceDisks(self, instance)
6805 class LUInstanceReinstall(LogicalUnit):
6806 """Reinstall an instance.
6809 HPATH = "instance-reinstall"
6810 HTYPE = constants.HTYPE_INSTANCE
6813 def ExpandNames(self):
6814 self._ExpandAndLockInstance()
6816 def BuildHooksEnv(self):
6819 This runs on master, primary and secondary nodes of the instance.
6822 return _BuildInstanceHookEnvByObject(self, self.instance)
6824 def BuildHooksNodes(self):
6825 """Build hooks nodes.
6828 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6831 def CheckPrereq(self):
6832 """Check prerequisites.
6834 This checks that the instance is in the cluster and is not running.
6837 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6838 assert instance is not None, \
6839 "Cannot retrieve locked instance %s" % self.op.instance_name
6840 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6841 " offline, cannot reinstall")
6842 for node in instance.secondary_nodes:
6843 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6844 " cannot reinstall")
6846 if instance.disk_template == constants.DT_DISKLESS:
6847 raise errors.OpPrereqError("Instance '%s' has no disks" %
6848 self.op.instance_name,
6850 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6852 if self.op.os_type is not None:
6854 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6855 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6856 instance_os = self.op.os_type
6858 instance_os = instance.os
6860 nodelist = list(instance.all_nodes)
6862 if self.op.osparams:
6863 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6864 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6865 self.os_inst = i_osdict # the new dict (without defaults)
6869 self.instance = instance
6871 def Exec(self, feedback_fn):
6872 """Reinstall the instance.
6875 inst = self.instance
6877 if self.op.os_type is not None:
6878 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6879 inst.os = self.op.os_type
6880 # Write to configuration
6881 self.cfg.Update(inst, feedback_fn)
6883 _StartInstanceDisks(self, inst, None)
6885 feedback_fn("Running the instance OS create scripts...")
6886 # FIXME: pass debug option from opcode to backend
6887 result = self.rpc.call_instance_os_add(inst.primary_node,
6888 (inst, self.os_inst), True,
6889 self.op.debug_level)
6890 result.Raise("Could not install OS for instance %s on node %s" %
6891 (inst.name, inst.primary_node))
6893 _ShutdownInstanceDisks(self, inst)
6896 class LUInstanceRecreateDisks(LogicalUnit):
6897 """Recreate an instance's missing disks.
6900 HPATH = "instance-recreate-disks"
6901 HTYPE = constants.HTYPE_INSTANCE
6904 _MODIFYABLE = frozenset([
6905 constants.IDISK_SIZE,
6906 constants.IDISK_MODE,
6909 # New or changed disk parameters may have different semantics
6910 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6911 constants.IDISK_ADOPT,
6913 # TODO: Implement support changing VG while recreating
6915 constants.IDISK_METAVG,
6918 def CheckArguments(self):
6919 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6920 # Normalize and convert deprecated list of disk indices
6921 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6923 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6925 raise errors.OpPrereqError("Some disks have been specified more than"
6926 " once: %s" % utils.CommaJoin(duplicates),
6929 for (idx, params) in self.op.disks:
6930 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6931 unsupported = frozenset(params.keys()) - self._MODIFYABLE
6933 raise errors.OpPrereqError("Parameters for disk %s try to change"
6934 " unmodifyable parameter(s): %s" %
6935 (idx, utils.CommaJoin(unsupported)),
6938 def ExpandNames(self):
6939 self._ExpandAndLockInstance()
6940 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6942 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6943 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6945 self.needed_locks[locking.LEVEL_NODE] = []
6946 self.needed_locks[locking.LEVEL_NODE_RES] = []
6948 def DeclareLocks(self, level):
6949 if level == locking.LEVEL_NODE:
6950 # if we replace the nodes, we only need to lock the old primary,
6951 # otherwise we need to lock all nodes for disk re-creation
6952 primary_only = bool(self.op.nodes)
6953 self._LockInstancesNodes(primary_only=primary_only)
6954 elif level == locking.LEVEL_NODE_RES:
6956 self.needed_locks[locking.LEVEL_NODE_RES] = \
6957 self.needed_locks[locking.LEVEL_NODE][:]
6959 def BuildHooksEnv(self):
6962 This runs on master, primary and secondary nodes of the instance.
6965 return _BuildInstanceHookEnvByObject(self, self.instance)
6967 def BuildHooksNodes(self):
6968 """Build hooks nodes.
6971 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6974 def CheckPrereq(self):
6975 """Check prerequisites.
6977 This checks that the instance is in the cluster and is not running.
6980 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6981 assert instance is not None, \
6982 "Cannot retrieve locked instance %s" % self.op.instance_name
6984 if len(self.op.nodes) != len(instance.all_nodes):
6985 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6986 " %d replacement nodes were specified" %
6987 (instance.name, len(instance.all_nodes),
6988 len(self.op.nodes)),
6990 assert instance.disk_template != constants.DT_DRBD8 or \
6991 len(self.op.nodes) == 2
6992 assert instance.disk_template != constants.DT_PLAIN or \
6993 len(self.op.nodes) == 1
6994 primary_node = self.op.nodes[0]
6996 primary_node = instance.primary_node
6997 _CheckNodeOnline(self, primary_node)
6999 if instance.disk_template == constants.DT_DISKLESS:
7000 raise errors.OpPrereqError("Instance '%s' has no disks" %
7001 self.op.instance_name, errors.ECODE_INVAL)
7003 # if we replace nodes *and* the old primary is offline, we don't
7005 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7006 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7007 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7008 if not (self.op.nodes and old_pnode.offline):
7009 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7010 msg="cannot recreate disks")
7013 self.disks = dict(self.op.disks)
7015 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7017 maxidx = max(self.disks.keys())
7018 if maxidx >= len(instance.disks):
7019 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7022 if (self.op.nodes and
7023 sorted(self.disks.keys()) != range(len(instance.disks))):
7024 raise errors.OpPrereqError("Can't recreate disks partially and"
7025 " change the nodes at the same time",
7028 self.instance = instance
7030 def Exec(self, feedback_fn):
7031 """Recreate the disks.
7034 instance = self.instance
7036 assert (self.owned_locks(locking.LEVEL_NODE) ==
7037 self.owned_locks(locking.LEVEL_NODE_RES))
7040 mods = [] # keeps track of needed changes
7042 for idx, disk in enumerate(instance.disks):
7044 changes = self.disks[idx]
7046 # Disk should not be recreated
7050 # update secondaries for disks, if needed
7051 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7052 # need to update the nodes and minors
7053 assert len(self.op.nodes) == 2
7054 assert len(disk.logical_id) == 6 # otherwise disk internals
7056 (_, _, old_port, _, _, old_secret) = disk.logical_id
7057 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7058 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7059 new_minors[0], new_minors[1], old_secret)
7060 assert len(disk.logical_id) == len(new_id)
7064 mods.append((idx, new_id, changes))
7066 # now that we have passed all asserts above, we can apply the mods
7067 # in a single run (to avoid partial changes)
7068 for idx, new_id, changes in mods:
7069 disk = instance.disks[idx]
7070 if new_id is not None:
7071 assert disk.dev_type == constants.LD_DRBD8
7072 disk.logical_id = new_id
7074 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7075 mode=changes.get(constants.IDISK_MODE, None))
7077 # change primary node, if needed
7079 instance.primary_node = self.op.nodes[0]
7080 self.LogWarning("Changing the instance's nodes, you will have to"
7081 " remove any disks left on the older nodes manually")
7084 self.cfg.Update(instance, feedback_fn)
7086 _CreateDisks(self, instance, to_skip=to_skip)
7089 class LUInstanceRename(LogicalUnit):
7090 """Rename an instance.
7093 HPATH = "instance-rename"
7094 HTYPE = constants.HTYPE_INSTANCE
7096 def CheckArguments(self):
7100 if self.op.ip_check and not self.op.name_check:
7101 # TODO: make the ip check more flexible and not depend on the name check
7102 raise errors.OpPrereqError("IP address check requires a name check",
7105 def BuildHooksEnv(self):
7108 This runs on master, primary and secondary nodes of the instance.
7111 env = _BuildInstanceHookEnvByObject(self, self.instance)
7112 env["INSTANCE_NEW_NAME"] = self.op.new_name
7115 def BuildHooksNodes(self):
7116 """Build hooks nodes.
7119 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7122 def CheckPrereq(self):
7123 """Check prerequisites.
7125 This checks that the instance is in the cluster and is not running.
7128 self.op.instance_name = _ExpandInstanceName(self.cfg,
7129 self.op.instance_name)
7130 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7131 assert instance is not None
7132 _CheckNodeOnline(self, instance.primary_node)
7133 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7134 msg="cannot rename")
7135 self.instance = instance
7137 new_name = self.op.new_name
7138 if self.op.name_check:
7139 hostname = netutils.GetHostname(name=new_name)
7140 if hostname.name != new_name:
7141 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7143 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7144 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7145 " same as given hostname '%s'") %
7146 (hostname.name, self.op.new_name),
7148 new_name = self.op.new_name = hostname.name
7149 if (self.op.ip_check and
7150 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7151 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7152 (hostname.ip, new_name),
7153 errors.ECODE_NOTUNIQUE)
7155 instance_list = self.cfg.GetInstanceList()
7156 if new_name in instance_list and new_name != instance.name:
7157 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7158 new_name, errors.ECODE_EXISTS)
7160 def Exec(self, feedback_fn):
7161 """Rename the instance.
7164 inst = self.instance
7165 old_name = inst.name
7167 rename_file_storage = False
7168 if (inst.disk_template in constants.DTS_FILEBASED and
7169 self.op.new_name != inst.name):
7170 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7171 rename_file_storage = True
7173 self.cfg.RenameInstance(inst.name, self.op.new_name)
7174 # Change the instance lock. This is definitely safe while we hold the BGL.
7175 # Otherwise the new lock would have to be added in acquired mode.
7177 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7178 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7180 # re-read the instance from the configuration after rename
7181 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7183 if rename_file_storage:
7184 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7185 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7186 old_file_storage_dir,
7187 new_file_storage_dir)
7188 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7189 " (but the instance has been renamed in Ganeti)" %
7190 (inst.primary_node, old_file_storage_dir,
7191 new_file_storage_dir))
7193 _StartInstanceDisks(self, inst, None)
7195 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7196 old_name, self.op.debug_level)
7197 msg = result.fail_msg
7199 msg = ("Could not run OS rename script for instance %s on node %s"
7200 " (but the instance has been renamed in Ganeti): %s" %
7201 (inst.name, inst.primary_node, msg))
7202 self.proc.LogWarning(msg)
7204 _ShutdownInstanceDisks(self, inst)
7209 class LUInstanceRemove(LogicalUnit):
7210 """Remove an instance.
7213 HPATH = "instance-remove"
7214 HTYPE = constants.HTYPE_INSTANCE
7217 def ExpandNames(self):
7218 self._ExpandAndLockInstance()
7219 self.needed_locks[locking.LEVEL_NODE] = []
7220 self.needed_locks[locking.LEVEL_NODE_RES] = []
7221 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7223 def DeclareLocks(self, level):
7224 if level == locking.LEVEL_NODE:
7225 self._LockInstancesNodes()
7226 elif level == locking.LEVEL_NODE_RES:
7228 self.needed_locks[locking.LEVEL_NODE_RES] = \
7229 self.needed_locks[locking.LEVEL_NODE][:]
7231 def BuildHooksEnv(self):
7234 This runs on master, primary and secondary nodes of the instance.
7237 env = _BuildInstanceHookEnvByObject(self, self.instance)
7238 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7241 def BuildHooksNodes(self):
7242 """Build hooks nodes.
7245 nl = [self.cfg.GetMasterNode()]
7246 nl_post = list(self.instance.all_nodes) + nl
7247 return (nl, nl_post)
7249 def CheckPrereq(self):
7250 """Check prerequisites.
7252 This checks that the instance is in the cluster.
7255 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7256 assert self.instance is not None, \
7257 "Cannot retrieve locked instance %s" % self.op.instance_name
7259 def Exec(self, feedback_fn):
7260 """Remove the instance.
7263 instance = self.instance
7264 logging.info("Shutting down instance %s on node %s",
7265 instance.name, instance.primary_node)
7267 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7268 self.op.shutdown_timeout)
7269 msg = result.fail_msg
7271 if self.op.ignore_failures:
7272 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7274 raise errors.OpExecError("Could not shutdown instance %s on"
7276 (instance.name, instance.primary_node, msg))
7278 assert (self.owned_locks(locking.LEVEL_NODE) ==
7279 self.owned_locks(locking.LEVEL_NODE_RES))
7280 assert not (set(instance.all_nodes) -
7281 self.owned_locks(locking.LEVEL_NODE)), \
7282 "Not owning correct locks"
7284 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7287 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7288 """Utility function to remove an instance.
7291 logging.info("Removing block devices for instance %s", instance.name)
7293 if not _RemoveDisks(lu, instance):
7294 if not ignore_failures:
7295 raise errors.OpExecError("Can't remove instance's disks")
7296 feedback_fn("Warning: can't remove instance's disks")
7298 logging.info("Removing instance %s out of cluster config", instance.name)
7300 lu.cfg.RemoveInstance(instance.name)
7302 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7303 "Instance lock removal conflict"
7305 # Remove lock for the instance
7306 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7309 class LUInstanceQuery(NoHooksLU):
7310 """Logical unit for querying instances.
7313 # pylint: disable=W0142
7316 def CheckArguments(self):
7317 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7318 self.op.output_fields, self.op.use_locking)
7320 def ExpandNames(self):
7321 self.iq.ExpandNames(self)
7323 def DeclareLocks(self, level):
7324 self.iq.DeclareLocks(self, level)
7326 def Exec(self, feedback_fn):
7327 return self.iq.OldStyleQuery(self)
7330 class LUInstanceFailover(LogicalUnit):
7331 """Failover an instance.
7334 HPATH = "instance-failover"
7335 HTYPE = constants.HTYPE_INSTANCE
7338 def CheckArguments(self):
7339 """Check the arguments.
7342 self.iallocator = getattr(self.op, "iallocator", None)
7343 self.target_node = getattr(self.op, "target_node", None)
7345 def ExpandNames(self):
7346 self._ExpandAndLockInstance()
7348 if self.op.target_node is not None:
7349 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7351 self.needed_locks[locking.LEVEL_NODE] = []
7352 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7354 ignore_consistency = self.op.ignore_consistency
7355 shutdown_timeout = self.op.shutdown_timeout
7356 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7359 ignore_consistency=ignore_consistency,
7360 shutdown_timeout=shutdown_timeout,
7361 ignore_ipolicy=self.op.ignore_ipolicy)
7362 self.tasklets = [self._migrater]
7364 def DeclareLocks(self, level):
7365 if level == locking.LEVEL_NODE:
7366 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7367 if instance.disk_template in constants.DTS_EXT_MIRROR:
7368 if self.op.target_node is None:
7369 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7371 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7372 self.op.target_node]
7373 del self.recalculate_locks[locking.LEVEL_NODE]
7375 self._LockInstancesNodes()
7377 def BuildHooksEnv(self):
7380 This runs on master, primary and secondary nodes of the instance.
7383 instance = self._migrater.instance
7384 source_node = instance.primary_node
7385 target_node = self.op.target_node
7387 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7388 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7389 "OLD_PRIMARY": source_node,
7390 "NEW_PRIMARY": target_node,
7393 if instance.disk_template in constants.DTS_INT_MIRROR:
7394 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7395 env["NEW_SECONDARY"] = source_node
7397 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7399 env.update(_BuildInstanceHookEnvByObject(self, instance))
7403 def BuildHooksNodes(self):
7404 """Build hooks nodes.
7407 instance = self._migrater.instance
7408 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7409 return (nl, nl + [instance.primary_node])
7412 class LUInstanceMigrate(LogicalUnit):
7413 """Migrate an instance.
7415 This is migration without shutting down, compared to the failover,
7416 which is done with shutdown.
7419 HPATH = "instance-migrate"
7420 HTYPE = constants.HTYPE_INSTANCE
7423 def ExpandNames(self):
7424 self._ExpandAndLockInstance()
7426 if self.op.target_node is not None:
7427 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7429 self.needed_locks[locking.LEVEL_NODE] = []
7430 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7432 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7433 cleanup=self.op.cleanup,
7435 fallback=self.op.allow_failover,
7436 ignore_ipolicy=self.op.ignore_ipolicy)
7437 self.tasklets = [self._migrater]
7439 def DeclareLocks(self, level):
7440 if level == locking.LEVEL_NODE:
7441 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7442 if instance.disk_template in constants.DTS_EXT_MIRROR:
7443 if self.op.target_node is None:
7444 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7446 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7447 self.op.target_node]
7448 del self.recalculate_locks[locking.LEVEL_NODE]
7450 self._LockInstancesNodes()
7452 def BuildHooksEnv(self):
7455 This runs on master, primary and secondary nodes of the instance.
7458 instance = self._migrater.instance
7459 source_node = instance.primary_node
7460 target_node = self.op.target_node
7461 env = _BuildInstanceHookEnvByObject(self, instance)
7463 "MIGRATE_LIVE": self._migrater.live,
7464 "MIGRATE_CLEANUP": self.op.cleanup,
7465 "OLD_PRIMARY": source_node,
7466 "NEW_PRIMARY": target_node,
7469 if instance.disk_template in constants.DTS_INT_MIRROR:
7470 env["OLD_SECONDARY"] = target_node
7471 env["NEW_SECONDARY"] = source_node
7473 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7477 def BuildHooksNodes(self):
7478 """Build hooks nodes.
7481 instance = self._migrater.instance
7482 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7483 return (nl, nl + [instance.primary_node])
7486 class LUInstanceMove(LogicalUnit):
7487 """Move an instance by data-copying.
7490 HPATH = "instance-move"
7491 HTYPE = constants.HTYPE_INSTANCE
7494 def ExpandNames(self):
7495 self._ExpandAndLockInstance()
7496 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7497 self.op.target_node = target_node
7498 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7499 self.needed_locks[locking.LEVEL_NODE_RES] = []
7500 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7502 def DeclareLocks(self, level):
7503 if level == locking.LEVEL_NODE:
7504 self._LockInstancesNodes(primary_only=True)
7505 elif level == locking.LEVEL_NODE_RES:
7507 self.needed_locks[locking.LEVEL_NODE_RES] = \
7508 self.needed_locks[locking.LEVEL_NODE][:]
7510 def BuildHooksEnv(self):
7513 This runs on master, primary and secondary nodes of the instance.
7517 "TARGET_NODE": self.op.target_node,
7518 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7520 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7523 def BuildHooksNodes(self):
7524 """Build hooks nodes.
7528 self.cfg.GetMasterNode(),
7529 self.instance.primary_node,
7530 self.op.target_node,
7534 def CheckPrereq(self):
7535 """Check prerequisites.
7537 This checks that the instance is in the cluster.
7540 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7541 assert self.instance is not None, \
7542 "Cannot retrieve locked instance %s" % self.op.instance_name
7544 node = self.cfg.GetNodeInfo(self.op.target_node)
7545 assert node is not None, \
7546 "Cannot retrieve locked node %s" % self.op.target_node
7548 self.target_node = target_node = node.name
7550 if target_node == instance.primary_node:
7551 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7552 (instance.name, target_node),
7555 bep = self.cfg.GetClusterInfo().FillBE(instance)
7557 for idx, dsk in enumerate(instance.disks):
7558 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7559 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7560 " cannot copy" % idx, errors.ECODE_STATE)
7562 _CheckNodeOnline(self, target_node)
7563 _CheckNodeNotDrained(self, target_node)
7564 _CheckNodeVmCapable(self, target_node)
7565 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7566 self.cfg.GetNodeGroup(node.group))
7567 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7568 ignore=self.op.ignore_ipolicy)
7570 if instance.admin_state == constants.ADMINST_UP:
7571 # check memory requirements on the secondary node
7572 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7573 instance.name, bep[constants.BE_MAXMEM],
7574 instance.hypervisor)
7576 self.LogInfo("Not checking memory on the secondary node as"
7577 " instance will not be started")
7579 # check bridge existance
7580 _CheckInstanceBridgesExist(self, instance, node=target_node)
7582 def Exec(self, feedback_fn):
7583 """Move an instance.
7585 The move is done by shutting it down on its present node, copying
7586 the data over (slow) and starting it on the new node.
7589 instance = self.instance
7591 source_node = instance.primary_node
7592 target_node = self.target_node
7594 self.LogInfo("Shutting down instance %s on source node %s",
7595 instance.name, source_node)
7597 assert (self.owned_locks(locking.LEVEL_NODE) ==
7598 self.owned_locks(locking.LEVEL_NODE_RES))
7600 result = self.rpc.call_instance_shutdown(source_node, instance,
7601 self.op.shutdown_timeout)
7602 msg = result.fail_msg
7604 if self.op.ignore_consistency:
7605 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7606 " Proceeding anyway. Please make sure node"
7607 " %s is down. Error details: %s",
7608 instance.name, source_node, source_node, msg)
7610 raise errors.OpExecError("Could not shutdown instance %s on"
7612 (instance.name, source_node, msg))
7614 # create the target disks
7616 _CreateDisks(self, instance, target_node=target_node)
7617 except errors.OpExecError:
7618 self.LogWarning("Device creation failed, reverting...")
7620 _RemoveDisks(self, instance, target_node=target_node)
7622 self.cfg.ReleaseDRBDMinors(instance.name)
7625 cluster_name = self.cfg.GetClusterInfo().cluster_name
7628 # activate, get path, copy the data over
7629 for idx, disk in enumerate(instance.disks):
7630 self.LogInfo("Copying data for disk %d", idx)
7631 result = self.rpc.call_blockdev_assemble(target_node, disk,
7632 instance.name, True, idx)
7634 self.LogWarning("Can't assemble newly created disk %d: %s",
7635 idx, result.fail_msg)
7636 errs.append(result.fail_msg)
7638 dev_path = result.payload
7639 result = self.rpc.call_blockdev_export(source_node, disk,
7640 target_node, dev_path,
7643 self.LogWarning("Can't copy data over for disk %d: %s",
7644 idx, result.fail_msg)
7645 errs.append(result.fail_msg)
7649 self.LogWarning("Some disks failed to copy, aborting")
7651 _RemoveDisks(self, instance, target_node=target_node)
7653 self.cfg.ReleaseDRBDMinors(instance.name)
7654 raise errors.OpExecError("Errors during disk copy: %s" %
7657 instance.primary_node = target_node
7658 self.cfg.Update(instance, feedback_fn)
7660 self.LogInfo("Removing the disks on the original node")
7661 _RemoveDisks(self, instance, target_node=source_node)
7663 # Only start the instance if it's marked as up
7664 if instance.admin_state == constants.ADMINST_UP:
7665 self.LogInfo("Starting instance %s on node %s",
7666 instance.name, target_node)
7668 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7669 ignore_secondaries=True)
7671 _ShutdownInstanceDisks(self, instance)
7672 raise errors.OpExecError("Can't activate the instance's disks")
7674 result = self.rpc.call_instance_start(target_node,
7675 (instance, None, None), False)
7676 msg = result.fail_msg
7678 _ShutdownInstanceDisks(self, instance)
7679 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7680 (instance.name, target_node, msg))
7683 class LUNodeMigrate(LogicalUnit):
7684 """Migrate all instances from a node.
7687 HPATH = "node-migrate"
7688 HTYPE = constants.HTYPE_NODE
7691 def CheckArguments(self):
7694 def ExpandNames(self):
7695 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7697 self.share_locks = _ShareAll()
7698 self.needed_locks = {
7699 locking.LEVEL_NODE: [self.op.node_name],
7702 def BuildHooksEnv(self):
7705 This runs on the master, the primary and all the secondaries.
7709 "NODE_NAME": self.op.node_name,
7712 def BuildHooksNodes(self):
7713 """Build hooks nodes.
7716 nl = [self.cfg.GetMasterNode()]
7719 def CheckPrereq(self):
7722 def Exec(self, feedback_fn):
7723 # Prepare jobs for migration instances
7725 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7728 iallocator=self.op.iallocator,
7729 target_node=self.op.target_node,
7730 ignore_ipolicy=self.op.ignore_ipolicy)]
7731 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7734 # TODO: Run iallocator in this opcode and pass correct placement options to
7735 # OpInstanceMigrate. Since other jobs can modify the cluster between
7736 # running the iallocator and the actual migration, a good consistency model
7737 # will have to be found.
7739 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7740 frozenset([self.op.node_name]))
7742 return ResultWithJobs(jobs)
7745 class TLMigrateInstance(Tasklet):
7746 """Tasklet class for instance migration.
7749 @ivar live: whether the migration will be done live or non-live;
7750 this variable is initalized only after CheckPrereq has run
7751 @type cleanup: boolean
7752 @ivar cleanup: Wheater we cleanup from a failed migration
7753 @type iallocator: string
7754 @ivar iallocator: The iallocator used to determine target_node
7755 @type target_node: string
7756 @ivar target_node: If given, the target_node to reallocate the instance to
7757 @type failover: boolean
7758 @ivar failover: Whether operation results in failover or migration
7759 @type fallback: boolean
7760 @ivar fallback: Whether fallback to failover is allowed if migration not
7762 @type ignore_consistency: boolean
7763 @ivar ignore_consistency: Wheter we should ignore consistency between source
7765 @type shutdown_timeout: int
7766 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7767 @type ignore_ipolicy: bool
7768 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7773 _MIGRATION_POLL_INTERVAL = 1 # seconds
7774 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7776 def __init__(self, lu, instance_name, cleanup=False,
7777 failover=False, fallback=False,
7778 ignore_consistency=False,
7779 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7780 ignore_ipolicy=False):
7781 """Initializes this class.
7784 Tasklet.__init__(self, lu)
7787 self.instance_name = instance_name
7788 self.cleanup = cleanup
7789 self.live = False # will be overridden later
7790 self.failover = failover
7791 self.fallback = fallback
7792 self.ignore_consistency = ignore_consistency
7793 self.shutdown_timeout = shutdown_timeout
7794 self.ignore_ipolicy = ignore_ipolicy
7796 def CheckPrereq(self):
7797 """Check prerequisites.
7799 This checks that the instance is in the cluster.
7802 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7803 instance = self.cfg.GetInstanceInfo(instance_name)
7804 assert instance is not None
7805 self.instance = instance
7806 cluster = self.cfg.GetClusterInfo()
7808 if (not self.cleanup and
7809 not instance.admin_state == constants.ADMINST_UP and
7810 not self.failover and self.fallback):
7811 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7812 " switching to failover")
7813 self.failover = True
7815 if instance.disk_template not in constants.DTS_MIRRORED:
7820 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7821 " %s" % (instance.disk_template, text),
7824 if instance.disk_template in constants.DTS_EXT_MIRROR:
7825 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7827 if self.lu.op.iallocator:
7828 self._RunAllocator()
7830 # We set set self.target_node as it is required by
7832 self.target_node = self.lu.op.target_node
7834 # Check that the target node is correct in terms of instance policy
7835 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7836 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7837 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7838 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7839 ignore=self.ignore_ipolicy)
7841 # self.target_node is already populated, either directly or by the
7843 target_node = self.target_node
7844 if self.target_node == instance.primary_node:
7845 raise errors.OpPrereqError("Cannot migrate instance %s"
7846 " to its primary (%s)" %
7847 (instance.name, instance.primary_node))
7849 if len(self.lu.tasklets) == 1:
7850 # It is safe to release locks only when we're the only tasklet
7852 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7853 keep=[instance.primary_node, self.target_node])
7856 secondary_nodes = instance.secondary_nodes
7857 if not secondary_nodes:
7858 raise errors.ConfigurationError("No secondary node but using"
7859 " %s disk template" %
7860 instance.disk_template)
7861 target_node = secondary_nodes[0]
7862 if self.lu.op.iallocator or (self.lu.op.target_node and
7863 self.lu.op.target_node != target_node):
7865 text = "failed over"
7868 raise errors.OpPrereqError("Instances with disk template %s cannot"
7869 " be %s to arbitrary nodes"
7870 " (neither an iallocator nor a target"
7871 " node can be passed)" %
7872 (instance.disk_template, text),
7874 nodeinfo = self.cfg.GetNodeInfo(target_node)
7875 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7876 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7877 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7878 ignore=self.ignore_ipolicy)
7880 i_be = cluster.FillBE(instance)
7882 # check memory requirements on the secondary node
7883 if (not self.cleanup and
7884 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7885 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7886 instance.name, i_be[constants.BE_MAXMEM],
7887 instance.hypervisor)
7889 self.lu.LogInfo("Not checking memory on the secondary node as"
7890 " instance will not be started")
7892 # check if failover must be forced instead of migration
7893 if (not self.cleanup and not self.failover and
7894 i_be[constants.BE_ALWAYS_FAILOVER]):
7896 self.lu.LogInfo("Instance configured to always failover; fallback"
7898 self.failover = True
7900 raise errors.OpPrereqError("This instance has been configured to"
7901 " always failover, please allow failover",
7904 # check bridge existance
7905 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7907 if not self.cleanup:
7908 _CheckNodeNotDrained(self.lu, target_node)
7909 if not self.failover:
7910 result = self.rpc.call_instance_migratable(instance.primary_node,
7912 if result.fail_msg and self.fallback:
7913 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7915 self.failover = True
7917 result.Raise("Can't migrate, please use failover",
7918 prereq=True, ecode=errors.ECODE_STATE)
7920 assert not (self.failover and self.cleanup)
7922 if not self.failover:
7923 if self.lu.op.live is not None and self.lu.op.mode is not None:
7924 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7925 " parameters are accepted",
7927 if self.lu.op.live is not None:
7929 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7931 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7932 # reset the 'live' parameter to None so that repeated
7933 # invocations of CheckPrereq do not raise an exception
7934 self.lu.op.live = None
7935 elif self.lu.op.mode is None:
7936 # read the default value from the hypervisor
7937 i_hv = cluster.FillHV(self.instance, skip_globals=False)
7938 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7940 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7942 # Failover is never live
7945 def _RunAllocator(self):
7946 """Run the allocator based on input opcode.
7949 # FIXME: add a self.ignore_ipolicy option
7950 ial = IAllocator(self.cfg, self.rpc,
7951 mode=constants.IALLOCATOR_MODE_RELOC,
7952 name=self.instance_name,
7953 # TODO See why hail breaks with a single node below
7954 relocate_from=[self.instance.primary_node,
7955 self.instance.primary_node],
7958 ial.Run(self.lu.op.iallocator)
7961 raise errors.OpPrereqError("Can't compute nodes using"
7962 " iallocator '%s': %s" %
7963 (self.lu.op.iallocator, ial.info),
7965 if len(ial.result) != ial.required_nodes:
7966 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7967 " of nodes (%s), required %s" %
7968 (self.lu.op.iallocator, len(ial.result),
7969 ial.required_nodes), errors.ECODE_FAULT)
7970 self.target_node = ial.result[0]
7971 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7972 self.instance_name, self.lu.op.iallocator,
7973 utils.CommaJoin(ial.result))
7975 def _WaitUntilSync(self):
7976 """Poll with custom rpc for disk sync.
7978 This uses our own step-based rpc call.
7981 self.feedback_fn("* wait until resync is done")
7985 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7987 self.instance.disks)
7989 for node, nres in result.items():
7990 nres.Raise("Cannot resync disks on node %s" % node)
7991 node_done, node_percent = nres.payload
7992 all_done = all_done and node_done
7993 if node_percent is not None:
7994 min_percent = min(min_percent, node_percent)
7996 if min_percent < 100:
7997 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8000 def _EnsureSecondary(self, node):
8001 """Demote a node to secondary.
8004 self.feedback_fn("* switching node %s to secondary mode" % node)
8006 for dev in self.instance.disks:
8007 self.cfg.SetDiskID(dev, node)
8009 result = self.rpc.call_blockdev_close(node, self.instance.name,
8010 self.instance.disks)
8011 result.Raise("Cannot change disk to secondary on node %s" % node)
8013 def _GoStandalone(self):
8014 """Disconnect from the network.
8017 self.feedback_fn("* changing into standalone mode")
8018 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8019 self.instance.disks)
8020 for node, nres in result.items():
8021 nres.Raise("Cannot disconnect disks node %s" % node)
8023 def _GoReconnect(self, multimaster):
8024 """Reconnect to the network.
8030 msg = "single-master"
8031 self.feedback_fn("* changing disks into %s mode" % msg)
8032 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8033 self.instance.disks,
8034 self.instance.name, multimaster)
8035 for node, nres in result.items():
8036 nres.Raise("Cannot change disks config on node %s" % node)
8038 def _ExecCleanup(self):
8039 """Try to cleanup after a failed migration.
8041 The cleanup is done by:
8042 - check that the instance is running only on one node
8043 (and update the config if needed)
8044 - change disks on its secondary node to secondary
8045 - wait until disks are fully synchronized
8046 - disconnect from the network
8047 - change disks into single-master mode
8048 - wait again until disks are fully synchronized
8051 instance = self.instance
8052 target_node = self.target_node
8053 source_node = self.source_node
8055 # check running on only one node
8056 self.feedback_fn("* checking where the instance actually runs"
8057 " (if this hangs, the hypervisor might be in"
8059 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8060 for node, result in ins_l.items():
8061 result.Raise("Can't contact node %s" % node)
8063 runningon_source = instance.name in ins_l[source_node].payload
8064 runningon_target = instance.name in ins_l[target_node].payload
8066 if runningon_source and runningon_target:
8067 raise errors.OpExecError("Instance seems to be running on two nodes,"
8068 " or the hypervisor is confused; you will have"
8069 " to ensure manually that it runs only on one"
8070 " and restart this operation")
8072 if not (runningon_source or runningon_target):
8073 raise errors.OpExecError("Instance does not seem to be running at all;"
8074 " in this case it's safer to repair by"
8075 " running 'gnt-instance stop' to ensure disk"
8076 " shutdown, and then restarting it")
8078 if runningon_target:
8079 # the migration has actually succeeded, we need to update the config
8080 self.feedback_fn("* instance running on secondary node (%s),"
8081 " updating config" % target_node)
8082 instance.primary_node = target_node
8083 self.cfg.Update(instance, self.feedback_fn)
8084 demoted_node = source_node
8086 self.feedback_fn("* instance confirmed to be running on its"
8087 " primary node (%s)" % source_node)
8088 demoted_node = target_node
8090 if instance.disk_template in constants.DTS_INT_MIRROR:
8091 self._EnsureSecondary(demoted_node)
8093 self._WaitUntilSync()
8094 except errors.OpExecError:
8095 # we ignore here errors, since if the device is standalone, it
8096 # won't be able to sync
8098 self._GoStandalone()
8099 self._GoReconnect(False)
8100 self._WaitUntilSync()
8102 self.feedback_fn("* done")
8104 def _RevertDiskStatus(self):
8105 """Try to revert the disk status after a failed migration.
8108 target_node = self.target_node
8109 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8113 self._EnsureSecondary(target_node)
8114 self._GoStandalone()
8115 self._GoReconnect(False)
8116 self._WaitUntilSync()
8117 except errors.OpExecError, err:
8118 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8119 " please try to recover the instance manually;"
8120 " error '%s'" % str(err))
8122 def _AbortMigration(self):
8123 """Call the hypervisor code to abort a started migration.
8126 instance = self.instance
8127 target_node = self.target_node
8128 source_node = self.source_node
8129 migration_info = self.migration_info
8131 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8135 abort_msg = abort_result.fail_msg
8137 logging.error("Aborting migration failed on target node %s: %s",
8138 target_node, abort_msg)
8139 # Don't raise an exception here, as we stil have to try to revert the
8140 # disk status, even if this step failed.
8142 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8143 instance, False, self.live)
8144 abort_msg = abort_result.fail_msg
8146 logging.error("Aborting migration failed on source node %s: %s",
8147 source_node, abort_msg)
8149 def _ExecMigration(self):
8150 """Migrate an instance.
8152 The migrate is done by:
8153 - change the disks into dual-master mode
8154 - wait until disks are fully synchronized again
8155 - migrate the instance
8156 - change disks on the new secondary node (the old primary) to secondary
8157 - wait until disks are fully synchronized
8158 - change disks into single-master mode
8161 instance = self.instance
8162 target_node = self.target_node
8163 source_node = self.source_node
8165 # Check for hypervisor version mismatch and warn the user.
8166 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8167 None, [self.instance.hypervisor])
8168 for ninfo in nodeinfo.values():
8169 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8171 (_, _, (src_info, )) = nodeinfo[source_node].payload
8172 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8174 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8175 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8176 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8177 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8178 if src_version != dst_version:
8179 self.feedback_fn("* warning: hypervisor version mismatch between"
8180 " source (%s) and target (%s) node" %
8181 (src_version, dst_version))
8183 self.feedback_fn("* checking disk consistency between source and target")
8184 for dev in instance.disks:
8185 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8186 raise errors.OpExecError("Disk %s is degraded or not fully"
8187 " synchronized on target node,"
8188 " aborting migration" % dev.iv_name)
8190 # First get the migration information from the remote node
8191 result = self.rpc.call_migration_info(source_node, instance)
8192 msg = result.fail_msg
8194 log_err = ("Failed fetching source migration information from %s: %s" %
8196 logging.error(log_err)
8197 raise errors.OpExecError(log_err)
8199 self.migration_info = migration_info = result.payload
8201 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8202 # Then switch the disks to master/master mode
8203 self._EnsureSecondary(target_node)
8204 self._GoStandalone()
8205 self._GoReconnect(True)
8206 self._WaitUntilSync()
8208 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8209 result = self.rpc.call_accept_instance(target_node,
8212 self.nodes_ip[target_node])
8214 msg = result.fail_msg
8216 logging.error("Instance pre-migration failed, trying to revert"
8217 " disk status: %s", msg)
8218 self.feedback_fn("Pre-migration failed, aborting")
8219 self._AbortMigration()
8220 self._RevertDiskStatus()
8221 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8222 (instance.name, msg))
8224 self.feedback_fn("* migrating instance to %s" % target_node)
8225 result = self.rpc.call_instance_migrate(source_node, instance,
8226 self.nodes_ip[target_node],
8228 msg = result.fail_msg
8230 logging.error("Instance migration failed, trying to revert"
8231 " disk status: %s", msg)
8232 self.feedback_fn("Migration failed, aborting")
8233 self._AbortMigration()
8234 self._RevertDiskStatus()
8235 raise errors.OpExecError("Could not migrate instance %s: %s" %
8236 (instance.name, msg))
8238 self.feedback_fn("* starting memory transfer")
8239 last_feedback = time.time()
8241 result = self.rpc.call_instance_get_migration_status(source_node,
8243 msg = result.fail_msg
8244 ms = result.payload # MigrationStatus instance
8245 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8246 logging.error("Instance migration failed, trying to revert"
8247 " disk status: %s", msg)
8248 self.feedback_fn("Migration failed, aborting")
8249 self._AbortMigration()
8250 self._RevertDiskStatus()
8251 raise errors.OpExecError("Could not migrate instance %s: %s" %
8252 (instance.name, msg))
8254 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8255 self.feedback_fn("* memory transfer complete")
8258 if (utils.TimeoutExpired(last_feedback,
8259 self._MIGRATION_FEEDBACK_INTERVAL) and
8260 ms.transferred_ram is not None):
8261 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8262 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8263 last_feedback = time.time()
8265 time.sleep(self._MIGRATION_POLL_INTERVAL)
8267 result = self.rpc.call_instance_finalize_migration_src(source_node,
8271 msg = result.fail_msg
8273 logging.error("Instance migration succeeded, but finalization failed"
8274 " on the source node: %s", msg)
8275 raise errors.OpExecError("Could not finalize instance migration: %s" %
8278 instance.primary_node = target_node
8280 # distribute new instance config to the other nodes
8281 self.cfg.Update(instance, self.feedback_fn)
8283 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8287 msg = result.fail_msg
8289 logging.error("Instance migration succeeded, but finalization failed"
8290 " on the target node: %s", msg)
8291 raise errors.OpExecError("Could not finalize instance migration: %s" %
8294 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8295 self._EnsureSecondary(source_node)
8296 self._WaitUntilSync()
8297 self._GoStandalone()
8298 self._GoReconnect(False)
8299 self._WaitUntilSync()
8301 # If the instance's disk template is `rbd' and there was a successful
8302 # migration, unmap the device from the source node.
8303 if self.instance.disk_template == constants.DT_RBD:
8304 disks = _ExpandCheckDisks(instance, instance.disks)
8305 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8307 result = self.rpc.call_blockdev_shutdown(source_node, disk)
8308 msg = result.fail_msg
8310 logging.error("Migration was successful, but couldn't unmap the"
8311 " block device %s on source node %s: %s",
8312 disk.iv_name, source_node, msg)
8313 logging.error("You need to unmap the device %s manually on %s",
8314 disk.iv_name, source_node)
8316 self.feedback_fn("* done")
8318 def _ExecFailover(self):
8319 """Failover an instance.
8321 The failover is done by shutting it down on its present node and
8322 starting it on the secondary.
8325 instance = self.instance
8326 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8328 source_node = instance.primary_node
8329 target_node = self.target_node
8331 if instance.admin_state == constants.ADMINST_UP:
8332 self.feedback_fn("* checking disk consistency between source and target")
8333 for dev in instance.disks:
8334 # for drbd, these are drbd over lvm
8335 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8336 if primary_node.offline:
8337 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8339 (primary_node.name, dev.iv_name, target_node))
8340 elif not self.ignore_consistency:
8341 raise errors.OpExecError("Disk %s is degraded on target node,"
8342 " aborting failover" % dev.iv_name)
8344 self.feedback_fn("* not checking disk consistency as instance is not"
8347 self.feedback_fn("* shutting down instance on source node")
8348 logging.info("Shutting down instance %s on node %s",
8349 instance.name, source_node)
8351 result = self.rpc.call_instance_shutdown(source_node, instance,
8352 self.shutdown_timeout)
8353 msg = result.fail_msg
8355 if self.ignore_consistency or primary_node.offline:
8356 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8357 " proceeding anyway; please make sure node"
8358 " %s is down; error details: %s",
8359 instance.name, source_node, source_node, msg)
8361 raise errors.OpExecError("Could not shutdown instance %s on"
8363 (instance.name, source_node, msg))
8365 self.feedback_fn("* deactivating the instance's disks on source node")
8366 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8367 raise errors.OpExecError("Can't shut down the instance's disks")
8369 instance.primary_node = target_node
8370 # distribute new instance config to the other nodes
8371 self.cfg.Update(instance, self.feedback_fn)
8373 # Only start the instance if it's marked as up
8374 if instance.admin_state == constants.ADMINST_UP:
8375 self.feedback_fn("* activating the instance's disks on target node %s" %
8377 logging.info("Starting instance %s on node %s",
8378 instance.name, target_node)
8380 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8381 ignore_secondaries=True)
8383 _ShutdownInstanceDisks(self.lu, instance)
8384 raise errors.OpExecError("Can't activate the instance's disks")
8386 self.feedback_fn("* starting the instance on the target node %s" %
8388 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8390 msg = result.fail_msg
8392 _ShutdownInstanceDisks(self.lu, instance)
8393 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8394 (instance.name, target_node, msg))
8396 def Exec(self, feedback_fn):
8397 """Perform the migration.
8400 self.feedback_fn = feedback_fn
8401 self.source_node = self.instance.primary_node
8403 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8404 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8405 self.target_node = self.instance.secondary_nodes[0]
8406 # Otherwise self.target_node has been populated either
8407 # directly, or through an iallocator.
8409 self.all_nodes = [self.source_node, self.target_node]
8410 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8411 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8414 feedback_fn("Failover instance %s" % self.instance.name)
8415 self._ExecFailover()
8417 feedback_fn("Migrating instance %s" % self.instance.name)
8420 return self._ExecCleanup()
8422 return self._ExecMigration()
8425 def _CreateBlockDev(lu, node, instance, device, force_create,
8427 """Create a tree of block devices on a given node.
8429 If this device type has to be created on secondaries, create it and
8432 If not, just recurse to children keeping the same 'force' value.
8434 @param lu: the lu on whose behalf we execute
8435 @param node: the node on which to create the device
8436 @type instance: L{objects.Instance}
8437 @param instance: the instance which owns the device
8438 @type device: L{objects.Disk}
8439 @param device: the device to create
8440 @type force_create: boolean
8441 @param force_create: whether to force creation of this device; this
8442 will be change to True whenever we find a device which has
8443 CreateOnSecondary() attribute
8444 @param info: the extra 'metadata' we should attach to the device
8445 (this will be represented as a LVM tag)
8446 @type force_open: boolean
8447 @param force_open: this parameter will be passes to the
8448 L{backend.BlockdevCreate} function where it specifies
8449 whether we run on primary or not, and it affects both
8450 the child assembly and the device own Open() execution
8453 if device.CreateOnSecondary():
8457 for child in device.children:
8458 _CreateBlockDev(lu, node, instance, child, force_create,
8461 if not force_create:
8464 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8467 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8468 """Create a single block device on a given node.
8470 This will not recurse over children of the device, so they must be
8473 @param lu: the lu on whose behalf we execute
8474 @param node: the node on which to create the device
8475 @type instance: L{objects.Instance}
8476 @param instance: the instance which owns the device
8477 @type device: L{objects.Disk}
8478 @param device: the device to create
8479 @param info: the extra 'metadata' we should attach to the device
8480 (this will be represented as a LVM tag)
8481 @type force_open: boolean
8482 @param force_open: this parameter will be passes to the
8483 L{backend.BlockdevCreate} function where it specifies
8484 whether we run on primary or not, and it affects both
8485 the child assembly and the device own Open() execution
8488 lu.cfg.SetDiskID(device, node)
8489 result = lu.rpc.call_blockdev_create(node, device, device.size,
8490 instance.name, force_open, info)
8491 result.Raise("Can't create block device %s on"
8492 " node %s for instance %s" % (device, node, instance.name))
8493 if device.physical_id is None:
8494 device.physical_id = result.payload
8497 def _GenerateUniqueNames(lu, exts):
8498 """Generate a suitable LV name.
8500 This will generate a logical volume name for the given instance.
8505 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8506 results.append("%s%s" % (new_id, val))
8510 def _ComputeLDParams(disk_template, disk_params):
8511 """Computes Logical Disk parameters from Disk Template parameters.
8513 @type disk_template: string
8514 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8515 @type disk_params: dict
8516 @param disk_params: disk template parameters; dict(template_name -> parameters
8518 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8519 contains the LD parameters of the node. The tree is flattened in-order.
8522 if disk_template not in constants.DISK_TEMPLATES:
8523 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8526 dt_params = disk_params[disk_template]
8527 if disk_template == constants.DT_DRBD8:
8529 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8530 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8531 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8532 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8533 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8534 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8535 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8536 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8537 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8538 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8539 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8540 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8544 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8547 result.append(drbd_params)
8551 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8554 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8556 result.append(data_params)
8560 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8563 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8565 result.append(meta_params)
8567 elif (disk_template == constants.DT_FILE or
8568 disk_template == constants.DT_SHARED_FILE):
8569 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8571 elif disk_template == constants.DT_PLAIN:
8573 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8576 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8578 result.append(params)
8580 elif disk_template == constants.DT_BLOCK:
8581 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8583 elif disk_template == constants.DT_RBD:
8585 constants.LDP_POOL: dt_params[constants.RBD_POOL]
8588 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8590 result.append(params)
8595 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8596 iv_name, p_minor, s_minor, drbd_params, data_params,
8598 """Generate a drbd8 device complete with its children.
8601 assert len(vgnames) == len(names) == 2
8602 port = lu.cfg.AllocatePort()
8603 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8605 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8606 logical_id=(vgnames[0], names[0]),
8608 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8609 logical_id=(vgnames[1], names[1]),
8611 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8612 logical_id=(primary, secondary, port,
8615 children=[dev_data, dev_meta],
8616 iv_name=iv_name, params=drbd_params)
8620 def _GenerateDiskTemplate(lu, template_name,
8621 instance_name, primary_node,
8622 secondary_nodes, disk_info,
8623 file_storage_dir, file_driver,
8624 base_index, feedback_fn, disk_params):
8625 """Generate the entire disk layout for a given template type.
8628 #TODO: compute space requirements
8630 vgname = lu.cfg.GetVGName()
8631 disk_count = len(disk_info)
8633 ld_params = _ComputeLDParams(template_name, disk_params)
8634 if template_name == constants.DT_DISKLESS:
8636 elif template_name == constants.DT_PLAIN:
8638 raise errors.ProgrammerError("Wrong template configuration")
8640 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8641 for i in range(disk_count)])
8642 for idx, disk in enumerate(disk_info):
8643 disk_index = idx + base_index
8644 vg = disk.get(constants.IDISK_VG, vgname)
8645 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8646 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8647 size=disk[constants.IDISK_SIZE],
8648 logical_id=(vg, names[idx]),
8649 iv_name="disk/%d" % disk_index,
8650 mode=disk[constants.IDISK_MODE],
8651 params=ld_params[0])
8652 disks.append(disk_dev)
8653 elif template_name == constants.DT_DRBD8:
8654 drbd_params, data_params, meta_params = ld_params
8655 if len(secondary_nodes) != 1:
8656 raise errors.ProgrammerError("Wrong template configuration")
8657 remote_node = secondary_nodes[0]
8658 minors = lu.cfg.AllocateDRBDMinor(
8659 [primary_node, remote_node] * len(disk_info), instance_name)
8662 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8663 for i in range(disk_count)]):
8664 names.append(lv_prefix + "_data")
8665 names.append(lv_prefix + "_meta")
8666 for idx, disk in enumerate(disk_info):
8667 disk_index = idx + base_index
8668 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8669 data_vg = disk.get(constants.IDISK_VG, vgname)
8670 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8671 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8672 disk[constants.IDISK_SIZE],
8674 names[idx * 2:idx * 2 + 2],
8675 "disk/%d" % disk_index,
8676 minors[idx * 2], minors[idx * 2 + 1],
8677 drbd_params, data_params, meta_params)
8678 disk_dev.mode = disk[constants.IDISK_MODE]
8679 disks.append(disk_dev)
8680 elif template_name == constants.DT_FILE:
8682 raise errors.ProgrammerError("Wrong template configuration")
8684 opcodes.RequireFileStorage()
8686 for idx, disk in enumerate(disk_info):
8687 disk_index = idx + base_index
8688 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8689 size=disk[constants.IDISK_SIZE],
8690 iv_name="disk/%d" % disk_index,
8691 logical_id=(file_driver,
8692 "%s/disk%d" % (file_storage_dir,
8694 mode=disk[constants.IDISK_MODE],
8695 params=ld_params[0])
8696 disks.append(disk_dev)
8697 elif template_name == constants.DT_SHARED_FILE:
8699 raise errors.ProgrammerError("Wrong template configuration")
8701 opcodes.RequireSharedFileStorage()
8703 for idx, disk in enumerate(disk_info):
8704 disk_index = idx + base_index
8705 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8706 size=disk[constants.IDISK_SIZE],
8707 iv_name="disk/%d" % disk_index,
8708 logical_id=(file_driver,
8709 "%s/disk%d" % (file_storage_dir,
8711 mode=disk[constants.IDISK_MODE],
8712 params=ld_params[0])
8713 disks.append(disk_dev)
8714 elif template_name == constants.DT_BLOCK:
8716 raise errors.ProgrammerError("Wrong template configuration")
8718 for idx, disk in enumerate(disk_info):
8719 disk_index = idx + base_index
8720 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8721 size=disk[constants.IDISK_SIZE],
8722 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8723 disk[constants.IDISK_ADOPT]),
8724 iv_name="disk/%d" % disk_index,
8725 mode=disk[constants.IDISK_MODE],
8726 params=ld_params[0])
8727 disks.append(disk_dev)
8728 elif template_name == constants.DT_RBD:
8730 raise errors.ProgrammerError("Wrong template configuration")
8732 names = _GenerateUniqueNames(lu, [".rbd.disk%d" % (base_index + i)
8733 for i in range(disk_count)])
8735 for idx, disk in enumerate(disk_info):
8736 disk_index = idx + base_index
8737 disk_dev = objects.Disk(dev_type=constants.LD_RBD,
8738 size=disk[constants.IDISK_SIZE],
8739 logical_id=("rbd", names[idx]),
8740 iv_name="disk/%d" % disk_index,
8741 mode=disk[constants.IDISK_MODE],
8742 params=ld_params[0])
8743 disks.append(disk_dev)
8746 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8750 def _GetInstanceInfoText(instance):
8751 """Compute that text that should be added to the disk's metadata.
8754 return "originstname+%s" % instance.name
8757 def _CalcEta(time_taken, written, total_size):
8758 """Calculates the ETA based on size written and total size.
8760 @param time_taken: The time taken so far
8761 @param written: amount written so far
8762 @param total_size: The total size of data to be written
8763 @return: The remaining time in seconds
8766 avg_time = time_taken / float(written)
8767 return (total_size - written) * avg_time
8770 def _WipeDisks(lu, instance):
8771 """Wipes instance disks.
8773 @type lu: L{LogicalUnit}
8774 @param lu: the logical unit on whose behalf we execute
8775 @type instance: L{objects.Instance}
8776 @param instance: the instance whose disks we should create
8777 @return: the success of the wipe
8780 node = instance.primary_node
8782 for device in instance.disks:
8783 lu.cfg.SetDiskID(device, node)
8785 logging.info("Pause sync of instance %s disks", instance.name)
8786 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8788 for idx, success in enumerate(result.payload):
8790 logging.warn("pause-sync of instance %s for disks %d failed",
8794 for idx, device in enumerate(instance.disks):
8795 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8796 # MAX_WIPE_CHUNK at max
8797 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8798 constants.MIN_WIPE_CHUNK_PERCENT)
8799 # we _must_ make this an int, otherwise rounding errors will
8801 wipe_chunk_size = int(wipe_chunk_size)
8803 lu.LogInfo("* Wiping disk %d", idx)
8804 logging.info("Wiping disk %d for instance %s, node %s using"
8805 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8810 start_time = time.time()
8812 while offset < size:
8813 wipe_size = min(wipe_chunk_size, size - offset)
8814 logging.debug("Wiping disk %d, offset %s, chunk %s",
8815 idx, offset, wipe_size)
8816 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8817 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8818 (idx, offset, wipe_size))
8821 if now - last_output >= 60:
8822 eta = _CalcEta(now - start_time, offset, size)
8823 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8824 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8827 logging.info("Resume sync of instance %s disks", instance.name)
8829 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8831 for idx, success in enumerate(result.payload):
8833 lu.LogWarning("Resume sync of disk %d failed, please have a"
8834 " look at the status and troubleshoot the issue", idx)
8835 logging.warn("resume-sync of instance %s for disks %d failed",
8839 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8840 """Create all disks for an instance.
8842 This abstracts away some work from AddInstance.
8844 @type lu: L{LogicalUnit}
8845 @param lu: the logical unit on whose behalf we execute
8846 @type instance: L{objects.Instance}
8847 @param instance: the instance whose disks we should create
8849 @param to_skip: list of indices to skip
8850 @type target_node: string
8851 @param target_node: if passed, overrides the target node for creation
8853 @return: the success of the creation
8856 info = _GetInstanceInfoText(instance)
8857 if target_node is None:
8858 pnode = instance.primary_node
8859 all_nodes = instance.all_nodes
8864 if instance.disk_template in constants.DTS_FILEBASED:
8865 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8866 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8868 result.Raise("Failed to create directory '%s' on"
8869 " node %s" % (file_storage_dir, pnode))
8871 # Note: this needs to be kept in sync with adding of disks in
8872 # LUInstanceSetParams
8873 for idx, device in enumerate(instance.disks):
8874 if to_skip and idx in to_skip:
8876 logging.info("Creating volume %s for instance %s",
8877 device.iv_name, instance.name)
8879 for node in all_nodes:
8880 f_create = node == pnode
8881 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8884 def _RemoveDisks(lu, instance, target_node=None):
8885 """Remove all disks for an instance.
8887 This abstracts away some work from `AddInstance()` and
8888 `RemoveInstance()`. Note that in case some of the devices couldn't
8889 be removed, the removal will continue with the other ones (compare
8890 with `_CreateDisks()`).
8892 @type lu: L{LogicalUnit}
8893 @param lu: the logical unit on whose behalf we execute
8894 @type instance: L{objects.Instance}
8895 @param instance: the instance whose disks we should remove
8896 @type target_node: string
8897 @param target_node: used to override the node on which to remove the disks
8899 @return: the success of the removal
8902 logging.info("Removing block devices for instance %s", instance.name)
8905 for device in instance.disks:
8907 edata = [(target_node, device)]
8909 edata = device.ComputeNodeTree(instance.primary_node)
8910 for node, disk in edata:
8911 lu.cfg.SetDiskID(disk, node)
8912 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8914 lu.LogWarning("Could not remove block device %s on node %s,"
8915 " continuing anyway: %s", device.iv_name, node, msg)
8918 # if this is a DRBD disk, return its port to the pool
8919 if device.dev_type in constants.LDS_DRBD:
8920 tcp_port = device.logical_id[2]
8921 lu.cfg.AddTcpUdpPort(tcp_port)
8923 if instance.disk_template == constants.DT_FILE:
8924 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8928 tgt = instance.primary_node
8929 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8931 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8932 file_storage_dir, instance.primary_node, result.fail_msg)
8938 def _ComputeDiskSizePerVG(disk_template, disks):
8939 """Compute disk size requirements in the volume group
8942 def _compute(disks, payload):
8943 """Universal algorithm.
8948 vgs[disk[constants.IDISK_VG]] = \
8949 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8953 # Required free disk space as a function of disk and swap space
8955 constants.DT_DISKLESS: {},
8956 constants.DT_PLAIN: _compute(disks, 0),
8957 # 128 MB are added for drbd metadata for each disk
8958 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8959 constants.DT_FILE: {},
8960 constants.DT_SHARED_FILE: {},
8963 if disk_template not in req_size_dict:
8964 raise errors.ProgrammerError("Disk template '%s' size requirement"
8965 " is unknown" % disk_template)
8967 return req_size_dict[disk_template]
8970 def _ComputeDiskSize(disk_template, disks):
8971 """Compute disk size requirements in the volume group
8974 # Required free disk space as a function of disk and swap space
8976 constants.DT_DISKLESS: None,
8977 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8978 # 128 MB are added for drbd metadata for each disk
8980 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8981 constants.DT_FILE: None,
8982 constants.DT_SHARED_FILE: 0,
8983 constants.DT_BLOCK: 0,
8984 constants.DT_RBD: 0,
8987 if disk_template not in req_size_dict:
8988 raise errors.ProgrammerError("Disk template '%s' size requirement"
8989 " is unknown" % disk_template)
8991 return req_size_dict[disk_template]
8994 def _FilterVmNodes(lu, nodenames):
8995 """Filters out non-vm_capable nodes from a list.
8997 @type lu: L{LogicalUnit}
8998 @param lu: the logical unit for which we check
8999 @type nodenames: list
9000 @param nodenames: the list of nodes on which we should check
9002 @return: the list of vm-capable nodes
9005 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9006 return [name for name in nodenames if name not in vm_nodes]
9009 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9010 """Hypervisor parameter validation.
9012 This function abstract the hypervisor parameter validation to be
9013 used in both instance create and instance modify.
9015 @type lu: L{LogicalUnit}
9016 @param lu: the logical unit for which we check
9017 @type nodenames: list
9018 @param nodenames: the list of nodes on which we should check
9019 @type hvname: string
9020 @param hvname: the name of the hypervisor we should use
9021 @type hvparams: dict
9022 @param hvparams: the parameters which we need to check
9023 @raise errors.OpPrereqError: if the parameters are not valid
9026 nodenames = _FilterVmNodes(lu, nodenames)
9028 cluster = lu.cfg.GetClusterInfo()
9029 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9031 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9032 for node in nodenames:
9036 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9039 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9040 """OS parameters validation.
9042 @type lu: L{LogicalUnit}
9043 @param lu: the logical unit for which we check
9044 @type required: boolean
9045 @param required: whether the validation should fail if the OS is not
9047 @type nodenames: list
9048 @param nodenames: the list of nodes on which we should check
9049 @type osname: string
9050 @param osname: the name of the hypervisor we should use
9051 @type osparams: dict
9052 @param osparams: the parameters which we need to check
9053 @raise errors.OpPrereqError: if the parameters are not valid
9056 nodenames = _FilterVmNodes(lu, nodenames)
9057 result = lu.rpc.call_os_validate(nodenames, required, osname,
9058 [constants.OS_VALIDATE_PARAMETERS],
9060 for node, nres in result.items():
9061 # we don't check for offline cases since this should be run only
9062 # against the master node and/or an instance's nodes
9063 nres.Raise("OS Parameters validation failed on node %s" % node)
9064 if not nres.payload:
9065 lu.LogInfo("OS %s not found on node %s, validation skipped",
9069 class LUInstanceCreate(LogicalUnit):
9070 """Create an instance.
9073 HPATH = "instance-add"
9074 HTYPE = constants.HTYPE_INSTANCE
9077 def CheckArguments(self):
9081 # do not require name_check to ease forward/backward compatibility
9083 if self.op.no_install and self.op.start:
9084 self.LogInfo("No-installation mode selected, disabling startup")
9085 self.op.start = False
9086 # validate/normalize the instance name
9087 self.op.instance_name = \
9088 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9090 if self.op.ip_check and not self.op.name_check:
9091 # TODO: make the ip check more flexible and not depend on the name check
9092 raise errors.OpPrereqError("Cannot do IP address check without a name"
9093 " check", errors.ECODE_INVAL)
9095 # check nics' parameter names
9096 for nic in self.op.nics:
9097 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9099 # check disks. parameter names and consistent adopt/no-adopt strategy
9100 has_adopt = has_no_adopt = False
9101 for disk in self.op.disks:
9102 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9103 if constants.IDISK_ADOPT in disk:
9107 if has_adopt and has_no_adopt:
9108 raise errors.OpPrereqError("Either all disks are adopted or none is",
9111 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9112 raise errors.OpPrereqError("Disk adoption is not supported for the"
9113 " '%s' disk template" %
9114 self.op.disk_template,
9116 if self.op.iallocator is not None:
9117 raise errors.OpPrereqError("Disk adoption not allowed with an"
9118 " iallocator script", errors.ECODE_INVAL)
9119 if self.op.mode == constants.INSTANCE_IMPORT:
9120 raise errors.OpPrereqError("Disk adoption not allowed for"
9121 " instance import", errors.ECODE_INVAL)
9123 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9124 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9125 " but no 'adopt' parameter given" %
9126 self.op.disk_template,
9129 self.adopt_disks = has_adopt
9131 # instance name verification
9132 if self.op.name_check:
9133 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9134 self.op.instance_name = self.hostname1.name
9135 # used in CheckPrereq for ip ping check
9136 self.check_ip = self.hostname1.ip
9138 self.check_ip = None
9140 # file storage checks
9141 if (self.op.file_driver and
9142 not self.op.file_driver in constants.FILE_DRIVER):
9143 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9144 self.op.file_driver, errors.ECODE_INVAL)
9146 if self.op.disk_template == constants.DT_FILE:
9147 opcodes.RequireFileStorage()
9148 elif self.op.disk_template == constants.DT_SHARED_FILE:
9149 opcodes.RequireSharedFileStorage()
9151 ### Node/iallocator related checks
9152 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9154 if self.op.pnode is not None:
9155 if self.op.disk_template in constants.DTS_INT_MIRROR:
9156 if self.op.snode is None:
9157 raise errors.OpPrereqError("The networked disk templates need"
9158 " a mirror node", errors.ECODE_INVAL)
9160 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9162 self.op.snode = None
9164 self._cds = _GetClusterDomainSecret()
9166 if self.op.mode == constants.INSTANCE_IMPORT:
9167 # On import force_variant must be True, because if we forced it at
9168 # initial install, our only chance when importing it back is that it
9170 self.op.force_variant = True
9172 if self.op.no_install:
9173 self.LogInfo("No-installation mode has no effect during import")
9175 elif self.op.mode == constants.INSTANCE_CREATE:
9176 if self.op.os_type is None:
9177 raise errors.OpPrereqError("No guest OS specified",
9179 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9180 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9181 " installation" % self.op.os_type,
9183 if self.op.disk_template is None:
9184 raise errors.OpPrereqError("No disk template specified",
9187 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9188 # Check handshake to ensure both clusters have the same domain secret
9189 src_handshake = self.op.source_handshake
9190 if not src_handshake:
9191 raise errors.OpPrereqError("Missing source handshake",
9194 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9197 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9200 # Load and check source CA
9201 self.source_x509_ca_pem = self.op.source_x509_ca
9202 if not self.source_x509_ca_pem:
9203 raise errors.OpPrereqError("Missing source X509 CA",
9207 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9209 except OpenSSL.crypto.Error, err:
9210 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9211 (err, ), errors.ECODE_INVAL)
9213 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9214 if errcode is not None:
9215 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9218 self.source_x509_ca = cert
9220 src_instance_name = self.op.source_instance_name
9221 if not src_instance_name:
9222 raise errors.OpPrereqError("Missing source instance name",
9225 self.source_instance_name = \
9226 netutils.GetHostname(name=src_instance_name).name
9229 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9230 self.op.mode, errors.ECODE_INVAL)
9232 def ExpandNames(self):
9233 """ExpandNames for CreateInstance.
9235 Figure out the right locks for instance creation.
9238 self.needed_locks = {}
9240 instance_name = self.op.instance_name
9241 # this is just a preventive check, but someone might still add this
9242 # instance in the meantime, and creation will fail at lock-add time
9243 if instance_name in self.cfg.GetInstanceList():
9244 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9245 instance_name, errors.ECODE_EXISTS)
9247 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9249 if self.op.iallocator:
9250 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9251 # specifying a group on instance creation and then selecting nodes from
9253 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9254 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9256 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9257 nodelist = [self.op.pnode]
9258 if self.op.snode is not None:
9259 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9260 nodelist.append(self.op.snode)
9261 self.needed_locks[locking.LEVEL_NODE] = nodelist
9262 # Lock resources of instance's primary and secondary nodes (copy to
9263 # prevent accidential modification)
9264 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9266 # in case of import lock the source node too
9267 if self.op.mode == constants.INSTANCE_IMPORT:
9268 src_node = self.op.src_node
9269 src_path = self.op.src_path
9271 if src_path is None:
9272 self.op.src_path = src_path = self.op.instance_name
9274 if src_node is None:
9275 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9276 self.op.src_node = None
9277 if os.path.isabs(src_path):
9278 raise errors.OpPrereqError("Importing an instance from a path"
9279 " requires a source node option",
9282 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9283 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9284 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9285 if not os.path.isabs(src_path):
9286 self.op.src_path = src_path = \
9287 utils.PathJoin(constants.EXPORT_DIR, src_path)
9289 def _RunAllocator(self):
9290 """Run the allocator based on input opcode.
9293 nics = [n.ToDict() for n in self.nics]
9294 ial = IAllocator(self.cfg, self.rpc,
9295 mode=constants.IALLOCATOR_MODE_ALLOC,
9296 name=self.op.instance_name,
9297 disk_template=self.op.disk_template,
9300 vcpus=self.be_full[constants.BE_VCPUS],
9301 memory=self.be_full[constants.BE_MAXMEM],
9304 hypervisor=self.op.hypervisor,
9307 ial.Run(self.op.iallocator)
9310 raise errors.OpPrereqError("Can't compute nodes using"
9311 " iallocator '%s': %s" %
9312 (self.op.iallocator, ial.info),
9314 if len(ial.result) != ial.required_nodes:
9315 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9316 " of nodes (%s), required %s" %
9317 (self.op.iallocator, len(ial.result),
9318 ial.required_nodes), errors.ECODE_FAULT)
9319 self.op.pnode = ial.result[0]
9320 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9321 self.op.instance_name, self.op.iallocator,
9322 utils.CommaJoin(ial.result))
9323 if ial.required_nodes == 2:
9324 self.op.snode = ial.result[1]
9326 def BuildHooksEnv(self):
9329 This runs on master, primary and secondary nodes of the instance.
9333 "ADD_MODE": self.op.mode,
9335 if self.op.mode == constants.INSTANCE_IMPORT:
9336 env["SRC_NODE"] = self.op.src_node
9337 env["SRC_PATH"] = self.op.src_path
9338 env["SRC_IMAGES"] = self.src_images
9340 env.update(_BuildInstanceHookEnv(
9341 name=self.op.instance_name,
9342 primary_node=self.op.pnode,
9343 secondary_nodes=self.secondaries,
9344 status=self.op.start,
9345 os_type=self.op.os_type,
9346 minmem=self.be_full[constants.BE_MINMEM],
9347 maxmem=self.be_full[constants.BE_MAXMEM],
9348 vcpus=self.be_full[constants.BE_VCPUS],
9349 nics=_NICListToTuple(self, self.nics),
9350 disk_template=self.op.disk_template,
9351 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9352 for d in self.disks],
9355 hypervisor_name=self.op.hypervisor,
9361 def BuildHooksNodes(self):
9362 """Build hooks nodes.
9365 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9368 def _ReadExportInfo(self):
9369 """Reads the export information from disk.
9371 It will override the opcode source node and path with the actual
9372 information, if these two were not specified before.
9374 @return: the export information
9377 assert self.op.mode == constants.INSTANCE_IMPORT
9379 src_node = self.op.src_node
9380 src_path = self.op.src_path
9382 if src_node is None:
9383 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9384 exp_list = self.rpc.call_export_list(locked_nodes)
9386 for node in exp_list:
9387 if exp_list[node].fail_msg:
9389 if src_path in exp_list[node].payload:
9391 self.op.src_node = src_node = node
9392 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9396 raise errors.OpPrereqError("No export found for relative path %s" %
9397 src_path, errors.ECODE_INVAL)
9399 _CheckNodeOnline(self, src_node)
9400 result = self.rpc.call_export_info(src_node, src_path)
9401 result.Raise("No export or invalid export found in dir %s" % src_path)
9403 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9404 if not export_info.has_section(constants.INISECT_EXP):
9405 raise errors.ProgrammerError("Corrupted export config",
9406 errors.ECODE_ENVIRON)
9408 ei_version = export_info.get(constants.INISECT_EXP, "version")
9409 if (int(ei_version) != constants.EXPORT_VERSION):
9410 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9411 (ei_version, constants.EXPORT_VERSION),
9412 errors.ECODE_ENVIRON)
9415 def _ReadExportParams(self, einfo):
9416 """Use export parameters as defaults.
9418 In case the opcode doesn't specify (as in override) some instance
9419 parameters, then try to use them from the export information, if
9423 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9425 if self.op.disk_template is None:
9426 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9427 self.op.disk_template = einfo.get(constants.INISECT_INS,
9429 if self.op.disk_template not in constants.DISK_TEMPLATES:
9430 raise errors.OpPrereqError("Disk template specified in configuration"
9431 " file is not one of the allowed values:"
9432 " %s" % " ".join(constants.DISK_TEMPLATES))
9434 raise errors.OpPrereqError("No disk template specified and the export"
9435 " is missing the disk_template information",
9438 if not self.op.disks:
9440 # TODO: import the disk iv_name too
9441 for idx in range(constants.MAX_DISKS):
9442 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9443 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9444 disks.append({constants.IDISK_SIZE: disk_sz})
9445 self.op.disks = disks
9446 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9447 raise errors.OpPrereqError("No disk info specified and the export"
9448 " is missing the disk information",
9451 if not self.op.nics:
9453 for idx in range(constants.MAX_NICS):
9454 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9456 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9457 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9464 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9465 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9467 if (self.op.hypervisor is None and
9468 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9469 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9471 if einfo.has_section(constants.INISECT_HYP):
9472 # use the export parameters but do not override the ones
9473 # specified by the user
9474 for name, value in einfo.items(constants.INISECT_HYP):
9475 if name not in self.op.hvparams:
9476 self.op.hvparams[name] = value
9478 if einfo.has_section(constants.INISECT_BEP):
9479 # use the parameters, without overriding
9480 for name, value in einfo.items(constants.INISECT_BEP):
9481 if name not in self.op.beparams:
9482 self.op.beparams[name] = value
9483 # Compatibility for the old "memory" be param
9484 if name == constants.BE_MEMORY:
9485 if constants.BE_MAXMEM not in self.op.beparams:
9486 self.op.beparams[constants.BE_MAXMEM] = value
9487 if constants.BE_MINMEM not in self.op.beparams:
9488 self.op.beparams[constants.BE_MINMEM] = value
9490 # try to read the parameters old style, from the main section
9491 for name in constants.BES_PARAMETERS:
9492 if (name not in self.op.beparams and
9493 einfo.has_option(constants.INISECT_INS, name)):
9494 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9496 if einfo.has_section(constants.INISECT_OSP):
9497 # use the parameters, without overriding
9498 for name, value in einfo.items(constants.INISECT_OSP):
9499 if name not in self.op.osparams:
9500 self.op.osparams[name] = value
9502 def _RevertToDefaults(self, cluster):
9503 """Revert the instance parameters to the default values.
9507 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9508 for name in self.op.hvparams.keys():
9509 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9510 del self.op.hvparams[name]
9512 be_defs = cluster.SimpleFillBE({})
9513 for name in self.op.beparams.keys():
9514 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9515 del self.op.beparams[name]
9517 nic_defs = cluster.SimpleFillNIC({})
9518 for nic in self.op.nics:
9519 for name in constants.NICS_PARAMETERS:
9520 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9523 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9524 for name in self.op.osparams.keys():
9525 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9526 del self.op.osparams[name]
9528 def _CalculateFileStorageDir(self):
9529 """Calculate final instance file storage dir.
9532 # file storage dir calculation/check
9533 self.instance_file_storage_dir = None
9534 if self.op.disk_template in constants.DTS_FILEBASED:
9535 # build the full file storage dir path
9538 if self.op.disk_template == constants.DT_SHARED_FILE:
9539 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9541 get_fsd_fn = self.cfg.GetFileStorageDir
9543 cfg_storagedir = get_fsd_fn()
9544 if not cfg_storagedir:
9545 raise errors.OpPrereqError("Cluster file storage dir not defined")
9546 joinargs.append(cfg_storagedir)
9548 if self.op.file_storage_dir is not None:
9549 joinargs.append(self.op.file_storage_dir)
9551 joinargs.append(self.op.instance_name)
9553 # pylint: disable=W0142
9554 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9556 def CheckPrereq(self): # pylint: disable=R0914
9557 """Check prerequisites.
9560 self._CalculateFileStorageDir()
9562 if self.op.mode == constants.INSTANCE_IMPORT:
9563 export_info = self._ReadExportInfo()
9564 self._ReadExportParams(export_info)
9566 if (not self.cfg.GetVGName() and
9567 self.op.disk_template not in constants.DTS_NOT_LVM):
9568 raise errors.OpPrereqError("Cluster does not support lvm-based"
9569 " instances", errors.ECODE_STATE)
9571 if (self.op.hypervisor is None or
9572 self.op.hypervisor == constants.VALUE_AUTO):
9573 self.op.hypervisor = self.cfg.GetHypervisorType()
9575 cluster = self.cfg.GetClusterInfo()
9576 enabled_hvs = cluster.enabled_hypervisors
9577 if self.op.hypervisor not in enabled_hvs:
9578 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9579 " cluster (%s)" % (self.op.hypervisor,
9580 ",".join(enabled_hvs)),
9583 # Check tag validity
9584 for tag in self.op.tags:
9585 objects.TaggableObject.ValidateTag(tag)
9587 # check hypervisor parameter syntax (locally)
9588 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9589 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9591 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9592 hv_type.CheckParameterSyntax(filled_hvp)
9593 self.hv_full = filled_hvp
9594 # check that we don't specify global parameters on an instance
9595 _CheckGlobalHvParams(self.op.hvparams)
9597 # fill and remember the beparams dict
9598 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9599 for param, value in self.op.beparams.iteritems():
9600 if value == constants.VALUE_AUTO:
9601 self.op.beparams[param] = default_beparams[param]
9602 objects.UpgradeBeParams(self.op.beparams)
9603 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9604 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9606 # build os parameters
9607 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9609 # now that hvp/bep are in final format, let's reset to defaults,
9611 if self.op.identify_defaults:
9612 self._RevertToDefaults(cluster)
9616 for idx, nic in enumerate(self.op.nics):
9617 nic_mode_req = nic.get(constants.INIC_MODE, None)
9618 nic_mode = nic_mode_req
9619 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9620 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9622 # in routed mode, for the first nic, the default ip is 'auto'
9623 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9624 default_ip_mode = constants.VALUE_AUTO
9626 default_ip_mode = constants.VALUE_NONE
9628 # ip validity checks
9629 ip = nic.get(constants.INIC_IP, default_ip_mode)
9630 if ip is None or ip.lower() == constants.VALUE_NONE:
9632 elif ip.lower() == constants.VALUE_AUTO:
9633 if not self.op.name_check:
9634 raise errors.OpPrereqError("IP address set to auto but name checks"
9635 " have been skipped",
9637 nic_ip = self.hostname1.ip
9639 if not netutils.IPAddress.IsValid(ip):
9640 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9644 # TODO: check the ip address for uniqueness
9645 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9646 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9649 # MAC address verification
9650 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9651 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9652 mac = utils.NormalizeAndValidateMac(mac)
9655 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9656 except errors.ReservationError:
9657 raise errors.OpPrereqError("MAC address %s already in use"
9658 " in cluster" % mac,
9659 errors.ECODE_NOTUNIQUE)
9661 # Build nic parameters
9662 link = nic.get(constants.INIC_LINK, None)
9663 if link == constants.VALUE_AUTO:
9664 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9667 nicparams[constants.NIC_MODE] = nic_mode
9669 nicparams[constants.NIC_LINK] = link
9671 check_params = cluster.SimpleFillNIC(nicparams)
9672 objects.NIC.CheckParameterSyntax(check_params)
9673 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9675 # disk checks/pre-build
9676 default_vg = self.cfg.GetVGName()
9678 for disk in self.op.disks:
9679 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9680 if mode not in constants.DISK_ACCESS_SET:
9681 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9682 mode, errors.ECODE_INVAL)
9683 size = disk.get(constants.IDISK_SIZE, None)
9685 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9688 except (TypeError, ValueError):
9689 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9692 data_vg = disk.get(constants.IDISK_VG, default_vg)
9694 constants.IDISK_SIZE: size,
9695 constants.IDISK_MODE: mode,
9696 constants.IDISK_VG: data_vg,
9698 if constants.IDISK_METAVG in disk:
9699 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9700 if constants.IDISK_ADOPT in disk:
9701 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9702 self.disks.append(new_disk)
9704 if self.op.mode == constants.INSTANCE_IMPORT:
9706 for idx in range(len(self.disks)):
9707 option = "disk%d_dump" % idx
9708 if export_info.has_option(constants.INISECT_INS, option):
9709 # FIXME: are the old os-es, disk sizes, etc. useful?
9710 export_name = export_info.get(constants.INISECT_INS, option)
9711 image = utils.PathJoin(self.op.src_path, export_name)
9712 disk_images.append(image)
9714 disk_images.append(False)
9716 self.src_images = disk_images
9718 old_name = export_info.get(constants.INISECT_INS, "name")
9719 if self.op.instance_name == old_name:
9720 for idx, nic in enumerate(self.nics):
9721 if nic.mac == constants.VALUE_AUTO:
9722 nic_mac_ini = "nic%d_mac" % idx
9723 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9725 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9727 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9728 if self.op.ip_check:
9729 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9730 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9731 (self.check_ip, self.op.instance_name),
9732 errors.ECODE_NOTUNIQUE)
9734 #### mac address generation
9735 # By generating here the mac address both the allocator and the hooks get
9736 # the real final mac address rather than the 'auto' or 'generate' value.
9737 # There is a race condition between the generation and the instance object
9738 # creation, which means that we know the mac is valid now, but we're not
9739 # sure it will be when we actually add the instance. If things go bad
9740 # adding the instance will abort because of a duplicate mac, and the
9741 # creation job will fail.
9742 for nic in self.nics:
9743 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9744 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9748 if self.op.iallocator is not None:
9749 self._RunAllocator()
9751 # Release all unneeded node locks
9752 _ReleaseLocks(self, locking.LEVEL_NODE,
9753 keep=filter(None, [self.op.pnode, self.op.snode,
9755 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9756 keep=filter(None, [self.op.pnode, self.op.snode,
9759 #### node related checks
9761 # check primary node
9762 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9763 assert self.pnode is not None, \
9764 "Cannot retrieve locked node %s" % self.op.pnode
9766 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9767 pnode.name, errors.ECODE_STATE)
9769 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9770 pnode.name, errors.ECODE_STATE)
9771 if not pnode.vm_capable:
9772 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9773 " '%s'" % pnode.name, errors.ECODE_STATE)
9775 self.secondaries = []
9777 # mirror node verification
9778 if self.op.disk_template in constants.DTS_INT_MIRROR:
9779 if self.op.snode == pnode.name:
9780 raise errors.OpPrereqError("The secondary node cannot be the"
9781 " primary node", errors.ECODE_INVAL)
9782 _CheckNodeOnline(self, self.op.snode)
9783 _CheckNodeNotDrained(self, self.op.snode)
9784 _CheckNodeVmCapable(self, self.op.snode)
9785 self.secondaries.append(self.op.snode)
9787 snode = self.cfg.GetNodeInfo(self.op.snode)
9788 if pnode.group != snode.group:
9789 self.LogWarning("The primary and secondary nodes are in two"
9790 " different node groups; the disk parameters"
9791 " from the first disk's node group will be"
9794 nodenames = [pnode.name] + self.secondaries
9796 # Verify instance specs
9798 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9799 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9800 constants.ISPEC_DISK_COUNT: len(self.disks),
9801 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9802 constants.ISPEC_NIC_COUNT: len(self.nics),
9805 group_info = self.cfg.GetNodeGroup(pnode.group)
9806 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9807 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9808 if not self.op.ignore_ipolicy and res:
9809 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9810 " policy: %s") % (pnode.group,
9811 utils.CommaJoin(res)),
9814 # disk parameters (not customizable at instance or node level)
9815 # just use the primary node parameters, ignoring the secondary.
9816 self.diskparams = group_info.diskparams
9818 if not self.adopt_disks:
9819 if self.op.disk_template == constants.DT_RBD:
9820 # _CheckRADOSFreeSpace() is just a placeholder.
9821 # Any function that checks prerequisites can be placed here.
9822 # Check if there is enough space on the RADOS cluster.
9823 _CheckRADOSFreeSpace()
9825 # Check lv size requirements, if not adopting
9826 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9827 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9829 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9830 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9831 disk[constants.IDISK_ADOPT])
9832 for disk in self.disks])
9833 if len(all_lvs) != len(self.disks):
9834 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9836 for lv_name in all_lvs:
9838 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9839 # to ReserveLV uses the same syntax
9840 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9841 except errors.ReservationError:
9842 raise errors.OpPrereqError("LV named %s used by another instance" %
9843 lv_name, errors.ECODE_NOTUNIQUE)
9845 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9846 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9848 node_lvs = self.rpc.call_lv_list([pnode.name],
9849 vg_names.payload.keys())[pnode.name]
9850 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9851 node_lvs = node_lvs.payload
9853 delta = all_lvs.difference(node_lvs.keys())
9855 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9856 utils.CommaJoin(delta),
9858 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9860 raise errors.OpPrereqError("Online logical volumes found, cannot"
9861 " adopt: %s" % utils.CommaJoin(online_lvs),
9863 # update the size of disk based on what is found
9864 for dsk in self.disks:
9865 dsk[constants.IDISK_SIZE] = \
9866 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9867 dsk[constants.IDISK_ADOPT])][0]))
9869 elif self.op.disk_template == constants.DT_BLOCK:
9870 # Normalize and de-duplicate device paths
9871 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9872 for disk in self.disks])
9873 if len(all_disks) != len(self.disks):
9874 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9876 baddisks = [d for d in all_disks
9877 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9879 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9880 " cannot be adopted" %
9881 (", ".join(baddisks),
9882 constants.ADOPTABLE_BLOCKDEV_ROOT),
9885 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9886 list(all_disks))[pnode.name]
9887 node_disks.Raise("Cannot get block device information from node %s" %
9889 node_disks = node_disks.payload
9890 delta = all_disks.difference(node_disks.keys())
9892 raise errors.OpPrereqError("Missing block device(s): %s" %
9893 utils.CommaJoin(delta),
9895 for dsk in self.disks:
9896 dsk[constants.IDISK_SIZE] = \
9897 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9899 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9901 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9902 # check OS parameters (remotely)
9903 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9905 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9907 # memory check on primary node
9908 #TODO(dynmem): use MINMEM for checking
9910 _CheckNodeFreeMemory(self, self.pnode.name,
9911 "creating instance %s" % self.op.instance_name,
9912 self.be_full[constants.BE_MAXMEM],
9915 self.dry_run_result = list(nodenames)
9917 def Exec(self, feedback_fn):
9918 """Create and add the instance to the cluster.
9921 instance = self.op.instance_name
9922 pnode_name = self.pnode.name
9924 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9925 self.owned_locks(locking.LEVEL_NODE)), \
9926 "Node locks differ from node resource locks"
9928 ht_kind = self.op.hypervisor
9929 if ht_kind in constants.HTS_REQ_PORT:
9930 network_port = self.cfg.AllocatePort()
9934 disks = _GenerateDiskTemplate(self,
9935 self.op.disk_template,
9936 instance, pnode_name,
9939 self.instance_file_storage_dir,
9940 self.op.file_driver,
9945 iobj = objects.Instance(name=instance, os=self.op.os_type,
9946 primary_node=pnode_name,
9947 nics=self.nics, disks=disks,
9948 disk_template=self.op.disk_template,
9949 admin_state=constants.ADMINST_DOWN,
9950 network_port=network_port,
9951 beparams=self.op.beparams,
9952 hvparams=self.op.hvparams,
9953 hypervisor=self.op.hypervisor,
9954 osparams=self.op.osparams,
9958 for tag in self.op.tags:
9961 if self.adopt_disks:
9962 if self.op.disk_template == constants.DT_PLAIN:
9963 # rename LVs to the newly-generated names; we need to construct
9964 # 'fake' LV disks with the old data, plus the new unique_id
9965 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9967 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9968 rename_to.append(t_dsk.logical_id)
9969 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9970 self.cfg.SetDiskID(t_dsk, pnode_name)
9971 result = self.rpc.call_blockdev_rename(pnode_name,
9972 zip(tmp_disks, rename_to))
9973 result.Raise("Failed to rename adoped LVs")
9975 feedback_fn("* creating instance disks...")
9977 _CreateDisks(self, iobj)
9978 except errors.OpExecError:
9979 self.LogWarning("Device creation failed, reverting...")
9981 _RemoveDisks(self, iobj)
9983 self.cfg.ReleaseDRBDMinors(instance)
9986 feedback_fn("adding instance %s to cluster config" % instance)
9988 self.cfg.AddInstance(iobj, self.proc.GetECId())
9990 # Declare that we don't want to remove the instance lock anymore, as we've
9991 # added the instance to the config
9992 del self.remove_locks[locking.LEVEL_INSTANCE]
9994 if self.op.mode == constants.INSTANCE_IMPORT:
9995 # Release unused nodes
9996 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9999 _ReleaseLocks(self, locking.LEVEL_NODE)
10002 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10003 feedback_fn("* wiping instance disks...")
10005 _WipeDisks(self, iobj)
10006 except errors.OpExecError, err:
10007 logging.exception("Wiping disks failed")
10008 self.LogWarning("Wiping instance disks failed (%s)", err)
10012 # Something is already wrong with the disks, don't do anything else
10014 elif self.op.wait_for_sync:
10015 disk_abort = not _WaitForSync(self, iobj)
10016 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10017 # make sure the disks are not degraded (still sync-ing is ok)
10018 feedback_fn("* checking mirrors status")
10019 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10024 _RemoveDisks(self, iobj)
10025 self.cfg.RemoveInstance(iobj.name)
10026 # Make sure the instance lock gets removed
10027 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10028 raise errors.OpExecError("There are some degraded disks for"
10031 # Release all node resource locks
10032 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10034 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10035 if self.op.mode == constants.INSTANCE_CREATE:
10036 if not self.op.no_install:
10037 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10038 not self.op.wait_for_sync)
10040 feedback_fn("* pausing disk sync to install instance OS")
10041 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10043 for idx, success in enumerate(result.payload):
10045 logging.warn("pause-sync of instance %s for disk %d failed",
10048 feedback_fn("* running the instance OS create scripts...")
10049 # FIXME: pass debug option from opcode to backend
10051 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10052 self.op.debug_level)
10054 feedback_fn("* resuming disk sync")
10055 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10057 for idx, success in enumerate(result.payload):
10059 logging.warn("resume-sync of instance %s for disk %d failed",
10062 os_add_result.Raise("Could not add os for instance %s"
10063 " on node %s" % (instance, pnode_name))
10065 elif self.op.mode == constants.INSTANCE_IMPORT:
10066 feedback_fn("* running the instance OS import scripts...")
10070 for idx, image in enumerate(self.src_images):
10074 # FIXME: pass debug option from opcode to backend
10075 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10076 constants.IEIO_FILE, (image, ),
10077 constants.IEIO_SCRIPT,
10078 (iobj.disks[idx], idx),
10080 transfers.append(dt)
10083 masterd.instance.TransferInstanceData(self, feedback_fn,
10084 self.op.src_node, pnode_name,
10085 self.pnode.secondary_ip,
10087 if not compat.all(import_result):
10088 self.LogWarning("Some disks for instance %s on node %s were not"
10089 " imported successfully" % (instance, pnode_name))
10091 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10092 feedback_fn("* preparing remote import...")
10093 # The source cluster will stop the instance before attempting to make a
10094 # connection. In some cases stopping an instance can take a long time,
10095 # hence the shutdown timeout is added to the connection timeout.
10096 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10097 self.op.source_shutdown_timeout)
10098 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10100 assert iobj.primary_node == self.pnode.name
10102 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10103 self.source_x509_ca,
10104 self._cds, timeouts)
10105 if not compat.all(disk_results):
10106 # TODO: Should the instance still be started, even if some disks
10107 # failed to import (valid for local imports, too)?
10108 self.LogWarning("Some disks for instance %s on node %s were not"
10109 " imported successfully" % (instance, pnode_name))
10111 # Run rename script on newly imported instance
10112 assert iobj.name == instance
10113 feedback_fn("Running rename script for %s" % instance)
10114 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10115 self.source_instance_name,
10116 self.op.debug_level)
10117 if result.fail_msg:
10118 self.LogWarning("Failed to run rename script for %s on node"
10119 " %s: %s" % (instance, pnode_name, result.fail_msg))
10122 # also checked in the prereq part
10123 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10126 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10129 iobj.admin_state = constants.ADMINST_UP
10130 self.cfg.Update(iobj, feedback_fn)
10131 logging.info("Starting instance %s on node %s", instance, pnode_name)
10132 feedback_fn("* starting instance...")
10133 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10135 result.Raise("Could not start instance")
10137 return list(iobj.all_nodes)
10140 def _CheckRADOSFreeSpace():
10141 """Compute disk size requirements inside the RADOS cluster.
10144 # For the RADOS cluster we assume there is always enough space.
10148 class LUInstanceConsole(NoHooksLU):
10149 """Connect to an instance's console.
10151 This is somewhat special in that it returns the command line that
10152 you need to run on the master node in order to connect to the
10158 def ExpandNames(self):
10159 self.share_locks = _ShareAll()
10160 self._ExpandAndLockInstance()
10162 def CheckPrereq(self):
10163 """Check prerequisites.
10165 This checks that the instance is in the cluster.
10168 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10169 assert self.instance is not None, \
10170 "Cannot retrieve locked instance %s" % self.op.instance_name
10171 _CheckNodeOnline(self, self.instance.primary_node)
10173 def Exec(self, feedback_fn):
10174 """Connect to the console of an instance
10177 instance = self.instance
10178 node = instance.primary_node
10180 node_insts = self.rpc.call_instance_list([node],
10181 [instance.hypervisor])[node]
10182 node_insts.Raise("Can't get node information from %s" % node)
10184 if instance.name not in node_insts.payload:
10185 if instance.admin_state == constants.ADMINST_UP:
10186 state = constants.INSTST_ERRORDOWN
10187 elif instance.admin_state == constants.ADMINST_DOWN:
10188 state = constants.INSTST_ADMINDOWN
10190 state = constants.INSTST_ADMINOFFLINE
10191 raise errors.OpExecError("Instance %s is not running (state %s)" %
10192 (instance.name, state))
10194 logging.debug("Connecting to console of %s on %s", instance.name, node)
10196 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10199 def _GetInstanceConsole(cluster, instance):
10200 """Returns console information for an instance.
10202 @type cluster: L{objects.Cluster}
10203 @type instance: L{objects.Instance}
10207 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10208 # beparams and hvparams are passed separately, to avoid editing the
10209 # instance and then saving the defaults in the instance itself.
10210 hvparams = cluster.FillHV(instance)
10211 beparams = cluster.FillBE(instance)
10212 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10214 assert console.instance == instance.name
10215 assert console.Validate()
10217 return console.ToDict()
10220 class LUInstanceReplaceDisks(LogicalUnit):
10221 """Replace the disks of an instance.
10224 HPATH = "mirrors-replace"
10225 HTYPE = constants.HTYPE_INSTANCE
10228 def CheckArguments(self):
10229 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10230 self.op.iallocator)
10232 def ExpandNames(self):
10233 self._ExpandAndLockInstance()
10235 assert locking.LEVEL_NODE not in self.needed_locks
10236 assert locking.LEVEL_NODE_RES not in self.needed_locks
10237 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10239 assert self.op.iallocator is None or self.op.remote_node is None, \
10240 "Conflicting options"
10242 if self.op.remote_node is not None:
10243 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10245 # Warning: do not remove the locking of the new secondary here
10246 # unless DRBD8.AddChildren is changed to work in parallel;
10247 # currently it doesn't since parallel invocations of
10248 # FindUnusedMinor will conflict
10249 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10250 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10252 self.needed_locks[locking.LEVEL_NODE] = []
10253 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10255 if self.op.iallocator is not None:
10256 # iallocator will select a new node in the same group
10257 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10259 self.needed_locks[locking.LEVEL_NODE_RES] = []
10261 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10262 self.op.iallocator, self.op.remote_node,
10263 self.op.disks, False, self.op.early_release,
10264 self.op.ignore_ipolicy)
10266 self.tasklets = [self.replacer]
10268 def DeclareLocks(self, level):
10269 if level == locking.LEVEL_NODEGROUP:
10270 assert self.op.remote_node is None
10271 assert self.op.iallocator is not None
10272 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10274 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10275 # Lock all groups used by instance optimistically; this requires going
10276 # via the node before it's locked, requiring verification later on
10277 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10278 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10280 elif level == locking.LEVEL_NODE:
10281 if self.op.iallocator is not None:
10282 assert self.op.remote_node is None
10283 assert not self.needed_locks[locking.LEVEL_NODE]
10285 # Lock member nodes of all locked groups
10286 self.needed_locks[locking.LEVEL_NODE] = [node_name
10287 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10288 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10290 self._LockInstancesNodes()
10291 elif level == locking.LEVEL_NODE_RES:
10293 self.needed_locks[locking.LEVEL_NODE_RES] = \
10294 self.needed_locks[locking.LEVEL_NODE]
10296 def BuildHooksEnv(self):
10297 """Build hooks env.
10299 This runs on the master, the primary and all the secondaries.
10302 instance = self.replacer.instance
10304 "MODE": self.op.mode,
10305 "NEW_SECONDARY": self.op.remote_node,
10306 "OLD_SECONDARY": instance.secondary_nodes[0],
10308 env.update(_BuildInstanceHookEnvByObject(self, instance))
10311 def BuildHooksNodes(self):
10312 """Build hooks nodes.
10315 instance = self.replacer.instance
10317 self.cfg.GetMasterNode(),
10318 instance.primary_node,
10320 if self.op.remote_node is not None:
10321 nl.append(self.op.remote_node)
10324 def CheckPrereq(self):
10325 """Check prerequisites.
10328 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10329 self.op.iallocator is None)
10331 # Verify if node group locks are still correct
10332 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10334 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10336 return LogicalUnit.CheckPrereq(self)
10339 class TLReplaceDisks(Tasklet):
10340 """Replaces disks for an instance.
10342 Note: Locking is not within the scope of this class.
10345 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10346 disks, delay_iallocator, early_release, ignore_ipolicy):
10347 """Initializes this class.
10350 Tasklet.__init__(self, lu)
10353 self.instance_name = instance_name
10355 self.iallocator_name = iallocator_name
10356 self.remote_node = remote_node
10358 self.delay_iallocator = delay_iallocator
10359 self.early_release = early_release
10360 self.ignore_ipolicy = ignore_ipolicy
10363 self.instance = None
10364 self.new_node = None
10365 self.target_node = None
10366 self.other_node = None
10367 self.remote_node_info = None
10368 self.node_secondary_ip = None
10371 def CheckArguments(mode, remote_node, iallocator):
10372 """Helper function for users of this class.
10375 # check for valid parameter combination
10376 if mode == constants.REPLACE_DISK_CHG:
10377 if remote_node is None and iallocator is None:
10378 raise errors.OpPrereqError("When changing the secondary either an"
10379 " iallocator script must be used or the"
10380 " new node given", errors.ECODE_INVAL)
10382 if remote_node is not None and iallocator is not None:
10383 raise errors.OpPrereqError("Give either the iallocator or the new"
10384 " secondary, not both", errors.ECODE_INVAL)
10386 elif remote_node is not None or iallocator is not None:
10387 # Not replacing the secondary
10388 raise errors.OpPrereqError("The iallocator and new node options can"
10389 " only be used when changing the"
10390 " secondary node", errors.ECODE_INVAL)
10393 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10394 """Compute a new secondary node using an IAllocator.
10397 ial = IAllocator(lu.cfg, lu.rpc,
10398 mode=constants.IALLOCATOR_MODE_RELOC,
10399 name=instance_name,
10400 relocate_from=list(relocate_from))
10402 ial.Run(iallocator_name)
10404 if not ial.success:
10405 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10406 " %s" % (iallocator_name, ial.info),
10407 errors.ECODE_NORES)
10409 if len(ial.result) != ial.required_nodes:
10410 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10411 " of nodes (%s), required %s" %
10413 len(ial.result), ial.required_nodes),
10414 errors.ECODE_FAULT)
10416 remote_node_name = ial.result[0]
10418 lu.LogInfo("Selected new secondary for instance '%s': %s",
10419 instance_name, remote_node_name)
10421 return remote_node_name
10423 def _FindFaultyDisks(self, node_name):
10424 """Wrapper for L{_FindFaultyInstanceDisks}.
10427 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10430 def _CheckDisksActivated(self, instance):
10431 """Checks if the instance disks are activated.
10433 @param instance: The instance to check disks
10434 @return: True if they are activated, False otherwise
10437 nodes = instance.all_nodes
10439 for idx, dev in enumerate(instance.disks):
10441 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10442 self.cfg.SetDiskID(dev, node)
10444 result = self.rpc.call_blockdev_find(node, dev)
10448 elif result.fail_msg or not result.payload:
10453 def CheckPrereq(self):
10454 """Check prerequisites.
10456 This checks that the instance is in the cluster.
10459 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10460 assert instance is not None, \
10461 "Cannot retrieve locked instance %s" % self.instance_name
10463 if instance.disk_template != constants.DT_DRBD8:
10464 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10465 " instances", errors.ECODE_INVAL)
10467 if len(instance.secondary_nodes) != 1:
10468 raise errors.OpPrereqError("The instance has a strange layout,"
10469 " expected one secondary but found %d" %
10470 len(instance.secondary_nodes),
10471 errors.ECODE_FAULT)
10473 if not self.delay_iallocator:
10474 self._CheckPrereq2()
10476 def _CheckPrereq2(self):
10477 """Check prerequisites, second part.
10479 This function should always be part of CheckPrereq. It was separated and is
10480 now called from Exec because during node evacuation iallocator was only
10481 called with an unmodified cluster model, not taking planned changes into
10485 instance = self.instance
10486 secondary_node = instance.secondary_nodes[0]
10488 if self.iallocator_name is None:
10489 remote_node = self.remote_node
10491 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10492 instance.name, instance.secondary_nodes)
10494 if remote_node is None:
10495 self.remote_node_info = None
10497 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10498 "Remote node '%s' is not locked" % remote_node
10500 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10501 assert self.remote_node_info is not None, \
10502 "Cannot retrieve locked node %s" % remote_node
10504 if remote_node == self.instance.primary_node:
10505 raise errors.OpPrereqError("The specified node is the primary node of"
10506 " the instance", errors.ECODE_INVAL)
10508 if remote_node == secondary_node:
10509 raise errors.OpPrereqError("The specified node is already the"
10510 " secondary node of the instance",
10511 errors.ECODE_INVAL)
10513 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10514 constants.REPLACE_DISK_CHG):
10515 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10516 errors.ECODE_INVAL)
10518 if self.mode == constants.REPLACE_DISK_AUTO:
10519 if not self._CheckDisksActivated(instance):
10520 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10521 " first" % self.instance_name,
10522 errors.ECODE_STATE)
10523 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10524 faulty_secondary = self._FindFaultyDisks(secondary_node)
10526 if faulty_primary and faulty_secondary:
10527 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10528 " one node and can not be repaired"
10529 " automatically" % self.instance_name,
10530 errors.ECODE_STATE)
10533 self.disks = faulty_primary
10534 self.target_node = instance.primary_node
10535 self.other_node = secondary_node
10536 check_nodes = [self.target_node, self.other_node]
10537 elif faulty_secondary:
10538 self.disks = faulty_secondary
10539 self.target_node = secondary_node
10540 self.other_node = instance.primary_node
10541 check_nodes = [self.target_node, self.other_node]
10547 # Non-automatic modes
10548 if self.mode == constants.REPLACE_DISK_PRI:
10549 self.target_node = instance.primary_node
10550 self.other_node = secondary_node
10551 check_nodes = [self.target_node, self.other_node]
10553 elif self.mode == constants.REPLACE_DISK_SEC:
10554 self.target_node = secondary_node
10555 self.other_node = instance.primary_node
10556 check_nodes = [self.target_node, self.other_node]
10558 elif self.mode == constants.REPLACE_DISK_CHG:
10559 self.new_node = remote_node
10560 self.other_node = instance.primary_node
10561 self.target_node = secondary_node
10562 check_nodes = [self.new_node, self.other_node]
10564 _CheckNodeNotDrained(self.lu, remote_node)
10565 _CheckNodeVmCapable(self.lu, remote_node)
10567 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10568 assert old_node_info is not None
10569 if old_node_info.offline and not self.early_release:
10570 # doesn't make sense to delay the release
10571 self.early_release = True
10572 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10573 " early-release mode", secondary_node)
10576 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10579 # If not specified all disks should be replaced
10581 self.disks = range(len(self.instance.disks))
10583 # TODO: This is ugly, but right now we can't distinguish between internal
10584 # submitted opcode and external one. We should fix that.
10585 if self.remote_node_info:
10586 # We change the node, lets verify it still meets instance policy
10587 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10588 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10590 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10591 ignore=self.ignore_ipolicy)
10593 # TODO: compute disk parameters
10594 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10595 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10596 if primary_node_info.group != secondary_node_info.group:
10597 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10598 " different node groups; the disk parameters of the"
10599 " primary node's group will be applied.")
10601 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10603 for node in check_nodes:
10604 _CheckNodeOnline(self.lu, node)
10606 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10609 if node_name is not None)
10611 # Release unneeded node and node resource locks
10612 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10613 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10615 # Release any owned node group
10616 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10617 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10619 # Check whether disks are valid
10620 for disk_idx in self.disks:
10621 instance.FindDisk(disk_idx)
10623 # Get secondary node IP addresses
10624 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10625 in self.cfg.GetMultiNodeInfo(touched_nodes))
10627 def Exec(self, feedback_fn):
10628 """Execute disk replacement.
10630 This dispatches the disk replacement to the appropriate handler.
10633 if self.delay_iallocator:
10634 self._CheckPrereq2()
10637 # Verify owned locks before starting operation
10638 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10639 assert set(owned_nodes) == set(self.node_secondary_ip), \
10640 ("Incorrect node locks, owning %s, expected %s" %
10641 (owned_nodes, self.node_secondary_ip.keys()))
10642 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10643 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10645 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10646 assert list(owned_instances) == [self.instance_name], \
10647 "Instance '%s' not locked" % self.instance_name
10649 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10650 "Should not own any node group lock at this point"
10653 feedback_fn("No disks need replacement")
10656 feedback_fn("Replacing disk(s) %s for %s" %
10657 (utils.CommaJoin(self.disks), self.instance.name))
10659 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10661 # Activate the instance disks if we're replacing them on a down instance
10663 _StartInstanceDisks(self.lu, self.instance, True)
10666 # Should we replace the secondary node?
10667 if self.new_node is not None:
10668 fn = self._ExecDrbd8Secondary
10670 fn = self._ExecDrbd8DiskOnly
10672 result = fn(feedback_fn)
10674 # Deactivate the instance disks if we're replacing them on a
10677 _SafeShutdownInstanceDisks(self.lu, self.instance)
10679 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10682 # Verify owned locks
10683 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10684 nodes = frozenset(self.node_secondary_ip)
10685 assert ((self.early_release and not owned_nodes) or
10686 (not self.early_release and not (set(owned_nodes) - nodes))), \
10687 ("Not owning the correct locks, early_release=%s, owned=%r,"
10688 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10692 def _CheckVolumeGroup(self, nodes):
10693 self.lu.LogInfo("Checking volume groups")
10695 vgname = self.cfg.GetVGName()
10697 # Make sure volume group exists on all involved nodes
10698 results = self.rpc.call_vg_list(nodes)
10700 raise errors.OpExecError("Can't list volume groups on the nodes")
10703 res = results[node]
10704 res.Raise("Error checking node %s" % node)
10705 if vgname not in res.payload:
10706 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10709 def _CheckDisksExistence(self, nodes):
10710 # Check disk existence
10711 for idx, dev in enumerate(self.instance.disks):
10712 if idx not in self.disks:
10716 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10717 self.cfg.SetDiskID(dev, node)
10719 result = self.rpc.call_blockdev_find(node, dev)
10721 msg = result.fail_msg
10722 if msg or not result.payload:
10724 msg = "disk not found"
10725 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10728 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10729 for idx, dev in enumerate(self.instance.disks):
10730 if idx not in self.disks:
10733 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10736 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10738 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10739 " replace disks for instance %s" %
10740 (node_name, self.instance.name))
10742 def _CreateNewStorage(self, node_name):
10743 """Create new storage on the primary or secondary node.
10745 This is only used for same-node replaces, not for changing the
10746 secondary node, hence we don't want to modify the existing disk.
10751 for idx, dev in enumerate(self.instance.disks):
10752 if idx not in self.disks:
10755 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10757 self.cfg.SetDiskID(dev, node_name)
10759 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10760 names = _GenerateUniqueNames(self.lu, lv_names)
10762 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10764 vg_data = dev.children[0].logical_id[0]
10765 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10766 logical_id=(vg_data, names[0]), params=data_p)
10767 vg_meta = dev.children[1].logical_id[0]
10768 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10769 logical_id=(vg_meta, names[1]), params=meta_p)
10771 new_lvs = [lv_data, lv_meta]
10772 old_lvs = [child.Copy() for child in dev.children]
10773 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10775 # we pass force_create=True to force the LVM creation
10776 for new_lv in new_lvs:
10777 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10778 _GetInstanceInfoText(self.instance), False)
10782 def _CheckDevices(self, node_name, iv_names):
10783 for name, (dev, _, _) in iv_names.iteritems():
10784 self.cfg.SetDiskID(dev, node_name)
10786 result = self.rpc.call_blockdev_find(node_name, dev)
10788 msg = result.fail_msg
10789 if msg or not result.payload:
10791 msg = "disk not found"
10792 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10795 if result.payload.is_degraded:
10796 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10798 def _RemoveOldStorage(self, node_name, iv_names):
10799 for name, (_, old_lvs, _) in iv_names.iteritems():
10800 self.lu.LogInfo("Remove logical volumes for %s" % name)
10803 self.cfg.SetDiskID(lv, node_name)
10805 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10807 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10808 hint="remove unused LVs manually")
10810 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10811 """Replace a disk on the primary or secondary for DRBD 8.
10813 The algorithm for replace is quite complicated:
10815 1. for each disk to be replaced:
10817 1. create new LVs on the target node with unique names
10818 1. detach old LVs from the drbd device
10819 1. rename old LVs to name_replaced.<time_t>
10820 1. rename new LVs to old LVs
10821 1. attach the new LVs (with the old names now) to the drbd device
10823 1. wait for sync across all devices
10825 1. for each modified disk:
10827 1. remove old LVs (which have the name name_replaces.<time_t>)
10829 Failures are not very well handled.
10834 # Step: check device activation
10835 self.lu.LogStep(1, steps_total, "Check device existence")
10836 self._CheckDisksExistence([self.other_node, self.target_node])
10837 self._CheckVolumeGroup([self.target_node, self.other_node])
10839 # Step: check other node consistency
10840 self.lu.LogStep(2, steps_total, "Check peer consistency")
10841 self._CheckDisksConsistency(self.other_node,
10842 self.other_node == self.instance.primary_node,
10845 # Step: create new storage
10846 self.lu.LogStep(3, steps_total, "Allocate new storage")
10847 iv_names = self._CreateNewStorage(self.target_node)
10849 # Step: for each lv, detach+rename*2+attach
10850 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10851 for dev, old_lvs, new_lvs in iv_names.itervalues():
10852 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10854 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10856 result.Raise("Can't detach drbd from local storage on node"
10857 " %s for device %s" % (self.target_node, dev.iv_name))
10859 #cfg.Update(instance)
10861 # ok, we created the new LVs, so now we know we have the needed
10862 # storage; as such, we proceed on the target node to rename
10863 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10864 # using the assumption that logical_id == physical_id (which in
10865 # turn is the unique_id on that node)
10867 # FIXME(iustin): use a better name for the replaced LVs
10868 temp_suffix = int(time.time())
10869 ren_fn = lambda d, suff: (d.physical_id[0],
10870 d.physical_id[1] + "_replaced-%s" % suff)
10872 # Build the rename list based on what LVs exist on the node
10873 rename_old_to_new = []
10874 for to_ren in old_lvs:
10875 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10876 if not result.fail_msg and result.payload:
10878 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10880 self.lu.LogInfo("Renaming the old LVs on the target node")
10881 result = self.rpc.call_blockdev_rename(self.target_node,
10883 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10885 # Now we rename the new LVs to the old LVs
10886 self.lu.LogInfo("Renaming the new LVs on the target node")
10887 rename_new_to_old = [(new, old.physical_id)
10888 for old, new in zip(old_lvs, new_lvs)]
10889 result = self.rpc.call_blockdev_rename(self.target_node,
10891 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10893 # Intermediate steps of in memory modifications
10894 for old, new in zip(old_lvs, new_lvs):
10895 new.logical_id = old.logical_id
10896 self.cfg.SetDiskID(new, self.target_node)
10898 # We need to modify old_lvs so that removal later removes the
10899 # right LVs, not the newly added ones; note that old_lvs is a
10901 for disk in old_lvs:
10902 disk.logical_id = ren_fn(disk, temp_suffix)
10903 self.cfg.SetDiskID(disk, self.target_node)
10905 # Now that the new lvs have the old name, we can add them to the device
10906 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10907 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10909 msg = result.fail_msg
10911 for new_lv in new_lvs:
10912 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10915 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10916 hint=("cleanup manually the unused logical"
10918 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10920 cstep = itertools.count(5)
10922 if self.early_release:
10923 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10924 self._RemoveOldStorage(self.target_node, iv_names)
10925 # TODO: Check if releasing locks early still makes sense
10926 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10928 # Release all resource locks except those used by the instance
10929 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10930 keep=self.node_secondary_ip.keys())
10932 # Release all node locks while waiting for sync
10933 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10935 # TODO: Can the instance lock be downgraded here? Take the optional disk
10936 # shutdown in the caller into consideration.
10939 # This can fail as the old devices are degraded and _WaitForSync
10940 # does a combined result over all disks, so we don't check its return value
10941 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10942 _WaitForSync(self.lu, self.instance)
10944 # Check all devices manually
10945 self._CheckDevices(self.instance.primary_node, iv_names)
10947 # Step: remove old storage
10948 if not self.early_release:
10949 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10950 self._RemoveOldStorage(self.target_node, iv_names)
10952 def _ExecDrbd8Secondary(self, feedback_fn):
10953 """Replace the secondary node for DRBD 8.
10955 The algorithm for replace is quite complicated:
10956 - for all disks of the instance:
10957 - create new LVs on the new node with same names
10958 - shutdown the drbd device on the old secondary
10959 - disconnect the drbd network on the primary
10960 - create the drbd device on the new secondary
10961 - network attach the drbd on the primary, using an artifice:
10962 the drbd code for Attach() will connect to the network if it
10963 finds a device which is connected to the good local disks but
10964 not network enabled
10965 - wait for sync across all devices
10966 - remove all disks from the old secondary
10968 Failures are not very well handled.
10973 pnode = self.instance.primary_node
10975 # Step: check device activation
10976 self.lu.LogStep(1, steps_total, "Check device existence")
10977 self._CheckDisksExistence([self.instance.primary_node])
10978 self._CheckVolumeGroup([self.instance.primary_node])
10980 # Step: check other node consistency
10981 self.lu.LogStep(2, steps_total, "Check peer consistency")
10982 self._CheckDisksConsistency(self.instance.primary_node, True, True)
10984 # Step: create new storage
10985 self.lu.LogStep(3, steps_total, "Allocate new storage")
10986 for idx, dev in enumerate(self.instance.disks):
10987 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10988 (self.new_node, idx))
10989 # we pass force_create=True to force LVM creation
10990 for new_lv in dev.children:
10991 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10992 _GetInstanceInfoText(self.instance), False)
10994 # Step 4: dbrd minors and drbd setups changes
10995 # after this, we must manually remove the drbd minors on both the
10996 # error and the success paths
10997 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10998 minors = self.cfg.AllocateDRBDMinor([self.new_node
10999 for dev in self.instance.disks],
11000 self.instance.name)
11001 logging.debug("Allocated minors %r", minors)
11004 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11005 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11006 (self.new_node, idx))
11007 # create new devices on new_node; note that we create two IDs:
11008 # one without port, so the drbd will be activated without
11009 # networking information on the new node at this stage, and one
11010 # with network, for the latter activation in step 4
11011 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11012 if self.instance.primary_node == o_node1:
11015 assert self.instance.primary_node == o_node2, "Three-node instance?"
11018 new_alone_id = (self.instance.primary_node, self.new_node, None,
11019 p_minor, new_minor, o_secret)
11020 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11021 p_minor, new_minor, o_secret)
11023 iv_names[idx] = (dev, dev.children, new_net_id)
11024 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11026 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11027 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11028 logical_id=new_alone_id,
11029 children=dev.children,
11031 params=drbd_params)
11033 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11034 _GetInstanceInfoText(self.instance), False)
11035 except errors.GenericError:
11036 self.cfg.ReleaseDRBDMinors(self.instance.name)
11039 # We have new devices, shutdown the drbd on the old secondary
11040 for idx, dev in enumerate(self.instance.disks):
11041 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11042 self.cfg.SetDiskID(dev, self.target_node)
11043 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11045 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11046 "node: %s" % (idx, msg),
11047 hint=("Please cleanup this device manually as"
11048 " soon as possible"))
11050 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11051 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11052 self.instance.disks)[pnode]
11054 msg = result.fail_msg
11056 # detaches didn't succeed (unlikely)
11057 self.cfg.ReleaseDRBDMinors(self.instance.name)
11058 raise errors.OpExecError("Can't detach the disks from the network on"
11059 " old node: %s" % (msg,))
11061 # if we managed to detach at least one, we update all the disks of
11062 # the instance to point to the new secondary
11063 self.lu.LogInfo("Updating instance configuration")
11064 for dev, _, new_logical_id in iv_names.itervalues():
11065 dev.logical_id = new_logical_id
11066 self.cfg.SetDiskID(dev, self.instance.primary_node)
11068 self.cfg.Update(self.instance, feedback_fn)
11070 # Release all node locks (the configuration has been updated)
11071 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11073 # and now perform the drbd attach
11074 self.lu.LogInfo("Attaching primary drbds to new secondary"
11075 " (standalone => connected)")
11076 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11078 self.node_secondary_ip,
11079 self.instance.disks,
11080 self.instance.name,
11082 for to_node, to_result in result.items():
11083 msg = to_result.fail_msg
11085 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11087 hint=("please do a gnt-instance info to see the"
11088 " status of disks"))
11090 cstep = itertools.count(5)
11092 if self.early_release:
11093 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11094 self._RemoveOldStorage(self.target_node, iv_names)
11095 # TODO: Check if releasing locks early still makes sense
11096 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11098 # Release all resource locks except those used by the instance
11099 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11100 keep=self.node_secondary_ip.keys())
11102 # TODO: Can the instance lock be downgraded here? Take the optional disk
11103 # shutdown in the caller into consideration.
11106 # This can fail as the old devices are degraded and _WaitForSync
11107 # does a combined result over all disks, so we don't check its return value
11108 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11109 _WaitForSync(self.lu, self.instance)
11111 # Check all devices manually
11112 self._CheckDevices(self.instance.primary_node, iv_names)
11114 # Step: remove old storage
11115 if not self.early_release:
11116 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11117 self._RemoveOldStorage(self.target_node, iv_names)
11120 class LURepairNodeStorage(NoHooksLU):
11121 """Repairs the volume group on a node.
11126 def CheckArguments(self):
11127 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11129 storage_type = self.op.storage_type
11131 if (constants.SO_FIX_CONSISTENCY not in
11132 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11133 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11134 " repaired" % storage_type,
11135 errors.ECODE_INVAL)
11137 def ExpandNames(self):
11138 self.needed_locks = {
11139 locking.LEVEL_NODE: [self.op.node_name],
11142 def _CheckFaultyDisks(self, instance, node_name):
11143 """Ensure faulty disks abort the opcode or at least warn."""
11145 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11147 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11148 " node '%s'" % (instance.name, node_name),
11149 errors.ECODE_STATE)
11150 except errors.OpPrereqError, err:
11151 if self.op.ignore_consistency:
11152 self.proc.LogWarning(str(err.args[0]))
11156 def CheckPrereq(self):
11157 """Check prerequisites.
11160 # Check whether any instance on this node has faulty disks
11161 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11162 if inst.admin_state != constants.ADMINST_UP:
11164 check_nodes = set(inst.all_nodes)
11165 check_nodes.discard(self.op.node_name)
11166 for inst_node_name in check_nodes:
11167 self._CheckFaultyDisks(inst, inst_node_name)
11169 def Exec(self, feedback_fn):
11170 feedback_fn("Repairing storage unit '%s' on %s ..." %
11171 (self.op.name, self.op.node_name))
11173 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11174 result = self.rpc.call_storage_execute(self.op.node_name,
11175 self.op.storage_type, st_args,
11177 constants.SO_FIX_CONSISTENCY)
11178 result.Raise("Failed to repair storage unit '%s' on %s" %
11179 (self.op.name, self.op.node_name))
11182 class LUNodeEvacuate(NoHooksLU):
11183 """Evacuates instances off a list of nodes.
11188 _MODE2IALLOCATOR = {
11189 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11190 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11191 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11193 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11194 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11195 constants.IALLOCATOR_NEVAC_MODES)
11197 def CheckArguments(self):
11198 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11200 def ExpandNames(self):
11201 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11203 if self.op.remote_node is not None:
11204 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11205 assert self.op.remote_node
11207 if self.op.remote_node == self.op.node_name:
11208 raise errors.OpPrereqError("Can not use evacuated node as a new"
11209 " secondary node", errors.ECODE_INVAL)
11211 if self.op.mode != constants.NODE_EVAC_SEC:
11212 raise errors.OpPrereqError("Without the use of an iallocator only"
11213 " secondary instances can be evacuated",
11214 errors.ECODE_INVAL)
11217 self.share_locks = _ShareAll()
11218 self.needed_locks = {
11219 locking.LEVEL_INSTANCE: [],
11220 locking.LEVEL_NODEGROUP: [],
11221 locking.LEVEL_NODE: [],
11224 # Determine nodes (via group) optimistically, needs verification once locks
11225 # have been acquired
11226 self.lock_nodes = self._DetermineNodes()
11228 def _DetermineNodes(self):
11229 """Gets the list of nodes to operate on.
11232 if self.op.remote_node is None:
11233 # Iallocator will choose any node(s) in the same group
11234 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11236 group_nodes = frozenset([self.op.remote_node])
11238 # Determine nodes to be locked
11239 return set([self.op.node_name]) | group_nodes
11241 def _DetermineInstances(self):
11242 """Builds list of instances to operate on.
11245 assert self.op.mode in constants.NODE_EVAC_MODES
11247 if self.op.mode == constants.NODE_EVAC_PRI:
11248 # Primary instances only
11249 inst_fn = _GetNodePrimaryInstances
11250 assert self.op.remote_node is None, \
11251 "Evacuating primary instances requires iallocator"
11252 elif self.op.mode == constants.NODE_EVAC_SEC:
11253 # Secondary instances only
11254 inst_fn = _GetNodeSecondaryInstances
11257 assert self.op.mode == constants.NODE_EVAC_ALL
11258 inst_fn = _GetNodeInstances
11259 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11261 raise errors.OpPrereqError("Due to an issue with the iallocator"
11262 " interface it is not possible to evacuate"
11263 " all instances at once; specify explicitly"
11264 " whether to evacuate primary or secondary"
11266 errors.ECODE_INVAL)
11268 return inst_fn(self.cfg, self.op.node_name)
11270 def DeclareLocks(self, level):
11271 if level == locking.LEVEL_INSTANCE:
11272 # Lock instances optimistically, needs verification once node and group
11273 # locks have been acquired
11274 self.needed_locks[locking.LEVEL_INSTANCE] = \
11275 set(i.name for i in self._DetermineInstances())
11277 elif level == locking.LEVEL_NODEGROUP:
11278 # Lock node groups for all potential target nodes optimistically, needs
11279 # verification once nodes have been acquired
11280 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11281 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11283 elif level == locking.LEVEL_NODE:
11284 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11286 def CheckPrereq(self):
11288 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11289 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11290 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11292 need_nodes = self._DetermineNodes()
11294 if not owned_nodes.issuperset(need_nodes):
11295 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11296 " locks were acquired, current nodes are"
11297 " are '%s', used to be '%s'; retry the"
11299 (self.op.node_name,
11300 utils.CommaJoin(need_nodes),
11301 utils.CommaJoin(owned_nodes)),
11302 errors.ECODE_STATE)
11304 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11305 if owned_groups != wanted_groups:
11306 raise errors.OpExecError("Node groups changed since locks were acquired,"
11307 " current groups are '%s', used to be '%s';"
11308 " retry the operation" %
11309 (utils.CommaJoin(wanted_groups),
11310 utils.CommaJoin(owned_groups)))
11312 # Determine affected instances
11313 self.instances = self._DetermineInstances()
11314 self.instance_names = [i.name for i in self.instances]
11316 if set(self.instance_names) != owned_instances:
11317 raise errors.OpExecError("Instances on node '%s' changed since locks"
11318 " were acquired, current instances are '%s',"
11319 " used to be '%s'; retry the operation" %
11320 (self.op.node_name,
11321 utils.CommaJoin(self.instance_names),
11322 utils.CommaJoin(owned_instances)))
11324 if self.instance_names:
11325 self.LogInfo("Evacuating instances from node '%s': %s",
11327 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11329 self.LogInfo("No instances to evacuate from node '%s'",
11332 if self.op.remote_node is not None:
11333 for i in self.instances:
11334 if i.primary_node == self.op.remote_node:
11335 raise errors.OpPrereqError("Node %s is the primary node of"
11336 " instance %s, cannot use it as"
11338 (self.op.remote_node, i.name),
11339 errors.ECODE_INVAL)
11341 def Exec(self, feedback_fn):
11342 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11344 if not self.instance_names:
11345 # No instances to evacuate
11348 elif self.op.iallocator is not None:
11349 # TODO: Implement relocation to other group
11350 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11351 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11352 instances=list(self.instance_names))
11354 ial.Run(self.op.iallocator)
11356 if not ial.success:
11357 raise errors.OpPrereqError("Can't compute node evacuation using"
11358 " iallocator '%s': %s" %
11359 (self.op.iallocator, ial.info),
11360 errors.ECODE_NORES)
11362 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11364 elif self.op.remote_node is not None:
11365 assert self.op.mode == constants.NODE_EVAC_SEC
11367 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11368 remote_node=self.op.remote_node,
11370 mode=constants.REPLACE_DISK_CHG,
11371 early_release=self.op.early_release)]
11372 for instance_name in self.instance_names
11376 raise errors.ProgrammerError("No iallocator or remote node")
11378 return ResultWithJobs(jobs)
11381 def _SetOpEarlyRelease(early_release, op):
11382 """Sets C{early_release} flag on opcodes if available.
11386 op.early_release = early_release
11387 except AttributeError:
11388 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11393 def _NodeEvacDest(use_nodes, group, nodes):
11394 """Returns group or nodes depending on caller's choice.
11398 return utils.CommaJoin(nodes)
11403 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11404 """Unpacks the result of change-group and node-evacuate iallocator requests.
11406 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11407 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11409 @type lu: L{LogicalUnit}
11410 @param lu: Logical unit instance
11411 @type alloc_result: tuple/list
11412 @param alloc_result: Result from iallocator
11413 @type early_release: bool
11414 @param early_release: Whether to release locks early if possible
11415 @type use_nodes: bool
11416 @param use_nodes: Whether to display node names instead of groups
11419 (moved, failed, jobs) = alloc_result
11422 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11423 for (name, reason) in failed)
11424 lu.LogWarning("Unable to evacuate instances %s", failreason)
11425 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11428 lu.LogInfo("Instances to be moved: %s",
11429 utils.CommaJoin("%s (to %s)" %
11430 (name, _NodeEvacDest(use_nodes, group, nodes))
11431 for (name, group, nodes) in moved))
11433 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11434 map(opcodes.OpCode.LoadOpCode, ops))
11438 class LUInstanceGrowDisk(LogicalUnit):
11439 """Grow a disk of an instance.
11442 HPATH = "disk-grow"
11443 HTYPE = constants.HTYPE_INSTANCE
11446 def ExpandNames(self):
11447 self._ExpandAndLockInstance()
11448 self.needed_locks[locking.LEVEL_NODE] = []
11449 self.needed_locks[locking.LEVEL_NODE_RES] = []
11450 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11451 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11453 def DeclareLocks(self, level):
11454 if level == locking.LEVEL_NODE:
11455 self._LockInstancesNodes()
11456 elif level == locking.LEVEL_NODE_RES:
11458 self.needed_locks[locking.LEVEL_NODE_RES] = \
11459 self.needed_locks[locking.LEVEL_NODE][:]
11461 def BuildHooksEnv(self):
11462 """Build hooks env.
11464 This runs on the master, the primary and all the secondaries.
11468 "DISK": self.op.disk,
11469 "AMOUNT": self.op.amount,
11471 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11474 def BuildHooksNodes(self):
11475 """Build hooks nodes.
11478 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11481 def CheckPrereq(self):
11482 """Check prerequisites.
11484 This checks that the instance is in the cluster.
11487 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11488 assert instance is not None, \
11489 "Cannot retrieve locked instance %s" % self.op.instance_name
11490 nodenames = list(instance.all_nodes)
11491 for node in nodenames:
11492 _CheckNodeOnline(self, node)
11494 self.instance = instance
11496 if instance.disk_template not in constants.DTS_GROWABLE:
11497 raise errors.OpPrereqError("Instance's disk layout does not support"
11498 " growing", errors.ECODE_INVAL)
11500 self.disk = instance.FindDisk(self.op.disk)
11502 if instance.disk_template not in (constants.DT_FILE,
11503 constants.DT_SHARED_FILE,
11505 # TODO: check the free disk space for file, when that feature will be
11507 _CheckNodesFreeDiskPerVG(self, nodenames,
11508 self.disk.ComputeGrowth(self.op.amount))
11510 def Exec(self, feedback_fn):
11511 """Execute disk grow.
11514 instance = self.instance
11517 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11518 assert (self.owned_locks(locking.LEVEL_NODE) ==
11519 self.owned_locks(locking.LEVEL_NODE_RES))
11521 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11523 raise errors.OpExecError("Cannot activate block device to grow")
11525 feedback_fn("Growing disk %s of instance '%s' by %s" %
11526 (self.op.disk, instance.name,
11527 utils.FormatUnit(self.op.amount, "h")))
11529 # First run all grow ops in dry-run mode
11530 for node in instance.all_nodes:
11531 self.cfg.SetDiskID(disk, node)
11532 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11533 result.Raise("Grow request failed to node %s" % node)
11535 # We know that (as far as we can test) operations across different
11536 # nodes will succeed, time to run it for real
11537 for node in instance.all_nodes:
11538 self.cfg.SetDiskID(disk, node)
11539 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11540 result.Raise("Grow request failed to node %s" % node)
11542 # TODO: Rewrite code to work properly
11543 # DRBD goes into sync mode for a short amount of time after executing the
11544 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11545 # calling "resize" in sync mode fails. Sleeping for a short amount of
11546 # time is a work-around.
11549 disk.RecordGrow(self.op.amount)
11550 self.cfg.Update(instance, feedback_fn)
11552 # Changes have been recorded, release node lock
11553 _ReleaseLocks(self, locking.LEVEL_NODE)
11555 # Downgrade lock while waiting for sync
11556 self.glm.downgrade(locking.LEVEL_INSTANCE)
11558 if self.op.wait_for_sync:
11559 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11561 self.proc.LogWarning("Disk sync-ing has not returned a good"
11562 " status; please check the instance")
11563 if instance.admin_state != constants.ADMINST_UP:
11564 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11565 elif instance.admin_state != constants.ADMINST_UP:
11566 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11567 " not supposed to be running because no wait for"
11568 " sync mode was requested")
11570 assert self.owned_locks(locking.LEVEL_NODE_RES)
11571 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11574 class LUInstanceQueryData(NoHooksLU):
11575 """Query runtime instance data.
11580 def ExpandNames(self):
11581 self.needed_locks = {}
11583 # Use locking if requested or when non-static information is wanted
11584 if not (self.op.static or self.op.use_locking):
11585 self.LogWarning("Non-static data requested, locks need to be acquired")
11586 self.op.use_locking = True
11588 if self.op.instances or not self.op.use_locking:
11589 # Expand instance names right here
11590 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11592 # Will use acquired locks
11593 self.wanted_names = None
11595 if self.op.use_locking:
11596 self.share_locks = _ShareAll()
11598 if self.wanted_names is None:
11599 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11601 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11603 self.needed_locks[locking.LEVEL_NODE] = []
11604 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11606 def DeclareLocks(self, level):
11607 if self.op.use_locking and level == locking.LEVEL_NODE:
11608 self._LockInstancesNodes()
11610 def CheckPrereq(self):
11611 """Check prerequisites.
11613 This only checks the optional instance list against the existing names.
11616 if self.wanted_names is None:
11617 assert self.op.use_locking, "Locking was not used"
11618 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11620 self.wanted_instances = \
11621 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11623 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11624 """Returns the status of a block device
11627 if self.op.static or not node:
11630 self.cfg.SetDiskID(dev, node)
11632 result = self.rpc.call_blockdev_find(node, dev)
11636 result.Raise("Can't compute disk status for %s" % instance_name)
11638 status = result.payload
11642 return (status.dev_path, status.major, status.minor,
11643 status.sync_percent, status.estimated_time,
11644 status.is_degraded, status.ldisk_status)
11646 def _ComputeDiskStatus(self, instance, snode, dev):
11647 """Compute block device status.
11650 if dev.dev_type in constants.LDS_DRBD:
11651 # we change the snode then (otherwise we use the one passed in)
11652 if dev.logical_id[0] == instance.primary_node:
11653 snode = dev.logical_id[1]
11655 snode = dev.logical_id[0]
11657 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11658 instance.name, dev)
11659 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11662 dev_children = map(compat.partial(self._ComputeDiskStatus,
11669 "iv_name": dev.iv_name,
11670 "dev_type": dev.dev_type,
11671 "logical_id": dev.logical_id,
11672 "physical_id": dev.physical_id,
11673 "pstatus": dev_pstatus,
11674 "sstatus": dev_sstatus,
11675 "children": dev_children,
11680 def Exec(self, feedback_fn):
11681 """Gather and return data"""
11684 cluster = self.cfg.GetClusterInfo()
11686 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11687 for i in self.wanted_instances)
11688 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11689 if self.op.static or pnode.offline:
11690 remote_state = None
11692 self.LogWarning("Primary node %s is marked offline, returning static"
11693 " information only for instance %s" %
11694 (pnode.name, instance.name))
11696 remote_info = self.rpc.call_instance_info(instance.primary_node,
11698 instance.hypervisor)
11699 remote_info.Raise("Error checking node %s" % instance.primary_node)
11700 remote_info = remote_info.payload
11701 if remote_info and "state" in remote_info:
11702 remote_state = "up"
11704 if instance.admin_state == constants.ADMINST_UP:
11705 remote_state = "down"
11707 remote_state = instance.admin_state
11709 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11712 result[instance.name] = {
11713 "name": instance.name,
11714 "config_state": instance.admin_state,
11715 "run_state": remote_state,
11716 "pnode": instance.primary_node,
11717 "snodes": instance.secondary_nodes,
11719 # this happens to be the same format used for hooks
11720 "nics": _NICListToTuple(self, instance.nics),
11721 "disk_template": instance.disk_template,
11723 "hypervisor": instance.hypervisor,
11724 "network_port": instance.network_port,
11725 "hv_instance": instance.hvparams,
11726 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11727 "be_instance": instance.beparams,
11728 "be_actual": cluster.FillBE(instance),
11729 "os_instance": instance.osparams,
11730 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11731 "serial_no": instance.serial_no,
11732 "mtime": instance.mtime,
11733 "ctime": instance.ctime,
11734 "uuid": instance.uuid,
11740 class LUInstanceSetParams(LogicalUnit):
11741 """Modifies an instances's parameters.
11744 HPATH = "instance-modify"
11745 HTYPE = constants.HTYPE_INSTANCE
11748 def CheckArguments(self):
11749 if not (self.op.nics or self.op.disks or self.op.disk_template or
11750 self.op.hvparams or self.op.beparams or self.op.os_name or
11751 self.op.online_inst or self.op.offline_inst or
11752 self.op.runtime_mem):
11753 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11755 if self.op.hvparams:
11756 _CheckGlobalHvParams(self.op.hvparams)
11760 for disk_op, disk_dict in self.op.disks:
11761 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11762 if disk_op == constants.DDM_REMOVE:
11763 disk_addremove += 1
11765 elif disk_op == constants.DDM_ADD:
11766 disk_addremove += 1
11768 if not isinstance(disk_op, int):
11769 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11770 if not isinstance(disk_dict, dict):
11771 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11772 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11774 if disk_op == constants.DDM_ADD:
11775 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11776 if mode not in constants.DISK_ACCESS_SET:
11777 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11778 errors.ECODE_INVAL)
11779 size = disk_dict.get(constants.IDISK_SIZE, None)
11781 raise errors.OpPrereqError("Required disk parameter size missing",
11782 errors.ECODE_INVAL)
11785 except (TypeError, ValueError), err:
11786 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11787 str(err), errors.ECODE_INVAL)
11788 disk_dict[constants.IDISK_SIZE] = size
11790 # modification of disk
11791 if constants.IDISK_SIZE in disk_dict:
11792 raise errors.OpPrereqError("Disk size change not possible, use"
11793 " grow-disk", errors.ECODE_INVAL)
11795 if disk_addremove > 1:
11796 raise errors.OpPrereqError("Only one disk add or remove operation"
11797 " supported at a time", errors.ECODE_INVAL)
11799 if self.op.disks and self.op.disk_template is not None:
11800 raise errors.OpPrereqError("Disk template conversion and other disk"
11801 " changes not supported at the same time",
11802 errors.ECODE_INVAL)
11804 if (self.op.disk_template and
11805 self.op.disk_template in constants.DTS_INT_MIRROR and
11806 self.op.remote_node is None):
11807 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11808 " one requires specifying a secondary node",
11809 errors.ECODE_INVAL)
11813 for nic_op, nic_dict in self.op.nics:
11814 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11815 if nic_op == constants.DDM_REMOVE:
11818 elif nic_op == constants.DDM_ADD:
11821 if not isinstance(nic_op, int):
11822 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11823 if not isinstance(nic_dict, dict):
11824 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11825 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11827 # nic_dict should be a dict
11828 nic_ip = nic_dict.get(constants.INIC_IP, None)
11829 if nic_ip is not None:
11830 if nic_ip.lower() == constants.VALUE_NONE:
11831 nic_dict[constants.INIC_IP] = None
11833 if not netutils.IPAddress.IsValid(nic_ip):
11834 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11835 errors.ECODE_INVAL)
11837 nic_bridge = nic_dict.get("bridge", None)
11838 nic_link = nic_dict.get(constants.INIC_LINK, None)
11839 if nic_bridge and nic_link:
11840 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11841 " at the same time", errors.ECODE_INVAL)
11842 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11843 nic_dict["bridge"] = None
11844 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11845 nic_dict[constants.INIC_LINK] = None
11847 if nic_op == constants.DDM_ADD:
11848 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11849 if nic_mac is None:
11850 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11852 if constants.INIC_MAC in nic_dict:
11853 nic_mac = nic_dict[constants.INIC_MAC]
11854 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11855 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11857 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11858 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11859 " modifying an existing nic",
11860 errors.ECODE_INVAL)
11862 if nic_addremove > 1:
11863 raise errors.OpPrereqError("Only one NIC add or remove operation"
11864 " supported at a time", errors.ECODE_INVAL)
11866 def ExpandNames(self):
11867 self._ExpandAndLockInstance()
11868 # Can't even acquire node locks in shared mode as upcoming changes in
11869 # Ganeti 2.6 will start to modify the node object on disk conversion
11870 self.needed_locks[locking.LEVEL_NODE] = []
11871 self.needed_locks[locking.LEVEL_NODE_RES] = []
11872 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11874 def DeclareLocks(self, level):
11875 if level == locking.LEVEL_NODE:
11876 self._LockInstancesNodes()
11877 if self.op.disk_template and self.op.remote_node:
11878 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11879 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11880 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11882 self.needed_locks[locking.LEVEL_NODE_RES] = \
11883 self.needed_locks[locking.LEVEL_NODE][:]
11885 def BuildHooksEnv(self):
11886 """Build hooks env.
11888 This runs on the master, primary and secondaries.
11892 if constants.BE_MINMEM in self.be_new:
11893 args["minmem"] = self.be_new[constants.BE_MINMEM]
11894 if constants.BE_MAXMEM in self.be_new:
11895 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11896 if constants.BE_VCPUS in self.be_new:
11897 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11898 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11899 # information at all.
11902 nic_override = dict(self.op.nics)
11903 for idx, nic in enumerate(self.instance.nics):
11904 if idx in nic_override:
11905 this_nic_override = nic_override[idx]
11907 this_nic_override = {}
11908 if constants.INIC_IP in this_nic_override:
11909 ip = this_nic_override[constants.INIC_IP]
11912 if constants.INIC_MAC in this_nic_override:
11913 mac = this_nic_override[constants.INIC_MAC]
11916 if idx in self.nic_pnew:
11917 nicparams = self.nic_pnew[idx]
11919 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11920 mode = nicparams[constants.NIC_MODE]
11921 link = nicparams[constants.NIC_LINK]
11922 args["nics"].append((ip, mac, mode, link))
11923 if constants.DDM_ADD in nic_override:
11924 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11925 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11926 nicparams = self.nic_pnew[constants.DDM_ADD]
11927 mode = nicparams[constants.NIC_MODE]
11928 link = nicparams[constants.NIC_LINK]
11929 args["nics"].append((ip, mac, mode, link))
11930 elif constants.DDM_REMOVE in nic_override:
11931 del args["nics"][-1]
11933 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11934 if self.op.disk_template:
11935 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11936 if self.op.runtime_mem:
11937 env["RUNTIME_MEMORY"] = self.op.runtime_mem
11941 def BuildHooksNodes(self):
11942 """Build hooks nodes.
11945 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11948 def CheckPrereq(self):
11949 """Check prerequisites.
11951 This only checks the instance list against the existing names.
11954 # checking the new params on the primary/secondary nodes
11956 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11957 cluster = self.cluster = self.cfg.GetClusterInfo()
11958 assert self.instance is not None, \
11959 "Cannot retrieve locked instance %s" % self.op.instance_name
11960 pnode = instance.primary_node
11961 nodelist = list(instance.all_nodes)
11962 pnode_info = self.cfg.GetNodeInfo(pnode)
11963 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11966 if self.op.os_name and not self.op.force:
11967 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11968 self.op.force_variant)
11969 instance_os = self.op.os_name
11971 instance_os = instance.os
11973 if self.op.disk_template:
11974 if instance.disk_template == self.op.disk_template:
11975 raise errors.OpPrereqError("Instance already has disk template %s" %
11976 instance.disk_template, errors.ECODE_INVAL)
11978 if (instance.disk_template,
11979 self.op.disk_template) not in self._DISK_CONVERSIONS:
11980 raise errors.OpPrereqError("Unsupported disk template conversion from"
11981 " %s to %s" % (instance.disk_template,
11982 self.op.disk_template),
11983 errors.ECODE_INVAL)
11984 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11985 msg="cannot change disk template")
11986 if self.op.disk_template in constants.DTS_INT_MIRROR:
11987 if self.op.remote_node == pnode:
11988 raise errors.OpPrereqError("Given new secondary node %s is the same"
11989 " as the primary node of the instance" %
11990 self.op.remote_node, errors.ECODE_STATE)
11991 _CheckNodeOnline(self, self.op.remote_node)
11992 _CheckNodeNotDrained(self, self.op.remote_node)
11993 # FIXME: here we assume that the old instance type is DT_PLAIN
11994 assert instance.disk_template == constants.DT_PLAIN
11995 disks = [{constants.IDISK_SIZE: d.size,
11996 constants.IDISK_VG: d.logical_id[0]}
11997 for d in instance.disks]
11998 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11999 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12001 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12002 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12003 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12004 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12005 ignore=self.op.ignore_ipolicy)
12006 if pnode_info.group != snode_info.group:
12007 self.LogWarning("The primary and secondary nodes are in two"
12008 " different node groups; the disk parameters"
12009 " from the first disk's node group will be"
12012 # hvparams processing
12013 if self.op.hvparams:
12014 hv_type = instance.hypervisor
12015 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12016 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12017 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12020 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12021 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12022 self.hv_proposed = self.hv_new = hv_new # the new actual values
12023 self.hv_inst = i_hvdict # the new dict (without defaults)
12025 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12027 self.hv_new = self.hv_inst = {}
12029 # beparams processing
12030 if self.op.beparams:
12031 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12033 objects.UpgradeBeParams(i_bedict)
12034 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12035 be_new = cluster.SimpleFillBE(i_bedict)
12036 self.be_proposed = self.be_new = be_new # the new actual values
12037 self.be_inst = i_bedict # the new dict (without defaults)
12039 self.be_new = self.be_inst = {}
12040 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12041 be_old = cluster.FillBE(instance)
12043 # CPU param validation -- checking every time a paramtere is
12044 # changed to cover all cases where either CPU mask or vcpus have
12046 if (constants.BE_VCPUS in self.be_proposed and
12047 constants.HV_CPU_MASK in self.hv_proposed):
12049 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12050 # Verify mask is consistent with number of vCPUs. Can skip this
12051 # test if only 1 entry in the CPU mask, which means same mask
12052 # is applied to all vCPUs.
12053 if (len(cpu_list) > 1 and
12054 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12055 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12057 (self.be_proposed[constants.BE_VCPUS],
12058 self.hv_proposed[constants.HV_CPU_MASK]),
12059 errors.ECODE_INVAL)
12061 # Only perform this test if a new CPU mask is given
12062 if constants.HV_CPU_MASK in self.hv_new:
12063 # Calculate the largest CPU number requested
12064 max_requested_cpu = max(map(max, cpu_list))
12065 # Check that all of the instance's nodes have enough physical CPUs to
12066 # satisfy the requested CPU mask
12067 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12068 max_requested_cpu + 1, instance.hypervisor)
12070 # osparams processing
12071 if self.op.osparams:
12072 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12073 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12074 self.os_inst = i_osdict # the new dict (without defaults)
12080 #TODO(dynmem): do the appropriate check involving MINMEM
12081 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12082 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12083 mem_check_list = [pnode]
12084 if be_new[constants.BE_AUTO_BALANCE]:
12085 # either we changed auto_balance to yes or it was from before
12086 mem_check_list.extend(instance.secondary_nodes)
12087 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12088 instance.hypervisor)
12089 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12090 [instance.hypervisor])
12091 pninfo = nodeinfo[pnode]
12092 msg = pninfo.fail_msg
12094 # Assume the primary node is unreachable and go ahead
12095 self.warn.append("Can't get info from primary node %s: %s" %
12098 (_, _, (pnhvinfo, )) = pninfo.payload
12099 if not isinstance(pnhvinfo.get("memory_free", None), int):
12100 self.warn.append("Node data from primary node %s doesn't contain"
12101 " free memory information" % pnode)
12102 elif instance_info.fail_msg:
12103 self.warn.append("Can't get instance runtime information: %s" %
12104 instance_info.fail_msg)
12106 if instance_info.payload:
12107 current_mem = int(instance_info.payload["memory"])
12109 # Assume instance not running
12110 # (there is a slight race condition here, but it's not very
12111 # probable, and we have no other way to check)
12112 # TODO: Describe race condition
12114 #TODO(dynmem): do the appropriate check involving MINMEM
12115 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12116 pnhvinfo["memory_free"])
12118 raise errors.OpPrereqError("This change will prevent the instance"
12119 " from starting, due to %d MB of memory"
12120 " missing on its primary node" %
12122 errors.ECODE_NORES)
12124 if be_new[constants.BE_AUTO_BALANCE]:
12125 for node, nres in nodeinfo.items():
12126 if node not in instance.secondary_nodes:
12128 nres.Raise("Can't get info from secondary node %s" % node,
12129 prereq=True, ecode=errors.ECODE_STATE)
12130 (_, _, (nhvinfo, )) = nres.payload
12131 if not isinstance(nhvinfo.get("memory_free", None), int):
12132 raise errors.OpPrereqError("Secondary node %s didn't return free"
12133 " memory information" % node,
12134 errors.ECODE_STATE)
12135 #TODO(dynmem): do the appropriate check involving MINMEM
12136 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12137 raise errors.OpPrereqError("This change will prevent the instance"
12138 " from failover to its secondary node"
12139 " %s, due to not enough memory" % node,
12140 errors.ECODE_STATE)
12142 if self.op.runtime_mem:
12143 remote_info = self.rpc.call_instance_info(instance.primary_node,
12145 instance.hypervisor)
12146 remote_info.Raise("Error checking node %s" % instance.primary_node)
12147 if not remote_info.payload: # not running already
12148 raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12149 errors.ECODE_STATE)
12151 current_memory = remote_info.payload["memory"]
12152 if (not self.op.force and
12153 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12154 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12155 raise errors.OpPrereqError("Instance %s must have memory between %d"
12156 " and %d MB of memory unless --force is"
12157 " given" % (instance.name,
12158 self.be_proposed[constants.BE_MINMEM],
12159 self.be_proposed[constants.BE_MAXMEM]),
12160 errors.ECODE_INVAL)
12162 if self.op.runtime_mem > current_memory:
12163 _CheckNodeFreeMemory(self, instance.primary_node,
12164 "ballooning memory for instance %s" %
12166 self.op.memory - current_memory,
12167 instance.hypervisor)
12171 self.nic_pinst = {}
12172 for nic_op, nic_dict in self.op.nics:
12173 if nic_op == constants.DDM_REMOVE:
12174 if not instance.nics:
12175 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
12176 errors.ECODE_INVAL)
12178 if nic_op != constants.DDM_ADD:
12180 if not instance.nics:
12181 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
12182 " no NICs" % nic_op,
12183 errors.ECODE_INVAL)
12184 if nic_op < 0 or nic_op >= len(instance.nics):
12185 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
12187 (nic_op, len(instance.nics) - 1),
12188 errors.ECODE_INVAL)
12189 old_nic_params = instance.nics[nic_op].nicparams
12190 old_nic_ip = instance.nics[nic_op].ip
12192 old_nic_params = {}
12195 update_params_dict = dict([(key, nic_dict[key])
12196 for key in constants.NICS_PARAMETERS
12197 if key in nic_dict])
12199 if "bridge" in nic_dict:
12200 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
12202 new_nic_params = _GetUpdatedParams(old_nic_params,
12203 update_params_dict)
12204 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
12205 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
12206 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
12207 self.nic_pinst[nic_op] = new_nic_params
12208 self.nic_pnew[nic_op] = new_filled_nic_params
12209 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
12211 if new_nic_mode == constants.NIC_MODE_BRIDGED:
12212 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
12213 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
12215 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
12217 self.warn.append(msg)
12219 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12220 if new_nic_mode == constants.NIC_MODE_ROUTED:
12221 if constants.INIC_IP in nic_dict:
12222 nic_ip = nic_dict[constants.INIC_IP]
12224 nic_ip = old_nic_ip
12226 raise errors.OpPrereqError("Cannot set the nic ip to None"
12227 " on a routed nic", errors.ECODE_INVAL)
12228 if constants.INIC_MAC in nic_dict:
12229 nic_mac = nic_dict[constants.INIC_MAC]
12230 if nic_mac is None:
12231 raise errors.OpPrereqError("Cannot set the nic mac to None",
12232 errors.ECODE_INVAL)
12233 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12234 # otherwise generate the mac
12235 nic_dict[constants.INIC_MAC] = \
12236 self.cfg.GenerateMAC(self.proc.GetECId())
12238 # or validate/reserve the current one
12240 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
12241 except errors.ReservationError:
12242 raise errors.OpPrereqError("MAC address %s already in use"
12243 " in cluster" % nic_mac,
12244 errors.ECODE_NOTUNIQUE)
12247 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12248 raise errors.OpPrereqError("Disk operations not supported for"
12249 " diskless instances",
12250 errors.ECODE_INVAL)
12251 for disk_op, _ in self.op.disks:
12252 if disk_op == constants.DDM_REMOVE:
12253 if len(instance.disks) == 1:
12254 raise errors.OpPrereqError("Cannot remove the last disk of"
12255 " an instance", errors.ECODE_INVAL)
12256 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12257 msg="cannot remove disks")
12259 if (disk_op == constants.DDM_ADD and
12260 len(instance.disks) >= constants.MAX_DISKS):
12261 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
12262 " add more" % constants.MAX_DISKS,
12263 errors.ECODE_STATE)
12264 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
12266 if disk_op < 0 or disk_op >= len(instance.disks):
12267 raise errors.OpPrereqError("Invalid disk index %s, valid values"
12269 (disk_op, len(instance.disks)),
12270 errors.ECODE_INVAL)
12272 # disabling the instance
12273 if self.op.offline_inst:
12274 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12275 msg="cannot change instance state to offline")
12277 # enabling the instance
12278 if self.op.online_inst:
12279 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
12280 msg="cannot make instance go online")
12282 def _ConvertPlainToDrbd(self, feedback_fn):
12283 """Converts an instance from plain to drbd.
12286 feedback_fn("Converting template to drbd")
12287 instance = self.instance
12288 pnode = instance.primary_node
12289 snode = self.op.remote_node
12291 assert instance.disk_template == constants.DT_PLAIN
12293 # create a fake disk info for _GenerateDiskTemplate
12294 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12295 constants.IDISK_VG: d.logical_id[0]}
12296 for d in instance.disks]
12297 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12298 instance.name, pnode, [snode],
12299 disk_info, None, None, 0, feedback_fn,
12301 info = _GetInstanceInfoText(instance)
12302 feedback_fn("Creating aditional volumes...")
12303 # first, create the missing data and meta devices
12304 for disk in new_disks:
12305 # unfortunately this is... not too nice
12306 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12308 for child in disk.children:
12309 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12310 # at this stage, all new LVs have been created, we can rename the
12312 feedback_fn("Renaming original volumes...")
12313 rename_list = [(o, n.children[0].logical_id)
12314 for (o, n) in zip(instance.disks, new_disks)]
12315 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12316 result.Raise("Failed to rename original LVs")
12318 feedback_fn("Initializing DRBD devices...")
12319 # all child devices are in place, we can now create the DRBD devices
12320 for disk in new_disks:
12321 for node in [pnode, snode]:
12322 f_create = node == pnode
12323 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12325 # at this point, the instance has been modified
12326 instance.disk_template = constants.DT_DRBD8
12327 instance.disks = new_disks
12328 self.cfg.Update(instance, feedback_fn)
12330 # Release node locks while waiting for sync
12331 _ReleaseLocks(self, locking.LEVEL_NODE)
12333 # disks are created, waiting for sync
12334 disk_abort = not _WaitForSync(self, instance,
12335 oneshot=not self.op.wait_for_sync)
12337 raise errors.OpExecError("There are some degraded disks for"
12338 " this instance, please cleanup manually")
12340 # Node resource locks will be released by caller
12342 def _ConvertDrbdToPlain(self, feedback_fn):
12343 """Converts an instance from drbd to plain.
12346 instance = self.instance
12348 assert len(instance.secondary_nodes) == 1
12349 assert instance.disk_template == constants.DT_DRBD8
12351 pnode = instance.primary_node
12352 snode = instance.secondary_nodes[0]
12353 feedback_fn("Converting template to plain")
12355 old_disks = instance.disks
12356 new_disks = [d.children[0] for d in old_disks]
12358 # copy over size and mode
12359 for parent, child in zip(old_disks, new_disks):
12360 child.size = parent.size
12361 child.mode = parent.mode
12363 # update instance structure
12364 instance.disks = new_disks
12365 instance.disk_template = constants.DT_PLAIN
12366 self.cfg.Update(instance, feedback_fn)
12368 # Release locks in case removing disks takes a while
12369 _ReleaseLocks(self, locking.LEVEL_NODE)
12371 feedback_fn("Removing volumes on the secondary node...")
12372 for disk in old_disks:
12373 self.cfg.SetDiskID(disk, snode)
12374 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12376 self.LogWarning("Could not remove block device %s on node %s,"
12377 " continuing anyway: %s", disk.iv_name, snode, msg)
12379 feedback_fn("Removing unneeded volumes on the primary node...")
12380 for idx, disk in enumerate(old_disks):
12381 meta = disk.children[1]
12382 self.cfg.SetDiskID(meta, pnode)
12383 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12385 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12386 " continuing anyway: %s", idx, pnode, msg)
12388 # this is a DRBD disk, return its port to the pool
12389 for disk in old_disks:
12390 tcp_port = disk.logical_id[2]
12391 self.cfg.AddTcpUdpPort(tcp_port)
12393 # Node resource locks will be released by caller
12395 def Exec(self, feedback_fn):
12396 """Modifies an instance.
12398 All parameters take effect only at the next restart of the instance.
12401 # Process here the warnings from CheckPrereq, as we don't have a
12402 # feedback_fn there.
12403 for warn in self.warn:
12404 feedback_fn("WARNING: %s" % warn)
12406 assert ((self.op.disk_template is None) ^
12407 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12408 "Not owning any node resource locks"
12411 instance = self.instance
12414 if self.op.runtime_mem:
12415 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12417 self.op.runtime_mem)
12418 rpcres.Raise("Cannot modify instance runtime memory")
12419 result.append(("runtime_memory", self.op.runtime_mem))
12422 for disk_op, disk_dict in self.op.disks:
12423 if disk_op == constants.DDM_REMOVE:
12424 # remove the last disk
12425 device = instance.disks.pop()
12426 device_idx = len(instance.disks)
12427 for node, disk in device.ComputeNodeTree(instance.primary_node):
12428 self.cfg.SetDiskID(disk, node)
12429 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12431 self.LogWarning("Could not remove disk/%d on node %s: %s,"
12432 " continuing anyway", device_idx, node, msg)
12433 result.append(("disk/%d" % device_idx, "remove"))
12435 # if this is a DRBD disk, return its port to the pool
12436 if device.dev_type in constants.LDS_DRBD:
12437 tcp_port = device.logical_id[2]
12438 self.cfg.AddTcpUdpPort(tcp_port)
12439 elif disk_op == constants.DDM_ADD:
12441 if instance.disk_template in (constants.DT_FILE,
12442 constants.DT_SHARED_FILE):
12443 file_driver, file_path = instance.disks[0].logical_id
12444 file_path = os.path.dirname(file_path)
12446 file_driver = file_path = None
12447 disk_idx_base = len(instance.disks)
12448 new_disk = _GenerateDiskTemplate(self,
12449 instance.disk_template,
12450 instance.name, instance.primary_node,
12451 instance.secondary_nodes,
12457 self.diskparams)[0]
12458 instance.disks.append(new_disk)
12459 info = _GetInstanceInfoText(instance)
12461 logging.info("Creating volume %s for instance %s",
12462 new_disk.iv_name, instance.name)
12463 # Note: this needs to be kept in sync with _CreateDisks
12465 for node in instance.all_nodes:
12466 f_create = node == instance.primary_node
12468 _CreateBlockDev(self, node, instance, new_disk,
12469 f_create, info, f_create)
12470 except errors.OpExecError, err:
12471 self.LogWarning("Failed to create volume %s (%s) on"
12473 new_disk.iv_name, new_disk, node, err)
12474 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12475 (new_disk.size, new_disk.mode)))
12477 # change a given disk
12478 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12479 result.append(("disk.mode/%d" % disk_op,
12480 disk_dict[constants.IDISK_MODE]))
12482 if self.op.disk_template:
12484 check_nodes = set(instance.all_nodes)
12485 if self.op.remote_node:
12486 check_nodes.add(self.op.remote_node)
12487 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12488 owned = self.owned_locks(level)
12489 assert not (check_nodes - owned), \
12490 ("Not owning the correct locks, owning %r, expected at least %r" %
12491 (owned, check_nodes))
12493 r_shut = _ShutdownInstanceDisks(self, instance)
12495 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12496 " proceed with disk template conversion")
12497 mode = (instance.disk_template, self.op.disk_template)
12499 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12501 self.cfg.ReleaseDRBDMinors(instance.name)
12503 result.append(("disk_template", self.op.disk_template))
12505 assert instance.disk_template == self.op.disk_template, \
12506 ("Expected disk template '%s', found '%s'" %
12507 (self.op.disk_template, instance.disk_template))
12509 # Release node and resource locks if there are any (they might already have
12510 # been released during disk conversion)
12511 _ReleaseLocks(self, locking.LEVEL_NODE)
12512 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12515 for nic_op, nic_dict in self.op.nics:
12516 if nic_op == constants.DDM_REMOVE:
12517 # remove the last nic
12518 del instance.nics[-1]
12519 result.append(("nic.%d" % len(instance.nics), "remove"))
12520 elif nic_op == constants.DDM_ADD:
12521 # mac and bridge should be set, by now
12522 mac = nic_dict[constants.INIC_MAC]
12523 ip = nic_dict.get(constants.INIC_IP, None)
12524 nicparams = self.nic_pinst[constants.DDM_ADD]
12525 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12526 instance.nics.append(new_nic)
12527 result.append(("nic.%d" % (len(instance.nics) - 1),
12528 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12529 (new_nic.mac, new_nic.ip,
12530 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12531 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12534 for key in (constants.INIC_MAC, constants.INIC_IP):
12535 if key in nic_dict:
12536 setattr(instance.nics[nic_op], key, nic_dict[key])
12537 if nic_op in self.nic_pinst:
12538 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12539 for key, val in nic_dict.iteritems():
12540 result.append(("nic.%s/%d" % (key, nic_op), val))
12543 if self.op.hvparams:
12544 instance.hvparams = self.hv_inst
12545 for key, val in self.op.hvparams.iteritems():
12546 result.append(("hv/%s" % key, val))
12549 if self.op.beparams:
12550 instance.beparams = self.be_inst
12551 for key, val in self.op.beparams.iteritems():
12552 result.append(("be/%s" % key, val))
12555 if self.op.os_name:
12556 instance.os = self.op.os_name
12559 if self.op.osparams:
12560 instance.osparams = self.os_inst
12561 for key, val in self.op.osparams.iteritems():
12562 result.append(("os/%s" % key, val))
12564 # online/offline instance
12565 if self.op.online_inst:
12566 self.cfg.MarkInstanceDown(instance.name)
12567 result.append(("admin_state", constants.ADMINST_DOWN))
12568 if self.op.offline_inst:
12569 self.cfg.MarkInstanceOffline(instance.name)
12570 result.append(("admin_state", constants.ADMINST_OFFLINE))
12572 self.cfg.Update(instance, feedback_fn)
12574 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12575 self.owned_locks(locking.LEVEL_NODE)), \
12576 "All node locks should have been released by now"
12580 _DISK_CONVERSIONS = {
12581 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12582 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12586 class LUInstanceChangeGroup(LogicalUnit):
12587 HPATH = "instance-change-group"
12588 HTYPE = constants.HTYPE_INSTANCE
12591 def ExpandNames(self):
12592 self.share_locks = _ShareAll()
12593 self.needed_locks = {
12594 locking.LEVEL_NODEGROUP: [],
12595 locking.LEVEL_NODE: [],
12598 self._ExpandAndLockInstance()
12600 if self.op.target_groups:
12601 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12602 self.op.target_groups)
12604 self.req_target_uuids = None
12606 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12608 def DeclareLocks(self, level):
12609 if level == locking.LEVEL_NODEGROUP:
12610 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12612 if self.req_target_uuids:
12613 lock_groups = set(self.req_target_uuids)
12615 # Lock all groups used by instance optimistically; this requires going
12616 # via the node before it's locked, requiring verification later on
12617 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12618 lock_groups.update(instance_groups)
12620 # No target groups, need to lock all of them
12621 lock_groups = locking.ALL_SET
12623 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12625 elif level == locking.LEVEL_NODE:
12626 if self.req_target_uuids:
12627 # Lock all nodes used by instances
12628 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12629 self._LockInstancesNodes()
12631 # Lock all nodes in all potential target groups
12632 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12633 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12634 member_nodes = [node_name
12635 for group in lock_groups
12636 for node_name in self.cfg.GetNodeGroup(group).members]
12637 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12639 # Lock all nodes as all groups are potential targets
12640 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12642 def CheckPrereq(self):
12643 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12644 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12645 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12647 assert (self.req_target_uuids is None or
12648 owned_groups.issuperset(self.req_target_uuids))
12649 assert owned_instances == set([self.op.instance_name])
12651 # Get instance information
12652 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12654 # Check if node groups for locked instance are still correct
12655 assert owned_nodes.issuperset(self.instance.all_nodes), \
12656 ("Instance %s's nodes changed while we kept the lock" %
12657 self.op.instance_name)
12659 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12662 if self.req_target_uuids:
12663 # User requested specific target groups
12664 self.target_uuids = self.req_target_uuids
12666 # All groups except those used by the instance are potential targets
12667 self.target_uuids = owned_groups - inst_groups
12669 conflicting_groups = self.target_uuids & inst_groups
12670 if conflicting_groups:
12671 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12672 " used by the instance '%s'" %
12673 (utils.CommaJoin(conflicting_groups),
12674 self.op.instance_name),
12675 errors.ECODE_INVAL)
12677 if not self.target_uuids:
12678 raise errors.OpPrereqError("There are no possible target groups",
12679 errors.ECODE_INVAL)
12681 def BuildHooksEnv(self):
12682 """Build hooks env.
12685 assert self.target_uuids
12688 "TARGET_GROUPS": " ".join(self.target_uuids),
12691 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12695 def BuildHooksNodes(self):
12696 """Build hooks nodes.
12699 mn = self.cfg.GetMasterNode()
12700 return ([mn], [mn])
12702 def Exec(self, feedback_fn):
12703 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12705 assert instances == [self.op.instance_name], "Instance not locked"
12707 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12708 instances=instances, target_groups=list(self.target_uuids))
12710 ial.Run(self.op.iallocator)
12712 if not ial.success:
12713 raise errors.OpPrereqError("Can't compute solution for changing group of"
12714 " instance '%s' using iallocator '%s': %s" %
12715 (self.op.instance_name, self.op.iallocator,
12717 errors.ECODE_NORES)
12719 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12721 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12722 " instance '%s'", len(jobs), self.op.instance_name)
12724 return ResultWithJobs(jobs)
12727 class LUBackupQuery(NoHooksLU):
12728 """Query the exports list
12733 def ExpandNames(self):
12734 self.needed_locks = {}
12735 self.share_locks[locking.LEVEL_NODE] = 1
12736 if not self.op.nodes:
12737 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12739 self.needed_locks[locking.LEVEL_NODE] = \
12740 _GetWantedNodes(self, self.op.nodes)
12742 def Exec(self, feedback_fn):
12743 """Compute the list of all the exported system images.
12746 @return: a dictionary with the structure node->(export-list)
12747 where export-list is a list of the instances exported on
12751 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12752 rpcresult = self.rpc.call_export_list(self.nodes)
12754 for node in rpcresult:
12755 if rpcresult[node].fail_msg:
12756 result[node] = False
12758 result[node] = rpcresult[node].payload
12763 class LUBackupPrepare(NoHooksLU):
12764 """Prepares an instance for an export and returns useful information.
12769 def ExpandNames(self):
12770 self._ExpandAndLockInstance()
12772 def CheckPrereq(self):
12773 """Check prerequisites.
12776 instance_name = self.op.instance_name
12778 self.instance = self.cfg.GetInstanceInfo(instance_name)
12779 assert self.instance is not None, \
12780 "Cannot retrieve locked instance %s" % self.op.instance_name
12781 _CheckNodeOnline(self, self.instance.primary_node)
12783 self._cds = _GetClusterDomainSecret()
12785 def Exec(self, feedback_fn):
12786 """Prepares an instance for an export.
12789 instance = self.instance
12791 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12792 salt = utils.GenerateSecret(8)
12794 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12795 result = self.rpc.call_x509_cert_create(instance.primary_node,
12796 constants.RIE_CERT_VALIDITY)
12797 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12799 (name, cert_pem) = result.payload
12801 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12805 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12806 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12808 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12814 class LUBackupExport(LogicalUnit):
12815 """Export an instance to an image in the cluster.
12818 HPATH = "instance-export"
12819 HTYPE = constants.HTYPE_INSTANCE
12822 def CheckArguments(self):
12823 """Check the arguments.
12826 self.x509_key_name = self.op.x509_key_name
12827 self.dest_x509_ca_pem = self.op.destination_x509_ca
12829 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12830 if not self.x509_key_name:
12831 raise errors.OpPrereqError("Missing X509 key name for encryption",
12832 errors.ECODE_INVAL)
12834 if not self.dest_x509_ca_pem:
12835 raise errors.OpPrereqError("Missing destination X509 CA",
12836 errors.ECODE_INVAL)
12838 def ExpandNames(self):
12839 self._ExpandAndLockInstance()
12841 # Lock all nodes for local exports
12842 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12843 # FIXME: lock only instance primary and destination node
12845 # Sad but true, for now we have do lock all nodes, as we don't know where
12846 # the previous export might be, and in this LU we search for it and
12847 # remove it from its current node. In the future we could fix this by:
12848 # - making a tasklet to search (share-lock all), then create the
12849 # new one, then one to remove, after
12850 # - removing the removal operation altogether
12851 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12853 def DeclareLocks(self, level):
12854 """Last minute lock declaration."""
12855 # All nodes are locked anyway, so nothing to do here.
12857 def BuildHooksEnv(self):
12858 """Build hooks env.
12860 This will run on the master, primary node and target node.
12864 "EXPORT_MODE": self.op.mode,
12865 "EXPORT_NODE": self.op.target_node,
12866 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12867 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12868 # TODO: Generic function for boolean env variables
12869 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12872 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12876 def BuildHooksNodes(self):
12877 """Build hooks nodes.
12880 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12882 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12883 nl.append(self.op.target_node)
12887 def CheckPrereq(self):
12888 """Check prerequisites.
12890 This checks that the instance and node names are valid.
12893 instance_name = self.op.instance_name
12895 self.instance = self.cfg.GetInstanceInfo(instance_name)
12896 assert self.instance is not None, \
12897 "Cannot retrieve locked instance %s" % self.op.instance_name
12898 _CheckNodeOnline(self, self.instance.primary_node)
12900 if (self.op.remove_instance and
12901 self.instance.admin_state == constants.ADMINST_UP and
12902 not self.op.shutdown):
12903 raise errors.OpPrereqError("Can not remove instance without shutting it"
12906 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12907 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12908 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12909 assert self.dst_node is not None
12911 _CheckNodeOnline(self, self.dst_node.name)
12912 _CheckNodeNotDrained(self, self.dst_node.name)
12915 self.dest_disk_info = None
12916 self.dest_x509_ca = None
12918 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12919 self.dst_node = None
12921 if len(self.op.target_node) != len(self.instance.disks):
12922 raise errors.OpPrereqError(("Received destination information for %s"
12923 " disks, but instance %s has %s disks") %
12924 (len(self.op.target_node), instance_name,
12925 len(self.instance.disks)),
12926 errors.ECODE_INVAL)
12928 cds = _GetClusterDomainSecret()
12930 # Check X509 key name
12932 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12933 except (TypeError, ValueError), err:
12934 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12936 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12937 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12938 errors.ECODE_INVAL)
12940 # Load and verify CA
12942 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12943 except OpenSSL.crypto.Error, err:
12944 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12945 (err, ), errors.ECODE_INVAL)
12947 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12948 if errcode is not None:
12949 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12950 (msg, ), errors.ECODE_INVAL)
12952 self.dest_x509_ca = cert
12954 # Verify target information
12956 for idx, disk_data in enumerate(self.op.target_node):
12958 (host, port, magic) = \
12959 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12960 except errors.GenericError, err:
12961 raise errors.OpPrereqError("Target info for disk %s: %s" %
12962 (idx, err), errors.ECODE_INVAL)
12964 disk_info.append((host, port, magic))
12966 assert len(disk_info) == len(self.op.target_node)
12967 self.dest_disk_info = disk_info
12970 raise errors.ProgrammerError("Unhandled export mode %r" %
12973 # instance disk type verification
12974 # TODO: Implement export support for file-based disks
12975 for disk in self.instance.disks:
12976 if disk.dev_type == constants.LD_FILE:
12977 raise errors.OpPrereqError("Export not supported for instances with"
12978 " file-based disks", errors.ECODE_INVAL)
12980 def _CleanupExports(self, feedback_fn):
12981 """Removes exports of current instance from all other nodes.
12983 If an instance in a cluster with nodes A..D was exported to node C, its
12984 exports will be removed from the nodes A, B and D.
12987 assert self.op.mode != constants.EXPORT_MODE_REMOTE
12989 nodelist = self.cfg.GetNodeList()
12990 nodelist.remove(self.dst_node.name)
12992 # on one-node clusters nodelist will be empty after the removal
12993 # if we proceed the backup would be removed because OpBackupQuery
12994 # substitutes an empty list with the full cluster node list.
12995 iname = self.instance.name
12997 feedback_fn("Removing old exports for instance %s" % iname)
12998 exportlist = self.rpc.call_export_list(nodelist)
12999 for node in exportlist:
13000 if exportlist[node].fail_msg:
13002 if iname in exportlist[node].payload:
13003 msg = self.rpc.call_export_remove(node, iname).fail_msg
13005 self.LogWarning("Could not remove older export for instance %s"
13006 " on node %s: %s", iname, node, msg)
13008 def Exec(self, feedback_fn):
13009 """Export an instance to an image in the cluster.
13012 assert self.op.mode in constants.EXPORT_MODES
13014 instance = self.instance
13015 src_node = instance.primary_node
13017 if self.op.shutdown:
13018 # shutdown the instance, but not the disks
13019 feedback_fn("Shutting down instance %s" % instance.name)
13020 result = self.rpc.call_instance_shutdown(src_node, instance,
13021 self.op.shutdown_timeout)
13022 # TODO: Maybe ignore failures if ignore_remove_failures is set
13023 result.Raise("Could not shutdown instance %s on"
13024 " node %s" % (instance.name, src_node))
13026 # set the disks ID correctly since call_instance_start needs the
13027 # correct drbd minor to create the symlinks
13028 for disk in instance.disks:
13029 self.cfg.SetDiskID(disk, src_node)
13031 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13034 # Activate the instance disks if we'exporting a stopped instance
13035 feedback_fn("Activating disks for %s" % instance.name)
13036 _StartInstanceDisks(self, instance, None)
13039 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13042 helper.CreateSnapshots()
13044 if (self.op.shutdown and
13045 instance.admin_state == constants.ADMINST_UP and
13046 not self.op.remove_instance):
13047 assert not activate_disks
13048 feedback_fn("Starting instance %s" % instance.name)
13049 result = self.rpc.call_instance_start(src_node,
13050 (instance, None, None), False)
13051 msg = result.fail_msg
13053 feedback_fn("Failed to start instance: %s" % msg)
13054 _ShutdownInstanceDisks(self, instance)
13055 raise errors.OpExecError("Could not start instance: %s" % msg)
13057 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13058 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13059 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13060 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13061 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13063 (key_name, _, _) = self.x509_key_name
13066 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13069 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13070 key_name, dest_ca_pem,
13075 # Check for backwards compatibility
13076 assert len(dresults) == len(instance.disks)
13077 assert compat.all(isinstance(i, bool) for i in dresults), \
13078 "Not all results are boolean: %r" % dresults
13082 feedback_fn("Deactivating disks for %s" % instance.name)
13083 _ShutdownInstanceDisks(self, instance)
13085 if not (compat.all(dresults) and fin_resu):
13088 failures.append("export finalization")
13089 if not compat.all(dresults):
13090 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13092 failures.append("disk export: disk(s) %s" % fdsk)
13094 raise errors.OpExecError("Export failed, errors in %s" %
13095 utils.CommaJoin(failures))
13097 # At this point, the export was successful, we can cleanup/finish
13099 # Remove instance if requested
13100 if self.op.remove_instance:
13101 feedback_fn("Removing instance %s" % instance.name)
13102 _RemoveInstance(self, feedback_fn, instance,
13103 self.op.ignore_remove_failures)
13105 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13106 self._CleanupExports(feedback_fn)
13108 return fin_resu, dresults
13111 class LUBackupRemove(NoHooksLU):
13112 """Remove exports related to the named instance.
13117 def ExpandNames(self):
13118 self.needed_locks = {}
13119 # We need all nodes to be locked in order for RemoveExport to work, but we
13120 # don't need to lock the instance itself, as nothing will happen to it (and
13121 # we can remove exports also for a removed instance)
13122 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13124 def Exec(self, feedback_fn):
13125 """Remove any export.
13128 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13129 # If the instance was not found we'll try with the name that was passed in.
13130 # This will only work if it was an FQDN, though.
13132 if not instance_name:
13134 instance_name = self.op.instance_name
13136 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13137 exportlist = self.rpc.call_export_list(locked_nodes)
13139 for node in exportlist:
13140 msg = exportlist[node].fail_msg
13142 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13144 if instance_name in exportlist[node].payload:
13146 result = self.rpc.call_export_remove(node, instance_name)
13147 msg = result.fail_msg
13149 logging.error("Could not remove export for instance %s"
13150 " on node %s: %s", instance_name, node, msg)
13152 if fqdn_warn and not found:
13153 feedback_fn("Export not found. If trying to remove an export belonging"
13154 " to a deleted instance please use its Fully Qualified"
13158 class LUGroupAdd(LogicalUnit):
13159 """Logical unit for creating node groups.
13162 HPATH = "group-add"
13163 HTYPE = constants.HTYPE_GROUP
13166 def ExpandNames(self):
13167 # We need the new group's UUID here so that we can create and acquire the
13168 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13169 # that it should not check whether the UUID exists in the configuration.
13170 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13171 self.needed_locks = {}
13172 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13174 def CheckPrereq(self):
13175 """Check prerequisites.
13177 This checks that the given group name is not an existing node group
13182 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13183 except errors.OpPrereqError:
13186 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13187 " node group (UUID: %s)" %
13188 (self.op.group_name, existing_uuid),
13189 errors.ECODE_EXISTS)
13191 if self.op.ndparams:
13192 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13194 if self.op.hv_state:
13195 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13197 self.new_hv_state = None
13199 if self.op.disk_state:
13200 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13202 self.new_disk_state = None
13204 if self.op.diskparams:
13205 for templ in constants.DISK_TEMPLATES:
13206 if templ not in self.op.diskparams:
13207 self.op.diskparams[templ] = {}
13208 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13210 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13212 if self.op.ipolicy:
13213 cluster = self.cfg.GetClusterInfo()
13214 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13216 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13217 except errors.ConfigurationError, err:
13218 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13219 errors.ECODE_INVAL)
13221 def BuildHooksEnv(self):
13222 """Build hooks env.
13226 "GROUP_NAME": self.op.group_name,
13229 def BuildHooksNodes(self):
13230 """Build hooks nodes.
13233 mn = self.cfg.GetMasterNode()
13234 return ([mn], [mn])
13236 def Exec(self, feedback_fn):
13237 """Add the node group to the cluster.
13240 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13241 uuid=self.group_uuid,
13242 alloc_policy=self.op.alloc_policy,
13243 ndparams=self.op.ndparams,
13244 diskparams=self.op.diskparams,
13245 ipolicy=self.op.ipolicy,
13246 hv_state_static=self.new_hv_state,
13247 disk_state_static=self.new_disk_state)
13249 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13250 del self.remove_locks[locking.LEVEL_NODEGROUP]
13253 class LUGroupAssignNodes(NoHooksLU):
13254 """Logical unit for assigning nodes to groups.
13259 def ExpandNames(self):
13260 # These raise errors.OpPrereqError on their own:
13261 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13262 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13264 # We want to lock all the affected nodes and groups. We have readily
13265 # available the list of nodes, and the *destination* group. To gather the
13266 # list of "source" groups, we need to fetch node information later on.
13267 self.needed_locks = {
13268 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13269 locking.LEVEL_NODE: self.op.nodes,
13272 def DeclareLocks(self, level):
13273 if level == locking.LEVEL_NODEGROUP:
13274 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13276 # Try to get all affected nodes' groups without having the group or node
13277 # lock yet. Needs verification later in the code flow.
13278 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13280 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13282 def CheckPrereq(self):
13283 """Check prerequisites.
13286 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13287 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13288 frozenset(self.op.nodes))
13290 expected_locks = (set([self.group_uuid]) |
13291 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13292 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13293 if actual_locks != expected_locks:
13294 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13295 " current groups are '%s', used to be '%s'" %
13296 (utils.CommaJoin(expected_locks),
13297 utils.CommaJoin(actual_locks)))
13299 self.node_data = self.cfg.GetAllNodesInfo()
13300 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13301 instance_data = self.cfg.GetAllInstancesInfo()
13303 if self.group is None:
13304 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13305 (self.op.group_name, self.group_uuid))
13307 (new_splits, previous_splits) = \
13308 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13309 for node in self.op.nodes],
13310 self.node_data, instance_data)
13313 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13315 if not self.op.force:
13316 raise errors.OpExecError("The following instances get split by this"
13317 " change and --force was not given: %s" %
13320 self.LogWarning("This operation will split the following instances: %s",
13323 if previous_splits:
13324 self.LogWarning("In addition, these already-split instances continue"
13325 " to be split across groups: %s",
13326 utils.CommaJoin(utils.NiceSort(previous_splits)))
13328 def Exec(self, feedback_fn):
13329 """Assign nodes to a new group.
13332 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13334 self.cfg.AssignGroupNodes(mods)
13337 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13338 """Check for split instances after a node assignment.
13340 This method considers a series of node assignments as an atomic operation,
13341 and returns information about split instances after applying the set of
13344 In particular, it returns information about newly split instances, and
13345 instances that were already split, and remain so after the change.
13347 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13350 @type changes: list of (node_name, new_group_uuid) pairs.
13351 @param changes: list of node assignments to consider.
13352 @param node_data: a dict with data for all nodes
13353 @param instance_data: a dict with all instances to consider
13354 @rtype: a two-tuple
13355 @return: a list of instances that were previously okay and result split as a
13356 consequence of this change, and a list of instances that were previously
13357 split and this change does not fix.
13360 changed_nodes = dict((node, group) for node, group in changes
13361 if node_data[node].group != group)
13363 all_split_instances = set()
13364 previously_split_instances = set()
13366 def InstanceNodes(instance):
13367 return [instance.primary_node] + list(instance.secondary_nodes)
13369 for inst in instance_data.values():
13370 if inst.disk_template not in constants.DTS_INT_MIRROR:
13373 instance_nodes = InstanceNodes(inst)
13375 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13376 previously_split_instances.add(inst.name)
13378 if len(set(changed_nodes.get(node, node_data[node].group)
13379 for node in instance_nodes)) > 1:
13380 all_split_instances.add(inst.name)
13382 return (list(all_split_instances - previously_split_instances),
13383 list(previously_split_instances & all_split_instances))
13386 class _GroupQuery(_QueryBase):
13387 FIELDS = query.GROUP_FIELDS
13389 def ExpandNames(self, lu):
13390 lu.needed_locks = {}
13392 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13393 self._cluster = lu.cfg.GetClusterInfo()
13394 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13397 self.wanted = [name_to_uuid[name]
13398 for name in utils.NiceSort(name_to_uuid.keys())]
13400 # Accept names to be either names or UUIDs.
13403 all_uuid = frozenset(self._all_groups.keys())
13405 for name in self.names:
13406 if name in all_uuid:
13407 self.wanted.append(name)
13408 elif name in name_to_uuid:
13409 self.wanted.append(name_to_uuid[name])
13411 missing.append(name)
13414 raise errors.OpPrereqError("Some groups do not exist: %s" %
13415 utils.CommaJoin(missing),
13416 errors.ECODE_NOENT)
13418 def DeclareLocks(self, lu, level):
13421 def _GetQueryData(self, lu):
13422 """Computes the list of node groups and their attributes.
13425 do_nodes = query.GQ_NODE in self.requested_data
13426 do_instances = query.GQ_INST in self.requested_data
13428 group_to_nodes = None
13429 group_to_instances = None
13431 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13432 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13433 # latter GetAllInstancesInfo() is not enough, for we have to go through
13434 # instance->node. Hence, we will need to process nodes even if we only need
13435 # instance information.
13436 if do_nodes or do_instances:
13437 all_nodes = lu.cfg.GetAllNodesInfo()
13438 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13441 for node in all_nodes.values():
13442 if node.group in group_to_nodes:
13443 group_to_nodes[node.group].append(node.name)
13444 node_to_group[node.name] = node.group
13447 all_instances = lu.cfg.GetAllInstancesInfo()
13448 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13450 for instance in all_instances.values():
13451 node = instance.primary_node
13452 if node in node_to_group:
13453 group_to_instances[node_to_group[node]].append(instance.name)
13456 # Do not pass on node information if it was not requested.
13457 group_to_nodes = None
13459 return query.GroupQueryData(self._cluster,
13460 [self._all_groups[uuid]
13461 for uuid in self.wanted],
13462 group_to_nodes, group_to_instances)
13465 class LUGroupQuery(NoHooksLU):
13466 """Logical unit for querying node groups.
13471 def CheckArguments(self):
13472 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13473 self.op.output_fields, False)
13475 def ExpandNames(self):
13476 self.gq.ExpandNames(self)
13478 def DeclareLocks(self, level):
13479 self.gq.DeclareLocks(self, level)
13481 def Exec(self, feedback_fn):
13482 return self.gq.OldStyleQuery(self)
13485 class LUGroupSetParams(LogicalUnit):
13486 """Modifies the parameters of a node group.
13489 HPATH = "group-modify"
13490 HTYPE = constants.HTYPE_GROUP
13493 def CheckArguments(self):
13496 self.op.diskparams,
13497 self.op.alloc_policy,
13499 self.op.disk_state,
13503 if all_changes.count(None) == len(all_changes):
13504 raise errors.OpPrereqError("Please pass at least one modification",
13505 errors.ECODE_INVAL)
13507 def ExpandNames(self):
13508 # This raises errors.OpPrereqError on its own:
13509 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13511 self.needed_locks = {
13512 locking.LEVEL_INSTANCE: [],
13513 locking.LEVEL_NODEGROUP: [self.group_uuid],
13516 self.share_locks[locking.LEVEL_INSTANCE] = 1
13518 def DeclareLocks(self, level):
13519 if level == locking.LEVEL_INSTANCE:
13520 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13522 # Lock instances optimistically, needs verification once group lock has
13524 self.needed_locks[locking.LEVEL_INSTANCE] = \
13525 self.cfg.GetNodeGroupInstances(self.group_uuid)
13527 def CheckPrereq(self):
13528 """Check prerequisites.
13531 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13533 # Check if locked instances are still correct
13534 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13536 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13537 cluster = self.cfg.GetClusterInfo()
13539 if self.group is None:
13540 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13541 (self.op.group_name, self.group_uuid))
13543 if self.op.ndparams:
13544 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13545 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13546 self.new_ndparams = new_ndparams
13548 if self.op.diskparams:
13549 self.new_diskparams = dict()
13550 for templ in constants.DISK_TEMPLATES:
13551 if templ not in self.op.diskparams:
13552 self.op.diskparams[templ] = {}
13553 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13554 self.op.diskparams[templ])
13555 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13556 self.new_diskparams[templ] = new_templ_params
13558 if self.op.hv_state:
13559 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13560 self.group.hv_state_static)
13562 if self.op.disk_state:
13563 self.new_disk_state = \
13564 _MergeAndVerifyDiskState(self.op.disk_state,
13565 self.group.disk_state_static)
13567 if self.op.ipolicy:
13568 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13572 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13573 inst_filter = lambda inst: inst.name in owned_instances
13574 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13576 _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13578 new_ipolicy, instances)
13581 self.LogWarning("After the ipolicy change the following instances"
13582 " violate them: %s",
13583 utils.CommaJoin(violations))
13585 def BuildHooksEnv(self):
13586 """Build hooks env.
13590 "GROUP_NAME": self.op.group_name,
13591 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13594 def BuildHooksNodes(self):
13595 """Build hooks nodes.
13598 mn = self.cfg.GetMasterNode()
13599 return ([mn], [mn])
13601 def Exec(self, feedback_fn):
13602 """Modifies the node group.
13607 if self.op.ndparams:
13608 self.group.ndparams = self.new_ndparams
13609 result.append(("ndparams", str(self.group.ndparams)))
13611 if self.op.diskparams:
13612 self.group.diskparams = self.new_diskparams
13613 result.append(("diskparams", str(self.group.diskparams)))
13615 if self.op.alloc_policy:
13616 self.group.alloc_policy = self.op.alloc_policy
13618 if self.op.hv_state:
13619 self.group.hv_state_static = self.new_hv_state
13621 if self.op.disk_state:
13622 self.group.disk_state_static = self.new_disk_state
13624 if self.op.ipolicy:
13625 self.group.ipolicy = self.new_ipolicy
13627 self.cfg.Update(self.group, feedback_fn)
13631 class LUGroupRemove(LogicalUnit):
13632 HPATH = "group-remove"
13633 HTYPE = constants.HTYPE_GROUP
13636 def ExpandNames(self):
13637 # This will raises errors.OpPrereqError on its own:
13638 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13639 self.needed_locks = {
13640 locking.LEVEL_NODEGROUP: [self.group_uuid],
13643 def CheckPrereq(self):
13644 """Check prerequisites.
13646 This checks that the given group name exists as a node group, that is
13647 empty (i.e., contains no nodes), and that is not the last group of the
13651 # Verify that the group is empty.
13652 group_nodes = [node.name
13653 for node in self.cfg.GetAllNodesInfo().values()
13654 if node.group == self.group_uuid]
13657 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13659 (self.op.group_name,
13660 utils.CommaJoin(utils.NiceSort(group_nodes))),
13661 errors.ECODE_STATE)
13663 # Verify the cluster would not be left group-less.
13664 if len(self.cfg.GetNodeGroupList()) == 1:
13665 raise errors.OpPrereqError("Group '%s' is the only group,"
13666 " cannot be removed" %
13667 self.op.group_name,
13668 errors.ECODE_STATE)
13670 def BuildHooksEnv(self):
13671 """Build hooks env.
13675 "GROUP_NAME": self.op.group_name,
13678 def BuildHooksNodes(self):
13679 """Build hooks nodes.
13682 mn = self.cfg.GetMasterNode()
13683 return ([mn], [mn])
13685 def Exec(self, feedback_fn):
13686 """Remove the node group.
13690 self.cfg.RemoveNodeGroup(self.group_uuid)
13691 except errors.ConfigurationError:
13692 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13693 (self.op.group_name, self.group_uuid))
13695 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13698 class LUGroupRename(LogicalUnit):
13699 HPATH = "group-rename"
13700 HTYPE = constants.HTYPE_GROUP
13703 def ExpandNames(self):
13704 # This raises errors.OpPrereqError on its own:
13705 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13707 self.needed_locks = {
13708 locking.LEVEL_NODEGROUP: [self.group_uuid],
13711 def CheckPrereq(self):
13712 """Check prerequisites.
13714 Ensures requested new name is not yet used.
13718 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13719 except errors.OpPrereqError:
13722 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13723 " node group (UUID: %s)" %
13724 (self.op.new_name, new_name_uuid),
13725 errors.ECODE_EXISTS)
13727 def BuildHooksEnv(self):
13728 """Build hooks env.
13732 "OLD_NAME": self.op.group_name,
13733 "NEW_NAME": self.op.new_name,
13736 def BuildHooksNodes(self):
13737 """Build hooks nodes.
13740 mn = self.cfg.GetMasterNode()
13742 all_nodes = self.cfg.GetAllNodesInfo()
13743 all_nodes.pop(mn, None)
13746 run_nodes.extend(node.name for node in all_nodes.values()
13747 if node.group == self.group_uuid)
13749 return (run_nodes, run_nodes)
13751 def Exec(self, feedback_fn):
13752 """Rename the node group.
13755 group = self.cfg.GetNodeGroup(self.group_uuid)
13758 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13759 (self.op.group_name, self.group_uuid))
13761 group.name = self.op.new_name
13762 self.cfg.Update(group, feedback_fn)
13764 return self.op.new_name
13767 class LUGroupEvacuate(LogicalUnit):
13768 HPATH = "group-evacuate"
13769 HTYPE = constants.HTYPE_GROUP
13772 def ExpandNames(self):
13773 # This raises errors.OpPrereqError on its own:
13774 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13776 if self.op.target_groups:
13777 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13778 self.op.target_groups)
13780 self.req_target_uuids = []
13782 if self.group_uuid in self.req_target_uuids:
13783 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13784 " as a target group (targets are %s)" %
13786 utils.CommaJoin(self.req_target_uuids)),
13787 errors.ECODE_INVAL)
13789 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13791 self.share_locks = _ShareAll()
13792 self.needed_locks = {
13793 locking.LEVEL_INSTANCE: [],
13794 locking.LEVEL_NODEGROUP: [],
13795 locking.LEVEL_NODE: [],
13798 def DeclareLocks(self, level):
13799 if level == locking.LEVEL_INSTANCE:
13800 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13802 # Lock instances optimistically, needs verification once node and group
13803 # locks have been acquired
13804 self.needed_locks[locking.LEVEL_INSTANCE] = \
13805 self.cfg.GetNodeGroupInstances(self.group_uuid)
13807 elif level == locking.LEVEL_NODEGROUP:
13808 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13810 if self.req_target_uuids:
13811 lock_groups = set([self.group_uuid] + self.req_target_uuids)
13813 # Lock all groups used by instances optimistically; this requires going
13814 # via the node before it's locked, requiring verification later on
13815 lock_groups.update(group_uuid
13816 for instance_name in
13817 self.owned_locks(locking.LEVEL_INSTANCE)
13819 self.cfg.GetInstanceNodeGroups(instance_name))
13821 # No target groups, need to lock all of them
13822 lock_groups = locking.ALL_SET
13824 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13826 elif level == locking.LEVEL_NODE:
13827 # This will only lock the nodes in the group to be evacuated which
13828 # contain actual instances
13829 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13830 self._LockInstancesNodes()
13832 # Lock all nodes in group to be evacuated and target groups
13833 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13834 assert self.group_uuid in owned_groups
13835 member_nodes = [node_name
13836 for group in owned_groups
13837 for node_name in self.cfg.GetNodeGroup(group).members]
13838 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13840 def CheckPrereq(self):
13841 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13842 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13843 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13845 assert owned_groups.issuperset(self.req_target_uuids)
13846 assert self.group_uuid in owned_groups
13848 # Check if locked instances are still correct
13849 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13851 # Get instance information
13852 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13854 # Check if node groups for locked instances are still correct
13855 for instance_name in owned_instances:
13856 inst = self.instances[instance_name]
13857 assert owned_nodes.issuperset(inst.all_nodes), \
13858 "Instance %s's nodes changed while we kept the lock" % instance_name
13860 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13863 assert self.group_uuid in inst_groups, \
13864 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13866 if self.req_target_uuids:
13867 # User requested specific target groups
13868 self.target_uuids = self.req_target_uuids
13870 # All groups except the one to be evacuated are potential targets
13871 self.target_uuids = [group_uuid for group_uuid in owned_groups
13872 if group_uuid != self.group_uuid]
13874 if not self.target_uuids:
13875 raise errors.OpPrereqError("There are no possible target groups",
13876 errors.ECODE_INVAL)
13878 def BuildHooksEnv(self):
13879 """Build hooks env.
13883 "GROUP_NAME": self.op.group_name,
13884 "TARGET_GROUPS": " ".join(self.target_uuids),
13887 def BuildHooksNodes(self):
13888 """Build hooks nodes.
13891 mn = self.cfg.GetMasterNode()
13893 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13895 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13897 return (run_nodes, run_nodes)
13899 def Exec(self, feedback_fn):
13900 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13902 assert self.group_uuid not in self.target_uuids
13904 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13905 instances=instances, target_groups=self.target_uuids)
13907 ial.Run(self.op.iallocator)
13909 if not ial.success:
13910 raise errors.OpPrereqError("Can't compute group evacuation using"
13911 " iallocator '%s': %s" %
13912 (self.op.iallocator, ial.info),
13913 errors.ECODE_NORES)
13915 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13917 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13918 len(jobs), self.op.group_name)
13920 return ResultWithJobs(jobs)
13923 class TagsLU(NoHooksLU): # pylint: disable=W0223
13924 """Generic tags LU.
13926 This is an abstract class which is the parent of all the other tags LUs.
13929 def ExpandNames(self):
13930 self.group_uuid = None
13931 self.needed_locks = {}
13932 if self.op.kind == constants.TAG_NODE:
13933 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13934 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13935 elif self.op.kind == constants.TAG_INSTANCE:
13936 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13937 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13938 elif self.op.kind == constants.TAG_NODEGROUP:
13939 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13941 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13942 # not possible to acquire the BGL based on opcode parameters)
13944 def CheckPrereq(self):
13945 """Check prerequisites.
13948 if self.op.kind == constants.TAG_CLUSTER:
13949 self.target = self.cfg.GetClusterInfo()
13950 elif self.op.kind == constants.TAG_NODE:
13951 self.target = self.cfg.GetNodeInfo(self.op.name)
13952 elif self.op.kind == constants.TAG_INSTANCE:
13953 self.target = self.cfg.GetInstanceInfo(self.op.name)
13954 elif self.op.kind == constants.TAG_NODEGROUP:
13955 self.target = self.cfg.GetNodeGroup(self.group_uuid)
13957 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13958 str(self.op.kind), errors.ECODE_INVAL)
13961 class LUTagsGet(TagsLU):
13962 """Returns the tags of a given object.
13967 def ExpandNames(self):
13968 TagsLU.ExpandNames(self)
13970 # Share locks as this is only a read operation
13971 self.share_locks = _ShareAll()
13973 def Exec(self, feedback_fn):
13974 """Returns the tag list.
13977 return list(self.target.GetTags())
13980 class LUTagsSearch(NoHooksLU):
13981 """Searches the tags for a given pattern.
13986 def ExpandNames(self):
13987 self.needed_locks = {}
13989 def CheckPrereq(self):
13990 """Check prerequisites.
13992 This checks the pattern passed for validity by compiling it.
13996 self.re = re.compile(self.op.pattern)
13997 except re.error, err:
13998 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13999 (self.op.pattern, err), errors.ECODE_INVAL)
14001 def Exec(self, feedback_fn):
14002 """Returns the tag list.
14006 tgts = [("/cluster", cfg.GetClusterInfo())]
14007 ilist = cfg.GetAllInstancesInfo().values()
14008 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14009 nlist = cfg.GetAllNodesInfo().values()
14010 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14011 tgts.extend(("/nodegroup/%s" % n.name, n)
14012 for n in cfg.GetAllNodeGroupsInfo().values())
14014 for path, target in tgts:
14015 for tag in target.GetTags():
14016 if self.re.search(tag):
14017 results.append((path, tag))
14021 class LUTagsSet(TagsLU):
14022 """Sets a tag on a given object.
14027 def CheckPrereq(self):
14028 """Check prerequisites.
14030 This checks the type and length of the tag name and value.
14033 TagsLU.CheckPrereq(self)
14034 for tag in self.op.tags:
14035 objects.TaggableObject.ValidateTag(tag)
14037 def Exec(self, feedback_fn):
14042 for tag in self.op.tags:
14043 self.target.AddTag(tag)
14044 except errors.TagError, err:
14045 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14046 self.cfg.Update(self.target, feedback_fn)
14049 class LUTagsDel(TagsLU):
14050 """Delete a list of tags from a given object.
14055 def CheckPrereq(self):
14056 """Check prerequisites.
14058 This checks that we have the given tag.
14061 TagsLU.CheckPrereq(self)
14062 for tag in self.op.tags:
14063 objects.TaggableObject.ValidateTag(tag)
14064 del_tags = frozenset(self.op.tags)
14065 cur_tags = self.target.GetTags()
14067 diff_tags = del_tags - cur_tags
14069 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14070 raise errors.OpPrereqError("Tag(s) %s not found" %
14071 (utils.CommaJoin(diff_names), ),
14072 errors.ECODE_NOENT)
14074 def Exec(self, feedback_fn):
14075 """Remove the tag from the object.
14078 for tag in self.op.tags:
14079 self.target.RemoveTag(tag)
14080 self.cfg.Update(self.target, feedback_fn)
14083 class LUTestDelay(NoHooksLU):
14084 """Sleep for a specified amount of time.
14086 This LU sleeps on the master and/or nodes for a specified amount of
14092 def ExpandNames(self):
14093 """Expand names and set required locks.
14095 This expands the node list, if any.
14098 self.needed_locks = {}
14099 if self.op.on_nodes:
14100 # _GetWantedNodes can be used here, but is not always appropriate to use
14101 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14102 # more information.
14103 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14104 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14106 def _TestDelay(self):
14107 """Do the actual sleep.
14110 if self.op.on_master:
14111 if not utils.TestDelay(self.op.duration):
14112 raise errors.OpExecError("Error during master delay test")
14113 if self.op.on_nodes:
14114 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14115 for node, node_result in result.items():
14116 node_result.Raise("Failure during rpc call to node %s" % node)
14118 def Exec(self, feedback_fn):
14119 """Execute the test delay opcode, with the wanted repetitions.
14122 if self.op.repeat == 0:
14125 top_value = self.op.repeat - 1
14126 for i in range(self.op.repeat):
14127 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14131 class LUTestJqueue(NoHooksLU):
14132 """Utility LU to test some aspects of the job queue.
14137 # Must be lower than default timeout for WaitForJobChange to see whether it
14138 # notices changed jobs
14139 _CLIENT_CONNECT_TIMEOUT = 20.0
14140 _CLIENT_CONFIRM_TIMEOUT = 60.0
14143 def _NotifyUsingSocket(cls, cb, errcls):
14144 """Opens a Unix socket and waits for another program to connect.
14147 @param cb: Callback to send socket name to client
14148 @type errcls: class
14149 @param errcls: Exception class to use for errors
14152 # Using a temporary directory as there's no easy way to create temporary
14153 # sockets without writing a custom loop around tempfile.mktemp and
14155 tmpdir = tempfile.mkdtemp()
14157 tmpsock = utils.PathJoin(tmpdir, "sock")
14159 logging.debug("Creating temporary socket at %s", tmpsock)
14160 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14165 # Send details to client
14168 # Wait for client to connect before continuing
14169 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14171 (conn, _) = sock.accept()
14172 except socket.error, err:
14173 raise errcls("Client didn't connect in time (%s)" % err)
14177 # Remove as soon as client is connected
14178 shutil.rmtree(tmpdir)
14180 # Wait for client to close
14183 # pylint: disable=E1101
14184 # Instance of '_socketobject' has no ... member
14185 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14187 except socket.error, err:
14188 raise errcls("Client failed to confirm notification (%s)" % err)
14192 def _SendNotification(self, test, arg, sockname):
14193 """Sends a notification to the client.
14196 @param test: Test name
14197 @param arg: Test argument (depends on test)
14198 @type sockname: string
14199 @param sockname: Socket path
14202 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14204 def _Notify(self, prereq, test, arg):
14205 """Notifies the client of a test.
14208 @param prereq: Whether this is a prereq-phase test
14210 @param test: Test name
14211 @param arg: Test argument (depends on test)
14215 errcls = errors.OpPrereqError
14217 errcls = errors.OpExecError
14219 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14223 def CheckArguments(self):
14224 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14225 self.expandnames_calls = 0
14227 def ExpandNames(self):
14228 checkargs_calls = getattr(self, "checkargs_calls", 0)
14229 if checkargs_calls < 1:
14230 raise errors.ProgrammerError("CheckArguments was not called")
14232 self.expandnames_calls += 1
14234 if self.op.notify_waitlock:
14235 self._Notify(True, constants.JQT_EXPANDNAMES, None)
14237 self.LogInfo("Expanding names")
14239 # Get lock on master node (just to get a lock, not for a particular reason)
14240 self.needed_locks = {
14241 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14244 def Exec(self, feedback_fn):
14245 if self.expandnames_calls < 1:
14246 raise errors.ProgrammerError("ExpandNames was not called")
14248 if self.op.notify_exec:
14249 self._Notify(False, constants.JQT_EXEC, None)
14251 self.LogInfo("Executing")
14253 if self.op.log_messages:
14254 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14255 for idx, msg in enumerate(self.op.log_messages):
14256 self.LogInfo("Sending log message %s", idx + 1)
14257 feedback_fn(constants.JQT_MSGPREFIX + msg)
14258 # Report how many test messages have been sent
14259 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14262 raise errors.OpExecError("Opcode failure was requested")
14267 class IAllocator(object):
14268 """IAllocator framework.
14270 An IAllocator instance has three sets of attributes:
14271 - cfg that is needed to query the cluster
14272 - input data (all members of the _KEYS class attribute are required)
14273 - four buffer attributes (in|out_data|text), that represent the
14274 input (to the external script) in text and data structure format,
14275 and the output from it, again in two formats
14276 - the result variables from the script (success, info, nodes) for
14280 # pylint: disable=R0902
14281 # lots of instance attributes
14283 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14285 self.rpc = rpc_runner
14286 # init buffer variables
14287 self.in_text = self.out_text = self.in_data = self.out_data = None
14288 # init all input fields so that pylint is happy
14290 self.memory = self.disks = self.disk_template = None
14291 self.os = self.tags = self.nics = self.vcpus = None
14292 self.hypervisor = None
14293 self.relocate_from = None
14295 self.instances = None
14296 self.evac_mode = None
14297 self.target_groups = []
14299 self.required_nodes = None
14300 # init result fields
14301 self.success = self.info = self.result = None
14304 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14306 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14307 " IAllocator" % self.mode)
14309 keyset = [n for (n, _) in keydata]
14312 if key not in keyset:
14313 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14314 " IAllocator" % key)
14315 setattr(self, key, kwargs[key])
14318 if key not in kwargs:
14319 raise errors.ProgrammerError("Missing input parameter '%s' to"
14320 " IAllocator" % key)
14321 self._BuildInputData(compat.partial(fn, self), keydata)
14323 def _ComputeClusterData(self):
14324 """Compute the generic allocator input data.
14326 This is the data that is independent of the actual operation.
14330 cluster_info = cfg.GetClusterInfo()
14333 "version": constants.IALLOCATOR_VERSION,
14334 "cluster_name": cfg.GetClusterName(),
14335 "cluster_tags": list(cluster_info.GetTags()),
14336 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14337 # we don't have job IDs
14339 ninfo = cfg.GetAllNodesInfo()
14340 iinfo = cfg.GetAllInstancesInfo().values()
14341 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14344 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14346 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14347 hypervisor_name = self.hypervisor
14348 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14349 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14351 hypervisor_name = cluster_info.primary_hypervisor
14353 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14356 self.rpc.call_all_instances_info(node_list,
14357 cluster_info.enabled_hypervisors)
14359 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14361 config_ndata = self._ComputeBasicNodeData(ninfo)
14362 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14363 i_list, config_ndata)
14364 assert len(data["nodes"]) == len(ninfo), \
14365 "Incomplete node data computed"
14367 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14369 self.in_data = data
14372 def _ComputeNodeGroupData(cfg):
14373 """Compute node groups data.
14376 cluster = cfg.GetClusterInfo()
14377 ng = dict((guuid, {
14378 "name": gdata.name,
14379 "alloc_policy": gdata.alloc_policy,
14380 "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14382 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14387 def _ComputeBasicNodeData(node_cfg):
14388 """Compute global node data.
14391 @returns: a dict of name: (node dict, node config)
14394 # fill in static (config-based) values
14395 node_results = dict((ninfo.name, {
14396 "tags": list(ninfo.GetTags()),
14397 "primary_ip": ninfo.primary_ip,
14398 "secondary_ip": ninfo.secondary_ip,
14399 "offline": ninfo.offline,
14400 "drained": ninfo.drained,
14401 "master_candidate": ninfo.master_candidate,
14402 "group": ninfo.group,
14403 "master_capable": ninfo.master_capable,
14404 "vm_capable": ninfo.vm_capable,
14406 for ninfo in node_cfg.values())
14408 return node_results
14411 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14413 """Compute global node data.
14415 @param node_results: the basic node structures as filled from the config
14418 #TODO(dynmem): compute the right data on MAX and MIN memory
14419 # make a copy of the current dict
14420 node_results = dict(node_results)
14421 for nname, nresult in node_data.items():
14422 assert nname in node_results, "Missing basic data for node %s" % nname
14423 ninfo = node_cfg[nname]
14425 if not (ninfo.offline or ninfo.drained):
14426 nresult.Raise("Can't get data for node %s" % nname)
14427 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14429 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14431 for attr in ["memory_total", "memory_free", "memory_dom0",
14432 "vg_size", "vg_free", "cpu_total"]:
14433 if attr not in remote_info:
14434 raise errors.OpExecError("Node '%s' didn't return attribute"
14435 " '%s'" % (nname, attr))
14436 if not isinstance(remote_info[attr], int):
14437 raise errors.OpExecError("Node '%s' returned invalid value"
14439 (nname, attr, remote_info[attr]))
14440 # compute memory used by primary instances
14441 i_p_mem = i_p_up_mem = 0
14442 for iinfo, beinfo in i_list:
14443 if iinfo.primary_node == nname:
14444 i_p_mem += beinfo[constants.BE_MAXMEM]
14445 if iinfo.name not in node_iinfo[nname].payload:
14448 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14449 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14450 remote_info["memory_free"] -= max(0, i_mem_diff)
14452 if iinfo.admin_state == constants.ADMINST_UP:
14453 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14455 # compute memory used by instances
14457 "total_memory": remote_info["memory_total"],
14458 "reserved_memory": remote_info["memory_dom0"],
14459 "free_memory": remote_info["memory_free"],
14460 "total_disk": remote_info["vg_size"],
14461 "free_disk": remote_info["vg_free"],
14462 "total_cpus": remote_info["cpu_total"],
14463 "i_pri_memory": i_p_mem,
14464 "i_pri_up_memory": i_p_up_mem,
14466 pnr_dyn.update(node_results[nname])
14467 node_results[nname] = pnr_dyn
14469 return node_results
14472 def _ComputeInstanceData(cluster_info, i_list):
14473 """Compute global instance data.
14477 for iinfo, beinfo in i_list:
14479 for nic in iinfo.nics:
14480 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14484 "mode": filled_params[constants.NIC_MODE],
14485 "link": filled_params[constants.NIC_LINK],
14487 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14488 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14489 nic_data.append(nic_dict)
14491 "tags": list(iinfo.GetTags()),
14492 "admin_state": iinfo.admin_state,
14493 "vcpus": beinfo[constants.BE_VCPUS],
14494 "memory": beinfo[constants.BE_MAXMEM],
14496 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14498 "disks": [{constants.IDISK_SIZE: dsk.size,
14499 constants.IDISK_MODE: dsk.mode}
14500 for dsk in iinfo.disks],
14501 "disk_template": iinfo.disk_template,
14502 "hypervisor": iinfo.hypervisor,
14504 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14506 instance_data[iinfo.name] = pir
14508 return instance_data
14510 def _AddNewInstance(self):
14511 """Add new instance data to allocator structure.
14513 This in combination with _AllocatorGetClusterData will create the
14514 correct structure needed as input for the allocator.
14516 The checks for the completeness of the opcode must have already been
14520 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14522 if self.disk_template in constants.DTS_INT_MIRROR:
14523 self.required_nodes = 2
14525 self.required_nodes = 1
14529 "disk_template": self.disk_template,
14532 "vcpus": self.vcpus,
14533 "memory": self.memory,
14534 "disks": self.disks,
14535 "disk_space_total": disk_space,
14537 "required_nodes": self.required_nodes,
14538 "hypervisor": self.hypervisor,
14543 def _AddRelocateInstance(self):
14544 """Add relocate instance data to allocator structure.
14546 This in combination with _IAllocatorGetClusterData will create the
14547 correct structure needed as input for the allocator.
14549 The checks for the completeness of the opcode must have already been
14553 instance = self.cfg.GetInstanceInfo(self.name)
14554 if instance is None:
14555 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14556 " IAllocator" % self.name)
14558 if instance.disk_template not in constants.DTS_MIRRORED:
14559 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14560 errors.ECODE_INVAL)
14562 if instance.disk_template in constants.DTS_INT_MIRROR and \
14563 len(instance.secondary_nodes) != 1:
14564 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14565 errors.ECODE_STATE)
14567 self.required_nodes = 1
14568 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14569 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14573 "disk_space_total": disk_space,
14574 "required_nodes": self.required_nodes,
14575 "relocate_from": self.relocate_from,
14579 def _AddNodeEvacuate(self):
14580 """Get data for node-evacuate requests.
14584 "instances": self.instances,
14585 "evac_mode": self.evac_mode,
14588 def _AddChangeGroup(self):
14589 """Get data for node-evacuate requests.
14593 "instances": self.instances,
14594 "target_groups": self.target_groups,
14597 def _BuildInputData(self, fn, keydata):
14598 """Build input data structures.
14601 self._ComputeClusterData()
14604 request["type"] = self.mode
14605 for keyname, keytype in keydata:
14606 if keyname not in request:
14607 raise errors.ProgrammerError("Request parameter %s is missing" %
14609 val = request[keyname]
14610 if not keytype(val):
14611 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14612 " validation, value %s, expected"
14613 " type %s" % (keyname, val, keytype))
14614 self.in_data["request"] = request
14616 self.in_text = serializer.Dump(self.in_data)
14618 _STRING_LIST = ht.TListOf(ht.TString)
14619 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14620 # pylint: disable=E1101
14621 # Class '...' has no 'OP_ID' member
14622 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14623 opcodes.OpInstanceMigrate.OP_ID,
14624 opcodes.OpInstanceReplaceDisks.OP_ID])
14628 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14629 ht.TItems([ht.TNonEmptyString,
14630 ht.TNonEmptyString,
14631 ht.TListOf(ht.TNonEmptyString),
14634 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14635 ht.TItems([ht.TNonEmptyString,
14638 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14639 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14642 constants.IALLOCATOR_MODE_ALLOC:
14645 ("name", ht.TString),
14646 ("memory", ht.TInt),
14647 ("disks", ht.TListOf(ht.TDict)),
14648 ("disk_template", ht.TString),
14649 ("os", ht.TString),
14650 ("tags", _STRING_LIST),
14651 ("nics", ht.TListOf(ht.TDict)),
14652 ("vcpus", ht.TInt),
14653 ("hypervisor", ht.TString),
14655 constants.IALLOCATOR_MODE_RELOC:
14656 (_AddRelocateInstance,
14657 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14659 constants.IALLOCATOR_MODE_NODE_EVAC:
14660 (_AddNodeEvacuate, [
14661 ("instances", _STRING_LIST),
14662 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14664 constants.IALLOCATOR_MODE_CHG_GROUP:
14665 (_AddChangeGroup, [
14666 ("instances", _STRING_LIST),
14667 ("target_groups", _STRING_LIST),
14671 def Run(self, name, validate=True, call_fn=None):
14672 """Run an instance allocator and return the results.
14675 if call_fn is None:
14676 call_fn = self.rpc.call_iallocator_runner
14678 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14679 result.Raise("Failure while running the iallocator script")
14681 self.out_text = result.payload
14683 self._ValidateResult()
14685 def _ValidateResult(self):
14686 """Process the allocator results.
14688 This will process and if successful save the result in
14689 self.out_data and the other parameters.
14693 rdict = serializer.Load(self.out_text)
14694 except Exception, err:
14695 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14697 if not isinstance(rdict, dict):
14698 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14700 # TODO: remove backwards compatiblity in later versions
14701 if "nodes" in rdict and "result" not in rdict:
14702 rdict["result"] = rdict["nodes"]
14705 for key in "success", "info", "result":
14706 if key not in rdict:
14707 raise errors.OpExecError("Can't parse iallocator results:"
14708 " missing key '%s'" % key)
14709 setattr(self, key, rdict[key])
14711 if not self._result_check(self.result):
14712 raise errors.OpExecError("Iallocator returned invalid result,"
14713 " expected %s, got %s" %
14714 (self._result_check, self.result),
14715 errors.ECODE_INVAL)
14717 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14718 assert self.relocate_from is not None
14719 assert self.required_nodes == 1
14721 node2group = dict((name, ndata["group"])
14722 for (name, ndata) in self.in_data["nodes"].items())
14724 fn = compat.partial(self._NodesToGroups, node2group,
14725 self.in_data["nodegroups"])
14727 instance = self.cfg.GetInstanceInfo(self.name)
14728 request_groups = fn(self.relocate_from + [instance.primary_node])
14729 result_groups = fn(rdict["result"] + [instance.primary_node])
14731 if self.success and not set(result_groups).issubset(request_groups):
14732 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14733 " differ from original groups (%s)" %
14734 (utils.CommaJoin(result_groups),
14735 utils.CommaJoin(request_groups)))
14737 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14738 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14740 self.out_data = rdict
14743 def _NodesToGroups(node2group, groups, nodes):
14744 """Returns a list of unique group names for a list of nodes.
14746 @type node2group: dict
14747 @param node2group: Map from node name to group UUID
14749 @param groups: Group information
14751 @param nodes: Node names
14758 group_uuid = node2group[node]
14760 # Ignore unknown node
14764 group = groups[group_uuid]
14766 # Can't find group, let's use UUID
14767 group_name = group_uuid
14769 group_name = group["name"]
14771 result.add(group_name)
14773 return sorted(result)
14776 class LUTestAllocator(NoHooksLU):
14777 """Run allocator tests.
14779 This LU runs the allocator tests
14782 def CheckPrereq(self):
14783 """Check prerequisites.
14785 This checks the opcode parameters depending on the director and mode test.
14788 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14789 for attr in ["memory", "disks", "disk_template",
14790 "os", "tags", "nics", "vcpus"]:
14791 if not hasattr(self.op, attr):
14792 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14793 attr, errors.ECODE_INVAL)
14794 iname = self.cfg.ExpandInstanceName(self.op.name)
14795 if iname is not None:
14796 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14797 iname, errors.ECODE_EXISTS)
14798 if not isinstance(self.op.nics, list):
14799 raise errors.OpPrereqError("Invalid parameter 'nics'",
14800 errors.ECODE_INVAL)
14801 if not isinstance(self.op.disks, list):
14802 raise errors.OpPrereqError("Invalid parameter 'disks'",
14803 errors.ECODE_INVAL)
14804 for row in self.op.disks:
14805 if (not isinstance(row, dict) or
14806 constants.IDISK_SIZE not in row or
14807 not isinstance(row[constants.IDISK_SIZE], int) or
14808 constants.IDISK_MODE not in row or
14809 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14810 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14811 " parameter", errors.ECODE_INVAL)
14812 if self.op.hypervisor is None:
14813 self.op.hypervisor = self.cfg.GetHypervisorType()
14814 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14815 fname = _ExpandInstanceName(self.cfg, self.op.name)
14816 self.op.name = fname
14817 self.relocate_from = \
14818 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14819 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14820 constants.IALLOCATOR_MODE_NODE_EVAC):
14821 if not self.op.instances:
14822 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14823 self.op.instances = _GetWantedInstances(self, self.op.instances)
14825 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14826 self.op.mode, errors.ECODE_INVAL)
14828 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14829 if self.op.allocator is None:
14830 raise errors.OpPrereqError("Missing allocator name",
14831 errors.ECODE_INVAL)
14832 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14833 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14834 self.op.direction, errors.ECODE_INVAL)
14836 def Exec(self, feedback_fn):
14837 """Run the allocator test.
14840 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14841 ial = IAllocator(self.cfg, self.rpc,
14844 memory=self.op.memory,
14845 disks=self.op.disks,
14846 disk_template=self.op.disk_template,
14850 vcpus=self.op.vcpus,
14851 hypervisor=self.op.hypervisor,
14853 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14854 ial = IAllocator(self.cfg, self.rpc,
14857 relocate_from=list(self.relocate_from),
14859 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14860 ial = IAllocator(self.cfg, self.rpc,
14862 instances=self.op.instances,
14863 target_groups=self.op.target_groups)
14864 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14865 ial = IAllocator(self.cfg, self.rpc,
14867 instances=self.op.instances,
14868 evac_mode=self.op.evac_mode)
14870 raise errors.ProgrammerError("Uncatched mode %s in"
14871 " LUTestAllocator.Exec", self.op.mode)
14873 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14874 result = ial.in_text
14876 ial.Run(self.op.allocator, validate=False)
14877 result = ial.out_text
14881 #: Query type implementations
14883 constants.QR_INSTANCE: _InstanceQuery,
14884 constants.QR_NODE: _NodeQuery,
14885 constants.QR_GROUP: _GroupQuery,
14886 constants.QR_OS: _OsQuery,
14889 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14892 def _GetQueryImplementation(name):
14893 """Returns the implemtnation for a query type.
14895 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14899 return _QUERY_IMPL[name]
14901 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14902 errors.ECODE_INVAL)