4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _MakeLegacyNodeInfo(data):
577 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
579 Converts the data into a single dictionary. This is fine for most use cases,
580 but some require information from more than one volume group or hypervisor.
583 (bootid, (vg_info, ), (hv_info, )) = data
585 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591 """Checks if the owned node groups are still correct for an instance.
593 @type cfg: L{config.ConfigWriter}
594 @param cfg: The cluster configuration
595 @type instance_name: string
596 @param instance_name: Instance name
597 @type owned_groups: set or frozenset
598 @param owned_groups: List of currently owned node groups
601 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
603 if not owned_groups.issuperset(inst_groups):
604 raise errors.OpPrereqError("Instance %s's node groups changed since"
605 " locks were acquired, current groups are"
606 " are '%s', owning groups '%s'; retry the"
609 utils.CommaJoin(inst_groups),
610 utils.CommaJoin(owned_groups)),
616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617 """Checks if the instances in a node group are still correct.
619 @type cfg: L{config.ConfigWriter}
620 @param cfg: The cluster configuration
621 @type group_uuid: string
622 @param group_uuid: Node group UUID
623 @type owned_instances: set or frozenset
624 @param owned_instances: List of currently owned instances
627 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628 if owned_instances != wanted_instances:
629 raise errors.OpPrereqError("Instances in node group '%s' changed since"
630 " locks were acquired, wanted '%s', have '%s';"
631 " retry the operation" %
633 utils.CommaJoin(wanted_instances),
634 utils.CommaJoin(owned_instances)),
637 return wanted_instances
640 def _SupportsOob(cfg, node):
641 """Tells if node supports OOB.
643 @type cfg: L{config.ConfigWriter}
644 @param cfg: The cluster configuration
645 @type node: L{objects.Node}
646 @param node: The node
647 @return: The OOB script if supported or an empty string otherwise
650 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
653 def _GetWantedNodes(lu, nodes):
654 """Returns list of checked and expanded node names.
656 @type lu: L{LogicalUnit}
657 @param lu: the logical unit on whose behalf we execute
659 @param nodes: list of node names or None for all nodes
661 @return: the list of nodes, sorted
662 @raise errors.ProgrammerError: if the nodes parameter is wrong type
666 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
668 return utils.NiceSort(lu.cfg.GetNodeList())
671 def _GetWantedInstances(lu, instances):
672 """Returns list of checked and expanded instance names.
674 @type lu: L{LogicalUnit}
675 @param lu: the logical unit on whose behalf we execute
676 @type instances: list
677 @param instances: list of instance names or None for all instances
679 @return: the list of instances, sorted
680 @raise errors.OpPrereqError: if the instances parameter is wrong type
681 @raise errors.OpPrereqError: if any of the passed instances is not found
685 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
687 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
691 def _GetUpdatedParams(old_params, update_dict,
692 use_default=True, use_none=False):
693 """Return the new version of a parameter dictionary.
695 @type old_params: dict
696 @param old_params: old parameters
697 @type update_dict: dict
698 @param update_dict: dict containing new parameter values, or
699 constants.VALUE_DEFAULT to reset the parameter to its default
701 @param use_default: boolean
702 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703 values as 'to be deleted' values
704 @param use_none: boolean
705 @type use_none: whether to recognise C{None} values as 'to be
708 @return: the new parameter dictionary
711 params_copy = copy.deepcopy(old_params)
712 for key, val in update_dict.iteritems():
713 if ((use_default and val == constants.VALUE_DEFAULT) or
714 (use_none and val is None)):
720 params_copy[key] = val
724 def _ReleaseLocks(lu, level, names=None, keep=None):
725 """Releases locks owned by an LU.
727 @type lu: L{LogicalUnit}
728 @param level: Lock level
729 @type names: list or None
730 @param names: Names of locks to release
731 @type keep: list or None
732 @param keep: Names of locks to retain
735 assert not (keep is not None and names is not None), \
736 "Only one of the 'names' and the 'keep' parameters can be given"
738 if names is not None:
739 should_release = names.__contains__
741 should_release = lambda name: name not in keep
743 should_release = None
745 owned = lu.owned_locks(level)
747 # Not owning any lock at this level, do nothing
754 # Determine which locks to release
756 if should_release(name):
761 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
763 # Release just some locks
764 lu.glm.release(level, names=release)
766 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
769 lu.glm.release(level)
771 assert not lu.glm.is_owned(level), "No locks should be owned"
774 def _MapInstanceDisksToNodes(instances):
775 """Creates a map from (node, volume) to instance name.
777 @type instances: list of L{objects.Instance}
778 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
781 return dict(((node, vol), inst.name)
782 for inst in instances
783 for (node, vols) in inst.MapLVsByNode().items()
787 def _RunPostHook(lu, node_name):
788 """Runs the post-hook for an opcode on a single node.
791 hm = lu.proc.BuildHooksManager(lu)
793 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
795 # pylint: disable=W0702
796 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
799 def _CheckOutputFields(static, dynamic, selected):
800 """Checks whether all selected fields are valid.
802 @type static: L{utils.FieldSet}
803 @param static: static fields set
804 @type dynamic: L{utils.FieldSet}
805 @param dynamic: dynamic fields set
812 delta = f.NonMatching(selected)
814 raise errors.OpPrereqError("Unknown output fields selected: %s"
815 % ",".join(delta), errors.ECODE_INVAL)
818 def _CheckGlobalHvParams(params):
819 """Validates that given hypervisor params are not global ones.
821 This will ensure that instances don't get customised versions of
825 used_globals = constants.HVC_GLOBALS.intersection(params)
827 msg = ("The following hypervisor parameters are global and cannot"
828 " be customized at instance level, please modify them at"
829 " cluster level: %s" % utils.CommaJoin(used_globals))
830 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
833 def _CheckNodeOnline(lu, node, msg=None):
834 """Ensure that a given node is online.
836 @param lu: the LU on behalf of which we make the check
837 @param node: the node to check
838 @param msg: if passed, should be a message to replace the default one
839 @raise errors.OpPrereqError: if the node is offline
843 msg = "Can't use offline node"
844 if lu.cfg.GetNodeInfo(node).offline:
845 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
848 def _CheckNodeNotDrained(lu, node):
849 """Ensure that a given node is not drained.
851 @param lu: the LU on behalf of which we make the check
852 @param node: the node to check
853 @raise errors.OpPrereqError: if the node is drained
856 if lu.cfg.GetNodeInfo(node).drained:
857 raise errors.OpPrereqError("Can't use drained node %s" % node,
861 def _CheckNodeVmCapable(lu, node):
862 """Ensure that a given node is vm capable.
864 @param lu: the LU on behalf of which we make the check
865 @param node: the node to check
866 @raise errors.OpPrereqError: if the node is not vm capable
869 if not lu.cfg.GetNodeInfo(node).vm_capable:
870 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
874 def _CheckNodeHasOS(lu, node, os_name, force_variant):
875 """Ensure that a node supports a given OS.
877 @param lu: the LU on behalf of which we make the check
878 @param node: the node to check
879 @param os_name: the OS to query about
880 @param force_variant: whether to ignore variant errors
881 @raise errors.OpPrereqError: if the node is not supporting the OS
884 result = lu.rpc.call_os_get(node, os_name)
885 result.Raise("OS '%s' not in supported OS list for node %s" %
887 prereq=True, ecode=errors.ECODE_INVAL)
888 if not force_variant:
889 _CheckOSVariant(result.payload, os_name)
892 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
893 """Ensure that a node has the given secondary ip.
895 @type lu: L{LogicalUnit}
896 @param lu: the LU on behalf of which we make the check
898 @param node: the node to check
899 @type secondary_ip: string
900 @param secondary_ip: the ip to check
901 @type prereq: boolean
902 @param prereq: whether to throw a prerequisite or an execute error
903 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
904 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
907 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
908 result.Raise("Failure checking secondary ip on node %s" % node,
909 prereq=prereq, ecode=errors.ECODE_ENVIRON)
910 if not result.payload:
911 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
912 " please fix and re-run this command" % secondary_ip)
914 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
916 raise errors.OpExecError(msg)
919 def _GetClusterDomainSecret():
920 """Reads the cluster domain secret.
923 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
927 def _CheckInstanceState(lu, instance, req_states, msg=None):
928 """Ensure that an instance is in one of the required states.
930 @param lu: the LU on behalf of which we make the check
931 @param instance: the instance to check
932 @param msg: if passed, should be a message to replace the default one
933 @raise errors.OpPrereqError: if the instance is not in the required state
937 msg = "can't use instance from outside %s states" % ", ".join(req_states)
938 if instance.admin_state not in req_states:
939 raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
940 (instance, instance.admin_state, msg),
943 if constants.ADMINST_UP not in req_states:
944 pnode = instance.primary_node
945 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
946 ins_l.Raise("Can't contact node %s for instance information" % pnode,
947 prereq=True, ecode=errors.ECODE_ENVIRON)
949 if instance.name in ins_l.payload:
950 raise errors.OpPrereqError("Instance %s is running, %s" %
951 (instance.name, msg), errors.ECODE_STATE)
954 def _ExpandItemName(fn, name, kind):
955 """Expand an item name.
957 @param fn: the function to use for expansion
958 @param name: requested item name
959 @param kind: text description ('Node' or 'Instance')
960 @return: the resolved (full) name
961 @raise errors.OpPrereqError: if the item is not found
965 if full_name is None:
966 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
971 def _ExpandNodeName(cfg, name):
972 """Wrapper over L{_ExpandItemName} for nodes."""
973 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
976 def _ExpandInstanceName(cfg, name):
977 """Wrapper over L{_ExpandItemName} for instance."""
978 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
981 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
982 minmem, maxmem, vcpus, nics, disk_template, disks,
983 bep, hvp, hypervisor_name, tags):
984 """Builds instance related env variables for hooks
986 This builds the hook environment from individual variables.
989 @param name: the name of the instance
990 @type primary_node: string
991 @param primary_node: the name of the instance's primary node
992 @type secondary_nodes: list
993 @param secondary_nodes: list of secondary nodes as strings
994 @type os_type: string
995 @param os_type: the name of the instance's OS
997 @param status: the desired status of the instance
999 @param minmem: the minimum memory size of the instance
1000 @type maxmem: string
1001 @param maxmem: the maximum memory size of the instance
1003 @param vcpus: the count of VCPUs the instance has
1005 @param nics: list of tuples (ip, mac, mode, link) representing
1006 the NICs the instance has
1007 @type disk_template: string
1008 @param disk_template: the disk template of the instance
1010 @param disks: the list of (size, mode) pairs
1012 @param bep: the backend parameters for the instance
1014 @param hvp: the hypervisor parameters for the instance
1015 @type hypervisor_name: string
1016 @param hypervisor_name: the hypervisor for the instance
1018 @param tags: list of instance tags as strings
1020 @return: the hook environment for this instance
1025 "INSTANCE_NAME": name,
1026 "INSTANCE_PRIMARY": primary_node,
1027 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1028 "INSTANCE_OS_TYPE": os_type,
1029 "INSTANCE_STATUS": status,
1030 "INSTANCE_MINMEM": minmem,
1031 "INSTANCE_MAXMEM": maxmem,
1032 # TODO(2.7) remove deprecated "memory" value
1033 "INSTANCE_MEMORY": maxmem,
1034 "INSTANCE_VCPUS": vcpus,
1035 "INSTANCE_DISK_TEMPLATE": disk_template,
1036 "INSTANCE_HYPERVISOR": hypervisor_name,
1039 nic_count = len(nics)
1040 for idx, (ip, mac, mode, link) in enumerate(nics):
1043 env["INSTANCE_NIC%d_IP" % idx] = ip
1044 env["INSTANCE_NIC%d_MAC" % idx] = mac
1045 env["INSTANCE_NIC%d_MODE" % idx] = mode
1046 env["INSTANCE_NIC%d_LINK" % idx] = link
1047 if mode == constants.NIC_MODE_BRIDGED:
1048 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1052 env["INSTANCE_NIC_COUNT"] = nic_count
1055 disk_count = len(disks)
1056 for idx, (size, mode) in enumerate(disks):
1057 env["INSTANCE_DISK%d_SIZE" % idx] = size
1058 env["INSTANCE_DISK%d_MODE" % idx] = mode
1062 env["INSTANCE_DISK_COUNT"] = disk_count
1067 env["INSTANCE_TAGS"] = " ".join(tags)
1069 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1070 for key, value in source.items():
1071 env["INSTANCE_%s_%s" % (kind, key)] = value
1076 def _NICListToTuple(lu, nics):
1077 """Build a list of nic information tuples.
1079 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1080 value in LUInstanceQueryData.
1082 @type lu: L{LogicalUnit}
1083 @param lu: the logical unit on whose behalf we execute
1084 @type nics: list of L{objects.NIC}
1085 @param nics: list of nics to convert to hooks tuples
1089 cluster = lu.cfg.GetClusterInfo()
1093 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1094 mode = filled_params[constants.NIC_MODE]
1095 link = filled_params[constants.NIC_LINK]
1096 hooks_nics.append((ip, mac, mode, link))
1100 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1101 """Builds instance related env variables for hooks from an object.
1103 @type lu: L{LogicalUnit}
1104 @param lu: the logical unit on whose behalf we execute
1105 @type instance: L{objects.Instance}
1106 @param instance: the instance for which we should build the
1108 @type override: dict
1109 @param override: dictionary with key/values that will override
1112 @return: the hook environment dictionary
1115 cluster = lu.cfg.GetClusterInfo()
1116 bep = cluster.FillBE(instance)
1117 hvp = cluster.FillHV(instance)
1119 "name": instance.name,
1120 "primary_node": instance.primary_node,
1121 "secondary_nodes": instance.secondary_nodes,
1122 "os_type": instance.os,
1123 "status": instance.admin_state,
1124 "maxmem": bep[constants.BE_MAXMEM],
1125 "minmem": bep[constants.BE_MINMEM],
1126 "vcpus": bep[constants.BE_VCPUS],
1127 "nics": _NICListToTuple(lu, instance.nics),
1128 "disk_template": instance.disk_template,
1129 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1132 "hypervisor_name": instance.hypervisor,
1133 "tags": instance.tags,
1136 args.update(override)
1137 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1140 def _AdjustCandidatePool(lu, exceptions):
1141 """Adjust the candidate pool after node operations.
1144 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1146 lu.LogInfo("Promoted nodes to master candidate role: %s",
1147 utils.CommaJoin(node.name for node in mod_list))
1148 for name in mod_list:
1149 lu.context.ReaddNode(name)
1150 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1152 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1156 def _DecideSelfPromotion(lu, exceptions=None):
1157 """Decide whether I should promote myself as a master candidate.
1160 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1161 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1162 # the new node will increase mc_max with one, so:
1163 mc_should = min(mc_should + 1, cp_size)
1164 return mc_now < mc_should
1167 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1168 """Check that the brigdes needed by a list of nics exist.
1171 cluster = lu.cfg.GetClusterInfo()
1172 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1173 brlist = [params[constants.NIC_LINK] for params in paramslist
1174 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1176 result = lu.rpc.call_bridges_exist(target_node, brlist)
1177 result.Raise("Error checking bridges on destination node '%s'" %
1178 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1181 def _CheckInstanceBridgesExist(lu, instance, node=None):
1182 """Check that the brigdes needed by an instance exist.
1186 node = instance.primary_node
1187 _CheckNicsBridgesExist(lu, instance.nics, node)
1190 def _CheckOSVariant(os_obj, name):
1191 """Check whether an OS name conforms to the os variants specification.
1193 @type os_obj: L{objects.OS}
1194 @param os_obj: OS object to check
1196 @param name: OS name passed by the user, to check for validity
1199 variant = objects.OS.GetVariant(name)
1200 if not os_obj.supported_variants:
1202 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1203 " passed)" % (os_obj.name, variant),
1207 raise errors.OpPrereqError("OS name must include a variant",
1210 if variant not in os_obj.supported_variants:
1211 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1214 def _GetNodeInstancesInner(cfg, fn):
1215 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1218 def _GetNodeInstances(cfg, node_name):
1219 """Returns a list of all primary and secondary instances on a node.
1223 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1226 def _GetNodePrimaryInstances(cfg, node_name):
1227 """Returns primary instances on a node.
1230 return _GetNodeInstancesInner(cfg,
1231 lambda inst: node_name == inst.primary_node)
1234 def _GetNodeSecondaryInstances(cfg, node_name):
1235 """Returns secondary instances on a node.
1238 return _GetNodeInstancesInner(cfg,
1239 lambda inst: node_name in inst.secondary_nodes)
1242 def _GetStorageTypeArgs(cfg, storage_type):
1243 """Returns the arguments for a storage type.
1246 # Special case for file storage
1247 if storage_type == constants.ST_FILE:
1248 # storage.FileStorage wants a list of storage directories
1249 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1254 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1257 for dev in instance.disks:
1258 cfg.SetDiskID(dev, node_name)
1260 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1261 result.Raise("Failed to get disk status from node %s" % node_name,
1262 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1264 for idx, bdev_status in enumerate(result.payload):
1265 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1271 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1272 """Check the sanity of iallocator and node arguments and use the
1273 cluster-wide iallocator if appropriate.
1275 Check that at most one of (iallocator, node) is specified. If none is
1276 specified, then the LU's opcode's iallocator slot is filled with the
1277 cluster-wide default iallocator.
1279 @type iallocator_slot: string
1280 @param iallocator_slot: the name of the opcode iallocator slot
1281 @type node_slot: string
1282 @param node_slot: the name of the opcode target node slot
1285 node = getattr(lu.op, node_slot, None)
1286 iallocator = getattr(lu.op, iallocator_slot, None)
1288 if node is not None and iallocator is not None:
1289 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1291 elif node is None and iallocator is None:
1292 default_iallocator = lu.cfg.GetDefaultIAllocator()
1293 if default_iallocator:
1294 setattr(lu.op, iallocator_slot, default_iallocator)
1296 raise errors.OpPrereqError("No iallocator or node given and no"
1297 " cluster-wide default iallocator found;"
1298 " please specify either an iallocator or a"
1299 " node, or set a cluster-wide default"
1303 def _GetDefaultIAllocator(cfg, iallocator):
1304 """Decides on which iallocator to use.
1306 @type cfg: L{config.ConfigWriter}
1307 @param cfg: Cluster configuration object
1308 @type iallocator: string or None
1309 @param iallocator: Iallocator specified in opcode
1311 @return: Iallocator name
1315 # Use default iallocator
1316 iallocator = cfg.GetDefaultIAllocator()
1319 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1320 " opcode nor as a cluster-wide default",
1326 class LUClusterPostInit(LogicalUnit):
1327 """Logical unit for running hooks after cluster initialization.
1330 HPATH = "cluster-init"
1331 HTYPE = constants.HTYPE_CLUSTER
1333 def BuildHooksEnv(self):
1338 "OP_TARGET": self.cfg.GetClusterName(),
1341 def BuildHooksNodes(self):
1342 """Build hooks nodes.
1345 return ([], [self.cfg.GetMasterNode()])
1347 def Exec(self, feedback_fn):
1354 class LUClusterDestroy(LogicalUnit):
1355 """Logical unit for destroying the cluster.
1358 HPATH = "cluster-destroy"
1359 HTYPE = constants.HTYPE_CLUSTER
1361 def BuildHooksEnv(self):
1366 "OP_TARGET": self.cfg.GetClusterName(),
1369 def BuildHooksNodes(self):
1370 """Build hooks nodes.
1375 def CheckPrereq(self):
1376 """Check prerequisites.
1378 This checks whether the cluster is empty.
1380 Any errors are signaled by raising errors.OpPrereqError.
1383 master = self.cfg.GetMasterNode()
1385 nodelist = self.cfg.GetNodeList()
1386 if len(nodelist) != 1 or nodelist[0] != master:
1387 raise errors.OpPrereqError("There are still %d node(s) in"
1388 " this cluster." % (len(nodelist) - 1),
1390 instancelist = self.cfg.GetInstanceList()
1392 raise errors.OpPrereqError("There are still %d instance(s) in"
1393 " this cluster." % len(instancelist),
1396 def Exec(self, feedback_fn):
1397 """Destroys the cluster.
1400 master_params = self.cfg.GetMasterNetworkParameters()
1402 # Run post hooks on master node before it's removed
1403 _RunPostHook(self, master_params.name)
1405 ems = self.cfg.GetUseExternalMipScript()
1406 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1408 result.Raise("Could not disable the master role")
1410 return master_params.name
1413 def _VerifyCertificate(filename):
1414 """Verifies a certificate for L{LUClusterVerifyConfig}.
1416 @type filename: string
1417 @param filename: Path to PEM file
1421 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1422 utils.ReadFile(filename))
1423 except Exception, err: # pylint: disable=W0703
1424 return (LUClusterVerifyConfig.ETYPE_ERROR,
1425 "Failed to load X509 certificate %s: %s" % (filename, err))
1428 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1429 constants.SSL_CERT_EXPIRATION_ERROR)
1432 fnamemsg = "While verifying %s: %s" % (filename, msg)
1437 return (None, fnamemsg)
1438 elif errcode == utils.CERT_WARNING:
1439 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1440 elif errcode == utils.CERT_ERROR:
1441 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1443 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1446 def _GetAllHypervisorParameters(cluster, instances):
1447 """Compute the set of all hypervisor parameters.
1449 @type cluster: L{objects.Cluster}
1450 @param cluster: the cluster object
1451 @param instances: list of L{objects.Instance}
1452 @param instances: additional instances from which to obtain parameters
1453 @rtype: list of (origin, hypervisor, parameters)
1454 @return: a list with all parameters found, indicating the hypervisor they
1455 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1460 for hv_name in cluster.enabled_hypervisors:
1461 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1463 for os_name, os_hvp in cluster.os_hvp.items():
1464 for hv_name, hv_params in os_hvp.items():
1466 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1467 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1469 # TODO: collapse identical parameter values in a single one
1470 for instance in instances:
1471 if instance.hvparams:
1472 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1473 cluster.FillHV(instance)))
1478 class _VerifyErrors(object):
1479 """Mix-in for cluster/group verify LUs.
1481 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1482 self.op and self._feedback_fn to be available.)
1486 ETYPE_FIELD = "code"
1487 ETYPE_ERROR = "ERROR"
1488 ETYPE_WARNING = "WARNING"
1490 def _Error(self, ecode, item, msg, *args, **kwargs):
1491 """Format an error message.
1493 Based on the opcode's error_codes parameter, either format a
1494 parseable error code, or a simpler error string.
1496 This must be called only from Exec and functions called from Exec.
1499 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1500 itype, etxt, _ = ecode
1501 # first complete the msg
1504 # then format the whole message
1505 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1506 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1512 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1513 # and finally report it via the feedback_fn
1514 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1516 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1517 """Log an error message if the passed condition is True.
1521 or self.op.debug_simulate_errors) # pylint: disable=E1101
1523 # If the error code is in the list of ignored errors, demote the error to a
1525 (_, etxt, _) = ecode
1526 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1527 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1530 self._Error(ecode, *args, **kwargs)
1532 # do not mark the operation as failed for WARN cases only
1533 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1534 self.bad = self.bad or cond
1537 class LUClusterVerify(NoHooksLU):
1538 """Submits all jobs necessary to verify the cluster.
1543 def ExpandNames(self):
1544 self.needed_locks = {}
1546 def Exec(self, feedback_fn):
1549 if self.op.group_name:
1550 groups = [self.op.group_name]
1551 depends_fn = lambda: None
1553 groups = self.cfg.GetNodeGroupList()
1555 # Verify global configuration
1557 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1560 # Always depend on global verification
1561 depends_fn = lambda: [(-len(jobs), [])]
1563 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1564 ignore_errors=self.op.ignore_errors,
1565 depends=depends_fn())]
1566 for group in groups)
1568 # Fix up all parameters
1569 for op in itertools.chain(*jobs): # pylint: disable=W0142
1570 op.debug_simulate_errors = self.op.debug_simulate_errors
1571 op.verbose = self.op.verbose
1572 op.error_codes = self.op.error_codes
1574 op.skip_checks = self.op.skip_checks
1575 except AttributeError:
1576 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1578 return ResultWithJobs(jobs)
1581 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1582 """Verifies the cluster config.
1587 def _VerifyHVP(self, hvp_data):
1588 """Verifies locally the syntax of the hypervisor parameters.
1591 for item, hv_name, hv_params in hvp_data:
1592 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1595 hv_class = hypervisor.GetHypervisor(hv_name)
1596 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1597 hv_class.CheckParameterSyntax(hv_params)
1598 except errors.GenericError, err:
1599 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1601 def ExpandNames(self):
1602 # Information can be safely retrieved as the BGL is acquired in exclusive
1604 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1605 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1606 self.all_node_info = self.cfg.GetAllNodesInfo()
1607 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1608 self.needed_locks = {}
1610 def Exec(self, feedback_fn):
1611 """Verify integrity of cluster, performing various test on nodes.
1615 self._feedback_fn = feedback_fn
1617 feedback_fn("* Verifying cluster config")
1619 for msg in self.cfg.VerifyConfig():
1620 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1622 feedback_fn("* Verifying cluster certificate files")
1624 for cert_filename in constants.ALL_CERT_FILES:
1625 (errcode, msg) = _VerifyCertificate(cert_filename)
1626 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1628 feedback_fn("* Verifying hypervisor parameters")
1630 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1631 self.all_inst_info.values()))
1633 feedback_fn("* Verifying all nodes belong to an existing group")
1635 # We do this verification here because, should this bogus circumstance
1636 # occur, it would never be caught by VerifyGroup, which only acts on
1637 # nodes/instances reachable from existing node groups.
1639 dangling_nodes = set(node.name for node in self.all_node_info.values()
1640 if node.group not in self.all_group_info)
1642 dangling_instances = {}
1643 no_node_instances = []
1645 for inst in self.all_inst_info.values():
1646 if inst.primary_node in dangling_nodes:
1647 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1648 elif inst.primary_node not in self.all_node_info:
1649 no_node_instances.append(inst.name)
1654 utils.CommaJoin(dangling_instances.get(node.name,
1656 for node in dangling_nodes]
1658 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1660 "the following nodes (and their instances) belong to a non"
1661 " existing group: %s", utils.CommaJoin(pretty_dangling))
1663 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1665 "the following instances have a non-existing primary-node:"
1666 " %s", utils.CommaJoin(no_node_instances))
1671 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1672 """Verifies the status of a node group.
1675 HPATH = "cluster-verify"
1676 HTYPE = constants.HTYPE_CLUSTER
1679 _HOOKS_INDENT_RE = re.compile("^", re.M)
1681 class NodeImage(object):
1682 """A class representing the logical and physical status of a node.
1685 @ivar name: the node name to which this object refers
1686 @ivar volumes: a structure as returned from
1687 L{ganeti.backend.GetVolumeList} (runtime)
1688 @ivar instances: a list of running instances (runtime)
1689 @ivar pinst: list of configured primary instances (config)
1690 @ivar sinst: list of configured secondary instances (config)
1691 @ivar sbp: dictionary of {primary-node: list of instances} for all
1692 instances for which this node is secondary (config)
1693 @ivar mfree: free memory, as reported by hypervisor (runtime)
1694 @ivar dfree: free disk, as reported by the node (runtime)
1695 @ivar offline: the offline status (config)
1696 @type rpc_fail: boolean
1697 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1698 not whether the individual keys were correct) (runtime)
1699 @type lvm_fail: boolean
1700 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1701 @type hyp_fail: boolean
1702 @ivar hyp_fail: whether the RPC call didn't return the instance list
1703 @type ghost: boolean
1704 @ivar ghost: whether this is a known node or not (config)
1705 @type os_fail: boolean
1706 @ivar os_fail: whether the RPC call didn't return valid OS data
1708 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1709 @type vm_capable: boolean
1710 @ivar vm_capable: whether the node can host instances
1713 def __init__(self, offline=False, name=None, vm_capable=True):
1722 self.offline = offline
1723 self.vm_capable = vm_capable
1724 self.rpc_fail = False
1725 self.lvm_fail = False
1726 self.hyp_fail = False
1728 self.os_fail = False
1731 def ExpandNames(self):
1732 # This raises errors.OpPrereqError on its own:
1733 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1735 # Get instances in node group; this is unsafe and needs verification later
1736 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1738 self.needed_locks = {
1739 locking.LEVEL_INSTANCE: inst_names,
1740 locking.LEVEL_NODEGROUP: [self.group_uuid],
1741 locking.LEVEL_NODE: [],
1744 self.share_locks = _ShareAll()
1746 def DeclareLocks(self, level):
1747 if level == locking.LEVEL_NODE:
1748 # Get members of node group; this is unsafe and needs verification later
1749 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1751 all_inst_info = self.cfg.GetAllInstancesInfo()
1753 # In Exec(), we warn about mirrored instances that have primary and
1754 # secondary living in separate node groups. To fully verify that
1755 # volumes for these instances are healthy, we will need to do an
1756 # extra call to their secondaries. We ensure here those nodes will
1758 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1759 # Important: access only the instances whose lock is owned
1760 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1761 nodes.update(all_inst_info[inst].secondary_nodes)
1763 self.needed_locks[locking.LEVEL_NODE] = nodes
1765 def CheckPrereq(self):
1766 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1767 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1769 group_nodes = set(self.group_info.members)
1770 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1773 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1775 unlocked_instances = \
1776 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1779 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1780 utils.CommaJoin(unlocked_nodes))
1782 if unlocked_instances:
1783 raise errors.OpPrereqError("Missing lock for instances: %s" %
1784 utils.CommaJoin(unlocked_instances))
1786 self.all_node_info = self.cfg.GetAllNodesInfo()
1787 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1789 self.my_node_names = utils.NiceSort(group_nodes)
1790 self.my_inst_names = utils.NiceSort(group_instances)
1792 self.my_node_info = dict((name, self.all_node_info[name])
1793 for name in self.my_node_names)
1795 self.my_inst_info = dict((name, self.all_inst_info[name])
1796 for name in self.my_inst_names)
1798 # We detect here the nodes that will need the extra RPC calls for verifying
1799 # split LV volumes; they should be locked.
1800 extra_lv_nodes = set()
1802 for inst in self.my_inst_info.values():
1803 if inst.disk_template in constants.DTS_INT_MIRROR:
1804 group = self.my_node_info[inst.primary_node].group
1805 for nname in inst.secondary_nodes:
1806 if self.all_node_info[nname].group != group:
1807 extra_lv_nodes.add(nname)
1809 unlocked_lv_nodes = \
1810 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1812 if unlocked_lv_nodes:
1813 raise errors.OpPrereqError("these nodes could be locked: %s" %
1814 utils.CommaJoin(unlocked_lv_nodes))
1815 self.extra_lv_nodes = list(extra_lv_nodes)
1817 def _VerifyNode(self, ninfo, nresult):
1818 """Perform some basic validation on data returned from a node.
1820 - check the result data structure is well formed and has all the
1822 - check ganeti version
1824 @type ninfo: L{objects.Node}
1825 @param ninfo: the node to check
1826 @param nresult: the results from the node
1828 @return: whether overall this call was successful (and we can expect
1829 reasonable values in the respose)
1833 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1835 # main result, nresult should be a non-empty dict
1836 test = not nresult or not isinstance(nresult, dict)
1837 _ErrorIf(test, constants.CV_ENODERPC, node,
1838 "unable to verify node: no data returned")
1842 # compares ganeti version
1843 local_version = constants.PROTOCOL_VERSION
1844 remote_version = nresult.get("version", None)
1845 test = not (remote_version and
1846 isinstance(remote_version, (list, tuple)) and
1847 len(remote_version) == 2)
1848 _ErrorIf(test, constants.CV_ENODERPC, node,
1849 "connection to node returned invalid data")
1853 test = local_version != remote_version[0]
1854 _ErrorIf(test, constants.CV_ENODEVERSION, node,
1855 "incompatible protocol versions: master %s,"
1856 " node %s", local_version, remote_version[0])
1860 # node seems compatible, we can actually try to look into its results
1862 # full package version
1863 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1864 constants.CV_ENODEVERSION, node,
1865 "software version mismatch: master %s, node %s",
1866 constants.RELEASE_VERSION, remote_version[1],
1867 code=self.ETYPE_WARNING)
1869 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1870 if ninfo.vm_capable and isinstance(hyp_result, dict):
1871 for hv_name, hv_result in hyp_result.iteritems():
1872 test = hv_result is not None
1873 _ErrorIf(test, constants.CV_ENODEHV, node,
1874 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1876 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1877 if ninfo.vm_capable and isinstance(hvp_result, list):
1878 for item, hv_name, hv_result in hvp_result:
1879 _ErrorIf(True, constants.CV_ENODEHV, node,
1880 "hypervisor %s parameter verify failure (source %s): %s",
1881 hv_name, item, hv_result)
1883 test = nresult.get(constants.NV_NODESETUP,
1884 ["Missing NODESETUP results"])
1885 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1890 def _VerifyNodeTime(self, ninfo, nresult,
1891 nvinfo_starttime, nvinfo_endtime):
1892 """Check the node time.
1894 @type ninfo: L{objects.Node}
1895 @param ninfo: the node to check
1896 @param nresult: the remote results for the node
1897 @param nvinfo_starttime: the start time of the RPC call
1898 @param nvinfo_endtime: the end time of the RPC call
1902 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1904 ntime = nresult.get(constants.NV_TIME, None)
1906 ntime_merged = utils.MergeTime(ntime)
1907 except (ValueError, TypeError):
1908 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1911 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1912 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1913 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1914 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1918 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1919 "Node time diverges by at least %s from master node time",
1922 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1923 """Check the node LVM results.
1925 @type ninfo: L{objects.Node}
1926 @param ninfo: the node to check
1927 @param nresult: the remote results for the node
1928 @param vg_name: the configured VG name
1935 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1937 # checks vg existence and size > 20G
1938 vglist = nresult.get(constants.NV_VGLIST, None)
1940 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1942 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1943 constants.MIN_VG_SIZE)
1944 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1947 pvlist = nresult.get(constants.NV_PVLIST, None)
1948 test = pvlist is None
1949 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1951 # check that ':' is not present in PV names, since it's a
1952 # special character for lvcreate (denotes the range of PEs to
1954 for _, pvname, owner_vg in pvlist:
1955 test = ":" in pvname
1956 _ErrorIf(test, constants.CV_ENODELVM, node,
1957 "Invalid character ':' in PV '%s' of VG '%s'",
1960 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1961 """Check the node bridges.
1963 @type ninfo: L{objects.Node}
1964 @param ninfo: the node to check
1965 @param nresult: the remote results for the node
1966 @param bridges: the expected list of bridges
1973 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1975 missing = nresult.get(constants.NV_BRIDGES, None)
1976 test = not isinstance(missing, list)
1977 _ErrorIf(test, constants.CV_ENODENET, node,
1978 "did not return valid bridge information")
1980 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1981 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1983 def _VerifyNodeUserScripts(self, ninfo, nresult):
1984 """Check the results of user scripts presence and executability on the node
1986 @type ninfo: L{objects.Node}
1987 @param ninfo: the node to check
1988 @param nresult: the remote results for the node
1993 test = not constants.NV_USERSCRIPTS in nresult
1994 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
1995 "did not return user scripts information")
1997 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
1999 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2000 "user scripts not present or not executable: %s" %
2001 utils.CommaJoin(sorted(broken_scripts)))
2003 def _VerifyNodeNetwork(self, ninfo, nresult):
2004 """Check the node network connectivity results.
2006 @type ninfo: L{objects.Node}
2007 @param ninfo: the node to check
2008 @param nresult: the remote results for the node
2012 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2014 test = constants.NV_NODELIST not in nresult
2015 _ErrorIf(test, constants.CV_ENODESSH, node,
2016 "node hasn't returned node ssh connectivity data")
2018 if nresult[constants.NV_NODELIST]:
2019 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2020 _ErrorIf(True, constants.CV_ENODESSH, node,
2021 "ssh communication with node '%s': %s", a_node, a_msg)
2023 test = constants.NV_NODENETTEST not in nresult
2024 _ErrorIf(test, constants.CV_ENODENET, node,
2025 "node hasn't returned node tcp connectivity data")
2027 if nresult[constants.NV_NODENETTEST]:
2028 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2030 _ErrorIf(True, constants.CV_ENODENET, node,
2031 "tcp communication with node '%s': %s",
2032 anode, nresult[constants.NV_NODENETTEST][anode])
2034 test = constants.NV_MASTERIP not in nresult
2035 _ErrorIf(test, constants.CV_ENODENET, node,
2036 "node hasn't returned node master IP reachability data")
2038 if not nresult[constants.NV_MASTERIP]:
2039 if node == self.master_node:
2040 msg = "the master node cannot reach the master IP (not configured?)"
2042 msg = "cannot reach the master IP"
2043 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2045 def _VerifyInstance(self, instance, instanceconfig, node_image,
2047 """Verify an instance.
2049 This function checks to see if the required block devices are
2050 available on the instance's node.
2053 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2054 node_current = instanceconfig.primary_node
2056 node_vol_should = {}
2057 instanceconfig.MapLVsByNode(node_vol_should)
2059 for node in node_vol_should:
2060 n_img = node_image[node]
2061 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2062 # ignore missing volumes on offline or broken nodes
2064 for volume in node_vol_should[node]:
2065 test = volume not in n_img.volumes
2066 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2067 "volume %s missing on node %s", volume, node)
2069 if instanceconfig.admin_state == constants.ADMINST_UP:
2070 pri_img = node_image[node_current]
2071 test = instance not in pri_img.instances and not pri_img.offline
2072 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2073 "instance not running on its primary node %s",
2076 diskdata = [(nname, success, status, idx)
2077 for (nname, disks) in diskstatus.items()
2078 for idx, (success, status) in enumerate(disks)]
2080 for nname, success, bdev_status, idx in diskdata:
2081 # the 'ghost node' construction in Exec() ensures that we have a
2083 snode = node_image[nname]
2084 bad_snode = snode.ghost or snode.offline
2085 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2086 not success and not bad_snode,
2087 constants.CV_EINSTANCEFAULTYDISK, instance,
2088 "couldn't retrieve status for disk/%s on %s: %s",
2089 idx, nname, bdev_status)
2090 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2091 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2092 constants.CV_EINSTANCEFAULTYDISK, instance,
2093 "disk/%s on %s is faulty", idx, nname)
2095 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2096 """Verify if there are any unknown volumes in the cluster.
2098 The .os, .swap and backup volumes are ignored. All other volumes are
2099 reported as unknown.
2101 @type reserved: L{ganeti.utils.FieldSet}
2102 @param reserved: a FieldSet of reserved volume names
2105 for node, n_img in node_image.items():
2106 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2107 # skip non-healthy nodes
2109 for volume in n_img.volumes:
2110 test = ((node not in node_vol_should or
2111 volume not in node_vol_should[node]) and
2112 not reserved.Matches(volume))
2113 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2114 "volume %s is unknown", volume)
2116 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2117 """Verify N+1 Memory Resilience.
2119 Check that if one single node dies we can still start all the
2120 instances it was primary for.
2123 cluster_info = self.cfg.GetClusterInfo()
2124 for node, n_img in node_image.items():
2125 # This code checks that every node which is now listed as
2126 # secondary has enough memory to host all instances it is
2127 # supposed to should a single other node in the cluster fail.
2128 # FIXME: not ready for failover to an arbitrary node
2129 # FIXME: does not support file-backed instances
2130 # WARNING: we currently take into account down instances as well
2131 # as up ones, considering that even if they're down someone
2132 # might want to start them even in the event of a node failure.
2134 # we're skipping offline nodes from the N+1 warning, since
2135 # most likely we don't have good memory infromation from them;
2136 # we already list instances living on such nodes, and that's
2139 #TODO(dynmem): use MINMEM for checking
2140 #TODO(dynmem): also consider ballooning out other instances
2141 for prinode, instances in n_img.sbp.items():
2143 for instance in instances:
2144 bep = cluster_info.FillBE(instance_cfg[instance])
2145 if bep[constants.BE_AUTO_BALANCE]:
2146 needed_mem += bep[constants.BE_MAXMEM]
2147 test = n_img.mfree < needed_mem
2148 self._ErrorIf(test, constants.CV_ENODEN1, node,
2149 "not enough memory to accomodate instance failovers"
2150 " should node %s fail (%dMiB needed, %dMiB available)",
2151 prinode, needed_mem, n_img.mfree)
2154 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2155 (files_all, files_opt, files_mc, files_vm)):
2156 """Verifies file checksums collected from all nodes.
2158 @param errorif: Callback for reporting errors
2159 @param nodeinfo: List of L{objects.Node} objects
2160 @param master_node: Name of master node
2161 @param all_nvinfo: RPC results
2164 # Define functions determining which nodes to consider for a file
2167 (files_mc, lambda node: (node.master_candidate or
2168 node.name == master_node)),
2169 (files_vm, lambda node: node.vm_capable),
2172 # Build mapping from filename to list of nodes which should have the file
2174 for (files, fn) in files2nodefn:
2176 filenodes = nodeinfo
2178 filenodes = filter(fn, nodeinfo)
2179 nodefiles.update((filename,
2180 frozenset(map(operator.attrgetter("name"), filenodes)))
2181 for filename in files)
2183 assert set(nodefiles) == (files_all | files_mc | files_vm)
2185 fileinfo = dict((filename, {}) for filename in nodefiles)
2186 ignore_nodes = set()
2188 for node in nodeinfo:
2190 ignore_nodes.add(node.name)
2193 nresult = all_nvinfo[node.name]
2195 if nresult.fail_msg or not nresult.payload:
2198 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2200 test = not (node_files and isinstance(node_files, dict))
2201 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2202 "Node did not return file checksum data")
2204 ignore_nodes.add(node.name)
2207 # Build per-checksum mapping from filename to nodes having it
2208 for (filename, checksum) in node_files.items():
2209 assert filename in nodefiles
2210 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2212 for (filename, checksums) in fileinfo.items():
2213 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2215 # Nodes having the file
2216 with_file = frozenset(node_name
2217 for nodes in fileinfo[filename].values()
2218 for node_name in nodes) - ignore_nodes
2220 expected_nodes = nodefiles[filename] - ignore_nodes
2222 # Nodes missing file
2223 missing_file = expected_nodes - with_file
2225 if filename in files_opt:
2227 errorif(missing_file and missing_file != expected_nodes,
2228 constants.CV_ECLUSTERFILECHECK, None,
2229 "File %s is optional, but it must exist on all or no"
2230 " nodes (not found on %s)",
2231 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2233 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2234 "File %s is missing from node(s) %s", filename,
2235 utils.CommaJoin(utils.NiceSort(missing_file)))
2237 # Warn if a node has a file it shouldn't
2238 unexpected = with_file - expected_nodes
2240 constants.CV_ECLUSTERFILECHECK, None,
2241 "File %s should not exist on node(s) %s",
2242 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2244 # See if there are multiple versions of the file
2245 test = len(checksums) > 1
2247 variants = ["variant %s on %s" %
2248 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2249 for (idx, (checksum, nodes)) in
2250 enumerate(sorted(checksums.items()))]
2254 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2255 "File %s found with %s different checksums (%s)",
2256 filename, len(checksums), "; ".join(variants))
2258 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2260 """Verifies and the node DRBD status.
2262 @type ninfo: L{objects.Node}
2263 @param ninfo: the node to check
2264 @param nresult: the remote results for the node
2265 @param instanceinfo: the dict of instances
2266 @param drbd_helper: the configured DRBD usermode helper
2267 @param drbd_map: the DRBD map as returned by
2268 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2272 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2275 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2276 test = (helper_result == None)
2277 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2278 "no drbd usermode helper returned")
2280 status, payload = helper_result
2282 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2283 "drbd usermode helper check unsuccessful: %s", payload)
2284 test = status and (payload != drbd_helper)
2285 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2286 "wrong drbd usermode helper: %s", payload)
2288 # compute the DRBD minors
2290 for minor, instance in drbd_map[node].items():
2291 test = instance not in instanceinfo
2292 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2293 "ghost instance '%s' in temporary DRBD map", instance)
2294 # ghost instance should not be running, but otherwise we
2295 # don't give double warnings (both ghost instance and
2296 # unallocated minor in use)
2298 node_drbd[minor] = (instance, False)
2300 instance = instanceinfo[instance]
2301 node_drbd[minor] = (instance.name,
2302 instance.admin_state == constants.ADMINST_UP)
2304 # and now check them
2305 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2306 test = not isinstance(used_minors, (tuple, list))
2307 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2308 "cannot parse drbd status file: %s", str(used_minors))
2310 # we cannot check drbd status
2313 for minor, (iname, must_exist) in node_drbd.items():
2314 test = minor not in used_minors and must_exist
2315 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2316 "drbd minor %d of instance %s is not active", minor, iname)
2317 for minor in used_minors:
2318 test = minor not in node_drbd
2319 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2320 "unallocated drbd minor %d is in use", minor)
2322 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2323 """Builds the node OS structures.
2325 @type ninfo: L{objects.Node}
2326 @param ninfo: the node to check
2327 @param nresult: the remote results for the node
2328 @param nimg: the node image object
2332 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2334 remote_os = nresult.get(constants.NV_OSLIST, None)
2335 test = (not isinstance(remote_os, list) or
2336 not compat.all(isinstance(v, list) and len(v) == 7
2337 for v in remote_os))
2339 _ErrorIf(test, constants.CV_ENODEOS, node,
2340 "node hasn't returned valid OS data")
2349 for (name, os_path, status, diagnose,
2350 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2352 if name not in os_dict:
2355 # parameters is a list of lists instead of list of tuples due to
2356 # JSON lacking a real tuple type, fix it:
2357 parameters = [tuple(v) for v in parameters]
2358 os_dict[name].append((os_path, status, diagnose,
2359 set(variants), set(parameters), set(api_ver)))
2361 nimg.oslist = os_dict
2363 def _VerifyNodeOS(self, ninfo, nimg, base):
2364 """Verifies the node OS list.
2366 @type ninfo: L{objects.Node}
2367 @param ninfo: the node to check
2368 @param nimg: the node image object
2369 @param base: the 'template' node we match against (e.g. from the master)
2373 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2375 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2377 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2378 for os_name, os_data in nimg.oslist.items():
2379 assert os_data, "Empty OS status for OS %s?!" % os_name
2380 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2381 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2382 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2383 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2384 "OS '%s' has multiple entries (first one shadows the rest): %s",
2385 os_name, utils.CommaJoin([v[0] for v in os_data]))
2386 # comparisons with the 'base' image
2387 test = os_name not in base.oslist
2388 _ErrorIf(test, constants.CV_ENODEOS, node,
2389 "Extra OS %s not present on reference node (%s)",
2393 assert base.oslist[os_name], "Base node has empty OS status?"
2394 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2396 # base OS is invalid, skipping
2398 for kind, a, b in [("API version", f_api, b_api),
2399 ("variants list", f_var, b_var),
2400 ("parameters", beautify_params(f_param),
2401 beautify_params(b_param))]:
2402 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2403 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2404 kind, os_name, base.name,
2405 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2407 # check any missing OSes
2408 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2409 _ErrorIf(missing, constants.CV_ENODEOS, node,
2410 "OSes present on reference node %s but missing on this node: %s",
2411 base.name, utils.CommaJoin(missing))
2413 def _VerifyOob(self, ninfo, nresult):
2414 """Verifies out of band functionality of a node.
2416 @type ninfo: L{objects.Node}
2417 @param ninfo: the node to check
2418 @param nresult: the remote results for the node
2422 # We just have to verify the paths on master and/or master candidates
2423 # as the oob helper is invoked on the master
2424 if ((ninfo.master_candidate or ninfo.master_capable) and
2425 constants.NV_OOB_PATHS in nresult):
2426 for path_result in nresult[constants.NV_OOB_PATHS]:
2427 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2429 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2430 """Verifies and updates the node volume data.
2432 This function will update a L{NodeImage}'s internal structures
2433 with data from the remote call.
2435 @type ninfo: L{objects.Node}
2436 @param ninfo: the node to check
2437 @param nresult: the remote results for the node
2438 @param nimg: the node image object
2439 @param vg_name: the configured VG name
2443 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2445 nimg.lvm_fail = True
2446 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2449 elif isinstance(lvdata, basestring):
2450 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2451 utils.SafeEncode(lvdata))
2452 elif not isinstance(lvdata, dict):
2453 _ErrorIf(True, constants.CV_ENODELVM, node,
2454 "rpc call to node failed (lvlist)")
2456 nimg.volumes = lvdata
2457 nimg.lvm_fail = False
2459 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2460 """Verifies and updates the node instance list.
2462 If the listing was successful, then updates this node's instance
2463 list. Otherwise, it marks the RPC call as failed for the instance
2466 @type ninfo: L{objects.Node}
2467 @param ninfo: the node to check
2468 @param nresult: the remote results for the node
2469 @param nimg: the node image object
2472 idata = nresult.get(constants.NV_INSTANCELIST, None)
2473 test = not isinstance(idata, list)
2474 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2475 "rpc call to node failed (instancelist): %s",
2476 utils.SafeEncode(str(idata)))
2478 nimg.hyp_fail = True
2480 nimg.instances = idata
2482 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2483 """Verifies and computes a node information map
2485 @type ninfo: L{objects.Node}
2486 @param ninfo: the node to check
2487 @param nresult: the remote results for the node
2488 @param nimg: the node image object
2489 @param vg_name: the configured VG name
2493 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2495 # try to read free memory (from the hypervisor)
2496 hv_info = nresult.get(constants.NV_HVINFO, None)
2497 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2498 _ErrorIf(test, constants.CV_ENODEHV, node,
2499 "rpc call to node failed (hvinfo)")
2502 nimg.mfree = int(hv_info["memory_free"])
2503 except (ValueError, TypeError):
2504 _ErrorIf(True, constants.CV_ENODERPC, node,
2505 "node returned invalid nodeinfo, check hypervisor")
2507 # FIXME: devise a free space model for file based instances as well
2508 if vg_name is not None:
2509 test = (constants.NV_VGLIST not in nresult or
2510 vg_name not in nresult[constants.NV_VGLIST])
2511 _ErrorIf(test, constants.CV_ENODELVM, node,
2512 "node didn't return data for the volume group '%s'"
2513 " - it is either missing or broken", vg_name)
2516 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2517 except (ValueError, TypeError):
2518 _ErrorIf(True, constants.CV_ENODERPC, node,
2519 "node returned invalid LVM info, check LVM status")
2521 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2522 """Gets per-disk status information for all instances.
2524 @type nodelist: list of strings
2525 @param nodelist: Node names
2526 @type node_image: dict of (name, L{objects.Node})
2527 @param node_image: Node objects
2528 @type instanceinfo: dict of (name, L{objects.Instance})
2529 @param instanceinfo: Instance objects
2530 @rtype: {instance: {node: [(succes, payload)]}}
2531 @return: a dictionary of per-instance dictionaries with nodes as
2532 keys and disk information as values; the disk information is a
2533 list of tuples (success, payload)
2536 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2539 node_disks_devonly = {}
2540 diskless_instances = set()
2541 diskless = constants.DT_DISKLESS
2543 for nname in nodelist:
2544 node_instances = list(itertools.chain(node_image[nname].pinst,
2545 node_image[nname].sinst))
2546 diskless_instances.update(inst for inst in node_instances
2547 if instanceinfo[inst].disk_template == diskless)
2548 disks = [(inst, disk)
2549 for inst in node_instances
2550 for disk in instanceinfo[inst].disks]
2553 # No need to collect data
2556 node_disks[nname] = disks
2558 # Creating copies as SetDiskID below will modify the objects and that can
2559 # lead to incorrect data returned from nodes
2560 devonly = [dev.Copy() for (_, dev) in disks]
2563 self.cfg.SetDiskID(dev, nname)
2565 node_disks_devonly[nname] = devonly
2567 assert len(node_disks) == len(node_disks_devonly)
2569 # Collect data from all nodes with disks
2570 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2573 assert len(result) == len(node_disks)
2577 for (nname, nres) in result.items():
2578 disks = node_disks[nname]
2581 # No data from this node
2582 data = len(disks) * [(False, "node offline")]
2585 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2586 "while getting disk information: %s", msg)
2588 # No data from this node
2589 data = len(disks) * [(False, msg)]
2592 for idx, i in enumerate(nres.payload):
2593 if isinstance(i, (tuple, list)) and len(i) == 2:
2596 logging.warning("Invalid result from node %s, entry %d: %s",
2598 data.append((False, "Invalid result from the remote node"))
2600 for ((inst, _), status) in zip(disks, data):
2601 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2603 # Add empty entries for diskless instances.
2604 for inst in diskless_instances:
2605 assert inst not in instdisk
2608 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2609 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2610 compat.all(isinstance(s, (tuple, list)) and
2611 len(s) == 2 for s in statuses)
2612 for inst, nnames in instdisk.items()
2613 for nname, statuses in nnames.items())
2614 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2619 def _SshNodeSelector(group_uuid, all_nodes):
2620 """Create endless iterators for all potential SSH check hosts.
2623 nodes = [node for node in all_nodes
2624 if (node.group != group_uuid and
2626 keyfunc = operator.attrgetter("group")
2628 return map(itertools.cycle,
2629 [sorted(map(operator.attrgetter("name"), names))
2630 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2634 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2635 """Choose which nodes should talk to which other nodes.
2637 We will make nodes contact all nodes in their group, and one node from
2640 @warning: This algorithm has a known issue if one node group is much
2641 smaller than others (e.g. just one node). In such a case all other
2642 nodes will talk to the single node.
2645 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2646 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2648 return (online_nodes,
2649 dict((name, sorted([i.next() for i in sel]))
2650 for name in online_nodes))
2652 def BuildHooksEnv(self):
2655 Cluster-Verify hooks just ran in the post phase and their failure makes
2656 the output be logged in the verify output and the verification to fail.
2660 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2663 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2664 for node in self.my_node_info.values())
2668 def BuildHooksNodes(self):
2669 """Build hooks nodes.
2672 return ([], self.my_node_names)
2674 def Exec(self, feedback_fn):
2675 """Verify integrity of the node group, performing various test on nodes.
2678 # This method has too many local variables. pylint: disable=R0914
2679 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2681 if not self.my_node_names:
2683 feedback_fn("* Empty node group, skipping verification")
2687 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2688 verbose = self.op.verbose
2689 self._feedback_fn = feedback_fn
2691 vg_name = self.cfg.GetVGName()
2692 drbd_helper = self.cfg.GetDRBDHelper()
2693 cluster = self.cfg.GetClusterInfo()
2694 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2695 hypervisors = cluster.enabled_hypervisors
2696 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2698 i_non_redundant = [] # Non redundant instances
2699 i_non_a_balanced = [] # Non auto-balanced instances
2700 i_offline = 0 # Count of offline instances
2701 n_offline = 0 # Count of offline nodes
2702 n_drained = 0 # Count of nodes being drained
2703 node_vol_should = {}
2705 # FIXME: verify OS list
2708 filemap = _ComputeAncillaryFiles(cluster, False)
2710 # do local checksums
2711 master_node = self.master_node = self.cfg.GetMasterNode()
2712 master_ip = self.cfg.GetMasterIP()
2714 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2717 if self.cfg.GetUseExternalMipScript():
2718 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2720 node_verify_param = {
2721 constants.NV_FILELIST:
2722 utils.UniqueSequence(filename
2723 for files in filemap
2724 for filename in files),
2725 constants.NV_NODELIST:
2726 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2727 self.all_node_info.values()),
2728 constants.NV_HYPERVISOR: hypervisors,
2729 constants.NV_HVPARAMS:
2730 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2731 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2732 for node in node_data_list
2733 if not node.offline],
2734 constants.NV_INSTANCELIST: hypervisors,
2735 constants.NV_VERSION: None,
2736 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2737 constants.NV_NODESETUP: None,
2738 constants.NV_TIME: None,
2739 constants.NV_MASTERIP: (master_node, master_ip),
2740 constants.NV_OSLIST: None,
2741 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2742 constants.NV_USERSCRIPTS: user_scripts,
2745 if vg_name is not None:
2746 node_verify_param[constants.NV_VGLIST] = None
2747 node_verify_param[constants.NV_LVLIST] = vg_name
2748 node_verify_param[constants.NV_PVLIST] = [vg_name]
2749 node_verify_param[constants.NV_DRBDLIST] = None
2752 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2755 # FIXME: this needs to be changed per node-group, not cluster-wide
2757 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2758 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2759 bridges.add(default_nicpp[constants.NIC_LINK])
2760 for instance in self.my_inst_info.values():
2761 for nic in instance.nics:
2762 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2763 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2764 bridges.add(full_nic[constants.NIC_LINK])
2767 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2769 # Build our expected cluster state
2770 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2772 vm_capable=node.vm_capable))
2773 for node in node_data_list)
2777 for node in self.all_node_info.values():
2778 path = _SupportsOob(self.cfg, node)
2779 if path and path not in oob_paths:
2780 oob_paths.append(path)
2783 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2785 for instance in self.my_inst_names:
2786 inst_config = self.my_inst_info[instance]
2788 for nname in inst_config.all_nodes:
2789 if nname not in node_image:
2790 gnode = self.NodeImage(name=nname)
2791 gnode.ghost = (nname not in self.all_node_info)
2792 node_image[nname] = gnode
2794 inst_config.MapLVsByNode(node_vol_should)
2796 pnode = inst_config.primary_node
2797 node_image[pnode].pinst.append(instance)
2799 for snode in inst_config.secondary_nodes:
2800 nimg = node_image[snode]
2801 nimg.sinst.append(instance)
2802 if pnode not in nimg.sbp:
2803 nimg.sbp[pnode] = []
2804 nimg.sbp[pnode].append(instance)
2806 # At this point, we have the in-memory data structures complete,
2807 # except for the runtime information, which we'll gather next
2809 # Due to the way our RPC system works, exact response times cannot be
2810 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2811 # time before and after executing the request, we can at least have a time
2813 nvinfo_starttime = time.time()
2814 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2816 self.cfg.GetClusterName())
2817 nvinfo_endtime = time.time()
2819 if self.extra_lv_nodes and vg_name is not None:
2821 self.rpc.call_node_verify(self.extra_lv_nodes,
2822 {constants.NV_LVLIST: vg_name},
2823 self.cfg.GetClusterName())
2825 extra_lv_nvinfo = {}
2827 all_drbd_map = self.cfg.ComputeDRBDMap()
2829 feedback_fn("* Gathering disk information (%s nodes)" %
2830 len(self.my_node_names))
2831 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2834 feedback_fn("* Verifying configuration file consistency")
2836 # If not all nodes are being checked, we need to make sure the master node
2837 # and a non-checked vm_capable node are in the list.
2838 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2840 vf_nvinfo = all_nvinfo.copy()
2841 vf_node_info = list(self.my_node_info.values())
2842 additional_nodes = []
2843 if master_node not in self.my_node_info:
2844 additional_nodes.append(master_node)
2845 vf_node_info.append(self.all_node_info[master_node])
2846 # Add the first vm_capable node we find which is not included
2847 for node in absent_nodes:
2848 nodeinfo = self.all_node_info[node]
2849 if nodeinfo.vm_capable and not nodeinfo.offline:
2850 additional_nodes.append(node)
2851 vf_node_info.append(self.all_node_info[node])
2853 key = constants.NV_FILELIST
2854 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2855 {key: node_verify_param[key]},
2856 self.cfg.GetClusterName()))
2858 vf_nvinfo = all_nvinfo
2859 vf_node_info = self.my_node_info.values()
2861 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2863 feedback_fn("* Verifying node status")
2867 for node_i in node_data_list:
2869 nimg = node_image[node]
2873 feedback_fn("* Skipping offline node %s" % (node,))
2877 if node == master_node:
2879 elif node_i.master_candidate:
2880 ntype = "master candidate"
2881 elif node_i.drained:
2887 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2889 msg = all_nvinfo[node].fail_msg
2890 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2893 nimg.rpc_fail = True
2896 nresult = all_nvinfo[node].payload
2898 nimg.call_ok = self._VerifyNode(node_i, nresult)
2899 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2900 self._VerifyNodeNetwork(node_i, nresult)
2901 self._VerifyNodeUserScripts(node_i, nresult)
2902 self._VerifyOob(node_i, nresult)
2905 self._VerifyNodeLVM(node_i, nresult, vg_name)
2906 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2909 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2910 self._UpdateNodeInstances(node_i, nresult, nimg)
2911 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2912 self._UpdateNodeOS(node_i, nresult, nimg)
2914 if not nimg.os_fail:
2915 if refos_img is None:
2917 self._VerifyNodeOS(node_i, nimg, refos_img)
2918 self._VerifyNodeBridges(node_i, nresult, bridges)
2920 # Check whether all running instancies are primary for the node. (This
2921 # can no longer be done from _VerifyInstance below, since some of the
2922 # wrong instances could be from other node groups.)
2923 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2925 for inst in non_primary_inst:
2926 # FIXME: investigate best way to handle offline insts
2927 if inst.admin_state == constants.ADMINST_OFFLINE:
2929 feedback_fn("* Skipping offline instance %s" % inst.name)
2932 test = inst in self.all_inst_info
2933 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2934 "instance should not run on node %s", node_i.name)
2935 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2936 "node is running unknown instance %s", inst)
2938 for node, result in extra_lv_nvinfo.items():
2939 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2940 node_image[node], vg_name)
2942 feedback_fn("* Verifying instance status")
2943 for instance in self.my_inst_names:
2945 feedback_fn("* Verifying instance %s" % instance)
2946 inst_config = self.my_inst_info[instance]
2947 self._VerifyInstance(instance, inst_config, node_image,
2949 inst_nodes_offline = []
2951 pnode = inst_config.primary_node
2952 pnode_img = node_image[pnode]
2953 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2954 constants.CV_ENODERPC, pnode, "instance %s, connection to"
2955 " primary node failed", instance)
2957 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2959 constants.CV_EINSTANCEBADNODE, instance,
2960 "instance is marked as running and lives on offline node %s",
2961 inst_config.primary_node)
2963 # If the instance is non-redundant we cannot survive losing its primary
2964 # node, so we are not N+1 compliant. On the other hand we have no disk
2965 # templates with more than one secondary so that situation is not well
2967 # FIXME: does not support file-backed instances
2968 if not inst_config.secondary_nodes:
2969 i_non_redundant.append(instance)
2971 _ErrorIf(len(inst_config.secondary_nodes) > 1,
2972 constants.CV_EINSTANCELAYOUT,
2973 instance, "instance has multiple secondary nodes: %s",
2974 utils.CommaJoin(inst_config.secondary_nodes),
2975 code=self.ETYPE_WARNING)
2977 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2978 pnode = inst_config.primary_node
2979 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2980 instance_groups = {}
2982 for node in instance_nodes:
2983 instance_groups.setdefault(self.all_node_info[node].group,
2987 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2988 # Sort so that we always list the primary node first.
2989 for group, nodes in sorted(instance_groups.items(),
2990 key=lambda (_, nodes): pnode in nodes,
2993 self._ErrorIf(len(instance_groups) > 1,
2994 constants.CV_EINSTANCESPLITGROUPS,
2995 instance, "instance has primary and secondary nodes in"
2996 " different groups: %s", utils.CommaJoin(pretty_list),
2997 code=self.ETYPE_WARNING)
2999 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3000 i_non_a_balanced.append(instance)
3002 for snode in inst_config.secondary_nodes:
3003 s_img = node_image[snode]
3004 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3005 snode, "instance %s, connection to secondary node failed",
3009 inst_nodes_offline.append(snode)
3011 # warn that the instance lives on offline nodes
3012 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3013 "instance has offline secondary node(s) %s",
3014 utils.CommaJoin(inst_nodes_offline))
3015 # ... or ghost/non-vm_capable nodes
3016 for node in inst_config.all_nodes:
3017 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3018 instance, "instance lives on ghost node %s", node)
3019 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3020 instance, "instance lives on non-vm_capable node %s", node)
3022 feedback_fn("* Verifying orphan volumes")
3023 reserved = utils.FieldSet(*cluster.reserved_lvs)
3025 # We will get spurious "unknown volume" warnings if any node of this group
3026 # is secondary for an instance whose primary is in another group. To avoid
3027 # them, we find these instances and add their volumes to node_vol_should.
3028 for inst in self.all_inst_info.values():
3029 for secondary in inst.secondary_nodes:
3030 if (secondary in self.my_node_info
3031 and inst.name not in self.my_inst_info):
3032 inst.MapLVsByNode(node_vol_should)
3035 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3037 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3038 feedback_fn("* Verifying N+1 Memory redundancy")
3039 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3041 feedback_fn("* Other Notes")
3043 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3044 % len(i_non_redundant))
3046 if i_non_a_balanced:
3047 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3048 % len(i_non_a_balanced))
3051 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3054 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3057 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3061 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3062 """Analyze the post-hooks' result
3064 This method analyses the hook result, handles it, and sends some
3065 nicely-formatted feedback back to the user.
3067 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3068 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3069 @param hooks_results: the results of the multi-node hooks rpc call
3070 @param feedback_fn: function used send feedback back to the caller
3071 @param lu_result: previous Exec result
3072 @return: the new Exec result, based on the previous result
3076 # We only really run POST phase hooks, only for non-empty groups,
3077 # and are only interested in their results
3078 if not self.my_node_names:
3081 elif phase == constants.HOOKS_PHASE_POST:
3082 # Used to change hooks' output to proper indentation
3083 feedback_fn("* Hooks Results")
3084 assert hooks_results, "invalid result from hooks"
3086 for node_name in hooks_results:
3087 res = hooks_results[node_name]
3089 test = msg and not res.offline
3090 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3091 "Communication failure in hooks execution: %s", msg)
3092 if res.offline or msg:
3093 # No need to investigate payload if node is offline or gave
3096 for script, hkr, output in res.payload:
3097 test = hkr == constants.HKR_FAIL
3098 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3099 "Script %s failed, output:", script)
3101 output = self._HOOKS_INDENT_RE.sub(" ", output)
3102 feedback_fn("%s" % output)
3108 class LUClusterVerifyDisks(NoHooksLU):
3109 """Verifies the cluster disks status.
3114 def ExpandNames(self):
3115 self.share_locks = _ShareAll()
3116 self.needed_locks = {
3117 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3120 def Exec(self, feedback_fn):
3121 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3123 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3124 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3125 for group in group_names])
3128 class LUGroupVerifyDisks(NoHooksLU):
3129 """Verifies the status of all disks in a node group.
3134 def ExpandNames(self):
3135 # Raises errors.OpPrereqError on its own if group can't be found
3136 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3138 self.share_locks = _ShareAll()
3139 self.needed_locks = {
3140 locking.LEVEL_INSTANCE: [],
3141 locking.LEVEL_NODEGROUP: [],
3142 locking.LEVEL_NODE: [],
3145 def DeclareLocks(self, level):
3146 if level == locking.LEVEL_INSTANCE:
3147 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3149 # Lock instances optimistically, needs verification once node and group
3150 # locks have been acquired
3151 self.needed_locks[locking.LEVEL_INSTANCE] = \
3152 self.cfg.GetNodeGroupInstances(self.group_uuid)
3154 elif level == locking.LEVEL_NODEGROUP:
3155 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3157 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3158 set([self.group_uuid] +
3159 # Lock all groups used by instances optimistically; this requires
3160 # going via the node before it's locked, requiring verification
3163 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3164 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3166 elif level == locking.LEVEL_NODE:
3167 # This will only lock the nodes in the group to be verified which contain
3169 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3170 self._LockInstancesNodes()
3172 # Lock all nodes in group to be verified
3173 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3174 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3175 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3177 def CheckPrereq(self):
3178 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3179 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3180 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3182 assert self.group_uuid in owned_groups
3184 # Check if locked instances are still correct
3185 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3187 # Get instance information
3188 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3190 # Check if node groups for locked instances are still correct
3191 for (instance_name, inst) in self.instances.items():
3192 assert owned_nodes.issuperset(inst.all_nodes), \
3193 "Instance %s's nodes changed while we kept the lock" % instance_name
3195 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3198 assert self.group_uuid in inst_groups, \
3199 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3201 def Exec(self, feedback_fn):
3202 """Verify integrity of cluster disks.
3204 @rtype: tuple of three items
3205 @return: a tuple of (dict of node-to-node_error, list of instances
3206 which need activate-disks, dict of instance: (node, volume) for
3211 res_instances = set()
3214 nv_dict = _MapInstanceDisksToNodes([inst
3215 for inst in self.instances.values()
3216 if inst.admin_state == constants.ADMINST_UP])
3219 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3220 set(self.cfg.GetVmCapableNodeList()))
3222 node_lvs = self.rpc.call_lv_list(nodes, [])
3224 for (node, node_res) in node_lvs.items():
3225 if node_res.offline:
3228 msg = node_res.fail_msg
3230 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3231 res_nodes[node] = msg
3234 for lv_name, (_, _, lv_online) in node_res.payload.items():
3235 inst = nv_dict.pop((node, lv_name), None)
3236 if not (lv_online or inst is None):
3237 res_instances.add(inst)
3239 # any leftover items in nv_dict are missing LVs, let's arrange the data
3241 for key, inst in nv_dict.iteritems():
3242 res_missing.setdefault(inst, []).append(list(key))
3244 return (res_nodes, list(res_instances), res_missing)
3247 class LUClusterRepairDiskSizes(NoHooksLU):
3248 """Verifies the cluster disks sizes.
3253 def ExpandNames(self):
3254 if self.op.instances:
3255 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3256 self.needed_locks = {
3257 locking.LEVEL_NODE_RES: [],
3258 locking.LEVEL_INSTANCE: self.wanted_names,
3260 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3262 self.wanted_names = None
3263 self.needed_locks = {
3264 locking.LEVEL_NODE_RES: locking.ALL_SET,
3265 locking.LEVEL_INSTANCE: locking.ALL_SET,
3267 self.share_locks = {
3268 locking.LEVEL_NODE_RES: 1,
3269 locking.LEVEL_INSTANCE: 0,
3272 def DeclareLocks(self, level):
3273 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3274 self._LockInstancesNodes(primary_only=True, level=level)
3276 def CheckPrereq(self):
3277 """Check prerequisites.
3279 This only checks the optional instance list against the existing names.
3282 if self.wanted_names is None:
3283 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3285 self.wanted_instances = \
3286 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3288 def _EnsureChildSizes(self, disk):
3289 """Ensure children of the disk have the needed disk size.
3291 This is valid mainly for DRBD8 and fixes an issue where the
3292 children have smaller disk size.
3294 @param disk: an L{ganeti.objects.Disk} object
3297 if disk.dev_type == constants.LD_DRBD8:
3298 assert disk.children, "Empty children for DRBD8?"
3299 fchild = disk.children[0]
3300 mismatch = fchild.size < disk.size
3302 self.LogInfo("Child disk has size %d, parent %d, fixing",
3303 fchild.size, disk.size)
3304 fchild.size = disk.size
3306 # and we recurse on this child only, not on the metadev
3307 return self._EnsureChildSizes(fchild) or mismatch
3311 def Exec(self, feedback_fn):
3312 """Verify the size of cluster disks.
3315 # TODO: check child disks too
3316 # TODO: check differences in size between primary/secondary nodes
3318 for instance in self.wanted_instances:
3319 pnode = instance.primary_node
3320 if pnode not in per_node_disks:
3321 per_node_disks[pnode] = []
3322 for idx, disk in enumerate(instance.disks):
3323 per_node_disks[pnode].append((instance, idx, disk))
3325 assert not (frozenset(per_node_disks.keys()) -
3326 self.owned_locks(locking.LEVEL_NODE_RES)), \
3327 "Not owning correct locks"
3328 assert not self.owned_locks(locking.LEVEL_NODE)
3331 for node, dskl in per_node_disks.items():
3332 newl = [v[2].Copy() for v in dskl]
3334 self.cfg.SetDiskID(dsk, node)
3335 result = self.rpc.call_blockdev_getsize(node, newl)
3337 self.LogWarning("Failure in blockdev_getsize call to node"
3338 " %s, ignoring", node)
3340 if len(result.payload) != len(dskl):
3341 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3342 " result.payload=%s", node, len(dskl), result.payload)
3343 self.LogWarning("Invalid result from node %s, ignoring node results",
3346 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3348 self.LogWarning("Disk %d of instance %s did not return size"
3349 " information, ignoring", idx, instance.name)
3351 if not isinstance(size, (int, long)):
3352 self.LogWarning("Disk %d of instance %s did not return valid"
3353 " size information, ignoring", idx, instance.name)
3356 if size != disk.size:
3357 self.LogInfo("Disk %d of instance %s has mismatched size,"
3358 " correcting: recorded %d, actual %d", idx,
3359 instance.name, disk.size, size)
3361 self.cfg.Update(instance, feedback_fn)
3362 changed.append((instance.name, idx, size))
3363 if self._EnsureChildSizes(disk):
3364 self.cfg.Update(instance, feedback_fn)
3365 changed.append((instance.name, idx, disk.size))
3369 class LUClusterRename(LogicalUnit):
3370 """Rename the cluster.
3373 HPATH = "cluster-rename"
3374 HTYPE = constants.HTYPE_CLUSTER
3376 def BuildHooksEnv(self):
3381 "OP_TARGET": self.cfg.GetClusterName(),
3382 "NEW_NAME": self.op.name,
3385 def BuildHooksNodes(self):
3386 """Build hooks nodes.
3389 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3391 def CheckPrereq(self):
3392 """Verify that the passed name is a valid one.
3395 hostname = netutils.GetHostname(name=self.op.name,
3396 family=self.cfg.GetPrimaryIPFamily())
3398 new_name = hostname.name
3399 self.ip = new_ip = hostname.ip
3400 old_name = self.cfg.GetClusterName()
3401 old_ip = self.cfg.GetMasterIP()
3402 if new_name == old_name and new_ip == old_ip:
3403 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3404 " cluster has changed",
3406 if new_ip != old_ip:
3407 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3408 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3409 " reachable on the network" %
3410 new_ip, errors.ECODE_NOTUNIQUE)
3412 self.op.name = new_name
3414 def Exec(self, feedback_fn):
3415 """Rename the cluster.
3418 clustername = self.op.name
3421 # shutdown the master IP
3422 master_params = self.cfg.GetMasterNetworkParameters()
3423 ems = self.cfg.GetUseExternalMipScript()
3424 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3426 result.Raise("Could not disable the master role")
3429 cluster = self.cfg.GetClusterInfo()
3430 cluster.cluster_name = clustername
3431 cluster.master_ip = new_ip
3432 self.cfg.Update(cluster, feedback_fn)
3434 # update the known hosts file
3435 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3436 node_list = self.cfg.GetOnlineNodeList()
3438 node_list.remove(master_params.name)
3441 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3443 master_params.ip = new_ip
3444 result = self.rpc.call_node_activate_master_ip(master_params.name,
3446 msg = result.fail_msg
3448 self.LogWarning("Could not re-enable the master role on"
3449 " the master, please restart manually: %s", msg)
3454 def _ValidateNetmask(cfg, netmask):
3455 """Checks if a netmask is valid.
3457 @type cfg: L{config.ConfigWriter}
3458 @param cfg: The cluster configuration
3460 @param netmask: the netmask to be verified
3461 @raise errors.OpPrereqError: if the validation fails
3464 ip_family = cfg.GetPrimaryIPFamily()
3466 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3467 except errors.ProgrammerError:
3468 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3470 if not ipcls.ValidateNetmask(netmask):
3471 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3475 class LUClusterSetParams(LogicalUnit):
3476 """Change the parameters of the cluster.
3479 HPATH = "cluster-modify"
3480 HTYPE = constants.HTYPE_CLUSTER
3483 def CheckArguments(self):
3487 if self.op.uid_pool:
3488 uidpool.CheckUidPool(self.op.uid_pool)
3490 if self.op.add_uids:
3491 uidpool.CheckUidPool(self.op.add_uids)
3493 if self.op.remove_uids:
3494 uidpool.CheckUidPool(self.op.remove_uids)
3496 if self.op.master_netmask is not None:
3497 _ValidateNetmask(self.cfg, self.op.master_netmask)
3499 if self.op.diskparams:
3500 for dt_params in self.op.diskparams.values():
3501 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3503 def ExpandNames(self):
3504 # FIXME: in the future maybe other cluster params won't require checking on
3505 # all nodes to be modified.
3506 self.needed_locks = {
3507 locking.LEVEL_NODE: locking.ALL_SET,
3509 self.share_locks[locking.LEVEL_NODE] = 1
3511 def BuildHooksEnv(self):
3516 "OP_TARGET": self.cfg.GetClusterName(),
3517 "NEW_VG_NAME": self.op.vg_name,
3520 def BuildHooksNodes(self):
3521 """Build hooks nodes.
3524 mn = self.cfg.GetMasterNode()
3527 def CheckPrereq(self):
3528 """Check prerequisites.
3530 This checks whether the given params don't conflict and
3531 if the given volume group is valid.
3534 if self.op.vg_name is not None and not self.op.vg_name:
3535 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3536 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3537 " instances exist", errors.ECODE_INVAL)
3539 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3540 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3541 raise errors.OpPrereqError("Cannot disable drbd helper while"
3542 " drbd-based instances exist",
3545 node_list = self.owned_locks(locking.LEVEL_NODE)
3547 # if vg_name not None, checks given volume group on all nodes
3549 vglist = self.rpc.call_vg_list(node_list)
3550 for node in node_list:
3551 msg = vglist[node].fail_msg
3553 # ignoring down node
3554 self.LogWarning("Error while gathering data on node %s"
3555 " (ignoring node): %s", node, msg)
3557 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3559 constants.MIN_VG_SIZE)
3561 raise errors.OpPrereqError("Error on node '%s': %s" %
3562 (node, vgstatus), errors.ECODE_ENVIRON)
3564 if self.op.drbd_helper:
3565 # checks given drbd helper on all nodes
3566 helpers = self.rpc.call_drbd_helper(node_list)
3567 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3569 self.LogInfo("Not checking drbd helper on offline node %s", node)
3571 msg = helpers[node].fail_msg
3573 raise errors.OpPrereqError("Error checking drbd helper on node"
3574 " '%s': %s" % (node, msg),
3575 errors.ECODE_ENVIRON)
3576 node_helper = helpers[node].payload
3577 if node_helper != self.op.drbd_helper:
3578 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3579 (node, node_helper), errors.ECODE_ENVIRON)
3581 self.cluster = cluster = self.cfg.GetClusterInfo()
3582 # validate params changes
3583 if self.op.beparams:
3584 objects.UpgradeBeParams(self.op.beparams)
3585 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3586 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3588 if self.op.ndparams:
3589 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3590 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3592 # TODO: we need a more general way to handle resetting
3593 # cluster-level parameters to default values
3594 if self.new_ndparams["oob_program"] == "":
3595 self.new_ndparams["oob_program"] = \
3596 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3598 if self.op.nicparams:
3599 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3600 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3601 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3604 # check all instances for consistency
3605 for instance in self.cfg.GetAllInstancesInfo().values():
3606 for nic_idx, nic in enumerate(instance.nics):
3607 params_copy = copy.deepcopy(nic.nicparams)
3608 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3610 # check parameter syntax
3612 objects.NIC.CheckParameterSyntax(params_filled)
3613 except errors.ConfigurationError, err:
3614 nic_errors.append("Instance %s, nic/%d: %s" %
3615 (instance.name, nic_idx, err))
3617 # if we're moving instances to routed, check that they have an ip
3618 target_mode = params_filled[constants.NIC_MODE]
3619 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3620 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3621 " address" % (instance.name, nic_idx))
3623 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3624 "\n".join(nic_errors))
3626 # hypervisor list/parameters
3627 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3628 if self.op.hvparams:
3629 for hv_name, hv_dict in self.op.hvparams.items():
3630 if hv_name not in self.new_hvparams:
3631 self.new_hvparams[hv_name] = hv_dict
3633 self.new_hvparams[hv_name].update(hv_dict)
3635 # disk template parameters
3636 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3637 if self.op.diskparams:
3638 for dt_name, dt_params in self.op.diskparams.items():
3639 if dt_name not in self.op.diskparams:
3640 self.new_diskparams[dt_name] = dt_params
3642 self.new_diskparams[dt_name].update(dt_params)
3644 # os hypervisor parameters
3645 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3647 for os_name, hvs in self.op.os_hvp.items():
3648 if os_name not in self.new_os_hvp:
3649 self.new_os_hvp[os_name] = hvs
3651 for hv_name, hv_dict in hvs.items():
3652 if hv_name not in self.new_os_hvp[os_name]:
3653 self.new_os_hvp[os_name][hv_name] = hv_dict
3655 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3658 self.new_osp = objects.FillDict(cluster.osparams, {})
3659 if self.op.osparams:
3660 for os_name, osp in self.op.osparams.items():
3661 if os_name not in self.new_osp:
3662 self.new_osp[os_name] = {}
3664 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3667 if not self.new_osp[os_name]:
3668 # we removed all parameters
3669 del self.new_osp[os_name]
3671 # check the parameter validity (remote check)
3672 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3673 os_name, self.new_osp[os_name])
3675 # changes to the hypervisor list
3676 if self.op.enabled_hypervisors is not None:
3677 self.hv_list = self.op.enabled_hypervisors
3678 for hv in self.hv_list:
3679 # if the hypervisor doesn't already exist in the cluster
3680 # hvparams, we initialize it to empty, and then (in both
3681 # cases) we make sure to fill the defaults, as we might not
3682 # have a complete defaults list if the hypervisor wasn't
3684 if hv not in new_hvp:
3686 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3687 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3689 self.hv_list = cluster.enabled_hypervisors
3691 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3692 # either the enabled list has changed, or the parameters have, validate
3693 for hv_name, hv_params in self.new_hvparams.items():
3694 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3695 (self.op.enabled_hypervisors and
3696 hv_name in self.op.enabled_hypervisors)):
3697 # either this is a new hypervisor, or its parameters have changed
3698 hv_class = hypervisor.GetHypervisor(hv_name)
3699 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3700 hv_class.CheckParameterSyntax(hv_params)
3701 _CheckHVParams(self, node_list, hv_name, hv_params)
3704 # no need to check any newly-enabled hypervisors, since the
3705 # defaults have already been checked in the above code-block
3706 for os_name, os_hvp in self.new_os_hvp.items():
3707 for hv_name, hv_params in os_hvp.items():
3708 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3709 # we need to fill in the new os_hvp on top of the actual hv_p
3710 cluster_defaults = self.new_hvparams.get(hv_name, {})
3711 new_osp = objects.FillDict(cluster_defaults, hv_params)
3712 hv_class = hypervisor.GetHypervisor(hv_name)
3713 hv_class.CheckParameterSyntax(new_osp)
3714 _CheckHVParams(self, node_list, hv_name, new_osp)
3716 if self.op.default_iallocator:
3717 alloc_script = utils.FindFile(self.op.default_iallocator,
3718 constants.IALLOCATOR_SEARCH_PATH,
3720 if alloc_script is None:
3721 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3722 " specified" % self.op.default_iallocator,
3725 def Exec(self, feedback_fn):
3726 """Change the parameters of the cluster.
3729 if self.op.vg_name is not None:
3730 new_volume = self.op.vg_name
3733 if new_volume != self.cfg.GetVGName():
3734 self.cfg.SetVGName(new_volume)
3736 feedback_fn("Cluster LVM configuration already in desired"
3737 " state, not changing")
3738 if self.op.drbd_helper is not None:
3739 new_helper = self.op.drbd_helper
3742 if new_helper != self.cfg.GetDRBDHelper():
3743 self.cfg.SetDRBDHelper(new_helper)
3745 feedback_fn("Cluster DRBD helper already in desired state,"
3747 if self.op.hvparams:
3748 self.cluster.hvparams = self.new_hvparams
3750 self.cluster.os_hvp = self.new_os_hvp
3751 if self.op.enabled_hypervisors is not None:
3752 self.cluster.hvparams = self.new_hvparams
3753 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3754 if self.op.beparams:
3755 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3756 if self.op.nicparams:
3757 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3758 if self.op.osparams:
3759 self.cluster.osparams = self.new_osp
3760 if self.op.ndparams:
3761 self.cluster.ndparams = self.new_ndparams
3762 if self.op.diskparams:
3763 self.cluster.diskparams = self.new_diskparams
3765 if self.op.candidate_pool_size is not None:
3766 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3767 # we need to update the pool size here, otherwise the save will fail
3768 _AdjustCandidatePool(self, [])
3770 if self.op.maintain_node_health is not None:
3771 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3772 feedback_fn("Note: CONFD was disabled at build time, node health"
3773 " maintenance is not useful (still enabling it)")
3774 self.cluster.maintain_node_health = self.op.maintain_node_health
3776 if self.op.prealloc_wipe_disks is not None:
3777 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3779 if self.op.add_uids is not None:
3780 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3782 if self.op.remove_uids is not None:
3783 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3785 if self.op.uid_pool is not None:
3786 self.cluster.uid_pool = self.op.uid_pool
3788 if self.op.default_iallocator is not None:
3789 self.cluster.default_iallocator = self.op.default_iallocator
3791 if self.op.reserved_lvs is not None:
3792 self.cluster.reserved_lvs = self.op.reserved_lvs
3794 if self.op.use_external_mip_script is not None:
3795 self.cluster.use_external_mip_script = self.op.use_external_mip_script
3797 def helper_os(aname, mods, desc):
3799 lst = getattr(self.cluster, aname)
3800 for key, val in mods:
3801 if key == constants.DDM_ADD:
3803 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3806 elif key == constants.DDM_REMOVE:
3810 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3812 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3814 if self.op.hidden_os:
3815 helper_os("hidden_os", self.op.hidden_os, "hidden")
3817 if self.op.blacklisted_os:
3818 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3820 if self.op.master_netdev:
3821 master_params = self.cfg.GetMasterNetworkParameters()
3822 ems = self.cfg.GetUseExternalMipScript()
3823 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3824 self.cluster.master_netdev)
3825 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3827 result.Raise("Could not disable the master ip")
3828 feedback_fn("Changing master_netdev from %s to %s" %
3829 (master_params.netdev, self.op.master_netdev))
3830 self.cluster.master_netdev = self.op.master_netdev
3832 if self.op.master_netmask:
3833 master_params = self.cfg.GetMasterNetworkParameters()
3834 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3835 result = self.rpc.call_node_change_master_netmask(master_params.name,
3836 master_params.netmask,
3837 self.op.master_netmask,
3839 master_params.netdev)
3841 msg = "Could not change the master IP netmask: %s" % result.fail_msg
3844 self.cluster.master_netmask = self.op.master_netmask
3846 self.cfg.Update(self.cluster, feedback_fn)
3848 if self.op.master_netdev:
3849 master_params = self.cfg.GetMasterNetworkParameters()
3850 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3851 self.op.master_netdev)
3852 ems = self.cfg.GetUseExternalMipScript()
3853 result = self.rpc.call_node_activate_master_ip(master_params.name,
3856 self.LogWarning("Could not re-enable the master ip on"
3857 " the master, please restart manually: %s",
3861 def _UploadHelper(lu, nodes, fname):
3862 """Helper for uploading a file and showing warnings.
3865 if os.path.exists(fname):
3866 result = lu.rpc.call_upload_file(nodes, fname)
3867 for to_node, to_result in result.items():
3868 msg = to_result.fail_msg
3870 msg = ("Copy of file %s to node %s failed: %s" %
3871 (fname, to_node, msg))
3872 lu.proc.LogWarning(msg)
3875 def _ComputeAncillaryFiles(cluster, redist):
3876 """Compute files external to Ganeti which need to be consistent.
3878 @type redist: boolean
3879 @param redist: Whether to include files which need to be redistributed
3882 # Compute files for all nodes
3884 constants.SSH_KNOWN_HOSTS_FILE,
3885 constants.CONFD_HMAC_KEY,
3886 constants.CLUSTER_DOMAIN_SECRET_FILE,
3887 constants.SPICE_CERT_FILE,
3888 constants.SPICE_CACERT_FILE,
3889 constants.RAPI_USERS_FILE,
3893 files_all.update(constants.ALL_CERT_FILES)
3894 files_all.update(ssconf.SimpleStore().GetFileList())
3896 # we need to ship at least the RAPI certificate
3897 files_all.add(constants.RAPI_CERT_FILE)
3899 if cluster.modify_etc_hosts:
3900 files_all.add(constants.ETC_HOSTS)
3902 # Files which are optional, these must:
3903 # - be present in one other category as well
3904 # - either exist or not exist on all nodes of that category (mc, vm all)
3906 constants.RAPI_USERS_FILE,
3909 # Files which should only be on master candidates
3913 files_mc.add(constants.CLUSTER_CONF_FILE)
3915 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3917 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
3919 # Files which should only be on VM-capable nodes
3920 files_vm = set(filename
3921 for hv_name in cluster.enabled_hypervisors
3922 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3924 files_opt |= set(filename
3925 for hv_name in cluster.enabled_hypervisors
3926 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3928 # Filenames in each category must be unique
3929 all_files_set = files_all | files_mc | files_vm
3930 assert (len(all_files_set) ==
3931 sum(map(len, [files_all, files_mc, files_vm]))), \
3932 "Found file listed in more than one file list"
3934 # Optional files must be present in one other category
3935 assert all_files_set.issuperset(files_opt), \
3936 "Optional file not in a different required list"
3938 return (files_all, files_opt, files_mc, files_vm)
3941 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3942 """Distribute additional files which are part of the cluster configuration.
3944 ConfigWriter takes care of distributing the config and ssconf files, but
3945 there are more files which should be distributed to all nodes. This function
3946 makes sure those are copied.
3948 @param lu: calling logical unit
3949 @param additional_nodes: list of nodes not in the config to distribute to
3950 @type additional_vm: boolean
3951 @param additional_vm: whether the additional nodes are vm-capable or not
3954 # Gather target nodes
3955 cluster = lu.cfg.GetClusterInfo()
3956 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3958 online_nodes = lu.cfg.GetOnlineNodeList()
3959 vm_nodes = lu.cfg.GetVmCapableNodeList()
3961 if additional_nodes is not None:
3962 online_nodes.extend(additional_nodes)
3964 vm_nodes.extend(additional_nodes)
3966 # Never distribute to master node
3967 for nodelist in [online_nodes, vm_nodes]:
3968 if master_info.name in nodelist:
3969 nodelist.remove(master_info.name)
3972 (files_all, _, files_mc, files_vm) = \
3973 _ComputeAncillaryFiles(cluster, True)
3975 # Never re-distribute configuration file from here
3976 assert not (constants.CLUSTER_CONF_FILE in files_all or
3977 constants.CLUSTER_CONF_FILE in files_vm)
3978 assert not files_mc, "Master candidates not handled in this function"
3981 (online_nodes, files_all),
3982 (vm_nodes, files_vm),
3986 for (node_list, files) in filemap:
3988 _UploadHelper(lu, node_list, fname)
3991 class LUClusterRedistConf(NoHooksLU):
3992 """Force the redistribution of cluster configuration.
3994 This is a very simple LU.
3999 def ExpandNames(self):
4000 self.needed_locks = {
4001 locking.LEVEL_NODE: locking.ALL_SET,
4003 self.share_locks[locking.LEVEL_NODE] = 1
4005 def Exec(self, feedback_fn):
4006 """Redistribute the configuration.
4009 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4010 _RedistributeAncillaryFiles(self)
4013 class LUClusterActivateMasterIp(NoHooksLU):
4014 """Activate the master IP on the master node.
4017 def Exec(self, feedback_fn):
4018 """Activate the master IP.
4021 master_params = self.cfg.GetMasterNetworkParameters()
4022 ems = self.cfg.GetUseExternalMipScript()
4023 result = self.rpc.call_node_activate_master_ip(master_params.name,
4025 result.Raise("Could not activate the master IP")
4028 class LUClusterDeactivateMasterIp(NoHooksLU):
4029 """Deactivate the master IP on the master node.
4032 def Exec(self, feedback_fn):
4033 """Deactivate the master IP.
4036 master_params = self.cfg.GetMasterNetworkParameters()
4037 ems = self.cfg.GetUseExternalMipScript()
4038 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4040 result.Raise("Could not deactivate the master IP")
4043 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4044 """Sleep and poll for an instance's disk to sync.
4047 if not instance.disks or disks is not None and not disks:
4050 disks = _ExpandCheckDisks(instance, disks)
4053 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4055 node = instance.primary_node
4058 lu.cfg.SetDiskID(dev, node)
4060 # TODO: Convert to utils.Retry
4063 degr_retries = 10 # in seconds, as we sleep 1 second each time
4067 cumul_degraded = False
4068 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4069 msg = rstats.fail_msg
4071 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4074 raise errors.RemoteError("Can't contact node %s for mirror data,"
4075 " aborting." % node)
4078 rstats = rstats.payload
4080 for i, mstat in enumerate(rstats):
4082 lu.LogWarning("Can't compute data for node %s/%s",
4083 node, disks[i].iv_name)
4086 cumul_degraded = (cumul_degraded or
4087 (mstat.is_degraded and mstat.sync_percent is None))
4088 if mstat.sync_percent is not None:
4090 if mstat.estimated_time is not None:
4091 rem_time = ("%s remaining (estimated)" %
4092 utils.FormatSeconds(mstat.estimated_time))
4093 max_time = mstat.estimated_time
4095 rem_time = "no time estimate"
4096 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4097 (disks[i].iv_name, mstat.sync_percent, rem_time))
4099 # if we're done but degraded, let's do a few small retries, to
4100 # make sure we see a stable and not transient situation; therefore
4101 # we force restart of the loop
4102 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4103 logging.info("Degraded disks found, %d retries left", degr_retries)
4111 time.sleep(min(60, max_time))
4114 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4115 return not cumul_degraded
4118 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4119 """Check that mirrors are not degraded.
4121 The ldisk parameter, if True, will change the test from the
4122 is_degraded attribute (which represents overall non-ok status for
4123 the device(s)) to the ldisk (representing the local storage status).
4126 lu.cfg.SetDiskID(dev, node)
4130 if on_primary or dev.AssembleOnSecondary():
4131 rstats = lu.rpc.call_blockdev_find(node, dev)
4132 msg = rstats.fail_msg
4134 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4136 elif not rstats.payload:
4137 lu.LogWarning("Can't find disk on node %s", node)
4141 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4143 result = result and not rstats.payload.is_degraded
4146 for child in dev.children:
4147 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4152 class LUOobCommand(NoHooksLU):
4153 """Logical unit for OOB handling.
4157 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4159 def ExpandNames(self):
4160 """Gather locks we need.
4163 if self.op.node_names:
4164 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4165 lock_names = self.op.node_names
4167 lock_names = locking.ALL_SET
4169 self.needed_locks = {
4170 locking.LEVEL_NODE: lock_names,
4173 def CheckPrereq(self):
4174 """Check prerequisites.
4177 - the node exists in the configuration
4180 Any errors are signaled by raising errors.OpPrereqError.
4184 self.master_node = self.cfg.GetMasterNode()
4186 assert self.op.power_delay >= 0.0
4188 if self.op.node_names:
4189 if (self.op.command in self._SKIP_MASTER and
4190 self.master_node in self.op.node_names):
4191 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4192 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4194 if master_oob_handler:
4195 additional_text = ("run '%s %s %s' if you want to operate on the"
4196 " master regardless") % (master_oob_handler,
4200 additional_text = "it does not support out-of-band operations"
4202 raise errors.OpPrereqError(("Operating on the master node %s is not"
4203 " allowed for %s; %s") %
4204 (self.master_node, self.op.command,
4205 additional_text), errors.ECODE_INVAL)
4207 self.op.node_names = self.cfg.GetNodeList()
4208 if self.op.command in self._SKIP_MASTER:
4209 self.op.node_names.remove(self.master_node)
4211 if self.op.command in self._SKIP_MASTER:
4212 assert self.master_node not in self.op.node_names
4214 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4216 raise errors.OpPrereqError("Node %s not found" % node_name,
4219 self.nodes.append(node)
4221 if (not self.op.ignore_status and
4222 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4223 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4224 " not marked offline") % node_name,
4227 def Exec(self, feedback_fn):
4228 """Execute OOB and return result if we expect any.
4231 master_node = self.master_node
4234 for idx, node in enumerate(utils.NiceSort(self.nodes,
4235 key=lambda node: node.name)):
4236 node_entry = [(constants.RS_NORMAL, node.name)]
4237 ret.append(node_entry)
4239 oob_program = _SupportsOob(self.cfg, node)
4242 node_entry.append((constants.RS_UNAVAIL, None))
4245 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4246 self.op.command, oob_program, node.name)
4247 result = self.rpc.call_run_oob(master_node, oob_program,
4248 self.op.command, node.name,
4252 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4253 node.name, result.fail_msg)
4254 node_entry.append((constants.RS_NODATA, None))
4257 self._CheckPayload(result)
4258 except errors.OpExecError, err:
4259 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4261 node_entry.append((constants.RS_NODATA, None))
4263 if self.op.command == constants.OOB_HEALTH:
4264 # For health we should log important events
4265 for item, status in result.payload:
4266 if status in [constants.OOB_STATUS_WARNING,
4267 constants.OOB_STATUS_CRITICAL]:
4268 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4269 item, node.name, status)
4271 if self.op.command == constants.OOB_POWER_ON:
4273 elif self.op.command == constants.OOB_POWER_OFF:
4274 node.powered = False
4275 elif self.op.command == constants.OOB_POWER_STATUS:
4276 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4277 if powered != node.powered:
4278 logging.warning(("Recorded power state (%s) of node '%s' does not"
4279 " match actual power state (%s)"), node.powered,
4282 # For configuration changing commands we should update the node
4283 if self.op.command in (constants.OOB_POWER_ON,
4284 constants.OOB_POWER_OFF):
4285 self.cfg.Update(node, feedback_fn)
4287 node_entry.append((constants.RS_NORMAL, result.payload))
4289 if (self.op.command == constants.OOB_POWER_ON and
4290 idx < len(self.nodes) - 1):
4291 time.sleep(self.op.power_delay)
4295 def _CheckPayload(self, result):
4296 """Checks if the payload is valid.
4298 @param result: RPC result
4299 @raises errors.OpExecError: If payload is not valid
4303 if self.op.command == constants.OOB_HEALTH:
4304 if not isinstance(result.payload, list):
4305 errs.append("command 'health' is expected to return a list but got %s" %
4306 type(result.payload))
4308 for item, status in result.payload:
4309 if status not in constants.OOB_STATUSES:
4310 errs.append("health item '%s' has invalid status '%s'" %
4313 if self.op.command == constants.OOB_POWER_STATUS:
4314 if not isinstance(result.payload, dict):
4315 errs.append("power-status is expected to return a dict but got %s" %
4316 type(result.payload))
4318 if self.op.command in [
4319 constants.OOB_POWER_ON,
4320 constants.OOB_POWER_OFF,
4321 constants.OOB_POWER_CYCLE,
4323 if result.payload is not None:
4324 errs.append("%s is expected to not return payload but got '%s'" %
4325 (self.op.command, result.payload))
4328 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4329 utils.CommaJoin(errs))
4332 class _OsQuery(_QueryBase):
4333 FIELDS = query.OS_FIELDS
4335 def ExpandNames(self, lu):
4336 # Lock all nodes in shared mode
4337 # Temporary removal of locks, should be reverted later
4338 # TODO: reintroduce locks when they are lighter-weight
4339 lu.needed_locks = {}
4340 #self.share_locks[locking.LEVEL_NODE] = 1
4341 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4343 # The following variables interact with _QueryBase._GetNames
4345 self.wanted = self.names
4347 self.wanted = locking.ALL_SET
4349 self.do_locking = self.use_locking
4351 def DeclareLocks(self, lu, level):
4355 def _DiagnoseByOS(rlist):
4356 """Remaps a per-node return list into an a per-os per-node dictionary
4358 @param rlist: a map with node names as keys and OS objects as values
4361 @return: a dictionary with osnames as keys and as value another
4362 map, with nodes as keys and tuples of (path, status, diagnose,
4363 variants, parameters, api_versions) as values, eg::
4365 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4366 (/srv/..., False, "invalid api")],
4367 "node2": [(/srv/..., True, "", [], [])]}
4372 # we build here the list of nodes that didn't fail the RPC (at RPC
4373 # level), so that nodes with a non-responding node daemon don't
4374 # make all OSes invalid
4375 good_nodes = [node_name for node_name in rlist
4376 if not rlist[node_name].fail_msg]
4377 for node_name, nr in rlist.items():
4378 if nr.fail_msg or not nr.payload:
4380 for (name, path, status, diagnose, variants,
4381 params, api_versions) in nr.payload:
4382 if name not in all_os:
4383 # build a list of nodes for this os containing empty lists
4384 # for each node in node_list
4386 for nname in good_nodes:
4387 all_os[name][nname] = []
4388 # convert params from [name, help] to (name, help)
4389 params = [tuple(v) for v in params]
4390 all_os[name][node_name].append((path, status, diagnose,
4391 variants, params, api_versions))
4394 def _GetQueryData(self, lu):
4395 """Computes the list of nodes and their attributes.
4398 # Locking is not used
4399 assert not (compat.any(lu.glm.is_owned(level)
4400 for level in locking.LEVELS
4401 if level != locking.LEVEL_CLUSTER) or
4402 self.do_locking or self.use_locking)
4404 valid_nodes = [node.name
4405 for node in lu.cfg.GetAllNodesInfo().values()
4406 if not node.offline and node.vm_capable]
4407 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4408 cluster = lu.cfg.GetClusterInfo()
4412 for (os_name, os_data) in pol.items():
4413 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4414 hidden=(os_name in cluster.hidden_os),
4415 blacklisted=(os_name in cluster.blacklisted_os))
4419 api_versions = set()
4421 for idx, osl in enumerate(os_data.values()):
4422 info.valid = bool(info.valid and osl and osl[0][1])
4426 (node_variants, node_params, node_api) = osl[0][3:6]
4429 variants.update(node_variants)
4430 parameters.update(node_params)
4431 api_versions.update(node_api)
4433 # Filter out inconsistent values
4434 variants.intersection_update(node_variants)
4435 parameters.intersection_update(node_params)
4436 api_versions.intersection_update(node_api)
4438 info.variants = list(variants)
4439 info.parameters = list(parameters)
4440 info.api_versions = list(api_versions)
4442 data[os_name] = info
4444 # Prepare data in requested order
4445 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4449 class LUOsDiagnose(NoHooksLU):
4450 """Logical unit for OS diagnose/query.
4456 def _BuildFilter(fields, names):
4457 """Builds a filter for querying OSes.
4460 name_filter = qlang.MakeSimpleFilter("name", names)
4462 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4463 # respective field is not requested
4464 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4465 for fname in ["hidden", "blacklisted"]
4466 if fname not in fields]
4467 if "valid" not in fields:
4468 status_filter.append([qlang.OP_TRUE, "valid"])
4471 status_filter.insert(0, qlang.OP_AND)
4473 status_filter = None
4475 if name_filter and status_filter:
4476 return [qlang.OP_AND, name_filter, status_filter]
4480 return status_filter
4482 def CheckArguments(self):
4483 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4484 self.op.output_fields, False)
4486 def ExpandNames(self):
4487 self.oq.ExpandNames(self)
4489 def Exec(self, feedback_fn):
4490 return self.oq.OldStyleQuery(self)
4493 class LUNodeRemove(LogicalUnit):
4494 """Logical unit for removing a node.
4497 HPATH = "node-remove"
4498 HTYPE = constants.HTYPE_NODE
4500 def BuildHooksEnv(self):
4503 This doesn't run on the target node in the pre phase as a failed
4504 node would then be impossible to remove.
4508 "OP_TARGET": self.op.node_name,
4509 "NODE_NAME": self.op.node_name,
4512 def BuildHooksNodes(self):
4513 """Build hooks nodes.
4516 all_nodes = self.cfg.GetNodeList()
4518 all_nodes.remove(self.op.node_name)
4520 logging.warning("Node '%s', which is about to be removed, was not found"
4521 " in the list of all nodes", self.op.node_name)
4522 return (all_nodes, all_nodes)
4524 def CheckPrereq(self):
4525 """Check prerequisites.
4528 - the node exists in the configuration
4529 - it does not have primary or secondary instances
4530 - it's not the master
4532 Any errors are signaled by raising errors.OpPrereqError.
4535 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4536 node = self.cfg.GetNodeInfo(self.op.node_name)
4537 assert node is not None
4539 masternode = self.cfg.GetMasterNode()
4540 if node.name == masternode:
4541 raise errors.OpPrereqError("Node is the master node, failover to another"
4542 " node is required", errors.ECODE_INVAL)
4544 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4545 if node.name in instance.all_nodes:
4546 raise errors.OpPrereqError("Instance %s is still running on the node,"
4547 " please remove first" % instance_name,
4549 self.op.node_name = node.name
4552 def Exec(self, feedback_fn):
4553 """Removes the node from the cluster.
4557 logging.info("Stopping the node daemon and removing configs from node %s",
4560 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4562 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4565 # Promote nodes to master candidate as needed
4566 _AdjustCandidatePool(self, exceptions=[node.name])
4567 self.context.RemoveNode(node.name)
4569 # Run post hooks on the node before it's removed
4570 _RunPostHook(self, node.name)
4572 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4573 msg = result.fail_msg
4575 self.LogWarning("Errors encountered on the remote node while leaving"
4576 " the cluster: %s", msg)
4578 # Remove node from our /etc/hosts
4579 if self.cfg.GetClusterInfo().modify_etc_hosts:
4580 master_node = self.cfg.GetMasterNode()
4581 result = self.rpc.call_etc_hosts_modify(master_node,
4582 constants.ETC_HOSTS_REMOVE,
4584 result.Raise("Can't update hosts file with new host data")
4585 _RedistributeAncillaryFiles(self)
4588 class _NodeQuery(_QueryBase):
4589 FIELDS = query.NODE_FIELDS
4591 def ExpandNames(self, lu):
4592 lu.needed_locks = {}
4593 lu.share_locks = _ShareAll()
4596 self.wanted = _GetWantedNodes(lu, self.names)
4598 self.wanted = locking.ALL_SET
4600 self.do_locking = (self.use_locking and
4601 query.NQ_LIVE in self.requested_data)
4604 # If any non-static field is requested we need to lock the nodes
4605 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4607 def DeclareLocks(self, lu, level):
4610 def _GetQueryData(self, lu):
4611 """Computes the list of nodes and their attributes.
4614 all_info = lu.cfg.GetAllNodesInfo()
4616 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4618 # Gather data as requested
4619 if query.NQ_LIVE in self.requested_data:
4620 # filter out non-vm_capable nodes
4621 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4623 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4624 [lu.cfg.GetHypervisorType()])
4625 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4626 for (name, nresult) in node_data.items()
4627 if not nresult.fail_msg and nresult.payload)
4631 if query.NQ_INST in self.requested_data:
4632 node_to_primary = dict([(name, set()) for name in nodenames])
4633 node_to_secondary = dict([(name, set()) for name in nodenames])
4635 inst_data = lu.cfg.GetAllInstancesInfo()
4637 for inst in inst_data.values():
4638 if inst.primary_node in node_to_primary:
4639 node_to_primary[inst.primary_node].add(inst.name)
4640 for secnode in inst.secondary_nodes:
4641 if secnode in node_to_secondary:
4642 node_to_secondary[secnode].add(inst.name)
4644 node_to_primary = None
4645 node_to_secondary = None
4647 if query.NQ_OOB in self.requested_data:
4648 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4649 for name, node in all_info.iteritems())
4653 if query.NQ_GROUP in self.requested_data:
4654 groups = lu.cfg.GetAllNodeGroupsInfo()
4658 return query.NodeQueryData([all_info[name] for name in nodenames],
4659 live_data, lu.cfg.GetMasterNode(),
4660 node_to_primary, node_to_secondary, groups,
4661 oob_support, lu.cfg.GetClusterInfo())
4664 class LUNodeQuery(NoHooksLU):
4665 """Logical unit for querying nodes.
4668 # pylint: disable=W0142
4671 def CheckArguments(self):
4672 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4673 self.op.output_fields, self.op.use_locking)
4675 def ExpandNames(self):
4676 self.nq.ExpandNames(self)
4678 def DeclareLocks(self, level):
4679 self.nq.DeclareLocks(self, level)
4681 def Exec(self, feedback_fn):
4682 return self.nq.OldStyleQuery(self)
4685 class LUNodeQueryvols(NoHooksLU):
4686 """Logical unit for getting volumes on node(s).
4690 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4691 _FIELDS_STATIC = utils.FieldSet("node")
4693 def CheckArguments(self):
4694 _CheckOutputFields(static=self._FIELDS_STATIC,
4695 dynamic=self._FIELDS_DYNAMIC,
4696 selected=self.op.output_fields)
4698 def ExpandNames(self):
4699 self.share_locks = _ShareAll()
4700 self.needed_locks = {}
4702 if not self.op.nodes:
4703 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4705 self.needed_locks[locking.LEVEL_NODE] = \
4706 _GetWantedNodes(self, self.op.nodes)
4708 def Exec(self, feedback_fn):
4709 """Computes the list of nodes and their attributes.
4712 nodenames = self.owned_locks(locking.LEVEL_NODE)
4713 volumes = self.rpc.call_node_volumes(nodenames)
4715 ilist = self.cfg.GetAllInstancesInfo()
4716 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4719 for node in nodenames:
4720 nresult = volumes[node]
4723 msg = nresult.fail_msg
4725 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4728 node_vols = sorted(nresult.payload,
4729 key=operator.itemgetter("dev"))
4731 for vol in node_vols:
4733 for field in self.op.output_fields:
4736 elif field == "phys":
4740 elif field == "name":
4742 elif field == "size":
4743 val = int(float(vol["size"]))
4744 elif field == "instance":
4745 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4747 raise errors.ParameterError(field)
4748 node_output.append(str(val))
4750 output.append(node_output)
4755 class LUNodeQueryStorage(NoHooksLU):
4756 """Logical unit for getting information on storage units on node(s).
4759 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4762 def CheckArguments(self):
4763 _CheckOutputFields(static=self._FIELDS_STATIC,
4764 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4765 selected=self.op.output_fields)
4767 def ExpandNames(self):
4768 self.share_locks = _ShareAll()
4769 self.needed_locks = {}
4772 self.needed_locks[locking.LEVEL_NODE] = \
4773 _GetWantedNodes(self, self.op.nodes)
4775 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4777 def Exec(self, feedback_fn):
4778 """Computes the list of nodes and their attributes.
4781 self.nodes = self.owned_locks(locking.LEVEL_NODE)
4783 # Always get name to sort by
4784 if constants.SF_NAME in self.op.output_fields:
4785 fields = self.op.output_fields[:]
4787 fields = [constants.SF_NAME] + self.op.output_fields
4789 # Never ask for node or type as it's only known to the LU
4790 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4791 while extra in fields:
4792 fields.remove(extra)
4794 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4795 name_idx = field_idx[constants.SF_NAME]
4797 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4798 data = self.rpc.call_storage_list(self.nodes,
4799 self.op.storage_type, st_args,
4800 self.op.name, fields)
4804 for node in utils.NiceSort(self.nodes):
4805 nresult = data[node]
4809 msg = nresult.fail_msg
4811 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4814 rows = dict([(row[name_idx], row) for row in nresult.payload])
4816 for name in utils.NiceSort(rows.keys()):
4821 for field in self.op.output_fields:
4822 if field == constants.SF_NODE:
4824 elif field == constants.SF_TYPE:
4825 val = self.op.storage_type
4826 elif field in field_idx:
4827 val = row[field_idx[field]]
4829 raise errors.ParameterError(field)
4838 class _InstanceQuery(_QueryBase):
4839 FIELDS = query.INSTANCE_FIELDS
4841 def ExpandNames(self, lu):
4842 lu.needed_locks = {}
4843 lu.share_locks = _ShareAll()
4846 self.wanted = _GetWantedInstances(lu, self.names)
4848 self.wanted = locking.ALL_SET
4850 self.do_locking = (self.use_locking and
4851 query.IQ_LIVE in self.requested_data)
4853 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4854 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4855 lu.needed_locks[locking.LEVEL_NODE] = []
4856 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4858 self.do_grouplocks = (self.do_locking and
4859 query.IQ_NODES in self.requested_data)
4861 def DeclareLocks(self, lu, level):
4863 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4864 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4866 # Lock all groups used by instances optimistically; this requires going
4867 # via the node before it's locked, requiring verification later on
4868 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4870 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4871 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4872 elif level == locking.LEVEL_NODE:
4873 lu._LockInstancesNodes() # pylint: disable=W0212
4876 def _CheckGroupLocks(lu):
4877 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4878 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4880 # Check if node groups for locked instances are still correct
4881 for instance_name in owned_instances:
4882 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4884 def _GetQueryData(self, lu):
4885 """Computes the list of instances and their attributes.
4888 if self.do_grouplocks:
4889 self._CheckGroupLocks(lu)
4891 cluster = lu.cfg.GetClusterInfo()
4892 all_info = lu.cfg.GetAllInstancesInfo()
4894 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4896 instance_list = [all_info[name] for name in instance_names]
4897 nodes = frozenset(itertools.chain(*(inst.all_nodes
4898 for inst in instance_list)))
4899 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4902 wrongnode_inst = set()
4904 # Gather data as requested
4905 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4907 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4909 result = node_data[name]
4911 # offline nodes will be in both lists
4912 assert result.fail_msg
4913 offline_nodes.append(name)
4915 bad_nodes.append(name)
4916 elif result.payload:
4917 for inst in result.payload:
4918 if inst in all_info:
4919 if all_info[inst].primary_node == name:
4920 live_data.update(result.payload)
4922 wrongnode_inst.add(inst)
4924 # orphan instance; we don't list it here as we don't
4925 # handle this case yet in the output of instance listing
4926 logging.warning("Orphan instance '%s' found on node %s",
4928 # else no instance is alive
4932 if query.IQ_DISKUSAGE in self.requested_data:
4933 disk_usage = dict((inst.name,
4934 _ComputeDiskSize(inst.disk_template,
4935 [{constants.IDISK_SIZE: disk.size}
4936 for disk in inst.disks]))
4937 for inst in instance_list)
4941 if query.IQ_CONSOLE in self.requested_data:
4943 for inst in instance_list:
4944 if inst.name in live_data:
4945 # Instance is running
4946 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4948 consinfo[inst.name] = None
4949 assert set(consinfo.keys()) == set(instance_names)
4953 if query.IQ_NODES in self.requested_data:
4954 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4956 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4957 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4958 for uuid in set(map(operator.attrgetter("group"),
4964 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4965 disk_usage, offline_nodes, bad_nodes,
4966 live_data, wrongnode_inst, consinfo,
4970 class LUQuery(NoHooksLU):
4971 """Query for resources/items of a certain kind.
4974 # pylint: disable=W0142
4977 def CheckArguments(self):
4978 qcls = _GetQueryImplementation(self.op.what)
4980 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4982 def ExpandNames(self):
4983 self.impl.ExpandNames(self)
4985 def DeclareLocks(self, level):
4986 self.impl.DeclareLocks(self, level)
4988 def Exec(self, feedback_fn):
4989 return self.impl.NewStyleQuery(self)
4992 class LUQueryFields(NoHooksLU):
4993 """Query for resources/items of a certain kind.
4996 # pylint: disable=W0142
4999 def CheckArguments(self):
5000 self.qcls = _GetQueryImplementation(self.op.what)
5002 def ExpandNames(self):
5003 self.needed_locks = {}
5005 def Exec(self, feedback_fn):
5006 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5009 class LUNodeModifyStorage(NoHooksLU):
5010 """Logical unit for modifying a storage volume on a node.
5015 def CheckArguments(self):
5016 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5018 storage_type = self.op.storage_type
5021 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5023 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5024 " modified" % storage_type,
5027 diff = set(self.op.changes.keys()) - modifiable
5029 raise errors.OpPrereqError("The following fields can not be modified for"
5030 " storage units of type '%s': %r" %
5031 (storage_type, list(diff)),
5034 def ExpandNames(self):
5035 self.needed_locks = {
5036 locking.LEVEL_NODE: self.op.node_name,
5039 def Exec(self, feedback_fn):
5040 """Computes the list of nodes and their attributes.
5043 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5044 result = self.rpc.call_storage_modify(self.op.node_name,
5045 self.op.storage_type, st_args,
5046 self.op.name, self.op.changes)
5047 result.Raise("Failed to modify storage unit '%s' on %s" %
5048 (self.op.name, self.op.node_name))
5051 class LUNodeAdd(LogicalUnit):
5052 """Logical unit for adding node to the cluster.
5056 HTYPE = constants.HTYPE_NODE
5057 _NFLAGS = ["master_capable", "vm_capable"]
5059 def CheckArguments(self):
5060 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5061 # validate/normalize the node name
5062 self.hostname = netutils.GetHostname(name=self.op.node_name,
5063 family=self.primary_ip_family)
5064 self.op.node_name = self.hostname.name
5066 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5067 raise errors.OpPrereqError("Cannot readd the master node",
5070 if self.op.readd and self.op.group:
5071 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5072 " being readded", errors.ECODE_INVAL)
5074 def BuildHooksEnv(self):
5077 This will run on all nodes before, and on all nodes + the new node after.
5081 "OP_TARGET": self.op.node_name,
5082 "NODE_NAME": self.op.node_name,
5083 "NODE_PIP": self.op.primary_ip,
5084 "NODE_SIP": self.op.secondary_ip,
5085 "MASTER_CAPABLE": str(self.op.master_capable),
5086 "VM_CAPABLE": str(self.op.vm_capable),
5089 def BuildHooksNodes(self):
5090 """Build hooks nodes.
5093 # Exclude added node
5094 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5095 post_nodes = pre_nodes + [self.op.node_name, ]
5097 return (pre_nodes, post_nodes)
5099 def CheckPrereq(self):
5100 """Check prerequisites.
5103 - the new node is not already in the config
5105 - its parameters (single/dual homed) matches the cluster
5107 Any errors are signaled by raising errors.OpPrereqError.
5111 hostname = self.hostname
5112 node = hostname.name
5113 primary_ip = self.op.primary_ip = hostname.ip
5114 if self.op.secondary_ip is None:
5115 if self.primary_ip_family == netutils.IP6Address.family:
5116 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5117 " IPv4 address must be given as secondary",
5119 self.op.secondary_ip = primary_ip
5121 secondary_ip = self.op.secondary_ip
5122 if not netutils.IP4Address.IsValid(secondary_ip):
5123 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5124 " address" % secondary_ip, errors.ECODE_INVAL)
5126 node_list = cfg.GetNodeList()
5127 if not self.op.readd and node in node_list:
5128 raise errors.OpPrereqError("Node %s is already in the configuration" %
5129 node, errors.ECODE_EXISTS)
5130 elif self.op.readd and node not in node_list:
5131 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5134 self.changed_primary_ip = False
5136 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5137 if self.op.readd and node == existing_node_name:
5138 if existing_node.secondary_ip != secondary_ip:
5139 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5140 " address configuration as before",
5142 if existing_node.primary_ip != primary_ip:
5143 self.changed_primary_ip = True
5147 if (existing_node.primary_ip == primary_ip or
5148 existing_node.secondary_ip == primary_ip or
5149 existing_node.primary_ip == secondary_ip or
5150 existing_node.secondary_ip == secondary_ip):
5151 raise errors.OpPrereqError("New node ip address(es) conflict with"
5152 " existing node %s" % existing_node.name,
5153 errors.ECODE_NOTUNIQUE)
5155 # After this 'if' block, None is no longer a valid value for the
5156 # _capable op attributes
5158 old_node = self.cfg.GetNodeInfo(node)
5159 assert old_node is not None, "Can't retrieve locked node %s" % node
5160 for attr in self._NFLAGS:
5161 if getattr(self.op, attr) is None:
5162 setattr(self.op, attr, getattr(old_node, attr))
5164 for attr in self._NFLAGS:
5165 if getattr(self.op, attr) is None:
5166 setattr(self.op, attr, True)
5168 if self.op.readd and not self.op.vm_capable:
5169 pri, sec = cfg.GetNodeInstances(node)
5171 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5172 " flag set to false, but it already holds"
5173 " instances" % node,
5176 # check that the type of the node (single versus dual homed) is the
5177 # same as for the master
5178 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5179 master_singlehomed = myself.secondary_ip == myself.primary_ip
5180 newbie_singlehomed = secondary_ip == primary_ip
5181 if master_singlehomed != newbie_singlehomed:
5182 if master_singlehomed:
5183 raise errors.OpPrereqError("The master has no secondary ip but the"
5184 " new node has one",
5187 raise errors.OpPrereqError("The master has a secondary ip but the"
5188 " new node doesn't have one",
5191 # checks reachability
5192 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5193 raise errors.OpPrereqError("Node not reachable by ping",
5194 errors.ECODE_ENVIRON)
5196 if not newbie_singlehomed:
5197 # check reachability from my secondary ip to newbie's secondary ip
5198 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5199 source=myself.secondary_ip):
5200 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5201 " based ping to node daemon port",
5202 errors.ECODE_ENVIRON)
5209 if self.op.master_capable:
5210 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5212 self.master_candidate = False
5215 self.new_node = old_node
5217 node_group = cfg.LookupNodeGroup(self.op.group)
5218 self.new_node = objects.Node(name=node,
5219 primary_ip=primary_ip,
5220 secondary_ip=secondary_ip,
5221 master_candidate=self.master_candidate,
5222 offline=False, drained=False,
5225 if self.op.ndparams:
5226 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5228 def Exec(self, feedback_fn):
5229 """Adds the new node to the cluster.
5232 new_node = self.new_node
5233 node = new_node.name
5235 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5238 # We adding a new node so we assume it's powered
5239 new_node.powered = True
5241 # for re-adds, reset the offline/drained/master-candidate flags;
5242 # we need to reset here, otherwise offline would prevent RPC calls
5243 # later in the procedure; this also means that if the re-add
5244 # fails, we are left with a non-offlined, broken node
5246 new_node.drained = new_node.offline = False # pylint: disable=W0201
5247 self.LogInfo("Readding a node, the offline/drained flags were reset")
5248 # if we demote the node, we do cleanup later in the procedure
5249 new_node.master_candidate = self.master_candidate
5250 if self.changed_primary_ip:
5251 new_node.primary_ip = self.op.primary_ip
5253 # copy the master/vm_capable flags
5254 for attr in self._NFLAGS:
5255 setattr(new_node, attr, getattr(self.op, attr))
5257 # notify the user about any possible mc promotion
5258 if new_node.master_candidate:
5259 self.LogInfo("Node will be a master candidate")
5261 if self.op.ndparams:
5262 new_node.ndparams = self.op.ndparams
5264 new_node.ndparams = {}
5266 # check connectivity
5267 result = self.rpc.call_version([node])[node]
5268 result.Raise("Can't get version information from node %s" % node)
5269 if constants.PROTOCOL_VERSION == result.payload:
5270 logging.info("Communication to node %s fine, sw version %s match",
5271 node, result.payload)
5273 raise errors.OpExecError("Version mismatch master version %s,"
5274 " node version %s" %
5275 (constants.PROTOCOL_VERSION, result.payload))
5277 # Add node to our /etc/hosts, and add key to known_hosts
5278 if self.cfg.GetClusterInfo().modify_etc_hosts:
5279 master_node = self.cfg.GetMasterNode()
5280 result = self.rpc.call_etc_hosts_modify(master_node,
5281 constants.ETC_HOSTS_ADD,
5284 result.Raise("Can't update hosts file with new host data")
5286 if new_node.secondary_ip != new_node.primary_ip:
5287 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5290 node_verify_list = [self.cfg.GetMasterNode()]
5291 node_verify_param = {
5292 constants.NV_NODELIST: ([node], {}),
5293 # TODO: do a node-net-test as well?
5296 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5297 self.cfg.GetClusterName())
5298 for verifier in node_verify_list:
5299 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5300 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5302 for failed in nl_payload:
5303 feedback_fn("ssh/hostname verification failed"
5304 " (checking from %s): %s" %
5305 (verifier, nl_payload[failed]))
5306 raise errors.OpExecError("ssh/hostname verification failed")
5309 _RedistributeAncillaryFiles(self)
5310 self.context.ReaddNode(new_node)
5311 # make sure we redistribute the config
5312 self.cfg.Update(new_node, feedback_fn)
5313 # and make sure the new node will not have old files around
5314 if not new_node.master_candidate:
5315 result = self.rpc.call_node_demote_from_mc(new_node.name)
5316 msg = result.fail_msg
5318 self.LogWarning("Node failed to demote itself from master"
5319 " candidate status: %s" % msg)
5321 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5322 additional_vm=self.op.vm_capable)
5323 self.context.AddNode(new_node, self.proc.GetECId())
5326 class LUNodeSetParams(LogicalUnit):
5327 """Modifies the parameters of a node.
5329 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5330 to the node role (as _ROLE_*)
5331 @cvar _R2F: a dictionary from node role to tuples of flags
5332 @cvar _FLAGS: a list of attribute names corresponding to the flags
5335 HPATH = "node-modify"
5336 HTYPE = constants.HTYPE_NODE
5338 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5340 (True, False, False): _ROLE_CANDIDATE,
5341 (False, True, False): _ROLE_DRAINED,
5342 (False, False, True): _ROLE_OFFLINE,
5343 (False, False, False): _ROLE_REGULAR,
5345 _R2F = dict((v, k) for k, v in _F2R.items())
5346 _FLAGS = ["master_candidate", "drained", "offline"]
5348 def CheckArguments(self):
5349 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5350 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5351 self.op.master_capable, self.op.vm_capable,
5352 self.op.secondary_ip, self.op.ndparams]
5353 if all_mods.count(None) == len(all_mods):
5354 raise errors.OpPrereqError("Please pass at least one modification",
5356 if all_mods.count(True) > 1:
5357 raise errors.OpPrereqError("Can't set the node into more than one"
5358 " state at the same time",
5361 # Boolean value that tells us whether we might be demoting from MC
5362 self.might_demote = (self.op.master_candidate == False or
5363 self.op.offline == True or
5364 self.op.drained == True or
5365 self.op.master_capable == False)
5367 if self.op.secondary_ip:
5368 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5369 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5370 " address" % self.op.secondary_ip,
5373 self.lock_all = self.op.auto_promote and self.might_demote
5374 self.lock_instances = self.op.secondary_ip is not None
5376 def _InstanceFilter(self, instance):
5377 """Filter for getting affected instances.
5380 return (instance.disk_template in constants.DTS_INT_MIRROR and
5381 self.op.node_name in instance.all_nodes)
5383 def ExpandNames(self):
5385 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5387 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5389 # Since modifying a node can have severe effects on currently running
5390 # operations the resource lock is at least acquired in shared mode
5391 self.needed_locks[locking.LEVEL_NODE_RES] = \
5392 self.needed_locks[locking.LEVEL_NODE]
5394 # Get node resource and instance locks in shared mode; they are not used
5395 # for anything but read-only access
5396 self.share_locks[locking.LEVEL_NODE_RES] = 1
5397 self.share_locks[locking.LEVEL_INSTANCE] = 1
5399 if self.lock_instances:
5400 self.needed_locks[locking.LEVEL_INSTANCE] = \
5401 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5403 def BuildHooksEnv(self):
5406 This runs on the master node.
5410 "OP_TARGET": self.op.node_name,
5411 "MASTER_CANDIDATE": str(self.op.master_candidate),
5412 "OFFLINE": str(self.op.offline),
5413 "DRAINED": str(self.op.drained),
5414 "MASTER_CAPABLE": str(self.op.master_capable),
5415 "VM_CAPABLE": str(self.op.vm_capable),
5418 def BuildHooksNodes(self):
5419 """Build hooks nodes.
5422 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5425 def CheckPrereq(self):
5426 """Check prerequisites.
5428 This only checks the instance list against the existing names.
5431 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5433 if self.lock_instances:
5434 affected_instances = \
5435 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5437 # Verify instance locks
5438 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5439 wanted_instances = frozenset(affected_instances.keys())
5440 if wanted_instances - owned_instances:
5441 raise errors.OpPrereqError("Instances affected by changing node %s's"
5442 " secondary IP address have changed since"
5443 " locks were acquired, wanted '%s', have"
5444 " '%s'; retry the operation" %
5446 utils.CommaJoin(wanted_instances),
5447 utils.CommaJoin(owned_instances)),
5450 affected_instances = None
5452 if (self.op.master_candidate is not None or
5453 self.op.drained is not None or
5454 self.op.offline is not None):
5455 # we can't change the master's node flags
5456 if self.op.node_name == self.cfg.GetMasterNode():
5457 raise errors.OpPrereqError("The master role can be changed"
5458 " only via master-failover",
5461 if self.op.master_candidate and not node.master_capable:
5462 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5463 " it a master candidate" % node.name,
5466 if self.op.vm_capable == False:
5467 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5469 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5470 " the vm_capable flag" % node.name,
5473 if node.master_candidate and self.might_demote and not self.lock_all:
5474 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5475 # check if after removing the current node, we're missing master
5477 (mc_remaining, mc_should, _) = \
5478 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5479 if mc_remaining < mc_should:
5480 raise errors.OpPrereqError("Not enough master candidates, please"
5481 " pass auto promote option to allow"
5482 " promotion", errors.ECODE_STATE)
5484 self.old_flags = old_flags = (node.master_candidate,
5485 node.drained, node.offline)
5486 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5487 self.old_role = old_role = self._F2R[old_flags]
5489 # Check for ineffective changes
5490 for attr in self._FLAGS:
5491 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5492 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5493 setattr(self.op, attr, None)
5495 # Past this point, any flag change to False means a transition
5496 # away from the respective state, as only real changes are kept
5498 # TODO: We might query the real power state if it supports OOB
5499 if _SupportsOob(self.cfg, node):
5500 if self.op.offline is False and not (node.powered or
5501 self.op.powered == True):
5502 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5503 " offline status can be reset") %
5505 elif self.op.powered is not None:
5506 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5507 " as it does not support out-of-band"
5508 " handling") % self.op.node_name)
5510 # If we're being deofflined/drained, we'll MC ourself if needed
5511 if (self.op.drained == False or self.op.offline == False or
5512 (self.op.master_capable and not node.master_capable)):
5513 if _DecideSelfPromotion(self):
5514 self.op.master_candidate = True
5515 self.LogInfo("Auto-promoting node to master candidate")
5517 # If we're no longer master capable, we'll demote ourselves from MC
5518 if self.op.master_capable == False and node.master_candidate:
5519 self.LogInfo("Demoting from master candidate")
5520 self.op.master_candidate = False
5523 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5524 if self.op.master_candidate:
5525 new_role = self._ROLE_CANDIDATE
5526 elif self.op.drained:
5527 new_role = self._ROLE_DRAINED
5528 elif self.op.offline:
5529 new_role = self._ROLE_OFFLINE
5530 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5531 # False is still in new flags, which means we're un-setting (the
5533 new_role = self._ROLE_REGULAR
5534 else: # no new flags, nothing, keep old role
5537 self.new_role = new_role
5539 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5540 # Trying to transition out of offline status
5541 # TODO: Use standard RPC runner, but make sure it works when the node is
5542 # still marked offline
5543 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5545 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5546 " to report its version: %s" %
5547 (node.name, result.fail_msg),
5550 self.LogWarning("Transitioning node from offline to online state"
5551 " without using re-add. Please make sure the node"
5554 if self.op.secondary_ip:
5555 # Ok even without locking, because this can't be changed by any LU
5556 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5557 master_singlehomed = master.secondary_ip == master.primary_ip
5558 if master_singlehomed and self.op.secondary_ip:
5559 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5560 " homed cluster", errors.ECODE_INVAL)
5562 assert not (frozenset(affected_instances) -
5563 self.owned_locks(locking.LEVEL_INSTANCE))
5566 if affected_instances:
5567 raise errors.OpPrereqError("Cannot change secondary IP address:"
5568 " offline node has instances (%s)"
5569 " configured to use it" %
5570 utils.CommaJoin(affected_instances.keys()))
5572 # On online nodes, check that no instances are running, and that
5573 # the node has the new ip and we can reach it.
5574 for instance in affected_instances.values():
5575 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5576 msg="cannot change secondary ip")
5578 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5579 if master.name != node.name:
5580 # check reachability from master secondary ip to new secondary ip
5581 if not netutils.TcpPing(self.op.secondary_ip,
5582 constants.DEFAULT_NODED_PORT,
5583 source=master.secondary_ip):
5584 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5585 " based ping to node daemon port",
5586 errors.ECODE_ENVIRON)
5588 if self.op.ndparams:
5589 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5590 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5591 self.new_ndparams = new_ndparams
5593 def Exec(self, feedback_fn):
5598 old_role = self.old_role
5599 new_role = self.new_role
5603 if self.op.ndparams:
5604 node.ndparams = self.new_ndparams
5606 if self.op.powered is not None:
5607 node.powered = self.op.powered
5609 for attr in ["master_capable", "vm_capable"]:
5610 val = getattr(self.op, attr)
5612 setattr(node, attr, val)
5613 result.append((attr, str(val)))
5615 if new_role != old_role:
5616 # Tell the node to demote itself, if no longer MC and not offline
5617 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5618 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5620 self.LogWarning("Node failed to demote itself: %s", msg)
5622 new_flags = self._R2F[new_role]
5623 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5625 result.append((desc, str(nf)))
5626 (node.master_candidate, node.drained, node.offline) = new_flags
5628 # we locked all nodes, we adjust the CP before updating this node
5630 _AdjustCandidatePool(self, [node.name])
5632 if self.op.secondary_ip:
5633 node.secondary_ip = self.op.secondary_ip
5634 result.append(("secondary_ip", self.op.secondary_ip))
5636 # this will trigger configuration file update, if needed
5637 self.cfg.Update(node, feedback_fn)
5639 # this will trigger job queue propagation or cleanup if the mc
5641 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5642 self.context.ReaddNode(node)
5647 class LUNodePowercycle(NoHooksLU):
5648 """Powercycles a node.
5653 def CheckArguments(self):
5654 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5655 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5656 raise errors.OpPrereqError("The node is the master and the force"
5657 " parameter was not set",
5660 def ExpandNames(self):
5661 """Locking for PowercycleNode.
5663 This is a last-resort option and shouldn't block on other
5664 jobs. Therefore, we grab no locks.
5667 self.needed_locks = {}
5669 def Exec(self, feedback_fn):
5673 result = self.rpc.call_node_powercycle(self.op.node_name,
5674 self.cfg.GetHypervisorType())
5675 result.Raise("Failed to schedule the reboot")
5676 return result.payload
5679 class LUClusterQuery(NoHooksLU):
5680 """Query cluster configuration.
5685 def ExpandNames(self):
5686 self.needed_locks = {}
5688 def Exec(self, feedback_fn):
5689 """Return cluster config.
5692 cluster = self.cfg.GetClusterInfo()
5695 # Filter just for enabled hypervisors
5696 for os_name, hv_dict in cluster.os_hvp.items():
5697 os_hvp[os_name] = {}
5698 for hv_name, hv_params in hv_dict.items():
5699 if hv_name in cluster.enabled_hypervisors:
5700 os_hvp[os_name][hv_name] = hv_params
5702 # Convert ip_family to ip_version
5703 primary_ip_version = constants.IP4_VERSION
5704 if cluster.primary_ip_family == netutils.IP6Address.family:
5705 primary_ip_version = constants.IP6_VERSION
5708 "software_version": constants.RELEASE_VERSION,
5709 "protocol_version": constants.PROTOCOL_VERSION,
5710 "config_version": constants.CONFIG_VERSION,
5711 "os_api_version": max(constants.OS_API_VERSIONS),
5712 "export_version": constants.EXPORT_VERSION,
5713 "architecture": (platform.architecture()[0], platform.machine()),
5714 "name": cluster.cluster_name,
5715 "master": cluster.master_node,
5716 "default_hypervisor": cluster.enabled_hypervisors[0],
5717 "enabled_hypervisors": cluster.enabled_hypervisors,
5718 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5719 for hypervisor_name in cluster.enabled_hypervisors]),
5721 "beparams": cluster.beparams,
5722 "osparams": cluster.osparams,
5723 "nicparams": cluster.nicparams,
5724 "ndparams": cluster.ndparams,
5725 "candidate_pool_size": cluster.candidate_pool_size,
5726 "master_netdev": cluster.master_netdev,
5727 "master_netmask": cluster.master_netmask,
5728 "use_external_mip_script": cluster.use_external_mip_script,
5729 "volume_group_name": cluster.volume_group_name,
5730 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5731 "file_storage_dir": cluster.file_storage_dir,
5732 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5733 "maintain_node_health": cluster.maintain_node_health,
5734 "ctime": cluster.ctime,
5735 "mtime": cluster.mtime,
5736 "uuid": cluster.uuid,
5737 "tags": list(cluster.GetTags()),
5738 "uid_pool": cluster.uid_pool,
5739 "default_iallocator": cluster.default_iallocator,
5740 "reserved_lvs": cluster.reserved_lvs,
5741 "primary_ip_version": primary_ip_version,
5742 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5743 "hidden_os": cluster.hidden_os,
5744 "blacklisted_os": cluster.blacklisted_os,
5750 class LUClusterConfigQuery(NoHooksLU):
5751 """Return configuration values.
5755 _FIELDS_DYNAMIC = utils.FieldSet()
5756 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5757 "watcher_pause", "volume_group_name")
5759 def CheckArguments(self):
5760 _CheckOutputFields(static=self._FIELDS_STATIC,
5761 dynamic=self._FIELDS_DYNAMIC,
5762 selected=self.op.output_fields)
5764 def ExpandNames(self):
5765 self.needed_locks = {}
5767 def Exec(self, feedback_fn):
5768 """Dump a representation of the cluster config to the standard output.
5772 for field in self.op.output_fields:
5773 if field == "cluster_name":
5774 entry = self.cfg.GetClusterName()
5775 elif field == "master_node":
5776 entry = self.cfg.GetMasterNode()
5777 elif field == "drain_flag":
5778 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5779 elif field == "watcher_pause":
5780 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5781 elif field == "volume_group_name":
5782 entry = self.cfg.GetVGName()
5784 raise errors.ParameterError(field)
5785 values.append(entry)
5789 class LUInstanceActivateDisks(NoHooksLU):
5790 """Bring up an instance's disks.
5795 def ExpandNames(self):
5796 self._ExpandAndLockInstance()
5797 self.needed_locks[locking.LEVEL_NODE] = []
5798 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5800 def DeclareLocks(self, level):
5801 if level == locking.LEVEL_NODE:
5802 self._LockInstancesNodes()
5804 def CheckPrereq(self):
5805 """Check prerequisites.
5807 This checks that the instance is in the cluster.
5810 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5811 assert self.instance is not None, \
5812 "Cannot retrieve locked instance %s" % self.op.instance_name
5813 _CheckNodeOnline(self, self.instance.primary_node)
5815 def Exec(self, feedback_fn):
5816 """Activate the disks.
5819 disks_ok, disks_info = \
5820 _AssembleInstanceDisks(self, self.instance,
5821 ignore_size=self.op.ignore_size)
5823 raise errors.OpExecError("Cannot activate block devices")
5828 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5830 """Prepare the block devices for an instance.
5832 This sets up the block devices on all nodes.
5834 @type lu: L{LogicalUnit}
5835 @param lu: the logical unit on whose behalf we execute
5836 @type instance: L{objects.Instance}
5837 @param instance: the instance for whose disks we assemble
5838 @type disks: list of L{objects.Disk} or None
5839 @param disks: which disks to assemble (or all, if None)
5840 @type ignore_secondaries: boolean
5841 @param ignore_secondaries: if true, errors on secondary nodes
5842 won't result in an error return from the function
5843 @type ignore_size: boolean
5844 @param ignore_size: if true, the current known size of the disk
5845 will not be used during the disk activation, useful for cases
5846 when the size is wrong
5847 @return: False if the operation failed, otherwise a list of
5848 (host, instance_visible_name, node_visible_name)
5849 with the mapping from node devices to instance devices
5854 iname = instance.name
5855 disks = _ExpandCheckDisks(instance, disks)
5857 # With the two passes mechanism we try to reduce the window of
5858 # opportunity for the race condition of switching DRBD to primary
5859 # before handshaking occured, but we do not eliminate it
5861 # The proper fix would be to wait (with some limits) until the
5862 # connection has been made and drbd transitions from WFConnection
5863 # into any other network-connected state (Connected, SyncTarget,
5866 # 1st pass, assemble on all nodes in secondary mode
5867 for idx, inst_disk in enumerate(disks):
5868 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5870 node_disk = node_disk.Copy()
5871 node_disk.UnsetSize()
5872 lu.cfg.SetDiskID(node_disk, node)
5873 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5874 msg = result.fail_msg
5876 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5877 " (is_primary=False, pass=1): %s",
5878 inst_disk.iv_name, node, msg)
5879 if not ignore_secondaries:
5882 # FIXME: race condition on drbd migration to primary
5884 # 2nd pass, do only the primary node
5885 for idx, inst_disk in enumerate(disks):
5888 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5889 if node != instance.primary_node:
5892 node_disk = node_disk.Copy()
5893 node_disk.UnsetSize()
5894 lu.cfg.SetDiskID(node_disk, node)
5895 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5896 msg = result.fail_msg
5898 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5899 " (is_primary=True, pass=2): %s",
5900 inst_disk.iv_name, node, msg)
5903 dev_path = result.payload
5905 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5907 # leave the disks configured for the primary node
5908 # this is a workaround that would be fixed better by
5909 # improving the logical/physical id handling
5911 lu.cfg.SetDiskID(disk, instance.primary_node)
5913 return disks_ok, device_info
5916 def _StartInstanceDisks(lu, instance, force):
5917 """Start the disks of an instance.
5920 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5921 ignore_secondaries=force)
5923 _ShutdownInstanceDisks(lu, instance)
5924 if force is not None and not force:
5925 lu.proc.LogWarning("", hint="If the message above refers to a"
5927 " you can retry the operation using '--force'.")
5928 raise errors.OpExecError("Disk consistency error")
5931 class LUInstanceDeactivateDisks(NoHooksLU):
5932 """Shutdown an instance's disks.
5937 def ExpandNames(self):
5938 self._ExpandAndLockInstance()
5939 self.needed_locks[locking.LEVEL_NODE] = []
5940 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5942 def DeclareLocks(self, level):
5943 if level == locking.LEVEL_NODE:
5944 self._LockInstancesNodes()
5946 def CheckPrereq(self):
5947 """Check prerequisites.
5949 This checks that the instance is in the cluster.
5952 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5953 assert self.instance is not None, \
5954 "Cannot retrieve locked instance %s" % self.op.instance_name
5956 def Exec(self, feedback_fn):
5957 """Deactivate the disks
5960 instance = self.instance
5962 _ShutdownInstanceDisks(self, instance)
5964 _SafeShutdownInstanceDisks(self, instance)
5967 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5968 """Shutdown block devices of an instance.
5970 This function checks if an instance is running, before calling
5971 _ShutdownInstanceDisks.
5974 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
5975 _ShutdownInstanceDisks(lu, instance, disks=disks)
5978 def _ExpandCheckDisks(instance, disks):
5979 """Return the instance disks selected by the disks list
5981 @type disks: list of L{objects.Disk} or None
5982 @param disks: selected disks
5983 @rtype: list of L{objects.Disk}
5984 @return: selected instance disks to act on
5988 return instance.disks
5990 if not set(disks).issubset(instance.disks):
5991 raise errors.ProgrammerError("Can only act on disks belonging to the"
5996 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5997 """Shutdown block devices of an instance.
5999 This does the shutdown on all nodes of the instance.
6001 If the ignore_primary is false, errors on the primary node are
6006 disks = _ExpandCheckDisks(instance, disks)
6009 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6010 lu.cfg.SetDiskID(top_disk, node)
6011 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6012 msg = result.fail_msg
6014 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6015 disk.iv_name, node, msg)
6016 if ((node == instance.primary_node and not ignore_primary) or
6017 (node != instance.primary_node and not result.offline)):
6022 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6023 """Checks if a node has enough free memory.
6025 This function check if a given node has the needed amount of free
6026 memory. In case the node has less memory or we cannot get the
6027 information from the node, this function raise an OpPrereqError
6030 @type lu: C{LogicalUnit}
6031 @param lu: a logical unit from which we get configuration data
6033 @param node: the node to check
6034 @type reason: C{str}
6035 @param reason: string to use in the error message
6036 @type requested: C{int}
6037 @param requested: the amount of memory in MiB to check for
6038 @type hypervisor_name: C{str}
6039 @param hypervisor_name: the hypervisor to ask for memory stats
6040 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6041 we cannot check the node
6044 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6045 nodeinfo[node].Raise("Can't get data from node %s" % node,
6046 prereq=True, ecode=errors.ECODE_ENVIRON)
6047 (_, _, (hv_info, )) = nodeinfo[node].payload
6049 free_mem = hv_info.get("memory_free", None)
6050 if not isinstance(free_mem, int):
6051 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6052 " was '%s'" % (node, free_mem),
6053 errors.ECODE_ENVIRON)
6054 if requested > free_mem:
6055 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6056 " needed %s MiB, available %s MiB" %
6057 (node, reason, requested, free_mem),
6061 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6062 """Checks if nodes have enough free disk space in the all VGs.
6064 This function check if all given nodes have the needed amount of
6065 free disk. In case any node has less disk or we cannot get the
6066 information from the node, this function raise an OpPrereqError
6069 @type lu: C{LogicalUnit}
6070 @param lu: a logical unit from which we get configuration data
6071 @type nodenames: C{list}
6072 @param nodenames: the list of node names to check
6073 @type req_sizes: C{dict}
6074 @param req_sizes: the hash of vg and corresponding amount of disk in
6076 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6077 or we cannot check the node
6080 for vg, req_size in req_sizes.items():
6081 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6084 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6085 """Checks if nodes have enough free disk space in the specified VG.
6087 This function check if all given nodes have the needed amount of
6088 free disk. In case any node has less disk or we cannot get the
6089 information from the node, this function raise an OpPrereqError
6092 @type lu: C{LogicalUnit}
6093 @param lu: a logical unit from which we get configuration data
6094 @type nodenames: C{list}
6095 @param nodenames: the list of node names to check
6097 @param vg: the volume group to check
6098 @type requested: C{int}
6099 @param requested: the amount of disk in MiB to check for
6100 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6101 or we cannot check the node
6104 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6105 for node in nodenames:
6106 info = nodeinfo[node]
6107 info.Raise("Cannot get current information from node %s" % node,
6108 prereq=True, ecode=errors.ECODE_ENVIRON)
6109 (_, (vg_info, ), _) = info.payload
6110 vg_free = vg_info.get("vg_free", None)
6111 if not isinstance(vg_free, int):
6112 raise errors.OpPrereqError("Can't compute free disk space on node"
6113 " %s for vg %s, result was '%s'" %
6114 (node, vg, vg_free), errors.ECODE_ENVIRON)
6115 if requested > vg_free:
6116 raise errors.OpPrereqError("Not enough disk space on target node %s"
6117 " vg %s: required %d MiB, available %d MiB" %
6118 (node, vg, requested, vg_free),
6122 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6123 """Checks if nodes have enough physical CPUs
6125 This function checks if all given nodes have the needed number of
6126 physical CPUs. In case any node has less CPUs or we cannot get the
6127 information from the node, this function raises an OpPrereqError
6130 @type lu: C{LogicalUnit}
6131 @param lu: a logical unit from which we get configuration data
6132 @type nodenames: C{list}
6133 @param nodenames: the list of node names to check
6134 @type requested: C{int}
6135 @param requested: the minimum acceptable number of physical CPUs
6136 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6137 or we cannot check the node
6140 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6141 for node in nodenames:
6142 info = nodeinfo[node]
6143 info.Raise("Cannot get current information from node %s" % node,
6144 prereq=True, ecode=errors.ECODE_ENVIRON)
6145 (_, _, (hv_info, )) = info.payload
6146 num_cpus = hv_info.get("cpu_total", None)
6147 if not isinstance(num_cpus, int):
6148 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6149 " on node %s, result was '%s'" %
6150 (node, num_cpus), errors.ECODE_ENVIRON)
6151 if requested > num_cpus:
6152 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6153 "required" % (node, num_cpus, requested),
6157 class LUInstanceStartup(LogicalUnit):
6158 """Starts an instance.
6161 HPATH = "instance-start"
6162 HTYPE = constants.HTYPE_INSTANCE
6165 def CheckArguments(self):
6167 if self.op.beparams:
6168 # fill the beparams dict
6169 objects.UpgradeBeParams(self.op.beparams)
6170 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6172 def ExpandNames(self):
6173 self._ExpandAndLockInstance()
6175 def BuildHooksEnv(self):
6178 This runs on master, primary and secondary nodes of the instance.
6182 "FORCE": self.op.force,
6185 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6189 def BuildHooksNodes(self):
6190 """Build hooks nodes.
6193 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6196 def CheckPrereq(self):
6197 """Check prerequisites.
6199 This checks that the instance is in the cluster.
6202 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6203 assert self.instance is not None, \
6204 "Cannot retrieve locked instance %s" % self.op.instance_name
6207 if self.op.hvparams:
6208 # check hypervisor parameter syntax (locally)
6209 cluster = self.cfg.GetClusterInfo()
6210 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6211 filled_hvp = cluster.FillHV(instance)
6212 filled_hvp.update(self.op.hvparams)
6213 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6214 hv_type.CheckParameterSyntax(filled_hvp)
6215 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6217 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6219 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6221 if self.primary_offline and self.op.ignore_offline_nodes:
6222 self.proc.LogWarning("Ignoring offline primary node")
6224 if self.op.hvparams or self.op.beparams:
6225 self.proc.LogWarning("Overridden parameters are ignored")
6227 _CheckNodeOnline(self, instance.primary_node)
6229 bep = self.cfg.GetClusterInfo().FillBE(instance)
6231 # check bridges existence
6232 _CheckInstanceBridgesExist(self, instance)
6234 remote_info = self.rpc.call_instance_info(instance.primary_node,
6236 instance.hypervisor)
6237 remote_info.Raise("Error checking node %s" % instance.primary_node,
6238 prereq=True, ecode=errors.ECODE_ENVIRON)
6239 if not remote_info.payload: # not running already
6240 _CheckNodeFreeMemory(self, instance.primary_node,
6241 "starting instance %s" % instance.name,
6242 bep[constants.BE_MAXMEM], instance.hypervisor)
6244 def Exec(self, feedback_fn):
6245 """Start the instance.
6248 instance = self.instance
6249 force = self.op.force
6251 if not self.op.no_remember:
6252 self.cfg.MarkInstanceUp(instance.name)
6254 if self.primary_offline:
6255 assert self.op.ignore_offline_nodes
6256 self.proc.LogInfo("Primary node offline, marked instance as started")
6258 node_current = instance.primary_node
6260 _StartInstanceDisks(self, instance, force)
6263 self.rpc.call_instance_start(node_current,
6264 (instance, self.op.hvparams,
6266 self.op.startup_paused)
6267 msg = result.fail_msg
6269 _ShutdownInstanceDisks(self, instance)
6270 raise errors.OpExecError("Could not start instance: %s" % msg)
6273 class LUInstanceReboot(LogicalUnit):
6274 """Reboot an instance.
6277 HPATH = "instance-reboot"
6278 HTYPE = constants.HTYPE_INSTANCE
6281 def ExpandNames(self):
6282 self._ExpandAndLockInstance()
6284 def BuildHooksEnv(self):
6287 This runs on master, primary and secondary nodes of the instance.
6291 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6292 "REBOOT_TYPE": self.op.reboot_type,
6293 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6296 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6300 def BuildHooksNodes(self):
6301 """Build hooks nodes.
6304 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6307 def CheckPrereq(self):
6308 """Check prerequisites.
6310 This checks that the instance is in the cluster.
6313 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6314 assert self.instance is not None, \
6315 "Cannot retrieve locked instance %s" % self.op.instance_name
6316 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6317 _CheckNodeOnline(self, instance.primary_node)
6319 # check bridges existence
6320 _CheckInstanceBridgesExist(self, instance)
6322 def Exec(self, feedback_fn):
6323 """Reboot the instance.
6326 instance = self.instance
6327 ignore_secondaries = self.op.ignore_secondaries
6328 reboot_type = self.op.reboot_type
6330 remote_info = self.rpc.call_instance_info(instance.primary_node,
6332 instance.hypervisor)
6333 remote_info.Raise("Error checking node %s" % instance.primary_node)
6334 instance_running = bool(remote_info.payload)
6336 node_current = instance.primary_node
6338 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6339 constants.INSTANCE_REBOOT_HARD]:
6340 for disk in instance.disks:
6341 self.cfg.SetDiskID(disk, node_current)
6342 result = self.rpc.call_instance_reboot(node_current, instance,
6344 self.op.shutdown_timeout)
6345 result.Raise("Could not reboot instance")
6347 if instance_running:
6348 result = self.rpc.call_instance_shutdown(node_current, instance,
6349 self.op.shutdown_timeout)
6350 result.Raise("Could not shutdown instance for full reboot")
6351 _ShutdownInstanceDisks(self, instance)
6353 self.LogInfo("Instance %s was already stopped, starting now",
6355 _StartInstanceDisks(self, instance, ignore_secondaries)
6356 result = self.rpc.call_instance_start(node_current,
6357 (instance, None, None), False)
6358 msg = result.fail_msg
6360 _ShutdownInstanceDisks(self, instance)
6361 raise errors.OpExecError("Could not start instance for"
6362 " full reboot: %s" % msg)
6364 self.cfg.MarkInstanceUp(instance.name)
6367 class LUInstanceShutdown(LogicalUnit):
6368 """Shutdown an instance.
6371 HPATH = "instance-stop"
6372 HTYPE = constants.HTYPE_INSTANCE
6375 def ExpandNames(self):
6376 self._ExpandAndLockInstance()
6378 def BuildHooksEnv(self):
6381 This runs on master, primary and secondary nodes of the instance.
6384 env = _BuildInstanceHookEnvByObject(self, self.instance)
6385 env["TIMEOUT"] = self.op.timeout
6388 def BuildHooksNodes(self):
6389 """Build hooks nodes.
6392 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6395 def CheckPrereq(self):
6396 """Check prerequisites.
6398 This checks that the instance is in the cluster.
6401 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6402 assert self.instance is not None, \
6403 "Cannot retrieve locked instance %s" % self.op.instance_name
6405 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6407 self.primary_offline = \
6408 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6410 if self.primary_offline and self.op.ignore_offline_nodes:
6411 self.proc.LogWarning("Ignoring offline primary node")
6413 _CheckNodeOnline(self, self.instance.primary_node)
6415 def Exec(self, feedback_fn):
6416 """Shutdown the instance.
6419 instance = self.instance
6420 node_current = instance.primary_node
6421 timeout = self.op.timeout
6423 if not self.op.no_remember:
6424 self.cfg.MarkInstanceDown(instance.name)
6426 if self.primary_offline:
6427 assert self.op.ignore_offline_nodes
6428 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6430 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6431 msg = result.fail_msg
6433 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6435 _ShutdownInstanceDisks(self, instance)
6438 class LUInstanceReinstall(LogicalUnit):
6439 """Reinstall an instance.
6442 HPATH = "instance-reinstall"
6443 HTYPE = constants.HTYPE_INSTANCE
6446 def ExpandNames(self):
6447 self._ExpandAndLockInstance()
6449 def BuildHooksEnv(self):
6452 This runs on master, primary and secondary nodes of the instance.
6455 return _BuildInstanceHookEnvByObject(self, self.instance)
6457 def BuildHooksNodes(self):
6458 """Build hooks nodes.
6461 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6464 def CheckPrereq(self):
6465 """Check prerequisites.
6467 This checks that the instance is in the cluster and is not running.
6470 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6471 assert instance is not None, \
6472 "Cannot retrieve locked instance %s" % self.op.instance_name
6473 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6474 " offline, cannot reinstall")
6475 for node in instance.secondary_nodes:
6476 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6477 " cannot reinstall")
6479 if instance.disk_template == constants.DT_DISKLESS:
6480 raise errors.OpPrereqError("Instance '%s' has no disks" %
6481 self.op.instance_name,
6483 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6485 if self.op.os_type is not None:
6487 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6488 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6489 instance_os = self.op.os_type
6491 instance_os = instance.os
6493 nodelist = list(instance.all_nodes)
6495 if self.op.osparams:
6496 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6497 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6498 self.os_inst = i_osdict # the new dict (without defaults)
6502 self.instance = instance
6504 def Exec(self, feedback_fn):
6505 """Reinstall the instance.
6508 inst = self.instance
6510 if self.op.os_type is not None:
6511 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6512 inst.os = self.op.os_type
6513 # Write to configuration
6514 self.cfg.Update(inst, feedback_fn)
6516 _StartInstanceDisks(self, inst, None)
6518 feedback_fn("Running the instance OS create scripts...")
6519 # FIXME: pass debug option from opcode to backend
6520 result = self.rpc.call_instance_os_add(inst.primary_node,
6521 (inst, self.os_inst), True,
6522 self.op.debug_level)
6523 result.Raise("Could not install OS for instance %s on node %s" %
6524 (inst.name, inst.primary_node))
6526 _ShutdownInstanceDisks(self, inst)
6529 class LUInstanceRecreateDisks(LogicalUnit):
6530 """Recreate an instance's missing disks.
6533 HPATH = "instance-recreate-disks"
6534 HTYPE = constants.HTYPE_INSTANCE
6537 def CheckArguments(self):
6538 # normalise the disk list
6539 self.op.disks = sorted(frozenset(self.op.disks))
6541 def ExpandNames(self):
6542 self._ExpandAndLockInstance()
6543 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6545 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6546 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6548 self.needed_locks[locking.LEVEL_NODE] = []
6550 def DeclareLocks(self, level):
6551 if level == locking.LEVEL_NODE:
6552 # if we replace the nodes, we only need to lock the old primary,
6553 # otherwise we need to lock all nodes for disk re-creation
6554 primary_only = bool(self.op.nodes)
6555 self._LockInstancesNodes(primary_only=primary_only)
6556 elif level == locking.LEVEL_NODE_RES:
6558 self.needed_locks[locking.LEVEL_NODE_RES] = \
6559 self.needed_locks[locking.LEVEL_NODE][:]
6561 def BuildHooksEnv(self):
6564 This runs on master, primary and secondary nodes of the instance.
6567 return _BuildInstanceHookEnvByObject(self, self.instance)
6569 def BuildHooksNodes(self):
6570 """Build hooks nodes.
6573 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6576 def CheckPrereq(self):
6577 """Check prerequisites.
6579 This checks that the instance is in the cluster and is not running.
6582 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6583 assert instance is not None, \
6584 "Cannot retrieve locked instance %s" % self.op.instance_name
6586 if len(self.op.nodes) != len(instance.all_nodes):
6587 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6588 " %d replacement nodes were specified" %
6589 (instance.name, len(instance.all_nodes),
6590 len(self.op.nodes)),
6592 assert instance.disk_template != constants.DT_DRBD8 or \
6593 len(self.op.nodes) == 2
6594 assert instance.disk_template != constants.DT_PLAIN or \
6595 len(self.op.nodes) == 1
6596 primary_node = self.op.nodes[0]
6598 primary_node = instance.primary_node
6599 _CheckNodeOnline(self, primary_node)
6601 if instance.disk_template == constants.DT_DISKLESS:
6602 raise errors.OpPrereqError("Instance '%s' has no disks" %
6603 self.op.instance_name, errors.ECODE_INVAL)
6604 # if we replace nodes *and* the old primary is offline, we don't
6606 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6607 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6608 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6609 if not (self.op.nodes and old_pnode.offline):
6610 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6611 msg="cannot recreate disks")
6613 if not self.op.disks:
6614 self.op.disks = range(len(instance.disks))
6616 for idx in self.op.disks:
6617 if idx >= len(instance.disks):
6618 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6620 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6621 raise errors.OpPrereqError("Can't recreate disks partially and"
6622 " change the nodes at the same time",
6624 self.instance = instance
6626 def Exec(self, feedback_fn):
6627 """Recreate the disks.
6630 instance = self.instance
6632 assert (self.owned_locks(locking.LEVEL_NODE) ==
6633 self.owned_locks(locking.LEVEL_NODE_RES))
6636 mods = [] # keeps track of needed logical_id changes
6638 for idx, disk in enumerate(instance.disks):
6639 if idx not in self.op.disks: # disk idx has not been passed in
6642 # update secondaries for disks, if needed
6644 if disk.dev_type == constants.LD_DRBD8:
6645 # need to update the nodes and minors
6646 assert len(self.op.nodes) == 2
6647 assert len(disk.logical_id) == 6 # otherwise disk internals
6649 (_, _, old_port, _, _, old_secret) = disk.logical_id
6650 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6651 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6652 new_minors[0], new_minors[1], old_secret)
6653 assert len(disk.logical_id) == len(new_id)
6654 mods.append((idx, new_id))
6656 # now that we have passed all asserts above, we can apply the mods
6657 # in a single run (to avoid partial changes)
6658 for idx, new_id in mods:
6659 instance.disks[idx].logical_id = new_id
6661 # change primary node, if needed
6663 instance.primary_node = self.op.nodes[0]
6664 self.LogWarning("Changing the instance's nodes, you will have to"
6665 " remove any disks left on the older nodes manually")
6668 self.cfg.Update(instance, feedback_fn)
6670 _CreateDisks(self, instance, to_skip=to_skip)
6673 class LUInstanceRename(LogicalUnit):
6674 """Rename an instance.
6677 HPATH = "instance-rename"
6678 HTYPE = constants.HTYPE_INSTANCE
6680 def CheckArguments(self):
6684 if self.op.ip_check and not self.op.name_check:
6685 # TODO: make the ip check more flexible and not depend on the name check
6686 raise errors.OpPrereqError("IP address check requires a name check",
6689 def BuildHooksEnv(self):
6692 This runs on master, primary and secondary nodes of the instance.
6695 env = _BuildInstanceHookEnvByObject(self, self.instance)
6696 env["INSTANCE_NEW_NAME"] = self.op.new_name
6699 def BuildHooksNodes(self):
6700 """Build hooks nodes.
6703 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6706 def CheckPrereq(self):
6707 """Check prerequisites.
6709 This checks that the instance is in the cluster and is not running.
6712 self.op.instance_name = _ExpandInstanceName(self.cfg,
6713 self.op.instance_name)
6714 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6715 assert instance is not None
6716 _CheckNodeOnline(self, instance.primary_node)
6717 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6718 msg="cannot rename")
6719 self.instance = instance
6721 new_name = self.op.new_name
6722 if self.op.name_check:
6723 hostname = netutils.GetHostname(name=new_name)
6724 if hostname.name != new_name:
6725 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6727 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6728 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6729 " same as given hostname '%s'") %
6730 (hostname.name, self.op.new_name),
6732 new_name = self.op.new_name = hostname.name
6733 if (self.op.ip_check and
6734 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6735 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6736 (hostname.ip, new_name),
6737 errors.ECODE_NOTUNIQUE)
6739 instance_list = self.cfg.GetInstanceList()
6740 if new_name in instance_list and new_name != instance.name:
6741 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6742 new_name, errors.ECODE_EXISTS)
6744 def Exec(self, feedback_fn):
6745 """Rename the instance.
6748 inst = self.instance
6749 old_name = inst.name
6751 rename_file_storage = False
6752 if (inst.disk_template in constants.DTS_FILEBASED and
6753 self.op.new_name != inst.name):
6754 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6755 rename_file_storage = True
6757 self.cfg.RenameInstance(inst.name, self.op.new_name)
6758 # Change the instance lock. This is definitely safe while we hold the BGL.
6759 # Otherwise the new lock would have to be added in acquired mode.
6761 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6762 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6764 # re-read the instance from the configuration after rename
6765 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6767 if rename_file_storage:
6768 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6769 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6770 old_file_storage_dir,
6771 new_file_storage_dir)
6772 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6773 " (but the instance has been renamed in Ganeti)" %
6774 (inst.primary_node, old_file_storage_dir,
6775 new_file_storage_dir))
6777 _StartInstanceDisks(self, inst, None)
6779 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6780 old_name, self.op.debug_level)
6781 msg = result.fail_msg
6783 msg = ("Could not run OS rename script for instance %s on node %s"
6784 " (but the instance has been renamed in Ganeti): %s" %
6785 (inst.name, inst.primary_node, msg))
6786 self.proc.LogWarning(msg)
6788 _ShutdownInstanceDisks(self, inst)
6793 class LUInstanceRemove(LogicalUnit):
6794 """Remove an instance.
6797 HPATH = "instance-remove"
6798 HTYPE = constants.HTYPE_INSTANCE
6801 def ExpandNames(self):
6802 self._ExpandAndLockInstance()
6803 self.needed_locks[locking.LEVEL_NODE] = []
6804 self.needed_locks[locking.LEVEL_NODE_RES] = []
6805 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6807 def DeclareLocks(self, level):
6808 if level == locking.LEVEL_NODE:
6809 self._LockInstancesNodes()
6810 elif level == locking.LEVEL_NODE_RES:
6812 self.needed_locks[locking.LEVEL_NODE_RES] = \
6813 self.needed_locks[locking.LEVEL_NODE][:]
6815 def BuildHooksEnv(self):
6818 This runs on master, primary and secondary nodes of the instance.
6821 env = _BuildInstanceHookEnvByObject(self, self.instance)
6822 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6825 def BuildHooksNodes(self):
6826 """Build hooks nodes.
6829 nl = [self.cfg.GetMasterNode()]
6830 nl_post = list(self.instance.all_nodes) + nl
6831 return (nl, nl_post)
6833 def CheckPrereq(self):
6834 """Check prerequisites.
6836 This checks that the instance is in the cluster.
6839 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6840 assert self.instance is not None, \
6841 "Cannot retrieve locked instance %s" % self.op.instance_name
6843 def Exec(self, feedback_fn):
6844 """Remove the instance.
6847 instance = self.instance
6848 logging.info("Shutting down instance %s on node %s",
6849 instance.name, instance.primary_node)
6851 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6852 self.op.shutdown_timeout)
6853 msg = result.fail_msg
6855 if self.op.ignore_failures:
6856 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6858 raise errors.OpExecError("Could not shutdown instance %s on"
6860 (instance.name, instance.primary_node, msg))
6862 assert (self.owned_locks(locking.LEVEL_NODE) ==
6863 self.owned_locks(locking.LEVEL_NODE_RES))
6864 assert not (set(instance.all_nodes) -
6865 self.owned_locks(locking.LEVEL_NODE)), \
6866 "Not owning correct locks"
6868 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6871 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6872 """Utility function to remove an instance.
6875 logging.info("Removing block devices for instance %s", instance.name)
6877 if not _RemoveDisks(lu, instance):
6878 if not ignore_failures:
6879 raise errors.OpExecError("Can't remove instance's disks")
6880 feedback_fn("Warning: can't remove instance's disks")
6882 logging.info("Removing instance %s out of cluster config", instance.name)
6884 lu.cfg.RemoveInstance(instance.name)
6886 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6887 "Instance lock removal conflict"
6889 # Remove lock for the instance
6890 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6893 class LUInstanceQuery(NoHooksLU):
6894 """Logical unit for querying instances.
6897 # pylint: disable=W0142
6900 def CheckArguments(self):
6901 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6902 self.op.output_fields, self.op.use_locking)
6904 def ExpandNames(self):
6905 self.iq.ExpandNames(self)
6907 def DeclareLocks(self, level):
6908 self.iq.DeclareLocks(self, level)
6910 def Exec(self, feedback_fn):
6911 return self.iq.OldStyleQuery(self)
6914 class LUInstanceFailover(LogicalUnit):
6915 """Failover an instance.
6918 HPATH = "instance-failover"
6919 HTYPE = constants.HTYPE_INSTANCE
6922 def CheckArguments(self):
6923 """Check the arguments.
6926 self.iallocator = getattr(self.op, "iallocator", None)
6927 self.target_node = getattr(self.op, "target_node", None)
6929 def ExpandNames(self):
6930 self._ExpandAndLockInstance()
6932 if self.op.target_node is not None:
6933 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6935 self.needed_locks[locking.LEVEL_NODE] = []
6936 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6938 ignore_consistency = self.op.ignore_consistency
6939 shutdown_timeout = self.op.shutdown_timeout
6940 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6943 ignore_consistency=ignore_consistency,
6944 shutdown_timeout=shutdown_timeout)
6945 self.tasklets = [self._migrater]
6947 def DeclareLocks(self, level):
6948 if level == locking.LEVEL_NODE:
6949 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6950 if instance.disk_template in constants.DTS_EXT_MIRROR:
6951 if self.op.target_node is None:
6952 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6954 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6955 self.op.target_node]
6956 del self.recalculate_locks[locking.LEVEL_NODE]
6958 self._LockInstancesNodes()
6960 def BuildHooksEnv(self):
6963 This runs on master, primary and secondary nodes of the instance.
6966 instance = self._migrater.instance
6967 source_node = instance.primary_node
6968 target_node = self.op.target_node
6970 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6971 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6972 "OLD_PRIMARY": source_node,
6973 "NEW_PRIMARY": target_node,
6976 if instance.disk_template in constants.DTS_INT_MIRROR:
6977 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6978 env["NEW_SECONDARY"] = source_node
6980 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6982 env.update(_BuildInstanceHookEnvByObject(self, instance))
6986 def BuildHooksNodes(self):
6987 """Build hooks nodes.
6990 instance = self._migrater.instance
6991 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6992 return (nl, nl + [instance.primary_node])
6995 class LUInstanceMigrate(LogicalUnit):
6996 """Migrate an instance.
6998 This is migration without shutting down, compared to the failover,
6999 which is done with shutdown.
7002 HPATH = "instance-migrate"
7003 HTYPE = constants.HTYPE_INSTANCE
7006 def ExpandNames(self):
7007 self._ExpandAndLockInstance()
7009 if self.op.target_node is not None:
7010 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7012 self.needed_locks[locking.LEVEL_NODE] = []
7013 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7015 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7016 cleanup=self.op.cleanup,
7018 fallback=self.op.allow_failover)
7019 self.tasklets = [self._migrater]
7021 def DeclareLocks(self, level):
7022 if level == locking.LEVEL_NODE:
7023 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7024 if instance.disk_template in constants.DTS_EXT_MIRROR:
7025 if self.op.target_node is None:
7026 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7028 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7029 self.op.target_node]
7030 del self.recalculate_locks[locking.LEVEL_NODE]
7032 self._LockInstancesNodes()
7034 def BuildHooksEnv(self):
7037 This runs on master, primary and secondary nodes of the instance.
7040 instance = self._migrater.instance
7041 source_node = instance.primary_node
7042 target_node = self.op.target_node
7043 env = _BuildInstanceHookEnvByObject(self, instance)
7045 "MIGRATE_LIVE": self._migrater.live,
7046 "MIGRATE_CLEANUP": self.op.cleanup,
7047 "OLD_PRIMARY": source_node,
7048 "NEW_PRIMARY": target_node,
7051 if instance.disk_template in constants.DTS_INT_MIRROR:
7052 env["OLD_SECONDARY"] = target_node
7053 env["NEW_SECONDARY"] = source_node
7055 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7059 def BuildHooksNodes(self):
7060 """Build hooks nodes.
7063 instance = self._migrater.instance
7064 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7065 return (nl, nl + [instance.primary_node])
7068 class LUInstanceMove(LogicalUnit):
7069 """Move an instance by data-copying.
7072 HPATH = "instance-move"
7073 HTYPE = constants.HTYPE_INSTANCE
7076 def ExpandNames(self):
7077 self._ExpandAndLockInstance()
7078 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7079 self.op.target_node = target_node
7080 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7081 self.needed_locks[locking.LEVEL_NODE_RES] = []
7082 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7084 def DeclareLocks(self, level):
7085 if level == locking.LEVEL_NODE:
7086 self._LockInstancesNodes(primary_only=True)
7087 elif level == locking.LEVEL_NODE_RES:
7089 self.needed_locks[locking.LEVEL_NODE_RES] = \
7090 self.needed_locks[locking.LEVEL_NODE][:]
7092 def BuildHooksEnv(self):
7095 This runs on master, primary and secondary nodes of the instance.
7099 "TARGET_NODE": self.op.target_node,
7100 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7102 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7105 def BuildHooksNodes(self):
7106 """Build hooks nodes.
7110 self.cfg.GetMasterNode(),
7111 self.instance.primary_node,
7112 self.op.target_node,
7116 def CheckPrereq(self):
7117 """Check prerequisites.
7119 This checks that the instance is in the cluster.
7122 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7123 assert self.instance is not None, \
7124 "Cannot retrieve locked instance %s" % self.op.instance_name
7126 node = self.cfg.GetNodeInfo(self.op.target_node)
7127 assert node is not None, \
7128 "Cannot retrieve locked node %s" % self.op.target_node
7130 self.target_node = target_node = node.name
7132 if target_node == instance.primary_node:
7133 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7134 (instance.name, target_node),
7137 bep = self.cfg.GetClusterInfo().FillBE(instance)
7139 for idx, dsk in enumerate(instance.disks):
7140 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7141 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7142 " cannot copy" % idx, errors.ECODE_STATE)
7144 _CheckNodeOnline(self, target_node)
7145 _CheckNodeNotDrained(self, target_node)
7146 _CheckNodeVmCapable(self, target_node)
7148 if instance.admin_state == constants.ADMINST_UP:
7149 # check memory requirements on the secondary node
7150 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7151 instance.name, bep[constants.BE_MAXMEM],
7152 instance.hypervisor)
7154 self.LogInfo("Not checking memory on the secondary node as"
7155 " instance will not be started")
7157 # check bridge existance
7158 _CheckInstanceBridgesExist(self, instance, node=target_node)
7160 def Exec(self, feedback_fn):
7161 """Move an instance.
7163 The move is done by shutting it down on its present node, copying
7164 the data over (slow) and starting it on the new node.
7167 instance = self.instance
7169 source_node = instance.primary_node
7170 target_node = self.target_node
7172 self.LogInfo("Shutting down instance %s on source node %s",
7173 instance.name, source_node)
7175 assert (self.owned_locks(locking.LEVEL_NODE) ==
7176 self.owned_locks(locking.LEVEL_NODE_RES))
7178 result = self.rpc.call_instance_shutdown(source_node, instance,
7179 self.op.shutdown_timeout)
7180 msg = result.fail_msg
7182 if self.op.ignore_consistency:
7183 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7184 " Proceeding anyway. Please make sure node"
7185 " %s is down. Error details: %s",
7186 instance.name, source_node, source_node, msg)
7188 raise errors.OpExecError("Could not shutdown instance %s on"
7190 (instance.name, source_node, msg))
7192 # create the target disks
7194 _CreateDisks(self, instance, target_node=target_node)
7195 except errors.OpExecError:
7196 self.LogWarning("Device creation failed, reverting...")
7198 _RemoveDisks(self, instance, target_node=target_node)
7200 self.cfg.ReleaseDRBDMinors(instance.name)
7203 cluster_name = self.cfg.GetClusterInfo().cluster_name
7206 # activate, get path, copy the data over
7207 for idx, disk in enumerate(instance.disks):
7208 self.LogInfo("Copying data for disk %d", idx)
7209 result = self.rpc.call_blockdev_assemble(target_node, disk,
7210 instance.name, True, idx)
7212 self.LogWarning("Can't assemble newly created disk %d: %s",
7213 idx, result.fail_msg)
7214 errs.append(result.fail_msg)
7216 dev_path = result.payload
7217 result = self.rpc.call_blockdev_export(source_node, disk,
7218 target_node, dev_path,
7221 self.LogWarning("Can't copy data over for disk %d: %s",
7222 idx, result.fail_msg)
7223 errs.append(result.fail_msg)
7227 self.LogWarning("Some disks failed to copy, aborting")
7229 _RemoveDisks(self, instance, target_node=target_node)
7231 self.cfg.ReleaseDRBDMinors(instance.name)
7232 raise errors.OpExecError("Errors during disk copy: %s" %
7235 instance.primary_node = target_node
7236 self.cfg.Update(instance, feedback_fn)
7238 self.LogInfo("Removing the disks on the original node")
7239 _RemoveDisks(self, instance, target_node=source_node)
7241 # Only start the instance if it's marked as up
7242 if instance.admin_state == constants.ADMINST_UP:
7243 self.LogInfo("Starting instance %s on node %s",
7244 instance.name, target_node)
7246 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7247 ignore_secondaries=True)
7249 _ShutdownInstanceDisks(self, instance)
7250 raise errors.OpExecError("Can't activate the instance's disks")
7252 result = self.rpc.call_instance_start(target_node,
7253 (instance, None, None), False)
7254 msg = result.fail_msg
7256 _ShutdownInstanceDisks(self, instance)
7257 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7258 (instance.name, target_node, msg))
7261 class LUNodeMigrate(LogicalUnit):
7262 """Migrate all instances from a node.
7265 HPATH = "node-migrate"
7266 HTYPE = constants.HTYPE_NODE
7269 def CheckArguments(self):
7272 def ExpandNames(self):
7273 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7275 self.share_locks = _ShareAll()
7276 self.needed_locks = {
7277 locking.LEVEL_NODE: [self.op.node_name],
7280 def BuildHooksEnv(self):
7283 This runs on the master, the primary and all the secondaries.
7287 "NODE_NAME": self.op.node_name,
7290 def BuildHooksNodes(self):
7291 """Build hooks nodes.
7294 nl = [self.cfg.GetMasterNode()]
7297 def CheckPrereq(self):
7300 def Exec(self, feedback_fn):
7301 # Prepare jobs for migration instances
7303 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7306 iallocator=self.op.iallocator,
7307 target_node=self.op.target_node)]
7308 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7311 # TODO: Run iallocator in this opcode and pass correct placement options to
7312 # OpInstanceMigrate. Since other jobs can modify the cluster between
7313 # running the iallocator and the actual migration, a good consistency model
7314 # will have to be found.
7316 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7317 frozenset([self.op.node_name]))
7319 return ResultWithJobs(jobs)
7322 class TLMigrateInstance(Tasklet):
7323 """Tasklet class for instance migration.
7326 @ivar live: whether the migration will be done live or non-live;
7327 this variable is initalized only after CheckPrereq has run
7328 @type cleanup: boolean
7329 @ivar cleanup: Wheater we cleanup from a failed migration
7330 @type iallocator: string
7331 @ivar iallocator: The iallocator used to determine target_node
7332 @type target_node: string
7333 @ivar target_node: If given, the target_node to reallocate the instance to
7334 @type failover: boolean
7335 @ivar failover: Whether operation results in failover or migration
7336 @type fallback: boolean
7337 @ivar fallback: Whether fallback to failover is allowed if migration not
7339 @type ignore_consistency: boolean
7340 @ivar ignore_consistency: Wheter we should ignore consistency between source
7342 @type shutdown_timeout: int
7343 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7348 _MIGRATION_POLL_INTERVAL = 1 # seconds
7349 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7351 def __init__(self, lu, instance_name, cleanup=False,
7352 failover=False, fallback=False,
7353 ignore_consistency=False,
7354 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7355 """Initializes this class.
7358 Tasklet.__init__(self, lu)
7361 self.instance_name = instance_name
7362 self.cleanup = cleanup
7363 self.live = False # will be overridden later
7364 self.failover = failover
7365 self.fallback = fallback
7366 self.ignore_consistency = ignore_consistency
7367 self.shutdown_timeout = shutdown_timeout
7369 def CheckPrereq(self):
7370 """Check prerequisites.
7372 This checks that the instance is in the cluster.
7375 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7376 instance = self.cfg.GetInstanceInfo(instance_name)
7377 assert instance is not None
7378 self.instance = instance
7380 if (not self.cleanup and
7381 not instance.admin_state == constants.ADMINST_UP and
7382 not self.failover and self.fallback):
7383 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7384 " switching to failover")
7385 self.failover = True
7387 if instance.disk_template not in constants.DTS_MIRRORED:
7392 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7393 " %s" % (instance.disk_template, text),
7396 if instance.disk_template in constants.DTS_EXT_MIRROR:
7397 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7399 if self.lu.op.iallocator:
7400 self._RunAllocator()
7402 # We set set self.target_node as it is required by
7404 self.target_node = self.lu.op.target_node
7406 # self.target_node is already populated, either directly or by the
7408 target_node = self.target_node
7409 if self.target_node == instance.primary_node:
7410 raise errors.OpPrereqError("Cannot migrate instance %s"
7411 " to its primary (%s)" %
7412 (instance.name, instance.primary_node))
7414 if len(self.lu.tasklets) == 1:
7415 # It is safe to release locks only when we're the only tasklet
7417 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7418 keep=[instance.primary_node, self.target_node])
7421 secondary_nodes = instance.secondary_nodes
7422 if not secondary_nodes:
7423 raise errors.ConfigurationError("No secondary node but using"
7424 " %s disk template" %
7425 instance.disk_template)
7426 target_node = secondary_nodes[0]
7427 if self.lu.op.iallocator or (self.lu.op.target_node and
7428 self.lu.op.target_node != target_node):
7430 text = "failed over"
7433 raise errors.OpPrereqError("Instances with disk template %s cannot"
7434 " be %s to arbitrary nodes"
7435 " (neither an iallocator nor a target"
7436 " node can be passed)" %
7437 (instance.disk_template, text),
7440 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7442 # check memory requirements on the secondary node
7443 if not self.failover or instance.admin_state == constants.ADMINST_UP:
7444 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7445 instance.name, i_be[constants.BE_MAXMEM],
7446 instance.hypervisor)
7448 self.lu.LogInfo("Not checking memory on the secondary node as"
7449 " instance will not be started")
7451 # check bridge existance
7452 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7454 if not self.cleanup:
7455 _CheckNodeNotDrained(self.lu, target_node)
7456 if not self.failover:
7457 result = self.rpc.call_instance_migratable(instance.primary_node,
7459 if result.fail_msg and self.fallback:
7460 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7462 self.failover = True
7464 result.Raise("Can't migrate, please use failover",
7465 prereq=True, ecode=errors.ECODE_STATE)
7467 assert not (self.failover and self.cleanup)
7469 if not self.failover:
7470 if self.lu.op.live is not None and self.lu.op.mode is not None:
7471 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7472 " parameters are accepted",
7474 if self.lu.op.live is not None:
7476 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7478 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7479 # reset the 'live' parameter to None so that repeated
7480 # invocations of CheckPrereq do not raise an exception
7481 self.lu.op.live = None
7482 elif self.lu.op.mode is None:
7483 # read the default value from the hypervisor
7484 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7486 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7488 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7490 # Failover is never live
7493 def _RunAllocator(self):
7494 """Run the allocator based on input opcode.
7497 ial = IAllocator(self.cfg, self.rpc,
7498 mode=constants.IALLOCATOR_MODE_RELOC,
7499 name=self.instance_name,
7500 # TODO See why hail breaks with a single node below
7501 relocate_from=[self.instance.primary_node,
7502 self.instance.primary_node],
7505 ial.Run(self.lu.op.iallocator)
7508 raise errors.OpPrereqError("Can't compute nodes using"
7509 " iallocator '%s': %s" %
7510 (self.lu.op.iallocator, ial.info),
7512 if len(ial.result) != ial.required_nodes:
7513 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7514 " of nodes (%s), required %s" %
7515 (self.lu.op.iallocator, len(ial.result),
7516 ial.required_nodes), errors.ECODE_FAULT)
7517 self.target_node = ial.result[0]
7518 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7519 self.instance_name, self.lu.op.iallocator,
7520 utils.CommaJoin(ial.result))
7522 def _WaitUntilSync(self):
7523 """Poll with custom rpc for disk sync.
7525 This uses our own step-based rpc call.
7528 self.feedback_fn("* wait until resync is done")
7532 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7534 self.instance.disks)
7536 for node, nres in result.items():
7537 nres.Raise("Cannot resync disks on node %s" % node)
7538 node_done, node_percent = nres.payload
7539 all_done = all_done and node_done
7540 if node_percent is not None:
7541 min_percent = min(min_percent, node_percent)
7543 if min_percent < 100:
7544 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7547 def _EnsureSecondary(self, node):
7548 """Demote a node to secondary.
7551 self.feedback_fn("* switching node %s to secondary mode" % node)
7553 for dev in self.instance.disks:
7554 self.cfg.SetDiskID(dev, node)
7556 result = self.rpc.call_blockdev_close(node, self.instance.name,
7557 self.instance.disks)
7558 result.Raise("Cannot change disk to secondary on node %s" % node)
7560 def _GoStandalone(self):
7561 """Disconnect from the network.
7564 self.feedback_fn("* changing into standalone mode")
7565 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7566 self.instance.disks)
7567 for node, nres in result.items():
7568 nres.Raise("Cannot disconnect disks node %s" % node)
7570 def _GoReconnect(self, multimaster):
7571 """Reconnect to the network.
7577 msg = "single-master"
7578 self.feedback_fn("* changing disks into %s mode" % msg)
7579 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7580 self.instance.disks,
7581 self.instance.name, multimaster)
7582 for node, nres in result.items():
7583 nres.Raise("Cannot change disks config on node %s" % node)
7585 def _ExecCleanup(self):
7586 """Try to cleanup after a failed migration.
7588 The cleanup is done by:
7589 - check that the instance is running only on one node
7590 (and update the config if needed)
7591 - change disks on its secondary node to secondary
7592 - wait until disks are fully synchronized
7593 - disconnect from the network
7594 - change disks into single-master mode
7595 - wait again until disks are fully synchronized
7598 instance = self.instance
7599 target_node = self.target_node
7600 source_node = self.source_node
7602 # check running on only one node
7603 self.feedback_fn("* checking where the instance actually runs"
7604 " (if this hangs, the hypervisor might be in"
7606 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7607 for node, result in ins_l.items():
7608 result.Raise("Can't contact node %s" % node)
7610 runningon_source = instance.name in ins_l[source_node].payload
7611 runningon_target = instance.name in ins_l[target_node].payload
7613 if runningon_source and runningon_target:
7614 raise errors.OpExecError("Instance seems to be running on two nodes,"
7615 " or the hypervisor is confused; you will have"
7616 " to ensure manually that it runs only on one"
7617 " and restart this operation")
7619 if not (runningon_source or runningon_target):
7620 raise errors.OpExecError("Instance does not seem to be running at all;"
7621 " in this case it's safer to repair by"
7622 " running 'gnt-instance stop' to ensure disk"
7623 " shutdown, and then restarting it")
7625 if runningon_target:
7626 # the migration has actually succeeded, we need to update the config
7627 self.feedback_fn("* instance running on secondary node (%s),"
7628 " updating config" % target_node)
7629 instance.primary_node = target_node
7630 self.cfg.Update(instance, self.feedback_fn)
7631 demoted_node = source_node
7633 self.feedback_fn("* instance confirmed to be running on its"
7634 " primary node (%s)" % source_node)
7635 demoted_node = target_node
7637 if instance.disk_template in constants.DTS_INT_MIRROR:
7638 self._EnsureSecondary(demoted_node)
7640 self._WaitUntilSync()
7641 except errors.OpExecError:
7642 # we ignore here errors, since if the device is standalone, it
7643 # won't be able to sync
7645 self._GoStandalone()
7646 self._GoReconnect(False)
7647 self._WaitUntilSync()
7649 self.feedback_fn("* done")
7651 def _RevertDiskStatus(self):
7652 """Try to revert the disk status after a failed migration.
7655 target_node = self.target_node
7656 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7660 self._EnsureSecondary(target_node)
7661 self._GoStandalone()
7662 self._GoReconnect(False)
7663 self._WaitUntilSync()
7664 except errors.OpExecError, err:
7665 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7666 " please try to recover the instance manually;"
7667 " error '%s'" % str(err))
7669 def _AbortMigration(self):
7670 """Call the hypervisor code to abort a started migration.
7673 instance = self.instance
7674 target_node = self.target_node
7675 source_node = self.source_node
7676 migration_info = self.migration_info
7678 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7682 abort_msg = abort_result.fail_msg
7684 logging.error("Aborting migration failed on target node %s: %s",
7685 target_node, abort_msg)
7686 # Don't raise an exception here, as we stil have to try to revert the
7687 # disk status, even if this step failed.
7689 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7690 instance, False, self.live)
7691 abort_msg = abort_result.fail_msg
7693 logging.error("Aborting migration failed on source node %s: %s",
7694 source_node, abort_msg)
7696 def _ExecMigration(self):
7697 """Migrate an instance.
7699 The migrate is done by:
7700 - change the disks into dual-master mode
7701 - wait until disks are fully synchronized again
7702 - migrate the instance
7703 - change disks on the new secondary node (the old primary) to secondary
7704 - wait until disks are fully synchronized
7705 - change disks into single-master mode
7708 instance = self.instance
7709 target_node = self.target_node
7710 source_node = self.source_node
7712 # Check for hypervisor version mismatch and warn the user.
7713 nodeinfo = self.rpc.call_node_info([source_node, target_node],
7714 None, [self.instance.hypervisor])
7715 for ninfo in nodeinfo.values():
7716 ninfo.Raise("Unable to retrieve node information from node '%s'" %
7718 (_, _, (src_info, )) = nodeinfo[source_node].payload
7719 (_, _, (dst_info, )) = nodeinfo[target_node].payload
7721 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
7722 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
7723 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
7724 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
7725 if src_version != dst_version:
7726 self.feedback_fn("* warning: hypervisor version mismatch between"
7727 " source (%s) and target (%s) node" %
7728 (src_version, dst_version))
7730 self.feedback_fn("* checking disk consistency between source and target")
7731 for dev in instance.disks:
7732 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7733 raise errors.OpExecError("Disk %s is degraded or not fully"
7734 " synchronized on target node,"
7735 " aborting migration" % dev.iv_name)
7737 # First get the migration information from the remote node
7738 result = self.rpc.call_migration_info(source_node, instance)
7739 msg = result.fail_msg
7741 log_err = ("Failed fetching source migration information from %s: %s" %
7743 logging.error(log_err)
7744 raise errors.OpExecError(log_err)
7746 self.migration_info = migration_info = result.payload
7748 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7749 # Then switch the disks to master/master mode
7750 self._EnsureSecondary(target_node)
7751 self._GoStandalone()
7752 self._GoReconnect(True)
7753 self._WaitUntilSync()
7755 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7756 result = self.rpc.call_accept_instance(target_node,
7759 self.nodes_ip[target_node])
7761 msg = result.fail_msg
7763 logging.error("Instance pre-migration failed, trying to revert"
7764 " disk status: %s", msg)
7765 self.feedback_fn("Pre-migration failed, aborting")
7766 self._AbortMigration()
7767 self._RevertDiskStatus()
7768 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7769 (instance.name, msg))
7771 self.feedback_fn("* migrating instance to %s" % target_node)
7772 result = self.rpc.call_instance_migrate(source_node, instance,
7773 self.nodes_ip[target_node],
7775 msg = result.fail_msg
7777 logging.error("Instance migration failed, trying to revert"
7778 " disk status: %s", msg)
7779 self.feedback_fn("Migration failed, aborting")
7780 self._AbortMigration()
7781 self._RevertDiskStatus()
7782 raise errors.OpExecError("Could not migrate instance %s: %s" %
7783 (instance.name, msg))
7785 self.feedback_fn("* starting memory transfer")
7786 last_feedback = time.time()
7788 result = self.rpc.call_instance_get_migration_status(source_node,
7790 msg = result.fail_msg
7791 ms = result.payload # MigrationStatus instance
7792 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7793 logging.error("Instance migration failed, trying to revert"
7794 " disk status: %s", msg)
7795 self.feedback_fn("Migration failed, aborting")
7796 self._AbortMigration()
7797 self._RevertDiskStatus()
7798 raise errors.OpExecError("Could not migrate instance %s: %s" %
7799 (instance.name, msg))
7801 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7802 self.feedback_fn("* memory transfer complete")
7805 if (utils.TimeoutExpired(last_feedback,
7806 self._MIGRATION_FEEDBACK_INTERVAL) and
7807 ms.transferred_ram is not None):
7808 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7809 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7810 last_feedback = time.time()
7812 time.sleep(self._MIGRATION_POLL_INTERVAL)
7814 result = self.rpc.call_instance_finalize_migration_src(source_node,
7818 msg = result.fail_msg
7820 logging.error("Instance migration succeeded, but finalization failed"
7821 " on the source node: %s", msg)
7822 raise errors.OpExecError("Could not finalize instance migration: %s" %
7825 instance.primary_node = target_node
7827 # distribute new instance config to the other nodes
7828 self.cfg.Update(instance, self.feedback_fn)
7830 result = self.rpc.call_instance_finalize_migration_dst(target_node,
7834 msg = result.fail_msg
7836 logging.error("Instance migration succeeded, but finalization failed"
7837 " on the target node: %s", msg)
7838 raise errors.OpExecError("Could not finalize instance migration: %s" %
7841 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7842 self._EnsureSecondary(source_node)
7843 self._WaitUntilSync()
7844 self._GoStandalone()
7845 self._GoReconnect(False)
7846 self._WaitUntilSync()
7848 self.feedback_fn("* done")
7850 def _ExecFailover(self):
7851 """Failover an instance.
7853 The failover is done by shutting it down on its present node and
7854 starting it on the secondary.
7857 instance = self.instance
7858 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7860 source_node = instance.primary_node
7861 target_node = self.target_node
7863 if instance.admin_state == constants.ADMINST_UP:
7864 self.feedback_fn("* checking disk consistency between source and target")
7865 for dev in instance.disks:
7866 # for drbd, these are drbd over lvm
7867 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7868 if primary_node.offline:
7869 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7871 (primary_node.name, dev.iv_name, target_node))
7872 elif not self.ignore_consistency:
7873 raise errors.OpExecError("Disk %s is degraded on target node,"
7874 " aborting failover" % dev.iv_name)
7876 self.feedback_fn("* not checking disk consistency as instance is not"
7879 self.feedback_fn("* shutting down instance on source node")
7880 logging.info("Shutting down instance %s on node %s",
7881 instance.name, source_node)
7883 result = self.rpc.call_instance_shutdown(source_node, instance,
7884 self.shutdown_timeout)
7885 msg = result.fail_msg
7887 if self.ignore_consistency or primary_node.offline:
7888 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7889 " proceeding anyway; please make sure node"
7890 " %s is down; error details: %s",
7891 instance.name, source_node, source_node, msg)
7893 raise errors.OpExecError("Could not shutdown instance %s on"
7895 (instance.name, source_node, msg))
7897 self.feedback_fn("* deactivating the instance's disks on source node")
7898 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7899 raise errors.OpExecError("Can't shut down the instance's disks")
7901 instance.primary_node = target_node
7902 # distribute new instance config to the other nodes
7903 self.cfg.Update(instance, self.feedback_fn)
7905 # Only start the instance if it's marked as up
7906 if instance.admin_state == constants.ADMINST_UP:
7907 self.feedback_fn("* activating the instance's disks on target node %s" %
7909 logging.info("Starting instance %s on node %s",
7910 instance.name, target_node)
7912 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7913 ignore_secondaries=True)
7915 _ShutdownInstanceDisks(self.lu, instance)
7916 raise errors.OpExecError("Can't activate the instance's disks")
7918 self.feedback_fn("* starting the instance on the target node %s" %
7920 result = self.rpc.call_instance_start(target_node, (instance, None, None),
7922 msg = result.fail_msg
7924 _ShutdownInstanceDisks(self.lu, instance)
7925 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7926 (instance.name, target_node, msg))
7928 def Exec(self, feedback_fn):
7929 """Perform the migration.
7932 self.feedback_fn = feedback_fn
7933 self.source_node = self.instance.primary_node
7935 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7936 if self.instance.disk_template in constants.DTS_INT_MIRROR:
7937 self.target_node = self.instance.secondary_nodes[0]
7938 # Otherwise self.target_node has been populated either
7939 # directly, or through an iallocator.
7941 self.all_nodes = [self.source_node, self.target_node]
7942 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7943 in self.cfg.GetMultiNodeInfo(self.all_nodes))
7946 feedback_fn("Failover instance %s" % self.instance.name)
7947 self._ExecFailover()
7949 feedback_fn("Migrating instance %s" % self.instance.name)
7952 return self._ExecCleanup()
7954 return self._ExecMigration()
7957 def _CreateBlockDev(lu, node, instance, device, force_create,
7959 """Create a tree of block devices on a given node.
7961 If this device type has to be created on secondaries, create it and
7964 If not, just recurse to children keeping the same 'force' value.
7966 @param lu: the lu on whose behalf we execute
7967 @param node: the node on which to create the device
7968 @type instance: L{objects.Instance}
7969 @param instance: the instance which owns the device
7970 @type device: L{objects.Disk}
7971 @param device: the device to create
7972 @type force_create: boolean
7973 @param force_create: whether to force creation of this device; this
7974 will be change to True whenever we find a device which has
7975 CreateOnSecondary() attribute
7976 @param info: the extra 'metadata' we should attach to the device
7977 (this will be represented as a LVM tag)
7978 @type force_open: boolean
7979 @param force_open: this parameter will be passes to the
7980 L{backend.BlockdevCreate} function where it specifies
7981 whether we run on primary or not, and it affects both
7982 the child assembly and the device own Open() execution
7985 if device.CreateOnSecondary():
7989 for child in device.children:
7990 _CreateBlockDev(lu, node, instance, child, force_create,
7993 if not force_create:
7996 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7999 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8000 """Create a single block device on a given node.
8002 This will not recurse over children of the device, so they must be
8005 @param lu: the lu on whose behalf we execute
8006 @param node: the node on which to create the device
8007 @type instance: L{objects.Instance}
8008 @param instance: the instance which owns the device
8009 @type device: L{objects.Disk}
8010 @param device: the device to create
8011 @param info: the extra 'metadata' we should attach to the device
8012 (this will be represented as a LVM tag)
8013 @type force_open: boolean
8014 @param force_open: this parameter will be passes to the
8015 L{backend.BlockdevCreate} function where it specifies
8016 whether we run on primary or not, and it affects both
8017 the child assembly and the device own Open() execution
8020 lu.cfg.SetDiskID(device, node)
8021 result = lu.rpc.call_blockdev_create(node, device, device.size,
8022 instance.name, force_open, info)
8023 result.Raise("Can't create block device %s on"
8024 " node %s for instance %s" % (device, node, instance.name))
8025 if device.physical_id is None:
8026 device.physical_id = result.payload
8029 def _GenerateUniqueNames(lu, exts):
8030 """Generate a suitable LV name.
8032 This will generate a logical volume name for the given instance.
8037 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8038 results.append("%s%s" % (new_id, val))
8042 def _ComputeLDParams(disk_template, disk_params):
8043 """Computes Logical Disk parameters from Disk Template parameters.
8045 @type disk_template: string
8046 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8047 @type disk_params: dict
8048 @param disk_params: disk template parameters; dict(template_name -> parameters
8050 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8051 contains the LD parameters of the node. The tree is flattened in-order.
8054 if disk_template not in constants.DISK_TEMPLATES:
8055 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8058 dt_params = disk_params[disk_template]
8059 if disk_template == constants.DT_DRBD8:
8061 constants.RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE]
8065 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8068 result.append(drbd_params)
8072 constants.STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8075 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8077 result.append(data_params)
8081 constants.STRIPES: dt_params[constants.DRBD_META_STRIPES],
8084 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8086 result.append(meta_params)
8088 elif (disk_template == constants.DT_FILE or
8089 disk_template == constants.DT_SHARED_FILE):
8090 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8092 elif disk_template == constants.DT_PLAIN:
8094 constants.STRIPES: dt_params[constants.LV_STRIPES],
8097 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8099 result.append(params)
8101 elif disk_template == constants.DT_BLOCK:
8102 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8107 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8108 iv_name, p_minor, s_minor, drbd_params, data_params,
8110 """Generate a drbd8 device complete with its children.
8113 assert len(vgnames) == len(names) == 2
8114 port = lu.cfg.AllocatePort()
8115 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8117 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8118 logical_id=(vgnames[0], names[0]),
8120 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8121 logical_id=(vgnames[1], names[1]),
8123 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8124 logical_id=(primary, secondary, port,
8127 children=[dev_data, dev_meta],
8128 iv_name=iv_name, params=drbd_params)
8132 def _GenerateDiskTemplate(lu, template_name,
8133 instance_name, primary_node,
8134 secondary_nodes, disk_info,
8135 file_storage_dir, file_driver,
8136 base_index, feedback_fn, disk_params):
8137 """Generate the entire disk layout for a given template type.
8140 #TODO: compute space requirements
8142 vgname = lu.cfg.GetVGName()
8143 disk_count = len(disk_info)
8145 ld_params = _ComputeLDParams(template_name, disk_params)
8146 if template_name == constants.DT_DISKLESS:
8148 elif template_name == constants.DT_PLAIN:
8149 if len(secondary_nodes) != 0:
8150 raise errors.ProgrammerError("Wrong template configuration")
8152 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8153 for i in range(disk_count)])
8154 for idx, disk in enumerate(disk_info):
8155 disk_index = idx + base_index
8156 vg = disk.get(constants.IDISK_VG, vgname)
8157 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8158 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8159 size=disk[constants.IDISK_SIZE],
8160 logical_id=(vg, names[idx]),
8161 iv_name="disk/%d" % disk_index,
8162 mode=disk[constants.IDISK_MODE],
8163 params=ld_params[0])
8164 disks.append(disk_dev)
8165 elif template_name == constants.DT_DRBD8:
8166 drbd_params, data_params, meta_params = ld_params
8167 if len(secondary_nodes) != 1:
8168 raise errors.ProgrammerError("Wrong template configuration")
8169 remote_node = secondary_nodes[0]
8170 minors = lu.cfg.AllocateDRBDMinor(
8171 [primary_node, remote_node] * len(disk_info), instance_name)
8174 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8175 for i in range(disk_count)]):
8176 names.append(lv_prefix + "_data")
8177 names.append(lv_prefix + "_meta")
8178 for idx, disk in enumerate(disk_info):
8179 disk_index = idx + base_index
8180 data_vg = disk.get(constants.IDISK_VG, vgname)
8181 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
8182 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8183 disk[constants.IDISK_SIZE],
8185 names[idx * 2:idx * 2 + 2],
8186 "disk/%d" % disk_index,
8187 minors[idx * 2], minors[idx * 2 + 1],
8188 drbd_params, data_params, meta_params)
8189 disk_dev.mode = disk[constants.IDISK_MODE]
8190 disks.append(disk_dev)
8191 elif template_name == constants.DT_FILE:
8192 if len(secondary_nodes) != 0:
8193 raise errors.ProgrammerError("Wrong template configuration")
8195 opcodes.RequireFileStorage()
8197 for idx, disk in enumerate(disk_info):
8198 disk_index = idx + base_index
8199 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8200 size=disk[constants.IDISK_SIZE],
8201 iv_name="disk/%d" % disk_index,
8202 logical_id=(file_driver,
8203 "%s/disk%d" % (file_storage_dir,
8205 mode=disk[constants.IDISK_MODE],
8206 params=ld_params[0])
8207 disks.append(disk_dev)
8208 elif template_name == constants.DT_SHARED_FILE:
8209 if len(secondary_nodes) != 0:
8210 raise errors.ProgrammerError("Wrong template configuration")
8212 opcodes.RequireSharedFileStorage()
8214 for idx, disk in enumerate(disk_info):
8215 disk_index = idx + base_index
8216 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8217 size=disk[constants.IDISK_SIZE],
8218 iv_name="disk/%d" % disk_index,
8219 logical_id=(file_driver,
8220 "%s/disk%d" % (file_storage_dir,
8222 mode=disk[constants.IDISK_MODE],
8223 params=ld_params[0])
8224 disks.append(disk_dev)
8225 elif template_name == constants.DT_BLOCK:
8226 if len(secondary_nodes) != 0:
8227 raise errors.ProgrammerError("Wrong template configuration")
8229 for idx, disk in enumerate(disk_info):
8230 disk_index = idx + base_index
8231 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8232 size=disk[constants.IDISK_SIZE],
8233 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8234 disk[constants.IDISK_ADOPT]),
8235 iv_name="disk/%d" % disk_index,
8236 mode=disk[constants.IDISK_MODE],
8237 params=ld_params[0])
8238 disks.append(disk_dev)
8241 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8245 def _GetInstanceInfoText(instance):
8246 """Compute that text that should be added to the disk's metadata.
8249 return "originstname+%s" % instance.name
8252 def _CalcEta(time_taken, written, total_size):
8253 """Calculates the ETA based on size written and total size.
8255 @param time_taken: The time taken so far
8256 @param written: amount written so far
8257 @param total_size: The total size of data to be written
8258 @return: The remaining time in seconds
8261 avg_time = time_taken / float(written)
8262 return (total_size - written) * avg_time
8265 def _WipeDisks(lu, instance):
8266 """Wipes instance disks.
8268 @type lu: L{LogicalUnit}
8269 @param lu: the logical unit on whose behalf we execute
8270 @type instance: L{objects.Instance}
8271 @param instance: the instance whose disks we should create
8272 @return: the success of the wipe
8275 node = instance.primary_node
8277 for device in instance.disks:
8278 lu.cfg.SetDiskID(device, node)
8280 logging.info("Pause sync of instance %s disks", instance.name)
8281 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8283 for idx, success in enumerate(result.payload):
8285 logging.warn("pause-sync of instance %s for disks %d failed",
8289 for idx, device in enumerate(instance.disks):
8290 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8291 # MAX_WIPE_CHUNK at max
8292 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8293 constants.MIN_WIPE_CHUNK_PERCENT)
8294 # we _must_ make this an int, otherwise rounding errors will
8296 wipe_chunk_size = int(wipe_chunk_size)
8298 lu.LogInfo("* Wiping disk %d", idx)
8299 logging.info("Wiping disk %d for instance %s, node %s using"
8300 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8305 start_time = time.time()
8307 while offset < size:
8308 wipe_size = min(wipe_chunk_size, size - offset)
8309 logging.debug("Wiping disk %d, offset %s, chunk %s",
8310 idx, offset, wipe_size)
8311 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8312 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8313 (idx, offset, wipe_size))
8316 if now - last_output >= 60:
8317 eta = _CalcEta(now - start_time, offset, size)
8318 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8319 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8322 logging.info("Resume sync of instance %s disks", instance.name)
8324 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8326 for idx, success in enumerate(result.payload):
8328 lu.LogWarning("Resume sync of disk %d failed, please have a"
8329 " look at the status and troubleshoot the issue", idx)
8330 logging.warn("resume-sync of instance %s for disks %d failed",
8334 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8335 """Create all disks for an instance.
8337 This abstracts away some work from AddInstance.
8339 @type lu: L{LogicalUnit}
8340 @param lu: the logical unit on whose behalf we execute
8341 @type instance: L{objects.Instance}
8342 @param instance: the instance whose disks we should create
8344 @param to_skip: list of indices to skip
8345 @type target_node: string
8346 @param target_node: if passed, overrides the target node for creation
8348 @return: the success of the creation
8351 info = _GetInstanceInfoText(instance)
8352 if target_node is None:
8353 pnode = instance.primary_node
8354 all_nodes = instance.all_nodes
8359 if instance.disk_template in constants.DTS_FILEBASED:
8360 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8361 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8363 result.Raise("Failed to create directory '%s' on"
8364 " node %s" % (file_storage_dir, pnode))
8366 # Note: this needs to be kept in sync with adding of disks in
8367 # LUInstanceSetParams
8368 for idx, device in enumerate(instance.disks):
8369 if to_skip and idx in to_skip:
8371 logging.info("Creating volume %s for instance %s",
8372 device.iv_name, instance.name)
8374 for node in all_nodes:
8375 f_create = node == pnode
8376 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8379 def _RemoveDisks(lu, instance, target_node=None):
8380 """Remove all disks for an instance.
8382 This abstracts away some work from `AddInstance()` and
8383 `RemoveInstance()`. Note that in case some of the devices couldn't
8384 be removed, the removal will continue with the other ones (compare
8385 with `_CreateDisks()`).
8387 @type lu: L{LogicalUnit}
8388 @param lu: the logical unit on whose behalf we execute
8389 @type instance: L{objects.Instance}
8390 @param instance: the instance whose disks we should remove
8391 @type target_node: string
8392 @param target_node: used to override the node on which to remove the disks
8394 @return: the success of the removal
8397 logging.info("Removing block devices for instance %s", instance.name)
8400 for device in instance.disks:
8402 edata = [(target_node, device)]
8404 edata = device.ComputeNodeTree(instance.primary_node)
8405 for node, disk in edata:
8406 lu.cfg.SetDiskID(disk, node)
8407 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8409 lu.LogWarning("Could not remove block device %s on node %s,"
8410 " continuing anyway: %s", device.iv_name, node, msg)
8413 # if this is a DRBD disk, return its port to the pool
8414 if device.dev_type in constants.LDS_DRBD:
8415 tcp_port = device.logical_id[2]
8416 lu.cfg.AddTcpUdpPort(tcp_port)
8418 if instance.disk_template == constants.DT_FILE:
8419 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8423 tgt = instance.primary_node
8424 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8426 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8427 file_storage_dir, instance.primary_node, result.fail_msg)
8433 def _ComputeDiskSizePerVG(disk_template, disks):
8434 """Compute disk size requirements in the volume group
8437 def _compute(disks, payload):
8438 """Universal algorithm.
8443 vgs[disk[constants.IDISK_VG]] = \
8444 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8448 # Required free disk space as a function of disk and swap space
8450 constants.DT_DISKLESS: {},
8451 constants.DT_PLAIN: _compute(disks, 0),
8452 # 128 MB are added for drbd metadata for each disk
8453 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8454 constants.DT_FILE: {},
8455 constants.DT_SHARED_FILE: {},
8458 if disk_template not in req_size_dict:
8459 raise errors.ProgrammerError("Disk template '%s' size requirement"
8460 " is unknown" % disk_template)
8462 return req_size_dict[disk_template]
8465 def _ComputeDiskSize(disk_template, disks):
8466 """Compute disk size requirements in the volume group
8469 # Required free disk space as a function of disk and swap space
8471 constants.DT_DISKLESS: None,
8472 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8473 # 128 MB are added for drbd metadata for each disk
8475 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8476 constants.DT_FILE: None,
8477 constants.DT_SHARED_FILE: 0,
8478 constants.DT_BLOCK: 0,
8481 if disk_template not in req_size_dict:
8482 raise errors.ProgrammerError("Disk template '%s' size requirement"
8483 " is unknown" % disk_template)
8485 return req_size_dict[disk_template]
8488 def _FilterVmNodes(lu, nodenames):
8489 """Filters out non-vm_capable nodes from a list.
8491 @type lu: L{LogicalUnit}
8492 @param lu: the logical unit for which we check
8493 @type nodenames: list
8494 @param nodenames: the list of nodes on which we should check
8496 @return: the list of vm-capable nodes
8499 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8500 return [name for name in nodenames if name not in vm_nodes]
8503 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8504 """Hypervisor parameter validation.
8506 This function abstract the hypervisor parameter validation to be
8507 used in both instance create and instance modify.
8509 @type lu: L{LogicalUnit}
8510 @param lu: the logical unit for which we check
8511 @type nodenames: list
8512 @param nodenames: the list of nodes on which we should check
8513 @type hvname: string
8514 @param hvname: the name of the hypervisor we should use
8515 @type hvparams: dict
8516 @param hvparams: the parameters which we need to check
8517 @raise errors.OpPrereqError: if the parameters are not valid
8520 nodenames = _FilterVmNodes(lu, nodenames)
8522 cluster = lu.cfg.GetClusterInfo()
8523 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8525 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8526 for node in nodenames:
8530 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8533 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8534 """OS parameters validation.
8536 @type lu: L{LogicalUnit}
8537 @param lu: the logical unit for which we check
8538 @type required: boolean
8539 @param required: whether the validation should fail if the OS is not
8541 @type nodenames: list
8542 @param nodenames: the list of nodes on which we should check
8543 @type osname: string
8544 @param osname: the name of the hypervisor we should use
8545 @type osparams: dict
8546 @param osparams: the parameters which we need to check
8547 @raise errors.OpPrereqError: if the parameters are not valid
8550 nodenames = _FilterVmNodes(lu, nodenames)
8551 result = lu.rpc.call_os_validate(nodenames, required, osname,
8552 [constants.OS_VALIDATE_PARAMETERS],
8554 for node, nres in result.items():
8555 # we don't check for offline cases since this should be run only
8556 # against the master node and/or an instance's nodes
8557 nres.Raise("OS Parameters validation failed on node %s" % node)
8558 if not nres.payload:
8559 lu.LogInfo("OS %s not found on node %s, validation skipped",
8563 class LUInstanceCreate(LogicalUnit):
8564 """Create an instance.
8567 HPATH = "instance-add"
8568 HTYPE = constants.HTYPE_INSTANCE
8571 def CheckArguments(self):
8575 # do not require name_check to ease forward/backward compatibility
8577 if self.op.no_install and self.op.start:
8578 self.LogInfo("No-installation mode selected, disabling startup")
8579 self.op.start = False
8580 # validate/normalize the instance name
8581 self.op.instance_name = \
8582 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8584 if self.op.ip_check and not self.op.name_check:
8585 # TODO: make the ip check more flexible and not depend on the name check
8586 raise errors.OpPrereqError("Cannot do IP address check without a name"
8587 " check", errors.ECODE_INVAL)
8589 # check nics' parameter names
8590 for nic in self.op.nics:
8591 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8593 # check disks. parameter names and consistent adopt/no-adopt strategy
8594 has_adopt = has_no_adopt = False
8595 for disk in self.op.disks:
8596 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8597 if constants.IDISK_ADOPT in disk:
8601 if has_adopt and has_no_adopt:
8602 raise errors.OpPrereqError("Either all disks are adopted or none is",
8605 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8606 raise errors.OpPrereqError("Disk adoption is not supported for the"
8607 " '%s' disk template" %
8608 self.op.disk_template,
8610 if self.op.iallocator is not None:
8611 raise errors.OpPrereqError("Disk adoption not allowed with an"
8612 " iallocator script", errors.ECODE_INVAL)
8613 if self.op.mode == constants.INSTANCE_IMPORT:
8614 raise errors.OpPrereqError("Disk adoption not allowed for"
8615 " instance import", errors.ECODE_INVAL)
8617 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8618 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8619 " but no 'adopt' parameter given" %
8620 self.op.disk_template,
8623 self.adopt_disks = has_adopt
8625 # instance name verification
8626 if self.op.name_check:
8627 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8628 self.op.instance_name = self.hostname1.name
8629 # used in CheckPrereq for ip ping check
8630 self.check_ip = self.hostname1.ip
8632 self.check_ip = None
8634 # file storage checks
8635 if (self.op.file_driver and
8636 not self.op.file_driver in constants.FILE_DRIVER):
8637 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8638 self.op.file_driver, errors.ECODE_INVAL)
8640 if self.op.disk_template == constants.DT_FILE:
8641 opcodes.RequireFileStorage()
8642 elif self.op.disk_template == constants.DT_SHARED_FILE:
8643 opcodes.RequireSharedFileStorage()
8645 ### Node/iallocator related checks
8646 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8648 if self.op.pnode is not None:
8649 if self.op.disk_template in constants.DTS_INT_MIRROR:
8650 if self.op.snode is None:
8651 raise errors.OpPrereqError("The networked disk templates need"
8652 " a mirror node", errors.ECODE_INVAL)
8654 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8656 self.op.snode = None
8658 self._cds = _GetClusterDomainSecret()
8660 if self.op.mode == constants.INSTANCE_IMPORT:
8661 # On import force_variant must be True, because if we forced it at
8662 # initial install, our only chance when importing it back is that it
8664 self.op.force_variant = True
8666 if self.op.no_install:
8667 self.LogInfo("No-installation mode has no effect during import")
8669 elif self.op.mode == constants.INSTANCE_CREATE:
8670 if self.op.os_type is None:
8671 raise errors.OpPrereqError("No guest OS specified",
8673 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8674 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8675 " installation" % self.op.os_type,
8677 if self.op.disk_template is None:
8678 raise errors.OpPrereqError("No disk template specified",
8681 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8682 # Check handshake to ensure both clusters have the same domain secret
8683 src_handshake = self.op.source_handshake
8684 if not src_handshake:
8685 raise errors.OpPrereqError("Missing source handshake",
8688 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8691 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8694 # Load and check source CA
8695 self.source_x509_ca_pem = self.op.source_x509_ca
8696 if not self.source_x509_ca_pem:
8697 raise errors.OpPrereqError("Missing source X509 CA",
8701 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8703 except OpenSSL.crypto.Error, err:
8704 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8705 (err, ), errors.ECODE_INVAL)
8707 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8708 if errcode is not None:
8709 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8712 self.source_x509_ca = cert
8714 src_instance_name = self.op.source_instance_name
8715 if not src_instance_name:
8716 raise errors.OpPrereqError("Missing source instance name",
8719 self.source_instance_name = \
8720 netutils.GetHostname(name=src_instance_name).name
8723 raise errors.OpPrereqError("Invalid instance creation mode %r" %
8724 self.op.mode, errors.ECODE_INVAL)
8726 def ExpandNames(self):
8727 """ExpandNames for CreateInstance.
8729 Figure out the right locks for instance creation.
8732 self.needed_locks = {}
8734 instance_name = self.op.instance_name
8735 # this is just a preventive check, but someone might still add this
8736 # instance in the meantime, and creation will fail at lock-add time
8737 if instance_name in self.cfg.GetInstanceList():
8738 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8739 instance_name, errors.ECODE_EXISTS)
8741 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8743 if self.op.iallocator:
8744 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8745 # specifying a group on instance creation and then selecting nodes from
8747 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8748 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8750 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8751 nodelist = [self.op.pnode]
8752 if self.op.snode is not None:
8753 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8754 nodelist.append(self.op.snode)
8755 self.needed_locks[locking.LEVEL_NODE] = nodelist
8756 # Lock resources of instance's primary and secondary nodes (copy to
8757 # prevent accidential modification)
8758 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8760 # in case of import lock the source node too
8761 if self.op.mode == constants.INSTANCE_IMPORT:
8762 src_node = self.op.src_node
8763 src_path = self.op.src_path
8765 if src_path is None:
8766 self.op.src_path = src_path = self.op.instance_name
8768 if src_node is None:
8769 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8770 self.op.src_node = None
8771 if os.path.isabs(src_path):
8772 raise errors.OpPrereqError("Importing an instance from a path"
8773 " requires a source node option",
8776 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8777 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8778 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8779 if not os.path.isabs(src_path):
8780 self.op.src_path = src_path = \
8781 utils.PathJoin(constants.EXPORT_DIR, src_path)
8783 def _RunAllocator(self):
8784 """Run the allocator based on input opcode.
8787 nics = [n.ToDict() for n in self.nics]
8788 ial = IAllocator(self.cfg, self.rpc,
8789 mode=constants.IALLOCATOR_MODE_ALLOC,
8790 name=self.op.instance_name,
8791 disk_template=self.op.disk_template,
8794 vcpus=self.be_full[constants.BE_VCPUS],
8795 memory=self.be_full[constants.BE_MAXMEM],
8798 hypervisor=self.op.hypervisor,
8801 ial.Run(self.op.iallocator)
8804 raise errors.OpPrereqError("Can't compute nodes using"
8805 " iallocator '%s': %s" %
8806 (self.op.iallocator, ial.info),
8808 if len(ial.result) != ial.required_nodes:
8809 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8810 " of nodes (%s), required %s" %
8811 (self.op.iallocator, len(ial.result),
8812 ial.required_nodes), errors.ECODE_FAULT)
8813 self.op.pnode = ial.result[0]
8814 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8815 self.op.instance_name, self.op.iallocator,
8816 utils.CommaJoin(ial.result))
8817 if ial.required_nodes == 2:
8818 self.op.snode = ial.result[1]
8820 def BuildHooksEnv(self):
8823 This runs on master, primary and secondary nodes of the instance.
8827 "ADD_MODE": self.op.mode,
8829 if self.op.mode == constants.INSTANCE_IMPORT:
8830 env["SRC_NODE"] = self.op.src_node
8831 env["SRC_PATH"] = self.op.src_path
8832 env["SRC_IMAGES"] = self.src_images
8834 env.update(_BuildInstanceHookEnv(
8835 name=self.op.instance_name,
8836 primary_node=self.op.pnode,
8837 secondary_nodes=self.secondaries,
8838 status=self.op.start,
8839 os_type=self.op.os_type,
8840 minmem=self.be_full[constants.BE_MINMEM],
8841 maxmem=self.be_full[constants.BE_MAXMEM],
8842 vcpus=self.be_full[constants.BE_VCPUS],
8843 nics=_NICListToTuple(self, self.nics),
8844 disk_template=self.op.disk_template,
8845 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8846 for d in self.disks],
8849 hypervisor_name=self.op.hypervisor,
8855 def BuildHooksNodes(self):
8856 """Build hooks nodes.
8859 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8862 def _ReadExportInfo(self):
8863 """Reads the export information from disk.
8865 It will override the opcode source node and path with the actual
8866 information, if these two were not specified before.
8868 @return: the export information
8871 assert self.op.mode == constants.INSTANCE_IMPORT
8873 src_node = self.op.src_node
8874 src_path = self.op.src_path
8876 if src_node is None:
8877 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8878 exp_list = self.rpc.call_export_list(locked_nodes)
8880 for node in exp_list:
8881 if exp_list[node].fail_msg:
8883 if src_path in exp_list[node].payload:
8885 self.op.src_node = src_node = node
8886 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8890 raise errors.OpPrereqError("No export found for relative path %s" %
8891 src_path, errors.ECODE_INVAL)
8893 _CheckNodeOnline(self, src_node)
8894 result = self.rpc.call_export_info(src_node, src_path)
8895 result.Raise("No export or invalid export found in dir %s" % src_path)
8897 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8898 if not export_info.has_section(constants.INISECT_EXP):
8899 raise errors.ProgrammerError("Corrupted export config",
8900 errors.ECODE_ENVIRON)
8902 ei_version = export_info.get(constants.INISECT_EXP, "version")
8903 if (int(ei_version) != constants.EXPORT_VERSION):
8904 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8905 (ei_version, constants.EXPORT_VERSION),
8906 errors.ECODE_ENVIRON)
8909 def _ReadExportParams(self, einfo):
8910 """Use export parameters as defaults.
8912 In case the opcode doesn't specify (as in override) some instance
8913 parameters, then try to use them from the export information, if
8917 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8919 if self.op.disk_template is None:
8920 if einfo.has_option(constants.INISECT_INS, "disk_template"):
8921 self.op.disk_template = einfo.get(constants.INISECT_INS,
8923 if self.op.disk_template not in constants.DISK_TEMPLATES:
8924 raise errors.OpPrereqError("Disk template specified in configuration"
8925 " file is not one of the allowed values:"
8926 " %s" % " ".join(constants.DISK_TEMPLATES))
8928 raise errors.OpPrereqError("No disk template specified and the export"
8929 " is missing the disk_template information",
8932 if not self.op.disks:
8934 # TODO: import the disk iv_name too
8935 for idx in range(constants.MAX_DISKS):
8936 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8937 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8938 disks.append({constants.IDISK_SIZE: disk_sz})
8939 self.op.disks = disks
8940 if not disks and self.op.disk_template != constants.DT_DISKLESS:
8941 raise errors.OpPrereqError("No disk info specified and the export"
8942 " is missing the disk information",
8945 if not self.op.nics:
8947 for idx in range(constants.MAX_NICS):
8948 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8950 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8951 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8958 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8959 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8961 if (self.op.hypervisor is None and
8962 einfo.has_option(constants.INISECT_INS, "hypervisor")):
8963 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8965 if einfo.has_section(constants.INISECT_HYP):
8966 # use the export parameters but do not override the ones
8967 # specified by the user
8968 for name, value in einfo.items(constants.INISECT_HYP):
8969 if name not in self.op.hvparams:
8970 self.op.hvparams[name] = value
8972 if einfo.has_section(constants.INISECT_BEP):
8973 # use the parameters, without overriding
8974 for name, value in einfo.items(constants.INISECT_BEP):
8975 if name not in self.op.beparams:
8976 self.op.beparams[name] = value
8977 # Compatibility for the old "memory" be param
8978 if name == constants.BE_MEMORY:
8979 if constants.BE_MAXMEM not in self.op.beparams:
8980 self.op.beparams[constants.BE_MAXMEM] = value
8981 if constants.BE_MINMEM not in self.op.beparams:
8982 self.op.beparams[constants.BE_MINMEM] = value
8984 # try to read the parameters old style, from the main section
8985 for name in constants.BES_PARAMETERS:
8986 if (name not in self.op.beparams and
8987 einfo.has_option(constants.INISECT_INS, name)):
8988 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8990 if einfo.has_section(constants.INISECT_OSP):
8991 # use the parameters, without overriding
8992 for name, value in einfo.items(constants.INISECT_OSP):
8993 if name not in self.op.osparams:
8994 self.op.osparams[name] = value
8996 def _RevertToDefaults(self, cluster):
8997 """Revert the instance parameters to the default values.
9001 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9002 for name in self.op.hvparams.keys():
9003 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9004 del self.op.hvparams[name]
9006 be_defs = cluster.SimpleFillBE({})
9007 for name in self.op.beparams.keys():
9008 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9009 del self.op.beparams[name]
9011 nic_defs = cluster.SimpleFillNIC({})
9012 for nic in self.op.nics:
9013 for name in constants.NICS_PARAMETERS:
9014 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9017 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9018 for name in self.op.osparams.keys():
9019 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9020 del self.op.osparams[name]
9022 def _CalculateFileStorageDir(self):
9023 """Calculate final instance file storage dir.
9026 # file storage dir calculation/check
9027 self.instance_file_storage_dir = None
9028 if self.op.disk_template in constants.DTS_FILEBASED:
9029 # build the full file storage dir path
9032 if self.op.disk_template == constants.DT_SHARED_FILE:
9033 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9035 get_fsd_fn = self.cfg.GetFileStorageDir
9037 cfg_storagedir = get_fsd_fn()
9038 if not cfg_storagedir:
9039 raise errors.OpPrereqError("Cluster file storage dir not defined")
9040 joinargs.append(cfg_storagedir)
9042 if self.op.file_storage_dir is not None:
9043 joinargs.append(self.op.file_storage_dir)
9045 joinargs.append(self.op.instance_name)
9047 # pylint: disable=W0142
9048 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9050 def CheckPrereq(self):
9051 """Check prerequisites.
9054 self._CalculateFileStorageDir()
9056 if self.op.mode == constants.INSTANCE_IMPORT:
9057 export_info = self._ReadExportInfo()
9058 self._ReadExportParams(export_info)
9060 if (not self.cfg.GetVGName() and
9061 self.op.disk_template not in constants.DTS_NOT_LVM):
9062 raise errors.OpPrereqError("Cluster does not support lvm-based"
9063 " instances", errors.ECODE_STATE)
9065 if (self.op.hypervisor is None or
9066 self.op.hypervisor == constants.VALUE_AUTO):
9067 self.op.hypervisor = self.cfg.GetHypervisorType()
9069 cluster = self.cfg.GetClusterInfo()
9070 enabled_hvs = cluster.enabled_hypervisors
9071 if self.op.hypervisor not in enabled_hvs:
9072 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9073 " cluster (%s)" % (self.op.hypervisor,
9074 ",".join(enabled_hvs)),
9077 # Check tag validity
9078 for tag in self.op.tags:
9079 objects.TaggableObject.ValidateTag(tag)
9081 # check hypervisor parameter syntax (locally)
9082 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9083 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9085 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9086 hv_type.CheckParameterSyntax(filled_hvp)
9087 self.hv_full = filled_hvp
9088 # check that we don't specify global parameters on an instance
9089 _CheckGlobalHvParams(self.op.hvparams)
9091 # fill and remember the beparams dict
9092 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9093 for param, value in self.op.beparams.iteritems():
9094 if value == constants.VALUE_AUTO:
9095 self.op.beparams[param] = default_beparams[param]
9096 objects.UpgradeBeParams(self.op.beparams)
9097 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9098 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9100 # build os parameters
9101 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9103 # now that hvp/bep are in final format, let's reset to defaults,
9105 if self.op.identify_defaults:
9106 self._RevertToDefaults(cluster)
9110 for idx, nic in enumerate(self.op.nics):
9111 nic_mode_req = nic.get(constants.INIC_MODE, None)
9112 nic_mode = nic_mode_req
9113 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9114 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9116 # in routed mode, for the first nic, the default ip is 'auto'
9117 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9118 default_ip_mode = constants.VALUE_AUTO
9120 default_ip_mode = constants.VALUE_NONE
9122 # ip validity checks
9123 ip = nic.get(constants.INIC_IP, default_ip_mode)
9124 if ip is None or ip.lower() == constants.VALUE_NONE:
9126 elif ip.lower() == constants.VALUE_AUTO:
9127 if not self.op.name_check:
9128 raise errors.OpPrereqError("IP address set to auto but name checks"
9129 " have been skipped",
9131 nic_ip = self.hostname1.ip
9133 if not netutils.IPAddress.IsValid(ip):
9134 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9138 # TODO: check the ip address for uniqueness
9139 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9140 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9143 # MAC address verification
9144 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9145 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9146 mac = utils.NormalizeAndValidateMac(mac)
9149 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9150 except errors.ReservationError:
9151 raise errors.OpPrereqError("MAC address %s already in use"
9152 " in cluster" % mac,
9153 errors.ECODE_NOTUNIQUE)
9155 # Build nic parameters
9156 link = nic.get(constants.INIC_LINK, None)
9157 if link == constants.VALUE_AUTO:
9158 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9161 nicparams[constants.NIC_MODE] = nic_mode
9163 nicparams[constants.NIC_LINK] = link
9165 check_params = cluster.SimpleFillNIC(nicparams)
9166 objects.NIC.CheckParameterSyntax(check_params)
9167 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9169 # disk checks/pre-build
9170 default_vg = self.cfg.GetVGName()
9172 for disk in self.op.disks:
9173 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9174 if mode not in constants.DISK_ACCESS_SET:
9175 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9176 mode, errors.ECODE_INVAL)
9177 size = disk.get(constants.IDISK_SIZE, None)
9179 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9182 except (TypeError, ValueError):
9183 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9186 data_vg = disk.get(constants.IDISK_VG, default_vg)
9188 constants.IDISK_SIZE: size,
9189 constants.IDISK_MODE: mode,
9190 constants.IDISK_VG: data_vg,
9191 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
9193 if constants.IDISK_ADOPT in disk:
9194 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9195 self.disks.append(new_disk)
9197 if self.op.mode == constants.INSTANCE_IMPORT:
9199 for idx in range(len(self.disks)):
9200 option = "disk%d_dump" % idx
9201 if export_info.has_option(constants.INISECT_INS, option):
9202 # FIXME: are the old os-es, disk sizes, etc. useful?
9203 export_name = export_info.get(constants.INISECT_INS, option)
9204 image = utils.PathJoin(self.op.src_path, export_name)
9205 disk_images.append(image)
9207 disk_images.append(False)
9209 self.src_images = disk_images
9211 old_name = export_info.get(constants.INISECT_INS, "name")
9212 if self.op.instance_name == old_name:
9213 for idx, nic in enumerate(self.nics):
9214 if nic.mac == constants.VALUE_AUTO:
9215 nic_mac_ini = "nic%d_mac" % idx
9216 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9218 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9220 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9221 if self.op.ip_check:
9222 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9223 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9224 (self.check_ip, self.op.instance_name),
9225 errors.ECODE_NOTUNIQUE)
9227 #### mac address generation
9228 # By generating here the mac address both the allocator and the hooks get
9229 # the real final mac address rather than the 'auto' or 'generate' value.
9230 # There is a race condition between the generation and the instance object
9231 # creation, which means that we know the mac is valid now, but we're not
9232 # sure it will be when we actually add the instance. If things go bad
9233 # adding the instance will abort because of a duplicate mac, and the
9234 # creation job will fail.
9235 for nic in self.nics:
9236 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9237 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9241 if self.op.iallocator is not None:
9242 self._RunAllocator()
9244 # Release all unneeded node locks
9245 _ReleaseLocks(self, locking.LEVEL_NODE,
9246 keep=filter(None, [self.op.pnode, self.op.snode,
9249 #### node related checks
9251 # check primary node
9252 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9253 assert self.pnode is not None, \
9254 "Cannot retrieve locked node %s" % self.op.pnode
9256 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9257 pnode.name, errors.ECODE_STATE)
9259 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9260 pnode.name, errors.ECODE_STATE)
9261 if not pnode.vm_capable:
9262 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9263 " '%s'" % pnode.name, errors.ECODE_STATE)
9265 self.secondaries = []
9267 # mirror node verification
9268 if self.op.disk_template in constants.DTS_INT_MIRROR:
9269 if self.op.snode == pnode.name:
9270 raise errors.OpPrereqError("The secondary node cannot be the"
9271 " primary node", errors.ECODE_INVAL)
9272 _CheckNodeOnline(self, self.op.snode)
9273 _CheckNodeNotDrained(self, self.op.snode)
9274 _CheckNodeVmCapable(self, self.op.snode)
9275 self.secondaries.append(self.op.snode)
9277 snode = self.cfg.GetNodeInfo(self.op.snode)
9278 if pnode.group != snode.group:
9279 self.LogWarning("The primary and secondary nodes are in two"
9280 " different node groups; the disk parameters"
9281 " from the first disk's node group will be"
9284 nodenames = [pnode.name] + self.secondaries
9286 # disk parameters (not customizable at instance or node level)
9287 # just use the primary node parameters, ignoring the secondary.
9288 self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9290 if not self.adopt_disks:
9291 # Check lv size requirements, if not adopting
9292 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9293 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9295 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9296 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9297 disk[constants.IDISK_ADOPT])
9298 for disk in self.disks])
9299 if len(all_lvs) != len(self.disks):
9300 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9302 for lv_name in all_lvs:
9304 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9305 # to ReserveLV uses the same syntax
9306 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9307 except errors.ReservationError:
9308 raise errors.OpPrereqError("LV named %s used by another instance" %
9309 lv_name, errors.ECODE_NOTUNIQUE)
9311 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9312 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9314 node_lvs = self.rpc.call_lv_list([pnode.name],
9315 vg_names.payload.keys())[pnode.name]
9316 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9317 node_lvs = node_lvs.payload
9319 delta = all_lvs.difference(node_lvs.keys())
9321 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9322 utils.CommaJoin(delta),
9324 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9326 raise errors.OpPrereqError("Online logical volumes found, cannot"
9327 " adopt: %s" % utils.CommaJoin(online_lvs),
9329 # update the size of disk based on what is found
9330 for dsk in self.disks:
9331 dsk[constants.IDISK_SIZE] = \
9332 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9333 dsk[constants.IDISK_ADOPT])][0]))
9335 elif self.op.disk_template == constants.DT_BLOCK:
9336 # Normalize and de-duplicate device paths
9337 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9338 for disk in self.disks])
9339 if len(all_disks) != len(self.disks):
9340 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9342 baddisks = [d for d in all_disks
9343 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9345 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9346 " cannot be adopted" %
9347 (", ".join(baddisks),
9348 constants.ADOPTABLE_BLOCKDEV_ROOT),
9351 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9352 list(all_disks))[pnode.name]
9353 node_disks.Raise("Cannot get block device information from node %s" %
9355 node_disks = node_disks.payload
9356 delta = all_disks.difference(node_disks.keys())
9358 raise errors.OpPrereqError("Missing block device(s): %s" %
9359 utils.CommaJoin(delta),
9361 for dsk in self.disks:
9362 dsk[constants.IDISK_SIZE] = \
9363 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9365 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9367 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9368 # check OS parameters (remotely)
9369 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9371 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9373 # memory check on primary node
9374 #TODO(dynmem): use MINMEM for checking
9376 _CheckNodeFreeMemory(self, self.pnode.name,
9377 "creating instance %s" % self.op.instance_name,
9378 self.be_full[constants.BE_MAXMEM],
9381 self.dry_run_result = list(nodenames)
9383 def Exec(self, feedback_fn):
9384 """Create and add the instance to the cluster.
9387 instance = self.op.instance_name
9388 pnode_name = self.pnode.name
9390 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9391 self.owned_locks(locking.LEVEL_NODE)), \
9392 "Node locks differ from node resource locks"
9394 ht_kind = self.op.hypervisor
9395 if ht_kind in constants.HTS_REQ_PORT:
9396 network_port = self.cfg.AllocatePort()
9400 disks = _GenerateDiskTemplate(self,
9401 self.op.disk_template,
9402 instance, pnode_name,
9405 self.instance_file_storage_dir,
9406 self.op.file_driver,
9411 iobj = objects.Instance(name=instance, os=self.op.os_type,
9412 primary_node=pnode_name,
9413 nics=self.nics, disks=disks,
9414 disk_template=self.op.disk_template,
9415 admin_state=constants.ADMINST_DOWN,
9416 network_port=network_port,
9417 beparams=self.op.beparams,
9418 hvparams=self.op.hvparams,
9419 hypervisor=self.op.hypervisor,
9420 osparams=self.op.osparams,
9424 for tag in self.op.tags:
9427 if self.adopt_disks:
9428 if self.op.disk_template == constants.DT_PLAIN:
9429 # rename LVs to the newly-generated names; we need to construct
9430 # 'fake' LV disks with the old data, plus the new unique_id
9431 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9433 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9434 rename_to.append(t_dsk.logical_id)
9435 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9436 self.cfg.SetDiskID(t_dsk, pnode_name)
9437 result = self.rpc.call_blockdev_rename(pnode_name,
9438 zip(tmp_disks, rename_to))
9439 result.Raise("Failed to rename adoped LVs")
9441 feedback_fn("* creating instance disks...")
9443 _CreateDisks(self, iobj)
9444 except errors.OpExecError:
9445 self.LogWarning("Device creation failed, reverting...")
9447 _RemoveDisks(self, iobj)
9449 self.cfg.ReleaseDRBDMinors(instance)
9452 feedback_fn("adding instance %s to cluster config" % instance)
9454 self.cfg.AddInstance(iobj, self.proc.GetECId())
9456 # Declare that we don't want to remove the instance lock anymore, as we've
9457 # added the instance to the config
9458 del self.remove_locks[locking.LEVEL_INSTANCE]
9460 if self.op.mode == constants.INSTANCE_IMPORT:
9461 # Release unused nodes
9462 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9465 _ReleaseLocks(self, locking.LEVEL_NODE)
9468 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9469 feedback_fn("* wiping instance disks...")
9471 _WipeDisks(self, iobj)
9472 except errors.OpExecError, err:
9473 logging.exception("Wiping disks failed")
9474 self.LogWarning("Wiping instance disks failed (%s)", err)
9478 # Something is already wrong with the disks, don't do anything else
9480 elif self.op.wait_for_sync:
9481 disk_abort = not _WaitForSync(self, iobj)
9482 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9483 # make sure the disks are not degraded (still sync-ing is ok)
9484 feedback_fn("* checking mirrors status")
9485 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9490 _RemoveDisks(self, iobj)
9491 self.cfg.RemoveInstance(iobj.name)
9492 # Make sure the instance lock gets removed
9493 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9494 raise errors.OpExecError("There are some degraded disks for"
9497 # Release all node resource locks
9498 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9500 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9501 if self.op.mode == constants.INSTANCE_CREATE:
9502 if not self.op.no_install:
9503 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9504 not self.op.wait_for_sync)
9506 feedback_fn("* pausing disk sync to install instance OS")
9507 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9509 for idx, success in enumerate(result.payload):
9511 logging.warn("pause-sync of instance %s for disk %d failed",
9514 feedback_fn("* running the instance OS create scripts...")
9515 # FIXME: pass debug option from opcode to backend
9517 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9518 self.op.debug_level)
9520 feedback_fn("* resuming disk sync")
9521 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9523 for idx, success in enumerate(result.payload):
9525 logging.warn("resume-sync of instance %s for disk %d failed",
9528 os_add_result.Raise("Could not add os for instance %s"
9529 " on node %s" % (instance, pnode_name))
9531 elif self.op.mode == constants.INSTANCE_IMPORT:
9532 feedback_fn("* running the instance OS import scripts...")
9536 for idx, image in enumerate(self.src_images):
9540 # FIXME: pass debug option from opcode to backend
9541 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9542 constants.IEIO_FILE, (image, ),
9543 constants.IEIO_SCRIPT,
9544 (iobj.disks[idx], idx),
9546 transfers.append(dt)
9549 masterd.instance.TransferInstanceData(self, feedback_fn,
9550 self.op.src_node, pnode_name,
9551 self.pnode.secondary_ip,
9553 if not compat.all(import_result):
9554 self.LogWarning("Some disks for instance %s on node %s were not"
9555 " imported successfully" % (instance, pnode_name))
9557 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9558 feedback_fn("* preparing remote import...")
9559 # The source cluster will stop the instance before attempting to make a
9560 # connection. In some cases stopping an instance can take a long time,
9561 # hence the shutdown timeout is added to the connection timeout.
9562 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9563 self.op.source_shutdown_timeout)
9564 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9566 assert iobj.primary_node == self.pnode.name
9568 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9569 self.source_x509_ca,
9570 self._cds, timeouts)
9571 if not compat.all(disk_results):
9572 # TODO: Should the instance still be started, even if some disks
9573 # failed to import (valid for local imports, too)?
9574 self.LogWarning("Some disks for instance %s on node %s were not"
9575 " imported successfully" % (instance, pnode_name))
9577 # Run rename script on newly imported instance
9578 assert iobj.name == instance
9579 feedback_fn("Running rename script for %s" % instance)
9580 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9581 self.source_instance_name,
9582 self.op.debug_level)
9584 self.LogWarning("Failed to run rename script for %s on node"
9585 " %s: %s" % (instance, pnode_name, result.fail_msg))
9588 # also checked in the prereq part
9589 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9592 assert not self.owned_locks(locking.LEVEL_NODE_RES)
9595 iobj.admin_state = constants.ADMINST_UP
9596 self.cfg.Update(iobj, feedback_fn)
9597 logging.info("Starting instance %s on node %s", instance, pnode_name)
9598 feedback_fn("* starting instance...")
9599 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9601 result.Raise("Could not start instance")
9603 return list(iobj.all_nodes)
9606 class LUInstanceConsole(NoHooksLU):
9607 """Connect to an instance's console.
9609 This is somewhat special in that it returns the command line that
9610 you need to run on the master node in order to connect to the
9616 def ExpandNames(self):
9617 self.share_locks = _ShareAll()
9618 self._ExpandAndLockInstance()
9620 def CheckPrereq(self):
9621 """Check prerequisites.
9623 This checks that the instance is in the cluster.
9626 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9627 assert self.instance is not None, \
9628 "Cannot retrieve locked instance %s" % self.op.instance_name
9629 _CheckNodeOnline(self, self.instance.primary_node)
9631 def Exec(self, feedback_fn):
9632 """Connect to the console of an instance
9635 instance = self.instance
9636 node = instance.primary_node
9638 node_insts = self.rpc.call_instance_list([node],
9639 [instance.hypervisor])[node]
9640 node_insts.Raise("Can't get node information from %s" % node)
9642 if instance.name not in node_insts.payload:
9643 if instance.admin_state == constants.ADMINST_UP:
9644 state = constants.INSTST_ERRORDOWN
9645 elif instance.admin_state == constants.ADMINST_DOWN:
9646 state = constants.INSTST_ADMINDOWN
9648 state = constants.INSTST_ADMINOFFLINE
9649 raise errors.OpExecError("Instance %s is not running (state %s)" %
9650 (instance.name, state))
9652 logging.debug("Connecting to console of %s on %s", instance.name, node)
9654 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9657 def _GetInstanceConsole(cluster, instance):
9658 """Returns console information for an instance.
9660 @type cluster: L{objects.Cluster}
9661 @type instance: L{objects.Instance}
9665 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9666 # beparams and hvparams are passed separately, to avoid editing the
9667 # instance and then saving the defaults in the instance itself.
9668 hvparams = cluster.FillHV(instance)
9669 beparams = cluster.FillBE(instance)
9670 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9672 assert console.instance == instance.name
9673 assert console.Validate()
9675 return console.ToDict()
9678 class LUInstanceReplaceDisks(LogicalUnit):
9679 """Replace the disks of an instance.
9682 HPATH = "mirrors-replace"
9683 HTYPE = constants.HTYPE_INSTANCE
9686 def CheckArguments(self):
9687 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9690 def ExpandNames(self):
9691 self._ExpandAndLockInstance()
9693 assert locking.LEVEL_NODE not in self.needed_locks
9694 assert locking.LEVEL_NODE_RES not in self.needed_locks
9695 assert locking.LEVEL_NODEGROUP not in self.needed_locks
9697 assert self.op.iallocator is None or self.op.remote_node is None, \
9698 "Conflicting options"
9700 if self.op.remote_node is not None:
9701 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9703 # Warning: do not remove the locking of the new secondary here
9704 # unless DRBD8.AddChildren is changed to work in parallel;
9705 # currently it doesn't since parallel invocations of
9706 # FindUnusedMinor will conflict
9707 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9708 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9710 self.needed_locks[locking.LEVEL_NODE] = []
9711 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9713 if self.op.iallocator is not None:
9714 # iallocator will select a new node in the same group
9715 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9717 self.needed_locks[locking.LEVEL_NODE_RES] = []
9719 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9720 self.op.iallocator, self.op.remote_node,
9721 self.op.disks, False, self.op.early_release)
9723 self.tasklets = [self.replacer]
9725 def DeclareLocks(self, level):
9726 if level == locking.LEVEL_NODEGROUP:
9727 assert self.op.remote_node is None
9728 assert self.op.iallocator is not None
9729 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9731 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9732 # Lock all groups used by instance optimistically; this requires going
9733 # via the node before it's locked, requiring verification later on
9734 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9735 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9737 elif level == locking.LEVEL_NODE:
9738 if self.op.iallocator is not None:
9739 assert self.op.remote_node is None
9740 assert not self.needed_locks[locking.LEVEL_NODE]
9742 # Lock member nodes of all locked groups
9743 self.needed_locks[locking.LEVEL_NODE] = [node_name
9744 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9745 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9747 self._LockInstancesNodes()
9748 elif level == locking.LEVEL_NODE_RES:
9750 self.needed_locks[locking.LEVEL_NODE_RES] = \
9751 self.needed_locks[locking.LEVEL_NODE]
9753 def BuildHooksEnv(self):
9756 This runs on the master, the primary and all the secondaries.
9759 instance = self.replacer.instance
9761 "MODE": self.op.mode,
9762 "NEW_SECONDARY": self.op.remote_node,
9763 "OLD_SECONDARY": instance.secondary_nodes[0],
9765 env.update(_BuildInstanceHookEnvByObject(self, instance))
9768 def BuildHooksNodes(self):
9769 """Build hooks nodes.
9772 instance = self.replacer.instance
9774 self.cfg.GetMasterNode(),
9775 instance.primary_node,
9777 if self.op.remote_node is not None:
9778 nl.append(self.op.remote_node)
9781 def CheckPrereq(self):
9782 """Check prerequisites.
9785 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9786 self.op.iallocator is None)
9788 # Verify if node group locks are still correct
9789 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9791 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9793 return LogicalUnit.CheckPrereq(self)
9796 class TLReplaceDisks(Tasklet):
9797 """Replaces disks for an instance.
9799 Note: Locking is not within the scope of this class.
9802 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9803 disks, delay_iallocator, early_release):
9804 """Initializes this class.
9807 Tasklet.__init__(self, lu)
9810 self.instance_name = instance_name
9812 self.iallocator_name = iallocator_name
9813 self.remote_node = remote_node
9815 self.delay_iallocator = delay_iallocator
9816 self.early_release = early_release
9819 self.instance = None
9820 self.new_node = None
9821 self.target_node = None
9822 self.other_node = None
9823 self.remote_node_info = None
9824 self.node_secondary_ip = None
9827 def CheckArguments(mode, remote_node, iallocator):
9828 """Helper function for users of this class.
9831 # check for valid parameter combination
9832 if mode == constants.REPLACE_DISK_CHG:
9833 if remote_node is None and iallocator is None:
9834 raise errors.OpPrereqError("When changing the secondary either an"
9835 " iallocator script must be used or the"
9836 " new node given", errors.ECODE_INVAL)
9838 if remote_node is not None and iallocator is not None:
9839 raise errors.OpPrereqError("Give either the iallocator or the new"
9840 " secondary, not both", errors.ECODE_INVAL)
9842 elif remote_node is not None or iallocator is not None:
9843 # Not replacing the secondary
9844 raise errors.OpPrereqError("The iallocator and new node options can"
9845 " only be used when changing the"
9846 " secondary node", errors.ECODE_INVAL)
9849 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9850 """Compute a new secondary node using an IAllocator.
9853 ial = IAllocator(lu.cfg, lu.rpc,
9854 mode=constants.IALLOCATOR_MODE_RELOC,
9856 relocate_from=list(relocate_from))
9858 ial.Run(iallocator_name)
9861 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9862 " %s" % (iallocator_name, ial.info),
9865 if len(ial.result) != ial.required_nodes:
9866 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9867 " of nodes (%s), required %s" %
9869 len(ial.result), ial.required_nodes),
9872 remote_node_name = ial.result[0]
9874 lu.LogInfo("Selected new secondary for instance '%s': %s",
9875 instance_name, remote_node_name)
9877 return remote_node_name
9879 def _FindFaultyDisks(self, node_name):
9880 """Wrapper for L{_FindFaultyInstanceDisks}.
9883 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9886 def _CheckDisksActivated(self, instance):
9887 """Checks if the instance disks are activated.
9889 @param instance: The instance to check disks
9890 @return: True if they are activated, False otherwise
9893 nodes = instance.all_nodes
9895 for idx, dev in enumerate(instance.disks):
9897 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9898 self.cfg.SetDiskID(dev, node)
9900 result = self.rpc.call_blockdev_find(node, dev)
9904 elif result.fail_msg or not result.payload:
9909 def CheckPrereq(self):
9910 """Check prerequisites.
9912 This checks that the instance is in the cluster.
9915 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9916 assert instance is not None, \
9917 "Cannot retrieve locked instance %s" % self.instance_name
9919 if instance.disk_template != constants.DT_DRBD8:
9920 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9921 " instances", errors.ECODE_INVAL)
9923 if len(instance.secondary_nodes) != 1:
9924 raise errors.OpPrereqError("The instance has a strange layout,"
9925 " expected one secondary but found %d" %
9926 len(instance.secondary_nodes),
9929 if not self.delay_iallocator:
9930 self._CheckPrereq2()
9932 def _CheckPrereq2(self):
9933 """Check prerequisites, second part.
9935 This function should always be part of CheckPrereq. It was separated and is
9936 now called from Exec because during node evacuation iallocator was only
9937 called with an unmodified cluster model, not taking planned changes into
9941 instance = self.instance
9942 secondary_node = instance.secondary_nodes[0]
9944 if self.iallocator_name is None:
9945 remote_node = self.remote_node
9947 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9948 instance.name, instance.secondary_nodes)
9950 if remote_node is None:
9951 self.remote_node_info = None
9953 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9954 "Remote node '%s' is not locked" % remote_node
9956 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9957 assert self.remote_node_info is not None, \
9958 "Cannot retrieve locked node %s" % remote_node
9960 if remote_node == self.instance.primary_node:
9961 raise errors.OpPrereqError("The specified node is the primary node of"
9962 " the instance", errors.ECODE_INVAL)
9964 if remote_node == secondary_node:
9965 raise errors.OpPrereqError("The specified node is already the"
9966 " secondary node of the instance",
9969 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9970 constants.REPLACE_DISK_CHG):
9971 raise errors.OpPrereqError("Cannot specify disks to be replaced",
9974 if self.mode == constants.REPLACE_DISK_AUTO:
9975 if not self._CheckDisksActivated(instance):
9976 raise errors.OpPrereqError("Please run activate-disks on instance %s"
9977 " first" % self.instance_name,
9979 faulty_primary = self._FindFaultyDisks(instance.primary_node)
9980 faulty_secondary = self._FindFaultyDisks(secondary_node)
9982 if faulty_primary and faulty_secondary:
9983 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9984 " one node and can not be repaired"
9985 " automatically" % self.instance_name,
9989 self.disks = faulty_primary
9990 self.target_node = instance.primary_node
9991 self.other_node = secondary_node
9992 check_nodes = [self.target_node, self.other_node]
9993 elif faulty_secondary:
9994 self.disks = faulty_secondary
9995 self.target_node = secondary_node
9996 self.other_node = instance.primary_node
9997 check_nodes = [self.target_node, self.other_node]
10003 # Non-automatic modes
10004 if self.mode == constants.REPLACE_DISK_PRI:
10005 self.target_node = instance.primary_node
10006 self.other_node = secondary_node
10007 check_nodes = [self.target_node, self.other_node]
10009 elif self.mode == constants.REPLACE_DISK_SEC:
10010 self.target_node = secondary_node
10011 self.other_node = instance.primary_node
10012 check_nodes = [self.target_node, self.other_node]
10014 elif self.mode == constants.REPLACE_DISK_CHG:
10015 self.new_node = remote_node
10016 self.other_node = instance.primary_node
10017 self.target_node = secondary_node
10018 check_nodes = [self.new_node, self.other_node]
10020 _CheckNodeNotDrained(self.lu, remote_node)
10021 _CheckNodeVmCapable(self.lu, remote_node)
10023 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10024 assert old_node_info is not None
10025 if old_node_info.offline and not self.early_release:
10026 # doesn't make sense to delay the release
10027 self.early_release = True
10028 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10029 " early-release mode", secondary_node)
10032 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10035 # If not specified all disks should be replaced
10037 self.disks = range(len(self.instance.disks))
10039 # TODO: compute disk parameters
10040 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10041 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10042 if primary_node_info.group != secondary_node_info.group:
10043 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10044 " different node groups; the disk parameters of the"
10045 " primary node's group will be applied.")
10047 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10049 for node in check_nodes:
10050 _CheckNodeOnline(self.lu, node)
10052 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10055 if node_name is not None)
10057 # Release unneeded node and node resource locks
10058 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10059 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10061 # Release any owned node group
10062 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10063 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10065 # Check whether disks are valid
10066 for disk_idx in self.disks:
10067 instance.FindDisk(disk_idx)
10069 # Get secondary node IP addresses
10070 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10071 in self.cfg.GetMultiNodeInfo(touched_nodes))
10073 def Exec(self, feedback_fn):
10074 """Execute disk replacement.
10076 This dispatches the disk replacement to the appropriate handler.
10079 if self.delay_iallocator:
10080 self._CheckPrereq2()
10083 # Verify owned locks before starting operation
10084 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10085 assert set(owned_nodes) == set(self.node_secondary_ip), \
10086 ("Incorrect node locks, owning %s, expected %s" %
10087 (owned_nodes, self.node_secondary_ip.keys()))
10088 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10089 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10091 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10092 assert list(owned_instances) == [self.instance_name], \
10093 "Instance '%s' not locked" % self.instance_name
10095 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10096 "Should not own any node group lock at this point"
10099 feedback_fn("No disks need replacement")
10102 feedback_fn("Replacing disk(s) %s for %s" %
10103 (utils.CommaJoin(self.disks), self.instance.name))
10105 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10107 # Activate the instance disks if we're replacing them on a down instance
10109 _StartInstanceDisks(self.lu, self.instance, True)
10112 # Should we replace the secondary node?
10113 if self.new_node is not None:
10114 fn = self._ExecDrbd8Secondary
10116 fn = self._ExecDrbd8DiskOnly
10118 result = fn(feedback_fn)
10120 # Deactivate the instance disks if we're replacing them on a
10123 _SafeShutdownInstanceDisks(self.lu, self.instance)
10125 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10128 # Verify owned locks
10129 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10130 nodes = frozenset(self.node_secondary_ip)
10131 assert ((self.early_release and not owned_nodes) or
10132 (not self.early_release and not (set(owned_nodes) - nodes))), \
10133 ("Not owning the correct locks, early_release=%s, owned=%r,"
10134 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10138 def _CheckVolumeGroup(self, nodes):
10139 self.lu.LogInfo("Checking volume groups")
10141 vgname = self.cfg.GetVGName()
10143 # Make sure volume group exists on all involved nodes
10144 results = self.rpc.call_vg_list(nodes)
10146 raise errors.OpExecError("Can't list volume groups on the nodes")
10149 res = results[node]
10150 res.Raise("Error checking node %s" % node)
10151 if vgname not in res.payload:
10152 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10155 def _CheckDisksExistence(self, nodes):
10156 # Check disk existence
10157 for idx, dev in enumerate(self.instance.disks):
10158 if idx not in self.disks:
10162 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10163 self.cfg.SetDiskID(dev, node)
10165 result = self.rpc.call_blockdev_find(node, dev)
10167 msg = result.fail_msg
10168 if msg or not result.payload:
10170 msg = "disk not found"
10171 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10174 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10175 for idx, dev in enumerate(self.instance.disks):
10176 if idx not in self.disks:
10179 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10182 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10184 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10185 " replace disks for instance %s" %
10186 (node_name, self.instance.name))
10188 def _CreateNewStorage(self, node_name):
10189 """Create new storage on the primary or secondary node.
10191 This is only used for same-node replaces, not for changing the
10192 secondary node, hence we don't want to modify the existing disk.
10197 for idx, dev in enumerate(self.instance.disks):
10198 if idx not in self.disks:
10201 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10203 self.cfg.SetDiskID(dev, node_name)
10205 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10206 names = _GenerateUniqueNames(self.lu, lv_names)
10208 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10210 vg_data = dev.children[0].logical_id[0]
10211 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10212 logical_id=(vg_data, names[0]), params=data_p)
10213 vg_meta = dev.children[1].logical_id[0]
10214 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10215 logical_id=(vg_meta, names[1]), params=meta_p)
10217 new_lvs = [lv_data, lv_meta]
10218 old_lvs = [child.Copy() for child in dev.children]
10219 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10221 # we pass force_create=True to force the LVM creation
10222 for new_lv in new_lvs:
10223 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10224 _GetInstanceInfoText(self.instance), False)
10228 def _CheckDevices(self, node_name, iv_names):
10229 for name, (dev, _, _) in iv_names.iteritems():
10230 self.cfg.SetDiskID(dev, node_name)
10232 result = self.rpc.call_blockdev_find(node_name, dev)
10234 msg = result.fail_msg
10235 if msg or not result.payload:
10237 msg = "disk not found"
10238 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10241 if result.payload.is_degraded:
10242 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10244 def _RemoveOldStorage(self, node_name, iv_names):
10245 for name, (_, old_lvs, _) in iv_names.iteritems():
10246 self.lu.LogInfo("Remove logical volumes for %s" % name)
10249 self.cfg.SetDiskID(lv, node_name)
10251 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10253 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10254 hint="remove unused LVs manually")
10256 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10257 """Replace a disk on the primary or secondary for DRBD 8.
10259 The algorithm for replace is quite complicated:
10261 1. for each disk to be replaced:
10263 1. create new LVs on the target node with unique names
10264 1. detach old LVs from the drbd device
10265 1. rename old LVs to name_replaced.<time_t>
10266 1. rename new LVs to old LVs
10267 1. attach the new LVs (with the old names now) to the drbd device
10269 1. wait for sync across all devices
10271 1. for each modified disk:
10273 1. remove old LVs (which have the name name_replaces.<time_t>)
10275 Failures are not very well handled.
10280 # Step: check device activation
10281 self.lu.LogStep(1, steps_total, "Check device existence")
10282 self._CheckDisksExistence([self.other_node, self.target_node])
10283 self._CheckVolumeGroup([self.target_node, self.other_node])
10285 # Step: check other node consistency
10286 self.lu.LogStep(2, steps_total, "Check peer consistency")
10287 self._CheckDisksConsistency(self.other_node,
10288 self.other_node == self.instance.primary_node,
10291 # Step: create new storage
10292 self.lu.LogStep(3, steps_total, "Allocate new storage")
10293 iv_names = self._CreateNewStorage(self.target_node)
10295 # Step: for each lv, detach+rename*2+attach
10296 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10297 for dev, old_lvs, new_lvs in iv_names.itervalues():
10298 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10300 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10302 result.Raise("Can't detach drbd from local storage on node"
10303 " %s for device %s" % (self.target_node, dev.iv_name))
10305 #cfg.Update(instance)
10307 # ok, we created the new LVs, so now we know we have the needed
10308 # storage; as such, we proceed on the target node to rename
10309 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10310 # using the assumption that logical_id == physical_id (which in
10311 # turn is the unique_id on that node)
10313 # FIXME(iustin): use a better name for the replaced LVs
10314 temp_suffix = int(time.time())
10315 ren_fn = lambda d, suff: (d.physical_id[0],
10316 d.physical_id[1] + "_replaced-%s" % suff)
10318 # Build the rename list based on what LVs exist on the node
10319 rename_old_to_new = []
10320 for to_ren in old_lvs:
10321 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10322 if not result.fail_msg and result.payload:
10324 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10326 self.lu.LogInfo("Renaming the old LVs on the target node")
10327 result = self.rpc.call_blockdev_rename(self.target_node,
10329 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10331 # Now we rename the new LVs to the old LVs
10332 self.lu.LogInfo("Renaming the new LVs on the target node")
10333 rename_new_to_old = [(new, old.physical_id)
10334 for old, new in zip(old_lvs, new_lvs)]
10335 result = self.rpc.call_blockdev_rename(self.target_node,
10337 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10339 # Intermediate steps of in memory modifications
10340 for old, new in zip(old_lvs, new_lvs):
10341 new.logical_id = old.logical_id
10342 self.cfg.SetDiskID(new, self.target_node)
10344 # We need to modify old_lvs so that removal later removes the
10345 # right LVs, not the newly added ones; note that old_lvs is a
10347 for disk in old_lvs:
10348 disk.logical_id = ren_fn(disk, temp_suffix)
10349 self.cfg.SetDiskID(disk, self.target_node)
10351 # Now that the new lvs have the old name, we can add them to the device
10352 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10353 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10355 msg = result.fail_msg
10357 for new_lv in new_lvs:
10358 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10361 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10362 hint=("cleanup manually the unused logical"
10364 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10366 cstep = itertools.count(5)
10368 if self.early_release:
10369 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10370 self._RemoveOldStorage(self.target_node, iv_names)
10371 # TODO: Check if releasing locks early still makes sense
10372 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10374 # Release all resource locks except those used by the instance
10375 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10376 keep=self.node_secondary_ip.keys())
10378 # Release all node locks while waiting for sync
10379 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10381 # TODO: Can the instance lock be downgraded here? Take the optional disk
10382 # shutdown in the caller into consideration.
10385 # This can fail as the old devices are degraded and _WaitForSync
10386 # does a combined result over all disks, so we don't check its return value
10387 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10388 _WaitForSync(self.lu, self.instance)
10390 # Check all devices manually
10391 self._CheckDevices(self.instance.primary_node, iv_names)
10393 # Step: remove old storage
10394 if not self.early_release:
10395 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10396 self._RemoveOldStorage(self.target_node, iv_names)
10398 def _ExecDrbd8Secondary(self, feedback_fn):
10399 """Replace the secondary node for DRBD 8.
10401 The algorithm for replace is quite complicated:
10402 - for all disks of the instance:
10403 - create new LVs on the new node with same names
10404 - shutdown the drbd device on the old secondary
10405 - disconnect the drbd network on the primary
10406 - create the drbd device on the new secondary
10407 - network attach the drbd on the primary, using an artifice:
10408 the drbd code for Attach() will connect to the network if it
10409 finds a device which is connected to the good local disks but
10410 not network enabled
10411 - wait for sync across all devices
10412 - remove all disks from the old secondary
10414 Failures are not very well handled.
10419 pnode = self.instance.primary_node
10421 # Step: check device activation
10422 self.lu.LogStep(1, steps_total, "Check device existence")
10423 self._CheckDisksExistence([self.instance.primary_node])
10424 self._CheckVolumeGroup([self.instance.primary_node])
10426 # Step: check other node consistency
10427 self.lu.LogStep(2, steps_total, "Check peer consistency")
10428 self._CheckDisksConsistency(self.instance.primary_node, True, True)
10430 # Step: create new storage
10431 self.lu.LogStep(3, steps_total, "Allocate new storage")
10432 for idx, dev in enumerate(self.instance.disks):
10433 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10434 (self.new_node, idx))
10435 # we pass force_create=True to force LVM creation
10436 for new_lv in dev.children:
10437 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10438 _GetInstanceInfoText(self.instance), False)
10440 # Step 4: dbrd minors and drbd setups changes
10441 # after this, we must manually remove the drbd minors on both the
10442 # error and the success paths
10443 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10444 minors = self.cfg.AllocateDRBDMinor([self.new_node
10445 for dev in self.instance.disks],
10446 self.instance.name)
10447 logging.debug("Allocated minors %r", minors)
10450 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10451 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10452 (self.new_node, idx))
10453 # create new devices on new_node; note that we create two IDs:
10454 # one without port, so the drbd will be activated without
10455 # networking information on the new node at this stage, and one
10456 # with network, for the latter activation in step 4
10457 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10458 if self.instance.primary_node == o_node1:
10461 assert self.instance.primary_node == o_node2, "Three-node instance?"
10464 new_alone_id = (self.instance.primary_node, self.new_node, None,
10465 p_minor, new_minor, o_secret)
10466 new_net_id = (self.instance.primary_node, self.new_node, o_port,
10467 p_minor, new_minor, o_secret)
10469 iv_names[idx] = (dev, dev.children, new_net_id)
10470 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10472 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10473 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10474 logical_id=new_alone_id,
10475 children=dev.children,
10477 params=drbd_params)
10479 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10480 _GetInstanceInfoText(self.instance), False)
10481 except errors.GenericError:
10482 self.cfg.ReleaseDRBDMinors(self.instance.name)
10485 # We have new devices, shutdown the drbd on the old secondary
10486 for idx, dev in enumerate(self.instance.disks):
10487 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10488 self.cfg.SetDiskID(dev, self.target_node)
10489 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10491 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10492 "node: %s" % (idx, msg),
10493 hint=("Please cleanup this device manually as"
10494 " soon as possible"))
10496 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10497 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10498 self.instance.disks)[pnode]
10500 msg = result.fail_msg
10502 # detaches didn't succeed (unlikely)
10503 self.cfg.ReleaseDRBDMinors(self.instance.name)
10504 raise errors.OpExecError("Can't detach the disks from the network on"
10505 " old node: %s" % (msg,))
10507 # if we managed to detach at least one, we update all the disks of
10508 # the instance to point to the new secondary
10509 self.lu.LogInfo("Updating instance configuration")
10510 for dev, _, new_logical_id in iv_names.itervalues():
10511 dev.logical_id = new_logical_id
10512 self.cfg.SetDiskID(dev, self.instance.primary_node)
10514 self.cfg.Update(self.instance, feedback_fn)
10516 # Release all node locks (the configuration has been updated)
10517 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10519 # and now perform the drbd attach
10520 self.lu.LogInfo("Attaching primary drbds to new secondary"
10521 " (standalone => connected)")
10522 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10524 self.node_secondary_ip,
10525 self.instance.disks,
10526 self.instance.name,
10528 for to_node, to_result in result.items():
10529 msg = to_result.fail_msg
10531 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10533 hint=("please do a gnt-instance info to see the"
10534 " status of disks"))
10536 cstep = itertools.count(5)
10538 if self.early_release:
10539 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10540 self._RemoveOldStorage(self.target_node, iv_names)
10541 # TODO: Check if releasing locks early still makes sense
10542 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10544 # Release all resource locks except those used by the instance
10545 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10546 keep=self.node_secondary_ip.keys())
10548 # TODO: Can the instance lock be downgraded here? Take the optional disk
10549 # shutdown in the caller into consideration.
10552 # This can fail as the old devices are degraded and _WaitForSync
10553 # does a combined result over all disks, so we don't check its return value
10554 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10555 _WaitForSync(self.lu, self.instance)
10557 # Check all devices manually
10558 self._CheckDevices(self.instance.primary_node, iv_names)
10560 # Step: remove old storage
10561 if not self.early_release:
10562 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10563 self._RemoveOldStorage(self.target_node, iv_names)
10566 class LURepairNodeStorage(NoHooksLU):
10567 """Repairs the volume group on a node.
10572 def CheckArguments(self):
10573 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10575 storage_type = self.op.storage_type
10577 if (constants.SO_FIX_CONSISTENCY not in
10578 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10579 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10580 " repaired" % storage_type,
10581 errors.ECODE_INVAL)
10583 def ExpandNames(self):
10584 self.needed_locks = {
10585 locking.LEVEL_NODE: [self.op.node_name],
10588 def _CheckFaultyDisks(self, instance, node_name):
10589 """Ensure faulty disks abort the opcode or at least warn."""
10591 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10593 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10594 " node '%s'" % (instance.name, node_name),
10595 errors.ECODE_STATE)
10596 except errors.OpPrereqError, err:
10597 if self.op.ignore_consistency:
10598 self.proc.LogWarning(str(err.args[0]))
10602 def CheckPrereq(self):
10603 """Check prerequisites.
10606 # Check whether any instance on this node has faulty disks
10607 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10608 if inst.admin_state != constants.ADMINST_UP:
10610 check_nodes = set(inst.all_nodes)
10611 check_nodes.discard(self.op.node_name)
10612 for inst_node_name in check_nodes:
10613 self._CheckFaultyDisks(inst, inst_node_name)
10615 def Exec(self, feedback_fn):
10616 feedback_fn("Repairing storage unit '%s' on %s ..." %
10617 (self.op.name, self.op.node_name))
10619 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10620 result = self.rpc.call_storage_execute(self.op.node_name,
10621 self.op.storage_type, st_args,
10623 constants.SO_FIX_CONSISTENCY)
10624 result.Raise("Failed to repair storage unit '%s' on %s" %
10625 (self.op.name, self.op.node_name))
10628 class LUNodeEvacuate(NoHooksLU):
10629 """Evacuates instances off a list of nodes.
10634 _MODE2IALLOCATOR = {
10635 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10636 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10637 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10639 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10640 assert (frozenset(_MODE2IALLOCATOR.values()) ==
10641 constants.IALLOCATOR_NEVAC_MODES)
10643 def CheckArguments(self):
10644 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10646 def ExpandNames(self):
10647 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10649 if self.op.remote_node is not None:
10650 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10651 assert self.op.remote_node
10653 if self.op.remote_node == self.op.node_name:
10654 raise errors.OpPrereqError("Can not use evacuated node as a new"
10655 " secondary node", errors.ECODE_INVAL)
10657 if self.op.mode != constants.NODE_EVAC_SEC:
10658 raise errors.OpPrereqError("Without the use of an iallocator only"
10659 " secondary instances can be evacuated",
10660 errors.ECODE_INVAL)
10663 self.share_locks = _ShareAll()
10664 self.needed_locks = {
10665 locking.LEVEL_INSTANCE: [],
10666 locking.LEVEL_NODEGROUP: [],
10667 locking.LEVEL_NODE: [],
10670 # Determine nodes (via group) optimistically, needs verification once locks
10671 # have been acquired
10672 self.lock_nodes = self._DetermineNodes()
10674 def _DetermineNodes(self):
10675 """Gets the list of nodes to operate on.
10678 if self.op.remote_node is None:
10679 # Iallocator will choose any node(s) in the same group
10680 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10682 group_nodes = frozenset([self.op.remote_node])
10684 # Determine nodes to be locked
10685 return set([self.op.node_name]) | group_nodes
10687 def _DetermineInstances(self):
10688 """Builds list of instances to operate on.
10691 assert self.op.mode in constants.NODE_EVAC_MODES
10693 if self.op.mode == constants.NODE_EVAC_PRI:
10694 # Primary instances only
10695 inst_fn = _GetNodePrimaryInstances
10696 assert self.op.remote_node is None, \
10697 "Evacuating primary instances requires iallocator"
10698 elif self.op.mode == constants.NODE_EVAC_SEC:
10699 # Secondary instances only
10700 inst_fn = _GetNodeSecondaryInstances
10703 assert self.op.mode == constants.NODE_EVAC_ALL
10704 inst_fn = _GetNodeInstances
10705 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10707 raise errors.OpPrereqError("Due to an issue with the iallocator"
10708 " interface it is not possible to evacuate"
10709 " all instances at once; specify explicitly"
10710 " whether to evacuate primary or secondary"
10712 errors.ECODE_INVAL)
10714 return inst_fn(self.cfg, self.op.node_name)
10716 def DeclareLocks(self, level):
10717 if level == locking.LEVEL_INSTANCE:
10718 # Lock instances optimistically, needs verification once node and group
10719 # locks have been acquired
10720 self.needed_locks[locking.LEVEL_INSTANCE] = \
10721 set(i.name for i in self._DetermineInstances())
10723 elif level == locking.LEVEL_NODEGROUP:
10724 # Lock node groups for all potential target nodes optimistically, needs
10725 # verification once nodes have been acquired
10726 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10727 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10729 elif level == locking.LEVEL_NODE:
10730 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10732 def CheckPrereq(self):
10734 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10735 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10736 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10738 need_nodes = self._DetermineNodes()
10740 if not owned_nodes.issuperset(need_nodes):
10741 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10742 " locks were acquired, current nodes are"
10743 " are '%s', used to be '%s'; retry the"
10745 (self.op.node_name,
10746 utils.CommaJoin(need_nodes),
10747 utils.CommaJoin(owned_nodes)),
10748 errors.ECODE_STATE)
10750 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10751 if owned_groups != wanted_groups:
10752 raise errors.OpExecError("Node groups changed since locks were acquired,"
10753 " current groups are '%s', used to be '%s';"
10754 " retry the operation" %
10755 (utils.CommaJoin(wanted_groups),
10756 utils.CommaJoin(owned_groups)))
10758 # Determine affected instances
10759 self.instances = self._DetermineInstances()
10760 self.instance_names = [i.name for i in self.instances]
10762 if set(self.instance_names) != owned_instances:
10763 raise errors.OpExecError("Instances on node '%s' changed since locks"
10764 " were acquired, current instances are '%s',"
10765 " used to be '%s'; retry the operation" %
10766 (self.op.node_name,
10767 utils.CommaJoin(self.instance_names),
10768 utils.CommaJoin(owned_instances)))
10770 if self.instance_names:
10771 self.LogInfo("Evacuating instances from node '%s': %s",
10773 utils.CommaJoin(utils.NiceSort(self.instance_names)))
10775 self.LogInfo("No instances to evacuate from node '%s'",
10778 if self.op.remote_node is not None:
10779 for i in self.instances:
10780 if i.primary_node == self.op.remote_node:
10781 raise errors.OpPrereqError("Node %s is the primary node of"
10782 " instance %s, cannot use it as"
10784 (self.op.remote_node, i.name),
10785 errors.ECODE_INVAL)
10787 def Exec(self, feedback_fn):
10788 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10790 if not self.instance_names:
10791 # No instances to evacuate
10794 elif self.op.iallocator is not None:
10795 # TODO: Implement relocation to other group
10796 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10797 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10798 instances=list(self.instance_names))
10800 ial.Run(self.op.iallocator)
10802 if not ial.success:
10803 raise errors.OpPrereqError("Can't compute node evacuation using"
10804 " iallocator '%s': %s" %
10805 (self.op.iallocator, ial.info),
10806 errors.ECODE_NORES)
10808 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10810 elif self.op.remote_node is not None:
10811 assert self.op.mode == constants.NODE_EVAC_SEC
10813 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10814 remote_node=self.op.remote_node,
10816 mode=constants.REPLACE_DISK_CHG,
10817 early_release=self.op.early_release)]
10818 for instance_name in self.instance_names
10822 raise errors.ProgrammerError("No iallocator or remote node")
10824 return ResultWithJobs(jobs)
10827 def _SetOpEarlyRelease(early_release, op):
10828 """Sets C{early_release} flag on opcodes if available.
10832 op.early_release = early_release
10833 except AttributeError:
10834 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10839 def _NodeEvacDest(use_nodes, group, nodes):
10840 """Returns group or nodes depending on caller's choice.
10844 return utils.CommaJoin(nodes)
10849 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10850 """Unpacks the result of change-group and node-evacuate iallocator requests.
10852 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10853 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10855 @type lu: L{LogicalUnit}
10856 @param lu: Logical unit instance
10857 @type alloc_result: tuple/list
10858 @param alloc_result: Result from iallocator
10859 @type early_release: bool
10860 @param early_release: Whether to release locks early if possible
10861 @type use_nodes: bool
10862 @param use_nodes: Whether to display node names instead of groups
10865 (moved, failed, jobs) = alloc_result
10868 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10869 for (name, reason) in failed)
10870 lu.LogWarning("Unable to evacuate instances %s", failreason)
10871 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10874 lu.LogInfo("Instances to be moved: %s",
10875 utils.CommaJoin("%s (to %s)" %
10876 (name, _NodeEvacDest(use_nodes, group, nodes))
10877 for (name, group, nodes) in moved))
10879 return [map(compat.partial(_SetOpEarlyRelease, early_release),
10880 map(opcodes.OpCode.LoadOpCode, ops))
10884 class LUInstanceGrowDisk(LogicalUnit):
10885 """Grow a disk of an instance.
10888 HPATH = "disk-grow"
10889 HTYPE = constants.HTYPE_INSTANCE
10892 def ExpandNames(self):
10893 self._ExpandAndLockInstance()
10894 self.needed_locks[locking.LEVEL_NODE] = []
10895 self.needed_locks[locking.LEVEL_NODE_RES] = []
10896 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
10898 def DeclareLocks(self, level):
10899 if level == locking.LEVEL_NODE:
10900 self._LockInstancesNodes()
10901 elif level == locking.LEVEL_NODE_RES:
10903 self.needed_locks[locking.LEVEL_NODE_RES] = \
10904 self.needed_locks[locking.LEVEL_NODE][:]
10906 def BuildHooksEnv(self):
10907 """Build hooks env.
10909 This runs on the master, the primary and all the secondaries.
10913 "DISK": self.op.disk,
10914 "AMOUNT": self.op.amount,
10916 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10919 def BuildHooksNodes(self):
10920 """Build hooks nodes.
10923 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10926 def CheckPrereq(self):
10927 """Check prerequisites.
10929 This checks that the instance is in the cluster.
10932 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10933 assert instance is not None, \
10934 "Cannot retrieve locked instance %s" % self.op.instance_name
10935 nodenames = list(instance.all_nodes)
10936 for node in nodenames:
10937 _CheckNodeOnline(self, node)
10939 self.instance = instance
10941 if instance.disk_template not in constants.DTS_GROWABLE:
10942 raise errors.OpPrereqError("Instance's disk layout does not support"
10943 " growing", errors.ECODE_INVAL)
10945 self.disk = instance.FindDisk(self.op.disk)
10947 if instance.disk_template not in (constants.DT_FILE,
10948 constants.DT_SHARED_FILE):
10949 # TODO: check the free disk space for file, when that feature will be
10951 _CheckNodesFreeDiskPerVG(self, nodenames,
10952 self.disk.ComputeGrowth(self.op.amount))
10954 def Exec(self, feedback_fn):
10955 """Execute disk grow.
10958 instance = self.instance
10961 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10962 assert (self.owned_locks(locking.LEVEL_NODE) ==
10963 self.owned_locks(locking.LEVEL_NODE_RES))
10965 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10967 raise errors.OpExecError("Cannot activate block device to grow")
10969 feedback_fn("Growing disk %s of instance '%s' by %s" %
10970 (self.op.disk, instance.name,
10971 utils.FormatUnit(self.op.amount, "h")))
10973 # First run all grow ops in dry-run mode
10974 for node in instance.all_nodes:
10975 self.cfg.SetDiskID(disk, node)
10976 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10977 result.Raise("Grow request failed to node %s" % node)
10979 # We know that (as far as we can test) operations across different
10980 # nodes will succeed, time to run it for real
10981 for node in instance.all_nodes:
10982 self.cfg.SetDiskID(disk, node)
10983 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10984 result.Raise("Grow request failed to node %s" % node)
10986 # TODO: Rewrite code to work properly
10987 # DRBD goes into sync mode for a short amount of time after executing the
10988 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10989 # calling "resize" in sync mode fails. Sleeping for a short amount of
10990 # time is a work-around.
10993 disk.RecordGrow(self.op.amount)
10994 self.cfg.Update(instance, feedback_fn)
10996 # Changes have been recorded, release node lock
10997 _ReleaseLocks(self, locking.LEVEL_NODE)
10999 # Downgrade lock while waiting for sync
11000 self.glm.downgrade(locking.LEVEL_INSTANCE)
11002 if self.op.wait_for_sync:
11003 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11005 self.proc.LogWarning("Disk sync-ing has not returned a good"
11006 " status; please check the instance")
11007 if instance.admin_state != constants.ADMINST_UP:
11008 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11009 elif instance.admin_state != constants.ADMINST_UP:
11010 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11011 " not supposed to be running because no wait for"
11012 " sync mode was requested")
11014 assert self.owned_locks(locking.LEVEL_NODE_RES)
11015 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11018 class LUInstanceQueryData(NoHooksLU):
11019 """Query runtime instance data.
11024 def ExpandNames(self):
11025 self.needed_locks = {}
11027 # Use locking if requested or when non-static information is wanted
11028 if not (self.op.static or self.op.use_locking):
11029 self.LogWarning("Non-static data requested, locks need to be acquired")
11030 self.op.use_locking = True
11032 if self.op.instances or not self.op.use_locking:
11033 # Expand instance names right here
11034 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11036 # Will use acquired locks
11037 self.wanted_names = None
11039 if self.op.use_locking:
11040 self.share_locks = _ShareAll()
11042 if self.wanted_names is None:
11043 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11045 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11047 self.needed_locks[locking.LEVEL_NODE] = []
11048 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11050 def DeclareLocks(self, level):
11051 if self.op.use_locking and level == locking.LEVEL_NODE:
11052 self._LockInstancesNodes()
11054 def CheckPrereq(self):
11055 """Check prerequisites.
11057 This only checks the optional instance list against the existing names.
11060 if self.wanted_names is None:
11061 assert self.op.use_locking, "Locking was not used"
11062 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11064 self.wanted_instances = \
11065 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11067 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11068 """Returns the status of a block device
11071 if self.op.static or not node:
11074 self.cfg.SetDiskID(dev, node)
11076 result = self.rpc.call_blockdev_find(node, dev)
11080 result.Raise("Can't compute disk status for %s" % instance_name)
11082 status = result.payload
11086 return (status.dev_path, status.major, status.minor,
11087 status.sync_percent, status.estimated_time,
11088 status.is_degraded, status.ldisk_status)
11090 def _ComputeDiskStatus(self, instance, snode, dev):
11091 """Compute block device status.
11094 if dev.dev_type in constants.LDS_DRBD:
11095 # we change the snode then (otherwise we use the one passed in)
11096 if dev.logical_id[0] == instance.primary_node:
11097 snode = dev.logical_id[1]
11099 snode = dev.logical_id[0]
11101 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11102 instance.name, dev)
11103 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11106 dev_children = map(compat.partial(self._ComputeDiskStatus,
11113 "iv_name": dev.iv_name,
11114 "dev_type": dev.dev_type,
11115 "logical_id": dev.logical_id,
11116 "physical_id": dev.physical_id,
11117 "pstatus": dev_pstatus,
11118 "sstatus": dev_sstatus,
11119 "children": dev_children,
11124 def Exec(self, feedback_fn):
11125 """Gather and return data"""
11128 cluster = self.cfg.GetClusterInfo()
11130 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11131 for i in self.wanted_instances)
11132 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11133 if self.op.static or pnode.offline:
11134 remote_state = None
11136 self.LogWarning("Primary node %s is marked offline, returning static"
11137 " information only for instance %s" %
11138 (pnode.name, instance.name))
11140 remote_info = self.rpc.call_instance_info(instance.primary_node,
11142 instance.hypervisor)
11143 remote_info.Raise("Error checking node %s" % instance.primary_node)
11144 remote_info = remote_info.payload
11145 if remote_info and "state" in remote_info:
11146 remote_state = "up"
11148 if instance.admin_state == constants.ADMINST_UP:
11149 remote_state = "down"
11151 remote_state = instance.admin_state
11153 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11156 result[instance.name] = {
11157 "name": instance.name,
11158 "config_state": instance.admin_state,
11159 "run_state": remote_state,
11160 "pnode": instance.primary_node,
11161 "snodes": instance.secondary_nodes,
11163 # this happens to be the same format used for hooks
11164 "nics": _NICListToTuple(self, instance.nics),
11165 "disk_template": instance.disk_template,
11167 "hypervisor": instance.hypervisor,
11168 "network_port": instance.network_port,
11169 "hv_instance": instance.hvparams,
11170 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11171 "be_instance": instance.beparams,
11172 "be_actual": cluster.FillBE(instance),
11173 "os_instance": instance.osparams,
11174 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11175 "serial_no": instance.serial_no,
11176 "mtime": instance.mtime,
11177 "ctime": instance.ctime,
11178 "uuid": instance.uuid,
11184 class LUInstanceSetParams(LogicalUnit):
11185 """Modifies an instances's parameters.
11188 HPATH = "instance-modify"
11189 HTYPE = constants.HTYPE_INSTANCE
11192 def CheckArguments(self):
11193 if not (self.op.nics or self.op.disks or self.op.disk_template or
11194 self.op.hvparams or self.op.beparams or self.op.os_name or
11195 self.op.online_inst or self.op.offline_inst):
11196 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11198 if self.op.hvparams:
11199 _CheckGlobalHvParams(self.op.hvparams)
11203 for disk_op, disk_dict in self.op.disks:
11204 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11205 if disk_op == constants.DDM_REMOVE:
11206 disk_addremove += 1
11208 elif disk_op == constants.DDM_ADD:
11209 disk_addremove += 1
11211 if not isinstance(disk_op, int):
11212 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11213 if not isinstance(disk_dict, dict):
11214 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11215 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11217 if disk_op == constants.DDM_ADD:
11218 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11219 if mode not in constants.DISK_ACCESS_SET:
11220 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11221 errors.ECODE_INVAL)
11222 size = disk_dict.get(constants.IDISK_SIZE, None)
11224 raise errors.OpPrereqError("Required disk parameter size missing",
11225 errors.ECODE_INVAL)
11228 except (TypeError, ValueError), err:
11229 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11230 str(err), errors.ECODE_INVAL)
11231 disk_dict[constants.IDISK_SIZE] = size
11233 # modification of disk
11234 if constants.IDISK_SIZE in disk_dict:
11235 raise errors.OpPrereqError("Disk size change not possible, use"
11236 " grow-disk", errors.ECODE_INVAL)
11238 if disk_addremove > 1:
11239 raise errors.OpPrereqError("Only one disk add or remove operation"
11240 " supported at a time", errors.ECODE_INVAL)
11242 if self.op.disks and self.op.disk_template is not None:
11243 raise errors.OpPrereqError("Disk template conversion and other disk"
11244 " changes not supported at the same time",
11245 errors.ECODE_INVAL)
11247 if (self.op.disk_template and
11248 self.op.disk_template in constants.DTS_INT_MIRROR and
11249 self.op.remote_node is None):
11250 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11251 " one requires specifying a secondary node",
11252 errors.ECODE_INVAL)
11256 for nic_op, nic_dict in self.op.nics:
11257 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11258 if nic_op == constants.DDM_REMOVE:
11261 elif nic_op == constants.DDM_ADD:
11264 if not isinstance(nic_op, int):
11265 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11266 if not isinstance(nic_dict, dict):
11267 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11268 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11270 # nic_dict should be a dict
11271 nic_ip = nic_dict.get(constants.INIC_IP, None)
11272 if nic_ip is not None:
11273 if nic_ip.lower() == constants.VALUE_NONE:
11274 nic_dict[constants.INIC_IP] = None
11276 if not netutils.IPAddress.IsValid(nic_ip):
11277 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11278 errors.ECODE_INVAL)
11280 nic_bridge = nic_dict.get("bridge", None)
11281 nic_link = nic_dict.get(constants.INIC_LINK, None)
11282 if nic_bridge and nic_link:
11283 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11284 " at the same time", errors.ECODE_INVAL)
11285 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11286 nic_dict["bridge"] = None
11287 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11288 nic_dict[constants.INIC_LINK] = None
11290 if nic_op == constants.DDM_ADD:
11291 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11292 if nic_mac is None:
11293 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11295 if constants.INIC_MAC in nic_dict:
11296 nic_mac = nic_dict[constants.INIC_MAC]
11297 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11298 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11300 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11301 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11302 " modifying an existing nic",
11303 errors.ECODE_INVAL)
11305 if nic_addremove > 1:
11306 raise errors.OpPrereqError("Only one NIC add or remove operation"
11307 " supported at a time", errors.ECODE_INVAL)
11309 def ExpandNames(self):
11310 self._ExpandAndLockInstance()
11311 # Can't even acquire node locks in shared mode as upcoming changes in
11312 # Ganeti 2.6 will start to modify the node object on disk conversion
11313 self.needed_locks[locking.LEVEL_NODE] = []
11314 self.needed_locks[locking.LEVEL_NODE_RES] = []
11315 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11317 def DeclareLocks(self, level):
11318 if level == locking.LEVEL_NODE:
11319 self._LockInstancesNodes()
11320 if self.op.disk_template and self.op.remote_node:
11321 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11322 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11323 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11325 self.needed_locks[locking.LEVEL_NODE_RES] = \
11326 self.needed_locks[locking.LEVEL_NODE][:]
11328 def BuildHooksEnv(self):
11329 """Build hooks env.
11331 This runs on the master, primary and secondaries.
11335 if constants.BE_MINMEM in self.be_new:
11336 args["minmem"] = self.be_new[constants.BE_MINMEM]
11337 if constants.BE_MAXMEM in self.be_new:
11338 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11339 if constants.BE_VCPUS in self.be_new:
11340 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11341 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11342 # information at all.
11345 nic_override = dict(self.op.nics)
11346 for idx, nic in enumerate(self.instance.nics):
11347 if idx in nic_override:
11348 this_nic_override = nic_override[idx]
11350 this_nic_override = {}
11351 if constants.INIC_IP in this_nic_override:
11352 ip = this_nic_override[constants.INIC_IP]
11355 if constants.INIC_MAC in this_nic_override:
11356 mac = this_nic_override[constants.INIC_MAC]
11359 if idx in self.nic_pnew:
11360 nicparams = self.nic_pnew[idx]
11362 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11363 mode = nicparams[constants.NIC_MODE]
11364 link = nicparams[constants.NIC_LINK]
11365 args["nics"].append((ip, mac, mode, link))
11366 if constants.DDM_ADD in nic_override:
11367 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11368 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11369 nicparams = self.nic_pnew[constants.DDM_ADD]
11370 mode = nicparams[constants.NIC_MODE]
11371 link = nicparams[constants.NIC_LINK]
11372 args["nics"].append((ip, mac, mode, link))
11373 elif constants.DDM_REMOVE in nic_override:
11374 del args["nics"][-1]
11376 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11377 if self.op.disk_template:
11378 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11382 def BuildHooksNodes(self):
11383 """Build hooks nodes.
11386 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11389 def CheckPrereq(self):
11390 """Check prerequisites.
11392 This only checks the instance list against the existing names.
11395 # checking the new params on the primary/secondary nodes
11397 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11398 cluster = self.cluster = self.cfg.GetClusterInfo()
11399 assert self.instance is not None, \
11400 "Cannot retrieve locked instance %s" % self.op.instance_name
11401 pnode = instance.primary_node
11402 nodelist = list(instance.all_nodes)
11403 pnode_info = self.cfg.GetNodeInfo(pnode)
11404 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11407 if self.op.os_name and not self.op.force:
11408 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11409 self.op.force_variant)
11410 instance_os = self.op.os_name
11412 instance_os = instance.os
11414 if self.op.disk_template:
11415 if instance.disk_template == self.op.disk_template:
11416 raise errors.OpPrereqError("Instance already has disk template %s" %
11417 instance.disk_template, errors.ECODE_INVAL)
11419 if (instance.disk_template,
11420 self.op.disk_template) not in self._DISK_CONVERSIONS:
11421 raise errors.OpPrereqError("Unsupported disk template conversion from"
11422 " %s to %s" % (instance.disk_template,
11423 self.op.disk_template),
11424 errors.ECODE_INVAL)
11425 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11426 msg="cannot change disk template")
11427 if self.op.disk_template in constants.DTS_INT_MIRROR:
11428 if self.op.remote_node == pnode:
11429 raise errors.OpPrereqError("Given new secondary node %s is the same"
11430 " as the primary node of the instance" %
11431 self.op.remote_node, errors.ECODE_STATE)
11432 _CheckNodeOnline(self, self.op.remote_node)
11433 _CheckNodeNotDrained(self, self.op.remote_node)
11434 # FIXME: here we assume that the old instance type is DT_PLAIN
11435 assert instance.disk_template == constants.DT_PLAIN
11436 disks = [{constants.IDISK_SIZE: d.size,
11437 constants.IDISK_VG: d.logical_id[0]}
11438 for d in instance.disks]
11439 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11440 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11442 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11443 if pnode_info.group != snode_info.group:
11444 self.LogWarning("The primary and secondary nodes are in two"
11445 " different node groups; the disk parameters"
11446 " from the first disk's node group will be"
11449 # hvparams processing
11450 if self.op.hvparams:
11451 hv_type = instance.hypervisor
11452 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11453 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11454 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11457 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11458 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11459 self.hv_proposed = self.hv_new = hv_new # the new actual values
11460 self.hv_inst = i_hvdict # the new dict (without defaults)
11462 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11464 self.hv_new = self.hv_inst = {}
11466 # beparams processing
11467 if self.op.beparams:
11468 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11470 objects.UpgradeBeParams(i_bedict)
11471 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11472 be_new = cluster.SimpleFillBE(i_bedict)
11473 self.be_proposed = self.be_new = be_new # the new actual values
11474 self.be_inst = i_bedict # the new dict (without defaults)
11476 self.be_new = self.be_inst = {}
11477 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11478 be_old = cluster.FillBE(instance)
11480 # CPU param validation -- checking every time a paramtere is
11481 # changed to cover all cases where either CPU mask or vcpus have
11483 if (constants.BE_VCPUS in self.be_proposed and
11484 constants.HV_CPU_MASK in self.hv_proposed):
11486 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11487 # Verify mask is consistent with number of vCPUs. Can skip this
11488 # test if only 1 entry in the CPU mask, which means same mask
11489 # is applied to all vCPUs.
11490 if (len(cpu_list) > 1 and
11491 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11492 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11494 (self.be_proposed[constants.BE_VCPUS],
11495 self.hv_proposed[constants.HV_CPU_MASK]),
11496 errors.ECODE_INVAL)
11498 # Only perform this test if a new CPU mask is given
11499 if constants.HV_CPU_MASK in self.hv_new:
11500 # Calculate the largest CPU number requested
11501 max_requested_cpu = max(map(max, cpu_list))
11502 # Check that all of the instance's nodes have enough physical CPUs to
11503 # satisfy the requested CPU mask
11504 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11505 max_requested_cpu + 1, instance.hypervisor)
11507 # osparams processing
11508 if self.op.osparams:
11509 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11510 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11511 self.os_inst = i_osdict # the new dict (without defaults)
11517 #TODO(dynmem): do the appropriate check involving MINMEM
11518 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11519 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11520 mem_check_list = [pnode]
11521 if be_new[constants.BE_AUTO_BALANCE]:
11522 # either we changed auto_balance to yes or it was from before
11523 mem_check_list.extend(instance.secondary_nodes)
11524 instance_info = self.rpc.call_instance_info(pnode, instance.name,
11525 instance.hypervisor)
11526 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11527 [instance.hypervisor])
11528 pninfo = nodeinfo[pnode]
11529 msg = pninfo.fail_msg
11531 # Assume the primary node is unreachable and go ahead
11532 self.warn.append("Can't get info from primary node %s: %s" %
11535 (_, _, (pnhvinfo, )) = pninfo.payload
11536 if not isinstance(pnhvinfo.get("memory_free", None), int):
11537 self.warn.append("Node data from primary node %s doesn't contain"
11538 " free memory information" % pnode)
11539 elif instance_info.fail_msg:
11540 self.warn.append("Can't get instance runtime information: %s" %
11541 instance_info.fail_msg)
11543 if instance_info.payload:
11544 current_mem = int(instance_info.payload["memory"])
11546 # Assume instance not running
11547 # (there is a slight race condition here, but it's not very
11548 # probable, and we have no other way to check)
11549 # TODO: Describe race condition
11551 #TODO(dynmem): do the appropriate check involving MINMEM
11552 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11553 pnhvinfo["memory_free"])
11555 raise errors.OpPrereqError("This change will prevent the instance"
11556 " from starting, due to %d MB of memory"
11557 " missing on its primary node" %
11559 errors.ECODE_NORES)
11561 if be_new[constants.BE_AUTO_BALANCE]:
11562 for node, nres in nodeinfo.items():
11563 if node not in instance.secondary_nodes:
11565 nres.Raise("Can't get info from secondary node %s" % node,
11566 prereq=True, ecode=errors.ECODE_STATE)
11567 (_, _, (nhvinfo, )) = nres.payload
11568 if not isinstance(nhvinfo.get("memory_free", None), int):
11569 raise errors.OpPrereqError("Secondary node %s didn't return free"
11570 " memory information" % node,
11571 errors.ECODE_STATE)
11572 #TODO(dynmem): do the appropriate check involving MINMEM
11573 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11574 raise errors.OpPrereqError("This change will prevent the instance"
11575 " from failover to its secondary node"
11576 " %s, due to not enough memory" % node,
11577 errors.ECODE_STATE)
11581 self.nic_pinst = {}
11582 for nic_op, nic_dict in self.op.nics:
11583 if nic_op == constants.DDM_REMOVE:
11584 if not instance.nics:
11585 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11586 errors.ECODE_INVAL)
11588 if nic_op != constants.DDM_ADD:
11590 if not instance.nics:
11591 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11592 " no NICs" % nic_op,
11593 errors.ECODE_INVAL)
11594 if nic_op < 0 or nic_op >= len(instance.nics):
11595 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11597 (nic_op, len(instance.nics) - 1),
11598 errors.ECODE_INVAL)
11599 old_nic_params = instance.nics[nic_op].nicparams
11600 old_nic_ip = instance.nics[nic_op].ip
11602 old_nic_params = {}
11605 update_params_dict = dict([(key, nic_dict[key])
11606 for key in constants.NICS_PARAMETERS
11607 if key in nic_dict])
11609 if "bridge" in nic_dict:
11610 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11612 new_nic_params = _GetUpdatedParams(old_nic_params,
11613 update_params_dict)
11614 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11615 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11616 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11617 self.nic_pinst[nic_op] = new_nic_params
11618 self.nic_pnew[nic_op] = new_filled_nic_params
11619 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11621 if new_nic_mode == constants.NIC_MODE_BRIDGED:
11622 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11623 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11625 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11627 self.warn.append(msg)
11629 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11630 if new_nic_mode == constants.NIC_MODE_ROUTED:
11631 if constants.INIC_IP in nic_dict:
11632 nic_ip = nic_dict[constants.INIC_IP]
11634 nic_ip = old_nic_ip
11636 raise errors.OpPrereqError("Cannot set the nic ip to None"
11637 " on a routed nic", errors.ECODE_INVAL)
11638 if constants.INIC_MAC in nic_dict:
11639 nic_mac = nic_dict[constants.INIC_MAC]
11640 if nic_mac is None:
11641 raise errors.OpPrereqError("Cannot set the nic mac to None",
11642 errors.ECODE_INVAL)
11643 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11644 # otherwise generate the mac
11645 nic_dict[constants.INIC_MAC] = \
11646 self.cfg.GenerateMAC(self.proc.GetECId())
11648 # or validate/reserve the current one
11650 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11651 except errors.ReservationError:
11652 raise errors.OpPrereqError("MAC address %s already in use"
11653 " in cluster" % nic_mac,
11654 errors.ECODE_NOTUNIQUE)
11657 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11658 raise errors.OpPrereqError("Disk operations not supported for"
11659 " diskless instances",
11660 errors.ECODE_INVAL)
11661 for disk_op, _ in self.op.disks:
11662 if disk_op == constants.DDM_REMOVE:
11663 if len(instance.disks) == 1:
11664 raise errors.OpPrereqError("Cannot remove the last disk of"
11665 " an instance", errors.ECODE_INVAL)
11666 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11667 msg="cannot remove disks")
11669 if (disk_op == constants.DDM_ADD and
11670 len(instance.disks) >= constants.MAX_DISKS):
11671 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11672 " add more" % constants.MAX_DISKS,
11673 errors.ECODE_STATE)
11674 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11676 if disk_op < 0 or disk_op >= len(instance.disks):
11677 raise errors.OpPrereqError("Invalid disk index %s, valid values"
11679 (disk_op, len(instance.disks)),
11680 errors.ECODE_INVAL)
11682 # disabling the instance
11683 if self.op.offline_inst:
11684 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11685 msg="cannot change instance state to offline")
11687 # enabling the instance
11688 if self.op.online_inst:
11689 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11690 msg="cannot make instance go online")
11692 def _ConvertPlainToDrbd(self, feedback_fn):
11693 """Converts an instance from plain to drbd.
11696 feedback_fn("Converting template to drbd")
11697 instance = self.instance
11698 pnode = instance.primary_node
11699 snode = self.op.remote_node
11701 assert instance.disk_template == constants.DT_PLAIN
11703 # create a fake disk info for _GenerateDiskTemplate
11704 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11705 constants.IDISK_VG: d.logical_id[0]}
11706 for d in instance.disks]
11707 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11708 instance.name, pnode, [snode],
11709 disk_info, None, None, 0, feedback_fn,
11711 info = _GetInstanceInfoText(instance)
11712 feedback_fn("Creating aditional volumes...")
11713 # first, create the missing data and meta devices
11714 for disk in new_disks:
11715 # unfortunately this is... not too nice
11716 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11718 for child in disk.children:
11719 _CreateSingleBlockDev(self, snode, instance, child, info, True)
11720 # at this stage, all new LVs have been created, we can rename the
11722 feedback_fn("Renaming original volumes...")
11723 rename_list = [(o, n.children[0].logical_id)
11724 for (o, n) in zip(instance.disks, new_disks)]
11725 result = self.rpc.call_blockdev_rename(pnode, rename_list)
11726 result.Raise("Failed to rename original LVs")
11728 feedback_fn("Initializing DRBD devices...")
11729 # all child devices are in place, we can now create the DRBD devices
11730 for disk in new_disks:
11731 for node in [pnode, snode]:
11732 f_create = node == pnode
11733 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11735 # at this point, the instance has been modified
11736 instance.disk_template = constants.DT_DRBD8
11737 instance.disks = new_disks
11738 self.cfg.Update(instance, feedback_fn)
11740 # Release node locks while waiting for sync
11741 _ReleaseLocks(self, locking.LEVEL_NODE)
11743 # disks are created, waiting for sync
11744 disk_abort = not _WaitForSync(self, instance,
11745 oneshot=not self.op.wait_for_sync)
11747 raise errors.OpExecError("There are some degraded disks for"
11748 " this instance, please cleanup manually")
11750 # Node resource locks will be released by caller
11752 def _ConvertDrbdToPlain(self, feedback_fn):
11753 """Converts an instance from drbd to plain.
11756 instance = self.instance
11758 assert len(instance.secondary_nodes) == 1
11759 assert instance.disk_template == constants.DT_DRBD8
11761 pnode = instance.primary_node
11762 snode = instance.secondary_nodes[0]
11763 feedback_fn("Converting template to plain")
11765 old_disks = instance.disks
11766 new_disks = [d.children[0] for d in old_disks]
11768 # copy over size and mode
11769 for parent, child in zip(old_disks, new_disks):
11770 child.size = parent.size
11771 child.mode = parent.mode
11773 # update instance structure
11774 instance.disks = new_disks
11775 instance.disk_template = constants.DT_PLAIN
11776 self.cfg.Update(instance, feedback_fn)
11778 # Release locks in case removing disks takes a while
11779 _ReleaseLocks(self, locking.LEVEL_NODE)
11781 feedback_fn("Removing volumes on the secondary node...")
11782 for disk in old_disks:
11783 self.cfg.SetDiskID(disk, snode)
11784 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11786 self.LogWarning("Could not remove block device %s on node %s,"
11787 " continuing anyway: %s", disk.iv_name, snode, msg)
11789 feedback_fn("Removing unneeded volumes on the primary node...")
11790 for idx, disk in enumerate(old_disks):
11791 meta = disk.children[1]
11792 self.cfg.SetDiskID(meta, pnode)
11793 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11795 self.LogWarning("Could not remove metadata for disk %d on node %s,"
11796 " continuing anyway: %s", idx, pnode, msg)
11798 # this is a DRBD disk, return its port to the pool
11799 for disk in old_disks:
11800 tcp_port = disk.logical_id[2]
11801 self.cfg.AddTcpUdpPort(tcp_port)
11803 # Node resource locks will be released by caller
11805 def Exec(self, feedback_fn):
11806 """Modifies an instance.
11808 All parameters take effect only at the next restart of the instance.
11811 # Process here the warnings from CheckPrereq, as we don't have a
11812 # feedback_fn there.
11813 for warn in self.warn:
11814 feedback_fn("WARNING: %s" % warn)
11816 assert ((self.op.disk_template is None) ^
11817 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
11818 "Not owning any node resource locks"
11821 instance = self.instance
11823 for disk_op, disk_dict in self.op.disks:
11824 if disk_op == constants.DDM_REMOVE:
11825 # remove the last disk
11826 device = instance.disks.pop()
11827 device_idx = len(instance.disks)
11828 for node, disk in device.ComputeNodeTree(instance.primary_node):
11829 self.cfg.SetDiskID(disk, node)
11830 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11832 self.LogWarning("Could not remove disk/%d on node %s: %s,"
11833 " continuing anyway", device_idx, node, msg)
11834 result.append(("disk/%d" % device_idx, "remove"))
11836 # if this is a DRBD disk, return its port to the pool
11837 if device.dev_type in constants.LDS_DRBD:
11838 tcp_port = device.logical_id[2]
11839 self.cfg.AddTcpUdpPort(tcp_port)
11840 elif disk_op == constants.DDM_ADD:
11842 if instance.disk_template in (constants.DT_FILE,
11843 constants.DT_SHARED_FILE):
11844 file_driver, file_path = instance.disks[0].logical_id
11845 file_path = os.path.dirname(file_path)
11847 file_driver = file_path = None
11848 disk_idx_base = len(instance.disks)
11849 new_disk = _GenerateDiskTemplate(self,
11850 instance.disk_template,
11851 instance.name, instance.primary_node,
11852 instance.secondary_nodes,
11858 self.diskparams)[0]
11859 instance.disks.append(new_disk)
11860 info = _GetInstanceInfoText(instance)
11862 logging.info("Creating volume %s for instance %s",
11863 new_disk.iv_name, instance.name)
11864 # Note: this needs to be kept in sync with _CreateDisks
11866 for node in instance.all_nodes:
11867 f_create = node == instance.primary_node
11869 _CreateBlockDev(self, node, instance, new_disk,
11870 f_create, info, f_create)
11871 except errors.OpExecError, err:
11872 self.LogWarning("Failed to create volume %s (%s) on"
11874 new_disk.iv_name, new_disk, node, err)
11875 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11876 (new_disk.size, new_disk.mode)))
11878 # change a given disk
11879 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11880 result.append(("disk.mode/%d" % disk_op,
11881 disk_dict[constants.IDISK_MODE]))
11883 if self.op.disk_template:
11885 check_nodes = set(instance.all_nodes)
11886 if self.op.remote_node:
11887 check_nodes.add(self.op.remote_node)
11888 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
11889 owned = self.owned_locks(level)
11890 assert not (check_nodes - owned), \
11891 ("Not owning the correct locks, owning %r, expected at least %r" %
11892 (owned, check_nodes))
11894 r_shut = _ShutdownInstanceDisks(self, instance)
11896 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11897 " proceed with disk template conversion")
11898 mode = (instance.disk_template, self.op.disk_template)
11900 self._DISK_CONVERSIONS[mode](self, feedback_fn)
11902 self.cfg.ReleaseDRBDMinors(instance.name)
11904 result.append(("disk_template", self.op.disk_template))
11906 assert instance.disk_template == self.op.disk_template, \
11907 ("Expected disk template '%s', found '%s'" %
11908 (self.op.disk_template, instance.disk_template))
11910 # Release node and resource locks if there are any (they might already have
11911 # been released during disk conversion)
11912 _ReleaseLocks(self, locking.LEVEL_NODE)
11913 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11916 for nic_op, nic_dict in self.op.nics:
11917 if nic_op == constants.DDM_REMOVE:
11918 # remove the last nic
11919 del instance.nics[-1]
11920 result.append(("nic.%d" % len(instance.nics), "remove"))
11921 elif nic_op == constants.DDM_ADD:
11922 # mac and bridge should be set, by now
11923 mac = nic_dict[constants.INIC_MAC]
11924 ip = nic_dict.get(constants.INIC_IP, None)
11925 nicparams = self.nic_pinst[constants.DDM_ADD]
11926 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11927 instance.nics.append(new_nic)
11928 result.append(("nic.%d" % (len(instance.nics) - 1),
11929 "add:mac=%s,ip=%s,mode=%s,link=%s" %
11930 (new_nic.mac, new_nic.ip,
11931 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11932 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11935 for key in (constants.INIC_MAC, constants.INIC_IP):
11936 if key in nic_dict:
11937 setattr(instance.nics[nic_op], key, nic_dict[key])
11938 if nic_op in self.nic_pinst:
11939 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11940 for key, val in nic_dict.iteritems():
11941 result.append(("nic.%s/%d" % (key, nic_op), val))
11944 if self.op.hvparams:
11945 instance.hvparams = self.hv_inst
11946 for key, val in self.op.hvparams.iteritems():
11947 result.append(("hv/%s" % key, val))
11950 if self.op.beparams:
11951 instance.beparams = self.be_inst
11952 for key, val in self.op.beparams.iteritems():
11953 result.append(("be/%s" % key, val))
11956 if self.op.os_name:
11957 instance.os = self.op.os_name
11960 if self.op.osparams:
11961 instance.osparams = self.os_inst
11962 for key, val in self.op.osparams.iteritems():
11963 result.append(("os/%s" % key, val))
11965 # online/offline instance
11966 if self.op.online_inst:
11967 self.cfg.MarkInstanceDown(instance.name)
11968 result.append(("admin_state", constants.ADMINST_DOWN))
11969 if self.op.offline_inst:
11970 self.cfg.MarkInstanceOffline(instance.name)
11971 result.append(("admin_state", constants.ADMINST_OFFLINE))
11973 self.cfg.Update(instance, feedback_fn)
11975 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
11976 self.owned_locks(locking.LEVEL_NODE)), \
11977 "All node locks should have been released by now"
11981 _DISK_CONVERSIONS = {
11982 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11983 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11987 class LUInstanceChangeGroup(LogicalUnit):
11988 HPATH = "instance-change-group"
11989 HTYPE = constants.HTYPE_INSTANCE
11992 def ExpandNames(self):
11993 self.share_locks = _ShareAll()
11994 self.needed_locks = {
11995 locking.LEVEL_NODEGROUP: [],
11996 locking.LEVEL_NODE: [],
11999 self._ExpandAndLockInstance()
12001 if self.op.target_groups:
12002 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12003 self.op.target_groups)
12005 self.req_target_uuids = None
12007 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12009 def DeclareLocks(self, level):
12010 if level == locking.LEVEL_NODEGROUP:
12011 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12013 if self.req_target_uuids:
12014 lock_groups = set(self.req_target_uuids)
12016 # Lock all groups used by instance optimistically; this requires going
12017 # via the node before it's locked, requiring verification later on
12018 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12019 lock_groups.update(instance_groups)
12021 # No target groups, need to lock all of them
12022 lock_groups = locking.ALL_SET
12024 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12026 elif level == locking.LEVEL_NODE:
12027 if self.req_target_uuids:
12028 # Lock all nodes used by instances
12029 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12030 self._LockInstancesNodes()
12032 # Lock all nodes in all potential target groups
12033 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12034 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12035 member_nodes = [node_name
12036 for group in lock_groups
12037 for node_name in self.cfg.GetNodeGroup(group).members]
12038 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12040 # Lock all nodes as all groups are potential targets
12041 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12043 def CheckPrereq(self):
12044 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12045 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12046 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12048 assert (self.req_target_uuids is None or
12049 owned_groups.issuperset(self.req_target_uuids))
12050 assert owned_instances == set([self.op.instance_name])
12052 # Get instance information
12053 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12055 # Check if node groups for locked instance are still correct
12056 assert owned_nodes.issuperset(self.instance.all_nodes), \
12057 ("Instance %s's nodes changed while we kept the lock" %
12058 self.op.instance_name)
12060 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12063 if self.req_target_uuids:
12064 # User requested specific target groups
12065 self.target_uuids = self.req_target_uuids
12067 # All groups except those used by the instance are potential targets
12068 self.target_uuids = owned_groups - inst_groups
12070 conflicting_groups = self.target_uuids & inst_groups
12071 if conflicting_groups:
12072 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12073 " used by the instance '%s'" %
12074 (utils.CommaJoin(conflicting_groups),
12075 self.op.instance_name),
12076 errors.ECODE_INVAL)
12078 if not self.target_uuids:
12079 raise errors.OpPrereqError("There are no possible target groups",
12080 errors.ECODE_INVAL)
12082 def BuildHooksEnv(self):
12083 """Build hooks env.
12086 assert self.target_uuids
12089 "TARGET_GROUPS": " ".join(self.target_uuids),
12092 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12096 def BuildHooksNodes(self):
12097 """Build hooks nodes.
12100 mn = self.cfg.GetMasterNode()
12101 return ([mn], [mn])
12103 def Exec(self, feedback_fn):
12104 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12106 assert instances == [self.op.instance_name], "Instance not locked"
12108 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12109 instances=instances, target_groups=list(self.target_uuids))
12111 ial.Run(self.op.iallocator)
12113 if not ial.success:
12114 raise errors.OpPrereqError("Can't compute solution for changing group of"
12115 " instance '%s' using iallocator '%s': %s" %
12116 (self.op.instance_name, self.op.iallocator,
12118 errors.ECODE_NORES)
12120 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12122 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12123 " instance '%s'", len(jobs), self.op.instance_name)
12125 return ResultWithJobs(jobs)
12128 class LUBackupQuery(NoHooksLU):
12129 """Query the exports list
12134 def ExpandNames(self):
12135 self.needed_locks = {}
12136 self.share_locks[locking.LEVEL_NODE] = 1
12137 if not self.op.nodes:
12138 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12140 self.needed_locks[locking.LEVEL_NODE] = \
12141 _GetWantedNodes(self, self.op.nodes)
12143 def Exec(self, feedback_fn):
12144 """Compute the list of all the exported system images.
12147 @return: a dictionary with the structure node->(export-list)
12148 where export-list is a list of the instances exported on
12152 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12153 rpcresult = self.rpc.call_export_list(self.nodes)
12155 for node in rpcresult:
12156 if rpcresult[node].fail_msg:
12157 result[node] = False
12159 result[node] = rpcresult[node].payload
12164 class LUBackupPrepare(NoHooksLU):
12165 """Prepares an instance for an export and returns useful information.
12170 def ExpandNames(self):
12171 self._ExpandAndLockInstance()
12173 def CheckPrereq(self):
12174 """Check prerequisites.
12177 instance_name = self.op.instance_name
12179 self.instance = self.cfg.GetInstanceInfo(instance_name)
12180 assert self.instance is not None, \
12181 "Cannot retrieve locked instance %s" % self.op.instance_name
12182 _CheckNodeOnline(self, self.instance.primary_node)
12184 self._cds = _GetClusterDomainSecret()
12186 def Exec(self, feedback_fn):
12187 """Prepares an instance for an export.
12190 instance = self.instance
12192 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12193 salt = utils.GenerateSecret(8)
12195 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12196 result = self.rpc.call_x509_cert_create(instance.primary_node,
12197 constants.RIE_CERT_VALIDITY)
12198 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12200 (name, cert_pem) = result.payload
12202 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12206 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12207 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12209 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12215 class LUBackupExport(LogicalUnit):
12216 """Export an instance to an image in the cluster.
12219 HPATH = "instance-export"
12220 HTYPE = constants.HTYPE_INSTANCE
12223 def CheckArguments(self):
12224 """Check the arguments.
12227 self.x509_key_name = self.op.x509_key_name
12228 self.dest_x509_ca_pem = self.op.destination_x509_ca
12230 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12231 if not self.x509_key_name:
12232 raise errors.OpPrereqError("Missing X509 key name for encryption",
12233 errors.ECODE_INVAL)
12235 if not self.dest_x509_ca_pem:
12236 raise errors.OpPrereqError("Missing destination X509 CA",
12237 errors.ECODE_INVAL)
12239 def ExpandNames(self):
12240 self._ExpandAndLockInstance()
12242 # Lock all nodes for local exports
12243 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12244 # FIXME: lock only instance primary and destination node
12246 # Sad but true, for now we have do lock all nodes, as we don't know where
12247 # the previous export might be, and in this LU we search for it and
12248 # remove it from its current node. In the future we could fix this by:
12249 # - making a tasklet to search (share-lock all), then create the
12250 # new one, then one to remove, after
12251 # - removing the removal operation altogether
12252 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12254 def DeclareLocks(self, level):
12255 """Last minute lock declaration."""
12256 # All nodes are locked anyway, so nothing to do here.
12258 def BuildHooksEnv(self):
12259 """Build hooks env.
12261 This will run on the master, primary node and target node.
12265 "EXPORT_MODE": self.op.mode,
12266 "EXPORT_NODE": self.op.target_node,
12267 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12268 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12269 # TODO: Generic function for boolean env variables
12270 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12273 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12277 def BuildHooksNodes(self):
12278 """Build hooks nodes.
12281 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12283 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12284 nl.append(self.op.target_node)
12288 def CheckPrereq(self):
12289 """Check prerequisites.
12291 This checks that the instance and node names are valid.
12294 instance_name = self.op.instance_name
12296 self.instance = self.cfg.GetInstanceInfo(instance_name)
12297 assert self.instance is not None, \
12298 "Cannot retrieve locked instance %s" % self.op.instance_name
12299 _CheckNodeOnline(self, self.instance.primary_node)
12301 if (self.op.remove_instance and
12302 self.instance.admin_state == constants.ADMINST_UP and
12303 not self.op.shutdown):
12304 raise errors.OpPrereqError("Can not remove instance without shutting it"
12307 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12308 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12309 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12310 assert self.dst_node is not None
12312 _CheckNodeOnline(self, self.dst_node.name)
12313 _CheckNodeNotDrained(self, self.dst_node.name)
12316 self.dest_disk_info = None
12317 self.dest_x509_ca = None
12319 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12320 self.dst_node = None
12322 if len(self.op.target_node) != len(self.instance.disks):
12323 raise errors.OpPrereqError(("Received destination information for %s"
12324 " disks, but instance %s has %s disks") %
12325 (len(self.op.target_node), instance_name,
12326 len(self.instance.disks)),
12327 errors.ECODE_INVAL)
12329 cds = _GetClusterDomainSecret()
12331 # Check X509 key name
12333 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12334 except (TypeError, ValueError), err:
12335 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12337 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12338 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12339 errors.ECODE_INVAL)
12341 # Load and verify CA
12343 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12344 except OpenSSL.crypto.Error, err:
12345 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12346 (err, ), errors.ECODE_INVAL)
12348 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12349 if errcode is not None:
12350 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12351 (msg, ), errors.ECODE_INVAL)
12353 self.dest_x509_ca = cert
12355 # Verify target information
12357 for idx, disk_data in enumerate(self.op.target_node):
12359 (host, port, magic) = \
12360 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12361 except errors.GenericError, err:
12362 raise errors.OpPrereqError("Target info for disk %s: %s" %
12363 (idx, err), errors.ECODE_INVAL)
12365 disk_info.append((host, port, magic))
12367 assert len(disk_info) == len(self.op.target_node)
12368 self.dest_disk_info = disk_info
12371 raise errors.ProgrammerError("Unhandled export mode %r" %
12374 # instance disk type verification
12375 # TODO: Implement export support for file-based disks
12376 for disk in self.instance.disks:
12377 if disk.dev_type == constants.LD_FILE:
12378 raise errors.OpPrereqError("Export not supported for instances with"
12379 " file-based disks", errors.ECODE_INVAL)
12381 def _CleanupExports(self, feedback_fn):
12382 """Removes exports of current instance from all other nodes.
12384 If an instance in a cluster with nodes A..D was exported to node C, its
12385 exports will be removed from the nodes A, B and D.
12388 assert self.op.mode != constants.EXPORT_MODE_REMOTE
12390 nodelist = self.cfg.GetNodeList()
12391 nodelist.remove(self.dst_node.name)
12393 # on one-node clusters nodelist will be empty after the removal
12394 # if we proceed the backup would be removed because OpBackupQuery
12395 # substitutes an empty list with the full cluster node list.
12396 iname = self.instance.name
12398 feedback_fn("Removing old exports for instance %s" % iname)
12399 exportlist = self.rpc.call_export_list(nodelist)
12400 for node in exportlist:
12401 if exportlist[node].fail_msg:
12403 if iname in exportlist[node].payload:
12404 msg = self.rpc.call_export_remove(node, iname).fail_msg
12406 self.LogWarning("Could not remove older export for instance %s"
12407 " on node %s: %s", iname, node, msg)
12409 def Exec(self, feedback_fn):
12410 """Export an instance to an image in the cluster.
12413 assert self.op.mode in constants.EXPORT_MODES
12415 instance = self.instance
12416 src_node = instance.primary_node
12418 if self.op.shutdown:
12419 # shutdown the instance, but not the disks
12420 feedback_fn("Shutting down instance %s" % instance.name)
12421 result = self.rpc.call_instance_shutdown(src_node, instance,
12422 self.op.shutdown_timeout)
12423 # TODO: Maybe ignore failures if ignore_remove_failures is set
12424 result.Raise("Could not shutdown instance %s on"
12425 " node %s" % (instance.name, src_node))
12427 # set the disks ID correctly since call_instance_start needs the
12428 # correct drbd minor to create the symlinks
12429 for disk in instance.disks:
12430 self.cfg.SetDiskID(disk, src_node)
12432 activate_disks = (instance.admin_state != constants.ADMINST_UP)
12435 # Activate the instance disks if we'exporting a stopped instance
12436 feedback_fn("Activating disks for %s" % instance.name)
12437 _StartInstanceDisks(self, instance, None)
12440 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12443 helper.CreateSnapshots()
12445 if (self.op.shutdown and
12446 instance.admin_state == constants.ADMINST_UP and
12447 not self.op.remove_instance):
12448 assert not activate_disks
12449 feedback_fn("Starting instance %s" % instance.name)
12450 result = self.rpc.call_instance_start(src_node,
12451 (instance, None, None), False)
12452 msg = result.fail_msg
12454 feedback_fn("Failed to start instance: %s" % msg)
12455 _ShutdownInstanceDisks(self, instance)
12456 raise errors.OpExecError("Could not start instance: %s" % msg)
12458 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12459 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12460 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12461 connect_timeout = constants.RIE_CONNECT_TIMEOUT
12462 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12464 (key_name, _, _) = self.x509_key_name
12467 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12470 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12471 key_name, dest_ca_pem,
12476 # Check for backwards compatibility
12477 assert len(dresults) == len(instance.disks)
12478 assert compat.all(isinstance(i, bool) for i in dresults), \
12479 "Not all results are boolean: %r" % dresults
12483 feedback_fn("Deactivating disks for %s" % instance.name)
12484 _ShutdownInstanceDisks(self, instance)
12486 if not (compat.all(dresults) and fin_resu):
12489 failures.append("export finalization")
12490 if not compat.all(dresults):
12491 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12493 failures.append("disk export: disk(s) %s" % fdsk)
12495 raise errors.OpExecError("Export failed, errors in %s" %
12496 utils.CommaJoin(failures))
12498 # At this point, the export was successful, we can cleanup/finish
12500 # Remove instance if requested
12501 if self.op.remove_instance:
12502 feedback_fn("Removing instance %s" % instance.name)
12503 _RemoveInstance(self, feedback_fn, instance,
12504 self.op.ignore_remove_failures)
12506 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12507 self._CleanupExports(feedback_fn)
12509 return fin_resu, dresults
12512 class LUBackupRemove(NoHooksLU):
12513 """Remove exports related to the named instance.
12518 def ExpandNames(self):
12519 self.needed_locks = {}
12520 # We need all nodes to be locked in order for RemoveExport to work, but we
12521 # don't need to lock the instance itself, as nothing will happen to it (and
12522 # we can remove exports also for a removed instance)
12523 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12525 def Exec(self, feedback_fn):
12526 """Remove any export.
12529 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12530 # If the instance was not found we'll try with the name that was passed in.
12531 # This will only work if it was an FQDN, though.
12533 if not instance_name:
12535 instance_name = self.op.instance_name
12537 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12538 exportlist = self.rpc.call_export_list(locked_nodes)
12540 for node in exportlist:
12541 msg = exportlist[node].fail_msg
12543 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12545 if instance_name in exportlist[node].payload:
12547 result = self.rpc.call_export_remove(node, instance_name)
12548 msg = result.fail_msg
12550 logging.error("Could not remove export for instance %s"
12551 " on node %s: %s", instance_name, node, msg)
12553 if fqdn_warn and not found:
12554 feedback_fn("Export not found. If trying to remove an export belonging"
12555 " to a deleted instance please use its Fully Qualified"
12559 class LUGroupAdd(LogicalUnit):
12560 """Logical unit for creating node groups.
12563 HPATH = "group-add"
12564 HTYPE = constants.HTYPE_GROUP
12567 def ExpandNames(self):
12568 # We need the new group's UUID here so that we can create and acquire the
12569 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12570 # that it should not check whether the UUID exists in the configuration.
12571 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12572 self.needed_locks = {}
12573 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12575 def CheckPrereq(self):
12576 """Check prerequisites.
12578 This checks that the given group name is not an existing node group
12583 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12584 except errors.OpPrereqError:
12587 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12588 " node group (UUID: %s)" %
12589 (self.op.group_name, existing_uuid),
12590 errors.ECODE_EXISTS)
12592 if self.op.ndparams:
12593 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12595 if self.op.diskparams:
12596 for templ in constants.DISK_TEMPLATES:
12597 if templ not in self.op.diskparams:
12598 self.op.diskparams[templ] = {}
12599 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12601 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12603 def BuildHooksEnv(self):
12604 """Build hooks env.
12608 "GROUP_NAME": self.op.group_name,
12611 def BuildHooksNodes(self):
12612 """Build hooks nodes.
12615 mn = self.cfg.GetMasterNode()
12616 return ([mn], [mn])
12618 def Exec(self, feedback_fn):
12619 """Add the node group to the cluster.
12622 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12623 uuid=self.group_uuid,
12624 alloc_policy=self.op.alloc_policy,
12625 ndparams=self.op.ndparams,
12626 diskparams=self.op.diskparams)
12628 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12629 del self.remove_locks[locking.LEVEL_NODEGROUP]
12632 class LUGroupAssignNodes(NoHooksLU):
12633 """Logical unit for assigning nodes to groups.
12638 def ExpandNames(self):
12639 # These raise errors.OpPrereqError on their own:
12640 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12641 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12643 # We want to lock all the affected nodes and groups. We have readily
12644 # available the list of nodes, and the *destination* group. To gather the
12645 # list of "source" groups, we need to fetch node information later on.
12646 self.needed_locks = {
12647 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12648 locking.LEVEL_NODE: self.op.nodes,
12651 def DeclareLocks(self, level):
12652 if level == locking.LEVEL_NODEGROUP:
12653 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12655 # Try to get all affected nodes' groups without having the group or node
12656 # lock yet. Needs verification later in the code flow.
12657 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12659 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12661 def CheckPrereq(self):
12662 """Check prerequisites.
12665 assert self.needed_locks[locking.LEVEL_NODEGROUP]
12666 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12667 frozenset(self.op.nodes))
12669 expected_locks = (set([self.group_uuid]) |
12670 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12671 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12672 if actual_locks != expected_locks:
12673 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12674 " current groups are '%s', used to be '%s'" %
12675 (utils.CommaJoin(expected_locks),
12676 utils.CommaJoin(actual_locks)))
12678 self.node_data = self.cfg.GetAllNodesInfo()
12679 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12680 instance_data = self.cfg.GetAllInstancesInfo()
12682 if self.group is None:
12683 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12684 (self.op.group_name, self.group_uuid))
12686 (new_splits, previous_splits) = \
12687 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12688 for node in self.op.nodes],
12689 self.node_data, instance_data)
12692 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12694 if not self.op.force:
12695 raise errors.OpExecError("The following instances get split by this"
12696 " change and --force was not given: %s" %
12699 self.LogWarning("This operation will split the following instances: %s",
12702 if previous_splits:
12703 self.LogWarning("In addition, these already-split instances continue"
12704 " to be split across groups: %s",
12705 utils.CommaJoin(utils.NiceSort(previous_splits)))
12707 def Exec(self, feedback_fn):
12708 """Assign nodes to a new group.
12711 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12713 self.cfg.AssignGroupNodes(mods)
12716 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12717 """Check for split instances after a node assignment.
12719 This method considers a series of node assignments as an atomic operation,
12720 and returns information about split instances after applying the set of
12723 In particular, it returns information about newly split instances, and
12724 instances that were already split, and remain so after the change.
12726 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12729 @type changes: list of (node_name, new_group_uuid) pairs.
12730 @param changes: list of node assignments to consider.
12731 @param node_data: a dict with data for all nodes
12732 @param instance_data: a dict with all instances to consider
12733 @rtype: a two-tuple
12734 @return: a list of instances that were previously okay and result split as a
12735 consequence of this change, and a list of instances that were previously
12736 split and this change does not fix.
12739 changed_nodes = dict((node, group) for node, group in changes
12740 if node_data[node].group != group)
12742 all_split_instances = set()
12743 previously_split_instances = set()
12745 def InstanceNodes(instance):
12746 return [instance.primary_node] + list(instance.secondary_nodes)
12748 for inst in instance_data.values():
12749 if inst.disk_template not in constants.DTS_INT_MIRROR:
12752 instance_nodes = InstanceNodes(inst)
12754 if len(set(node_data[node].group for node in instance_nodes)) > 1:
12755 previously_split_instances.add(inst.name)
12757 if len(set(changed_nodes.get(node, node_data[node].group)
12758 for node in instance_nodes)) > 1:
12759 all_split_instances.add(inst.name)
12761 return (list(all_split_instances - previously_split_instances),
12762 list(previously_split_instances & all_split_instances))
12765 class _GroupQuery(_QueryBase):
12766 FIELDS = query.GROUP_FIELDS
12768 def ExpandNames(self, lu):
12769 lu.needed_locks = {}
12771 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12772 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12775 self.wanted = [name_to_uuid[name]
12776 for name in utils.NiceSort(name_to_uuid.keys())]
12778 # Accept names to be either names or UUIDs.
12781 all_uuid = frozenset(self._all_groups.keys())
12783 for name in self.names:
12784 if name in all_uuid:
12785 self.wanted.append(name)
12786 elif name in name_to_uuid:
12787 self.wanted.append(name_to_uuid[name])
12789 missing.append(name)
12792 raise errors.OpPrereqError("Some groups do not exist: %s" %
12793 utils.CommaJoin(missing),
12794 errors.ECODE_NOENT)
12796 def DeclareLocks(self, lu, level):
12799 def _GetQueryData(self, lu):
12800 """Computes the list of node groups and their attributes.
12803 do_nodes = query.GQ_NODE in self.requested_data
12804 do_instances = query.GQ_INST in self.requested_data
12806 group_to_nodes = None
12807 group_to_instances = None
12809 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12810 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12811 # latter GetAllInstancesInfo() is not enough, for we have to go through
12812 # instance->node. Hence, we will need to process nodes even if we only need
12813 # instance information.
12814 if do_nodes or do_instances:
12815 all_nodes = lu.cfg.GetAllNodesInfo()
12816 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12819 for node in all_nodes.values():
12820 if node.group in group_to_nodes:
12821 group_to_nodes[node.group].append(node.name)
12822 node_to_group[node.name] = node.group
12825 all_instances = lu.cfg.GetAllInstancesInfo()
12826 group_to_instances = dict((uuid, []) for uuid in self.wanted)
12828 for instance in all_instances.values():
12829 node = instance.primary_node
12830 if node in node_to_group:
12831 group_to_instances[node_to_group[node]].append(instance.name)
12834 # Do not pass on node information if it was not requested.
12835 group_to_nodes = None
12837 return query.GroupQueryData([self._all_groups[uuid]
12838 for uuid in self.wanted],
12839 group_to_nodes, group_to_instances)
12842 class LUGroupQuery(NoHooksLU):
12843 """Logical unit for querying node groups.
12848 def CheckArguments(self):
12849 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12850 self.op.output_fields, False)
12852 def ExpandNames(self):
12853 self.gq.ExpandNames(self)
12855 def DeclareLocks(self, level):
12856 self.gq.DeclareLocks(self, level)
12858 def Exec(self, feedback_fn):
12859 return self.gq.OldStyleQuery(self)
12862 class LUGroupSetParams(LogicalUnit):
12863 """Modifies the parameters of a node group.
12866 HPATH = "group-modify"
12867 HTYPE = constants.HTYPE_GROUP
12870 def CheckArguments(self):
12873 self.op.diskparams,
12874 self.op.alloc_policy,
12877 if all_changes.count(None) == len(all_changes):
12878 raise errors.OpPrereqError("Please pass at least one modification",
12879 errors.ECODE_INVAL)
12881 def ExpandNames(self):
12882 # This raises errors.OpPrereqError on its own:
12883 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12885 self.needed_locks = {
12886 locking.LEVEL_NODEGROUP: [self.group_uuid],
12889 def CheckPrereq(self):
12890 """Check prerequisites.
12893 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12895 if self.group is None:
12896 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12897 (self.op.group_name, self.group_uuid))
12899 if self.op.ndparams:
12900 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12901 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12902 self.new_ndparams = new_ndparams
12904 if self.op.diskparams:
12905 self.new_diskparams = dict()
12906 for templ in constants.DISK_TEMPLATES:
12907 if templ not in self.op.diskparams:
12908 self.op.diskparams[templ] = {}
12909 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
12910 self.op.diskparams[templ])
12911 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
12912 self.new_diskparams[templ] = new_templ_params
12914 def BuildHooksEnv(self):
12915 """Build hooks env.
12919 "GROUP_NAME": self.op.group_name,
12920 "NEW_ALLOC_POLICY": self.op.alloc_policy,
12923 def BuildHooksNodes(self):
12924 """Build hooks nodes.
12927 mn = self.cfg.GetMasterNode()
12928 return ([mn], [mn])
12930 def Exec(self, feedback_fn):
12931 """Modifies the node group.
12936 if self.op.ndparams:
12937 self.group.ndparams = self.new_ndparams
12938 result.append(("ndparams", str(self.group.ndparams)))
12940 if self.op.diskparams:
12941 self.group.diskparams = self.new_diskparams
12942 result.append(("diskparams", str(self.group.diskparams)))
12944 if self.op.alloc_policy:
12945 self.group.alloc_policy = self.op.alloc_policy
12947 self.cfg.Update(self.group, feedback_fn)
12951 class LUGroupRemove(LogicalUnit):
12952 HPATH = "group-remove"
12953 HTYPE = constants.HTYPE_GROUP
12956 def ExpandNames(self):
12957 # This will raises errors.OpPrereqError on its own:
12958 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12959 self.needed_locks = {
12960 locking.LEVEL_NODEGROUP: [self.group_uuid],
12963 def CheckPrereq(self):
12964 """Check prerequisites.
12966 This checks that the given group name exists as a node group, that is
12967 empty (i.e., contains no nodes), and that is not the last group of the
12971 # Verify that the group is empty.
12972 group_nodes = [node.name
12973 for node in self.cfg.GetAllNodesInfo().values()
12974 if node.group == self.group_uuid]
12977 raise errors.OpPrereqError("Group '%s' not empty, has the following"
12979 (self.op.group_name,
12980 utils.CommaJoin(utils.NiceSort(group_nodes))),
12981 errors.ECODE_STATE)
12983 # Verify the cluster would not be left group-less.
12984 if len(self.cfg.GetNodeGroupList()) == 1:
12985 raise errors.OpPrereqError("Group '%s' is the only group,"
12986 " cannot be removed" %
12987 self.op.group_name,
12988 errors.ECODE_STATE)
12990 def BuildHooksEnv(self):
12991 """Build hooks env.
12995 "GROUP_NAME": self.op.group_name,
12998 def BuildHooksNodes(self):
12999 """Build hooks nodes.
13002 mn = self.cfg.GetMasterNode()
13003 return ([mn], [mn])
13005 def Exec(self, feedback_fn):
13006 """Remove the node group.
13010 self.cfg.RemoveNodeGroup(self.group_uuid)
13011 except errors.ConfigurationError:
13012 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13013 (self.op.group_name, self.group_uuid))
13015 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13018 class LUGroupRename(LogicalUnit):
13019 HPATH = "group-rename"
13020 HTYPE = constants.HTYPE_GROUP
13023 def ExpandNames(self):
13024 # This raises errors.OpPrereqError on its own:
13025 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13027 self.needed_locks = {
13028 locking.LEVEL_NODEGROUP: [self.group_uuid],
13031 def CheckPrereq(self):
13032 """Check prerequisites.
13034 Ensures requested new name is not yet used.
13038 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13039 except errors.OpPrereqError:
13042 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13043 " node group (UUID: %s)" %
13044 (self.op.new_name, new_name_uuid),
13045 errors.ECODE_EXISTS)
13047 def BuildHooksEnv(self):
13048 """Build hooks env.
13052 "OLD_NAME": self.op.group_name,
13053 "NEW_NAME": self.op.new_name,
13056 def BuildHooksNodes(self):
13057 """Build hooks nodes.
13060 mn = self.cfg.GetMasterNode()
13062 all_nodes = self.cfg.GetAllNodesInfo()
13063 all_nodes.pop(mn, None)
13066 run_nodes.extend(node.name for node in all_nodes.values()
13067 if node.group == self.group_uuid)
13069 return (run_nodes, run_nodes)
13071 def Exec(self, feedback_fn):
13072 """Rename the node group.
13075 group = self.cfg.GetNodeGroup(self.group_uuid)
13078 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13079 (self.op.group_name, self.group_uuid))
13081 group.name = self.op.new_name
13082 self.cfg.Update(group, feedback_fn)
13084 return self.op.new_name
13087 class LUGroupEvacuate(LogicalUnit):
13088 HPATH = "group-evacuate"
13089 HTYPE = constants.HTYPE_GROUP
13092 def ExpandNames(self):
13093 # This raises errors.OpPrereqError on its own:
13094 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13096 if self.op.target_groups:
13097 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13098 self.op.target_groups)
13100 self.req_target_uuids = []
13102 if self.group_uuid in self.req_target_uuids:
13103 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13104 " as a target group (targets are %s)" %
13106 utils.CommaJoin(self.req_target_uuids)),
13107 errors.ECODE_INVAL)
13109 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13111 self.share_locks = _ShareAll()
13112 self.needed_locks = {
13113 locking.LEVEL_INSTANCE: [],
13114 locking.LEVEL_NODEGROUP: [],
13115 locking.LEVEL_NODE: [],
13118 def DeclareLocks(self, level):
13119 if level == locking.LEVEL_INSTANCE:
13120 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13122 # Lock instances optimistically, needs verification once node and group
13123 # locks have been acquired
13124 self.needed_locks[locking.LEVEL_INSTANCE] = \
13125 self.cfg.GetNodeGroupInstances(self.group_uuid)
13127 elif level == locking.LEVEL_NODEGROUP:
13128 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13130 if self.req_target_uuids:
13131 lock_groups = set([self.group_uuid] + self.req_target_uuids)
13133 # Lock all groups used by instances optimistically; this requires going
13134 # via the node before it's locked, requiring verification later on
13135 lock_groups.update(group_uuid
13136 for instance_name in
13137 self.owned_locks(locking.LEVEL_INSTANCE)
13139 self.cfg.GetInstanceNodeGroups(instance_name))
13141 # No target groups, need to lock all of them
13142 lock_groups = locking.ALL_SET
13144 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13146 elif level == locking.LEVEL_NODE:
13147 # This will only lock the nodes in the group to be evacuated which
13148 # contain actual instances
13149 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13150 self._LockInstancesNodes()
13152 # Lock all nodes in group to be evacuated and target groups
13153 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13154 assert self.group_uuid in owned_groups
13155 member_nodes = [node_name
13156 for group in owned_groups
13157 for node_name in self.cfg.GetNodeGroup(group).members]
13158 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13160 def CheckPrereq(self):
13161 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13162 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13163 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13165 assert owned_groups.issuperset(self.req_target_uuids)
13166 assert self.group_uuid in owned_groups
13168 # Check if locked instances are still correct
13169 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13171 # Get instance information
13172 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13174 # Check if node groups for locked instances are still correct
13175 for instance_name in owned_instances:
13176 inst = self.instances[instance_name]
13177 assert owned_nodes.issuperset(inst.all_nodes), \
13178 "Instance %s's nodes changed while we kept the lock" % instance_name
13180 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13183 assert self.group_uuid in inst_groups, \
13184 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13186 if self.req_target_uuids:
13187 # User requested specific target groups
13188 self.target_uuids = self.req_target_uuids
13190 # All groups except the one to be evacuated are potential targets
13191 self.target_uuids = [group_uuid for group_uuid in owned_groups
13192 if group_uuid != self.group_uuid]
13194 if not self.target_uuids:
13195 raise errors.OpPrereqError("There are no possible target groups",
13196 errors.ECODE_INVAL)
13198 def BuildHooksEnv(self):
13199 """Build hooks env.
13203 "GROUP_NAME": self.op.group_name,
13204 "TARGET_GROUPS": " ".join(self.target_uuids),
13207 def BuildHooksNodes(self):
13208 """Build hooks nodes.
13211 mn = self.cfg.GetMasterNode()
13213 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13215 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13217 return (run_nodes, run_nodes)
13219 def Exec(self, feedback_fn):
13220 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13222 assert self.group_uuid not in self.target_uuids
13224 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13225 instances=instances, target_groups=self.target_uuids)
13227 ial.Run(self.op.iallocator)
13229 if not ial.success:
13230 raise errors.OpPrereqError("Can't compute group evacuation using"
13231 " iallocator '%s': %s" %
13232 (self.op.iallocator, ial.info),
13233 errors.ECODE_NORES)
13235 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13237 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13238 len(jobs), self.op.group_name)
13240 return ResultWithJobs(jobs)
13243 class TagsLU(NoHooksLU): # pylint: disable=W0223
13244 """Generic tags LU.
13246 This is an abstract class which is the parent of all the other tags LUs.
13249 def ExpandNames(self):
13250 self.group_uuid = None
13251 self.needed_locks = {}
13252 if self.op.kind == constants.TAG_NODE:
13253 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13254 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13255 elif self.op.kind == constants.TAG_INSTANCE:
13256 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13257 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13258 elif self.op.kind == constants.TAG_NODEGROUP:
13259 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13261 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13262 # not possible to acquire the BGL based on opcode parameters)
13264 def CheckPrereq(self):
13265 """Check prerequisites.
13268 if self.op.kind == constants.TAG_CLUSTER:
13269 self.target = self.cfg.GetClusterInfo()
13270 elif self.op.kind == constants.TAG_NODE:
13271 self.target = self.cfg.GetNodeInfo(self.op.name)
13272 elif self.op.kind == constants.TAG_INSTANCE:
13273 self.target = self.cfg.GetInstanceInfo(self.op.name)
13274 elif self.op.kind == constants.TAG_NODEGROUP:
13275 self.target = self.cfg.GetNodeGroup(self.group_uuid)
13277 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13278 str(self.op.kind), errors.ECODE_INVAL)
13281 class LUTagsGet(TagsLU):
13282 """Returns the tags of a given object.
13287 def ExpandNames(self):
13288 TagsLU.ExpandNames(self)
13290 # Share locks as this is only a read operation
13291 self.share_locks = _ShareAll()
13293 def Exec(self, feedback_fn):
13294 """Returns the tag list.
13297 return list(self.target.GetTags())
13300 class LUTagsSearch(NoHooksLU):
13301 """Searches the tags for a given pattern.
13306 def ExpandNames(self):
13307 self.needed_locks = {}
13309 def CheckPrereq(self):
13310 """Check prerequisites.
13312 This checks the pattern passed for validity by compiling it.
13316 self.re = re.compile(self.op.pattern)
13317 except re.error, err:
13318 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13319 (self.op.pattern, err), errors.ECODE_INVAL)
13321 def Exec(self, feedback_fn):
13322 """Returns the tag list.
13326 tgts = [("/cluster", cfg.GetClusterInfo())]
13327 ilist = cfg.GetAllInstancesInfo().values()
13328 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13329 nlist = cfg.GetAllNodesInfo().values()
13330 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13331 tgts.extend(("/nodegroup/%s" % n.name, n)
13332 for n in cfg.GetAllNodeGroupsInfo().values())
13334 for path, target in tgts:
13335 for tag in target.GetTags():
13336 if self.re.search(tag):
13337 results.append((path, tag))
13341 class LUTagsSet(TagsLU):
13342 """Sets a tag on a given object.
13347 def CheckPrereq(self):
13348 """Check prerequisites.
13350 This checks the type and length of the tag name and value.
13353 TagsLU.CheckPrereq(self)
13354 for tag in self.op.tags:
13355 objects.TaggableObject.ValidateTag(tag)
13357 def Exec(self, feedback_fn):
13362 for tag in self.op.tags:
13363 self.target.AddTag(tag)
13364 except errors.TagError, err:
13365 raise errors.OpExecError("Error while setting tag: %s" % str(err))
13366 self.cfg.Update(self.target, feedback_fn)
13369 class LUTagsDel(TagsLU):
13370 """Delete a list of tags from a given object.
13375 def CheckPrereq(self):
13376 """Check prerequisites.
13378 This checks that we have the given tag.
13381 TagsLU.CheckPrereq(self)
13382 for tag in self.op.tags:
13383 objects.TaggableObject.ValidateTag(tag)
13384 del_tags = frozenset(self.op.tags)
13385 cur_tags = self.target.GetTags()
13387 diff_tags = del_tags - cur_tags
13389 diff_names = ("'%s'" % i for i in sorted(diff_tags))
13390 raise errors.OpPrereqError("Tag(s) %s not found" %
13391 (utils.CommaJoin(diff_names), ),
13392 errors.ECODE_NOENT)
13394 def Exec(self, feedback_fn):
13395 """Remove the tag from the object.
13398 for tag in self.op.tags:
13399 self.target.RemoveTag(tag)
13400 self.cfg.Update(self.target, feedback_fn)
13403 class LUTestDelay(NoHooksLU):
13404 """Sleep for a specified amount of time.
13406 This LU sleeps on the master and/or nodes for a specified amount of
13412 def ExpandNames(self):
13413 """Expand names and set required locks.
13415 This expands the node list, if any.
13418 self.needed_locks = {}
13419 if self.op.on_nodes:
13420 # _GetWantedNodes can be used here, but is not always appropriate to use
13421 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13422 # more information.
13423 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13424 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13426 def _TestDelay(self):
13427 """Do the actual sleep.
13430 if self.op.on_master:
13431 if not utils.TestDelay(self.op.duration):
13432 raise errors.OpExecError("Error during master delay test")
13433 if self.op.on_nodes:
13434 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13435 for node, node_result in result.items():
13436 node_result.Raise("Failure during rpc call to node %s" % node)
13438 def Exec(self, feedback_fn):
13439 """Execute the test delay opcode, with the wanted repetitions.
13442 if self.op.repeat == 0:
13445 top_value = self.op.repeat - 1
13446 for i in range(self.op.repeat):
13447 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13451 class LUTestJqueue(NoHooksLU):
13452 """Utility LU to test some aspects of the job queue.
13457 # Must be lower than default timeout for WaitForJobChange to see whether it
13458 # notices changed jobs
13459 _CLIENT_CONNECT_TIMEOUT = 20.0
13460 _CLIENT_CONFIRM_TIMEOUT = 60.0
13463 def _NotifyUsingSocket(cls, cb, errcls):
13464 """Opens a Unix socket and waits for another program to connect.
13467 @param cb: Callback to send socket name to client
13468 @type errcls: class
13469 @param errcls: Exception class to use for errors
13472 # Using a temporary directory as there's no easy way to create temporary
13473 # sockets without writing a custom loop around tempfile.mktemp and
13475 tmpdir = tempfile.mkdtemp()
13477 tmpsock = utils.PathJoin(tmpdir, "sock")
13479 logging.debug("Creating temporary socket at %s", tmpsock)
13480 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13485 # Send details to client
13488 # Wait for client to connect before continuing
13489 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13491 (conn, _) = sock.accept()
13492 except socket.error, err:
13493 raise errcls("Client didn't connect in time (%s)" % err)
13497 # Remove as soon as client is connected
13498 shutil.rmtree(tmpdir)
13500 # Wait for client to close
13503 # pylint: disable=E1101
13504 # Instance of '_socketobject' has no ... member
13505 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13507 except socket.error, err:
13508 raise errcls("Client failed to confirm notification (%s)" % err)
13512 def _SendNotification(self, test, arg, sockname):
13513 """Sends a notification to the client.
13516 @param test: Test name
13517 @param arg: Test argument (depends on test)
13518 @type sockname: string
13519 @param sockname: Socket path
13522 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13524 def _Notify(self, prereq, test, arg):
13525 """Notifies the client of a test.
13528 @param prereq: Whether this is a prereq-phase test
13530 @param test: Test name
13531 @param arg: Test argument (depends on test)
13535 errcls = errors.OpPrereqError
13537 errcls = errors.OpExecError
13539 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13543 def CheckArguments(self):
13544 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13545 self.expandnames_calls = 0
13547 def ExpandNames(self):
13548 checkargs_calls = getattr(self, "checkargs_calls", 0)
13549 if checkargs_calls < 1:
13550 raise errors.ProgrammerError("CheckArguments was not called")
13552 self.expandnames_calls += 1
13554 if self.op.notify_waitlock:
13555 self._Notify(True, constants.JQT_EXPANDNAMES, None)
13557 self.LogInfo("Expanding names")
13559 # Get lock on master node (just to get a lock, not for a particular reason)
13560 self.needed_locks = {
13561 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13564 def Exec(self, feedback_fn):
13565 if self.expandnames_calls < 1:
13566 raise errors.ProgrammerError("ExpandNames was not called")
13568 if self.op.notify_exec:
13569 self._Notify(False, constants.JQT_EXEC, None)
13571 self.LogInfo("Executing")
13573 if self.op.log_messages:
13574 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13575 for idx, msg in enumerate(self.op.log_messages):
13576 self.LogInfo("Sending log message %s", idx + 1)
13577 feedback_fn(constants.JQT_MSGPREFIX + msg)
13578 # Report how many test messages have been sent
13579 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13582 raise errors.OpExecError("Opcode failure was requested")
13587 class IAllocator(object):
13588 """IAllocator framework.
13590 An IAllocator instance has three sets of attributes:
13591 - cfg that is needed to query the cluster
13592 - input data (all members of the _KEYS class attribute are required)
13593 - four buffer attributes (in|out_data|text), that represent the
13594 input (to the external script) in text and data structure format,
13595 and the output from it, again in two formats
13596 - the result variables from the script (success, info, nodes) for
13600 # pylint: disable=R0902
13601 # lots of instance attributes
13603 def __init__(self, cfg, rpc_runner, mode, **kwargs):
13605 self.rpc = rpc_runner
13606 # init buffer variables
13607 self.in_text = self.out_text = self.in_data = self.out_data = None
13608 # init all input fields so that pylint is happy
13610 self.memory = self.disks = self.disk_template = None
13611 self.os = self.tags = self.nics = self.vcpus = None
13612 self.hypervisor = None
13613 self.relocate_from = None
13615 self.instances = None
13616 self.evac_mode = None
13617 self.target_groups = []
13619 self.required_nodes = None
13620 # init result fields
13621 self.success = self.info = self.result = None
13624 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13626 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13627 " IAllocator" % self.mode)
13629 keyset = [n for (n, _) in keydata]
13632 if key not in keyset:
13633 raise errors.ProgrammerError("Invalid input parameter '%s' to"
13634 " IAllocator" % key)
13635 setattr(self, key, kwargs[key])
13638 if key not in kwargs:
13639 raise errors.ProgrammerError("Missing input parameter '%s' to"
13640 " IAllocator" % key)
13641 self._BuildInputData(compat.partial(fn, self), keydata)
13643 def _ComputeClusterData(self):
13644 """Compute the generic allocator input data.
13646 This is the data that is independent of the actual operation.
13650 cluster_info = cfg.GetClusterInfo()
13653 "version": constants.IALLOCATOR_VERSION,
13654 "cluster_name": cfg.GetClusterName(),
13655 "cluster_tags": list(cluster_info.GetTags()),
13656 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13657 # we don't have job IDs
13659 ninfo = cfg.GetAllNodesInfo()
13660 iinfo = cfg.GetAllInstancesInfo().values()
13661 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13664 node_list = [n.name for n in ninfo.values() if n.vm_capable]
13666 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13667 hypervisor_name = self.hypervisor
13668 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13669 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13671 hypervisor_name = cluster_info.enabled_hypervisors[0]
13673 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
13676 self.rpc.call_all_instances_info(node_list,
13677 cluster_info.enabled_hypervisors)
13679 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13681 config_ndata = self._ComputeBasicNodeData(ninfo)
13682 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13683 i_list, config_ndata)
13684 assert len(data["nodes"]) == len(ninfo), \
13685 "Incomplete node data computed"
13687 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13689 self.in_data = data
13692 def _ComputeNodeGroupData(cfg):
13693 """Compute node groups data.
13696 ng = dict((guuid, {
13697 "name": gdata.name,
13698 "alloc_policy": gdata.alloc_policy,
13700 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13705 def _ComputeBasicNodeData(node_cfg):
13706 """Compute global node data.
13709 @returns: a dict of name: (node dict, node config)
13712 # fill in static (config-based) values
13713 node_results = dict((ninfo.name, {
13714 "tags": list(ninfo.GetTags()),
13715 "primary_ip": ninfo.primary_ip,
13716 "secondary_ip": ninfo.secondary_ip,
13717 "offline": ninfo.offline,
13718 "drained": ninfo.drained,
13719 "master_candidate": ninfo.master_candidate,
13720 "group": ninfo.group,
13721 "master_capable": ninfo.master_capable,
13722 "vm_capable": ninfo.vm_capable,
13724 for ninfo in node_cfg.values())
13726 return node_results
13729 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13731 """Compute global node data.
13733 @param node_results: the basic node structures as filled from the config
13736 #TODO(dynmem): compute the right data on MAX and MIN memory
13737 # make a copy of the current dict
13738 node_results = dict(node_results)
13739 for nname, nresult in node_data.items():
13740 assert nname in node_results, "Missing basic data for node %s" % nname
13741 ninfo = node_cfg[nname]
13743 if not (ninfo.offline or ninfo.drained):
13744 nresult.Raise("Can't get data for node %s" % nname)
13745 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13747 remote_info = _MakeLegacyNodeInfo(nresult.payload)
13749 for attr in ["memory_total", "memory_free", "memory_dom0",
13750 "vg_size", "vg_free", "cpu_total"]:
13751 if attr not in remote_info:
13752 raise errors.OpExecError("Node '%s' didn't return attribute"
13753 " '%s'" % (nname, attr))
13754 if not isinstance(remote_info[attr], int):
13755 raise errors.OpExecError("Node '%s' returned invalid value"
13757 (nname, attr, remote_info[attr]))
13758 # compute memory used by primary instances
13759 i_p_mem = i_p_up_mem = 0
13760 for iinfo, beinfo in i_list:
13761 if iinfo.primary_node == nname:
13762 i_p_mem += beinfo[constants.BE_MAXMEM]
13763 if iinfo.name not in node_iinfo[nname].payload:
13766 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13767 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
13768 remote_info["memory_free"] -= max(0, i_mem_diff)
13770 if iinfo.admin_state == constants.ADMINST_UP:
13771 i_p_up_mem += beinfo[constants.BE_MAXMEM]
13773 # compute memory used by instances
13775 "total_memory": remote_info["memory_total"],
13776 "reserved_memory": remote_info["memory_dom0"],
13777 "free_memory": remote_info["memory_free"],
13778 "total_disk": remote_info["vg_size"],
13779 "free_disk": remote_info["vg_free"],
13780 "total_cpus": remote_info["cpu_total"],
13781 "i_pri_memory": i_p_mem,
13782 "i_pri_up_memory": i_p_up_mem,
13784 pnr_dyn.update(node_results[nname])
13785 node_results[nname] = pnr_dyn
13787 return node_results
13790 def _ComputeInstanceData(cluster_info, i_list):
13791 """Compute global instance data.
13795 for iinfo, beinfo in i_list:
13797 for nic in iinfo.nics:
13798 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13802 "mode": filled_params[constants.NIC_MODE],
13803 "link": filled_params[constants.NIC_LINK],
13805 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13806 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13807 nic_data.append(nic_dict)
13809 "tags": list(iinfo.GetTags()),
13810 "admin_state": iinfo.admin_state,
13811 "vcpus": beinfo[constants.BE_VCPUS],
13812 "memory": beinfo[constants.BE_MAXMEM],
13814 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13816 "disks": [{constants.IDISK_SIZE: dsk.size,
13817 constants.IDISK_MODE: dsk.mode}
13818 for dsk in iinfo.disks],
13819 "disk_template": iinfo.disk_template,
13820 "hypervisor": iinfo.hypervisor,
13822 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13824 instance_data[iinfo.name] = pir
13826 return instance_data
13828 def _AddNewInstance(self):
13829 """Add new instance data to allocator structure.
13831 This in combination with _AllocatorGetClusterData will create the
13832 correct structure needed as input for the allocator.
13834 The checks for the completeness of the opcode must have already been
13838 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13840 if self.disk_template in constants.DTS_INT_MIRROR:
13841 self.required_nodes = 2
13843 self.required_nodes = 1
13847 "disk_template": self.disk_template,
13850 "vcpus": self.vcpus,
13851 "memory": self.memory,
13852 "disks": self.disks,
13853 "disk_space_total": disk_space,
13855 "required_nodes": self.required_nodes,
13856 "hypervisor": self.hypervisor,
13861 def _AddRelocateInstance(self):
13862 """Add relocate instance data to allocator structure.
13864 This in combination with _IAllocatorGetClusterData will create the
13865 correct structure needed as input for the allocator.
13867 The checks for the completeness of the opcode must have already been
13871 instance = self.cfg.GetInstanceInfo(self.name)
13872 if instance is None:
13873 raise errors.ProgrammerError("Unknown instance '%s' passed to"
13874 " IAllocator" % self.name)
13876 if instance.disk_template not in constants.DTS_MIRRORED:
13877 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13878 errors.ECODE_INVAL)
13880 if instance.disk_template in constants.DTS_INT_MIRROR and \
13881 len(instance.secondary_nodes) != 1:
13882 raise errors.OpPrereqError("Instance has not exactly one secondary node",
13883 errors.ECODE_STATE)
13885 self.required_nodes = 1
13886 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13887 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13891 "disk_space_total": disk_space,
13892 "required_nodes": self.required_nodes,
13893 "relocate_from": self.relocate_from,
13897 def _AddNodeEvacuate(self):
13898 """Get data for node-evacuate requests.
13902 "instances": self.instances,
13903 "evac_mode": self.evac_mode,
13906 def _AddChangeGroup(self):
13907 """Get data for node-evacuate requests.
13911 "instances": self.instances,
13912 "target_groups": self.target_groups,
13915 def _BuildInputData(self, fn, keydata):
13916 """Build input data structures.
13919 self._ComputeClusterData()
13922 request["type"] = self.mode
13923 for keyname, keytype in keydata:
13924 if keyname not in request:
13925 raise errors.ProgrammerError("Request parameter %s is missing" %
13927 val = request[keyname]
13928 if not keytype(val):
13929 raise errors.ProgrammerError("Request parameter %s doesn't pass"
13930 " validation, value %s, expected"
13931 " type %s" % (keyname, val, keytype))
13932 self.in_data["request"] = request
13934 self.in_text = serializer.Dump(self.in_data)
13936 _STRING_LIST = ht.TListOf(ht.TString)
13937 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13938 # pylint: disable=E1101
13939 # Class '...' has no 'OP_ID' member
13940 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13941 opcodes.OpInstanceMigrate.OP_ID,
13942 opcodes.OpInstanceReplaceDisks.OP_ID])
13946 ht.TListOf(ht.TAnd(ht.TIsLength(3),
13947 ht.TItems([ht.TNonEmptyString,
13948 ht.TNonEmptyString,
13949 ht.TListOf(ht.TNonEmptyString),
13952 ht.TListOf(ht.TAnd(ht.TIsLength(2),
13953 ht.TItems([ht.TNonEmptyString,
13956 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13957 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13960 constants.IALLOCATOR_MODE_ALLOC:
13963 ("name", ht.TString),
13964 ("memory", ht.TInt),
13965 ("disks", ht.TListOf(ht.TDict)),
13966 ("disk_template", ht.TString),
13967 ("os", ht.TString),
13968 ("tags", _STRING_LIST),
13969 ("nics", ht.TListOf(ht.TDict)),
13970 ("vcpus", ht.TInt),
13971 ("hypervisor", ht.TString),
13973 constants.IALLOCATOR_MODE_RELOC:
13974 (_AddRelocateInstance,
13975 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13977 constants.IALLOCATOR_MODE_NODE_EVAC:
13978 (_AddNodeEvacuate, [
13979 ("instances", _STRING_LIST),
13980 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13982 constants.IALLOCATOR_MODE_CHG_GROUP:
13983 (_AddChangeGroup, [
13984 ("instances", _STRING_LIST),
13985 ("target_groups", _STRING_LIST),
13989 def Run(self, name, validate=True, call_fn=None):
13990 """Run an instance allocator and return the results.
13993 if call_fn is None:
13994 call_fn = self.rpc.call_iallocator_runner
13996 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13997 result.Raise("Failure while running the iallocator script")
13999 self.out_text = result.payload
14001 self._ValidateResult()
14003 def _ValidateResult(self):
14004 """Process the allocator results.
14006 This will process and if successful save the result in
14007 self.out_data and the other parameters.
14011 rdict = serializer.Load(self.out_text)
14012 except Exception, err:
14013 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14015 if not isinstance(rdict, dict):
14016 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14018 # TODO: remove backwards compatiblity in later versions
14019 if "nodes" in rdict and "result" not in rdict:
14020 rdict["result"] = rdict["nodes"]
14023 for key in "success", "info", "result":
14024 if key not in rdict:
14025 raise errors.OpExecError("Can't parse iallocator results:"
14026 " missing key '%s'" % key)
14027 setattr(self, key, rdict[key])
14029 if not self._result_check(self.result):
14030 raise errors.OpExecError("Iallocator returned invalid result,"
14031 " expected %s, got %s" %
14032 (self._result_check, self.result),
14033 errors.ECODE_INVAL)
14035 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14036 assert self.relocate_from is not None
14037 assert self.required_nodes == 1
14039 node2group = dict((name, ndata["group"])
14040 for (name, ndata) in self.in_data["nodes"].items())
14042 fn = compat.partial(self._NodesToGroups, node2group,
14043 self.in_data["nodegroups"])
14045 instance = self.cfg.GetInstanceInfo(self.name)
14046 request_groups = fn(self.relocate_from + [instance.primary_node])
14047 result_groups = fn(rdict["result"] + [instance.primary_node])
14049 if self.success and not set(result_groups).issubset(request_groups):
14050 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14051 " differ from original groups (%s)" %
14052 (utils.CommaJoin(result_groups),
14053 utils.CommaJoin(request_groups)))
14055 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14056 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14058 self.out_data = rdict
14061 def _NodesToGroups(node2group, groups, nodes):
14062 """Returns a list of unique group names for a list of nodes.
14064 @type node2group: dict
14065 @param node2group: Map from node name to group UUID
14067 @param groups: Group information
14069 @param nodes: Node names
14076 group_uuid = node2group[node]
14078 # Ignore unknown node
14082 group = groups[group_uuid]
14084 # Can't find group, let's use UUID
14085 group_name = group_uuid
14087 group_name = group["name"]
14089 result.add(group_name)
14091 return sorted(result)
14094 class LUTestAllocator(NoHooksLU):
14095 """Run allocator tests.
14097 This LU runs the allocator tests
14100 def CheckPrereq(self):
14101 """Check prerequisites.
14103 This checks the opcode parameters depending on the director and mode test.
14106 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14107 for attr in ["memory", "disks", "disk_template",
14108 "os", "tags", "nics", "vcpus"]:
14109 if not hasattr(self.op, attr):
14110 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14111 attr, errors.ECODE_INVAL)
14112 iname = self.cfg.ExpandInstanceName(self.op.name)
14113 if iname is not None:
14114 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14115 iname, errors.ECODE_EXISTS)
14116 if not isinstance(self.op.nics, list):
14117 raise errors.OpPrereqError("Invalid parameter 'nics'",
14118 errors.ECODE_INVAL)
14119 if not isinstance(self.op.disks, list):
14120 raise errors.OpPrereqError("Invalid parameter 'disks'",
14121 errors.ECODE_INVAL)
14122 for row in self.op.disks:
14123 if (not isinstance(row, dict) or
14124 constants.IDISK_SIZE not in row or
14125 not isinstance(row[constants.IDISK_SIZE], int) or
14126 constants.IDISK_MODE not in row or
14127 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14128 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14129 " parameter", errors.ECODE_INVAL)
14130 if self.op.hypervisor is None:
14131 self.op.hypervisor = self.cfg.GetHypervisorType()
14132 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14133 fname = _ExpandInstanceName(self.cfg, self.op.name)
14134 self.op.name = fname
14135 self.relocate_from = \
14136 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14137 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14138 constants.IALLOCATOR_MODE_NODE_EVAC):
14139 if not self.op.instances:
14140 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14141 self.op.instances = _GetWantedInstances(self, self.op.instances)
14143 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14144 self.op.mode, errors.ECODE_INVAL)
14146 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14147 if self.op.allocator is None:
14148 raise errors.OpPrereqError("Missing allocator name",
14149 errors.ECODE_INVAL)
14150 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14151 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14152 self.op.direction, errors.ECODE_INVAL)
14154 def Exec(self, feedback_fn):
14155 """Run the allocator test.
14158 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14159 ial = IAllocator(self.cfg, self.rpc,
14162 memory=self.op.memory,
14163 disks=self.op.disks,
14164 disk_template=self.op.disk_template,
14168 vcpus=self.op.vcpus,
14169 hypervisor=self.op.hypervisor,
14171 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14172 ial = IAllocator(self.cfg, self.rpc,
14175 relocate_from=list(self.relocate_from),
14177 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14178 ial = IAllocator(self.cfg, self.rpc,
14180 instances=self.op.instances,
14181 target_groups=self.op.target_groups)
14182 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14183 ial = IAllocator(self.cfg, self.rpc,
14185 instances=self.op.instances,
14186 evac_mode=self.op.evac_mode)
14188 raise errors.ProgrammerError("Uncatched mode %s in"
14189 " LUTestAllocator.Exec", self.op.mode)
14191 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14192 result = ial.in_text
14194 ial.Run(self.op.allocator, validate=False)
14195 result = ial.out_text
14199 #: Query type implementations
14201 constants.QR_INSTANCE: _InstanceQuery,
14202 constants.QR_NODE: _NodeQuery,
14203 constants.QR_GROUP: _GroupQuery,
14204 constants.QR_OS: _OsQuery,
14207 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14210 def _GetQueryImplementation(name):
14211 """Returns the implemtnation for a query type.
14213 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14217 return _QUERY_IMPL[name]
14219 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14220 errors.ECODE_INVAL)