4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _MakeLegacyNodeInfo(data):
577 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
579 Converts the data into a single dictionary. This is fine for most use cases,
580 but some require information from more than one volume group or hypervisor.
583 (bootid, (vg_info, ), (hv_info, )) = data
585 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591 """Checks if the owned node groups are still correct for an instance.
593 @type cfg: L{config.ConfigWriter}
594 @param cfg: The cluster configuration
595 @type instance_name: string
596 @param instance_name: Instance name
597 @type owned_groups: set or frozenset
598 @param owned_groups: List of currently owned node groups
601 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
603 if not owned_groups.issuperset(inst_groups):
604 raise errors.OpPrereqError("Instance %s's node groups changed since"
605 " locks were acquired, current groups are"
606 " are '%s', owning groups '%s'; retry the"
609 utils.CommaJoin(inst_groups),
610 utils.CommaJoin(owned_groups)),
616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617 """Checks if the instances in a node group are still correct.
619 @type cfg: L{config.ConfigWriter}
620 @param cfg: The cluster configuration
621 @type group_uuid: string
622 @param group_uuid: Node group UUID
623 @type owned_instances: set or frozenset
624 @param owned_instances: List of currently owned instances
627 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628 if owned_instances != wanted_instances:
629 raise errors.OpPrereqError("Instances in node group '%s' changed since"
630 " locks were acquired, wanted '%s', have '%s';"
631 " retry the operation" %
633 utils.CommaJoin(wanted_instances),
634 utils.CommaJoin(owned_instances)),
637 return wanted_instances
640 def _SupportsOob(cfg, node):
641 """Tells if node supports OOB.
643 @type cfg: L{config.ConfigWriter}
644 @param cfg: The cluster configuration
645 @type node: L{objects.Node}
646 @param node: The node
647 @return: The OOB script if supported or an empty string otherwise
650 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
653 def _GetWantedNodes(lu, nodes):
654 """Returns list of checked and expanded node names.
656 @type lu: L{LogicalUnit}
657 @param lu: the logical unit on whose behalf we execute
659 @param nodes: list of node names or None for all nodes
661 @return: the list of nodes, sorted
662 @raise errors.ProgrammerError: if the nodes parameter is wrong type
666 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
668 return utils.NiceSort(lu.cfg.GetNodeList())
671 def _GetWantedInstances(lu, instances):
672 """Returns list of checked and expanded instance names.
674 @type lu: L{LogicalUnit}
675 @param lu: the logical unit on whose behalf we execute
676 @type instances: list
677 @param instances: list of instance names or None for all instances
679 @return: the list of instances, sorted
680 @raise errors.OpPrereqError: if the instances parameter is wrong type
681 @raise errors.OpPrereqError: if any of the passed instances is not found
685 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
687 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
691 def _GetUpdatedParams(old_params, update_dict,
692 use_default=True, use_none=False):
693 """Return the new version of a parameter dictionary.
695 @type old_params: dict
696 @param old_params: old parameters
697 @type update_dict: dict
698 @param update_dict: dict containing new parameter values, or
699 constants.VALUE_DEFAULT to reset the parameter to its default
701 @param use_default: boolean
702 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703 values as 'to be deleted' values
704 @param use_none: boolean
705 @type use_none: whether to recognise C{None} values as 'to be
708 @return: the new parameter dictionary
711 params_copy = copy.deepcopy(old_params)
712 for key, val in update_dict.iteritems():
713 if ((use_default and val == constants.VALUE_DEFAULT) or
714 (use_none and val is None)):
720 params_copy[key] = val
724 def _ReleaseLocks(lu, level, names=None, keep=None):
725 """Releases locks owned by an LU.
727 @type lu: L{LogicalUnit}
728 @param level: Lock level
729 @type names: list or None
730 @param names: Names of locks to release
731 @type keep: list or None
732 @param keep: Names of locks to retain
735 assert not (keep is not None and names is not None), \
736 "Only one of the 'names' and the 'keep' parameters can be given"
738 if names is not None:
739 should_release = names.__contains__
741 should_release = lambda name: name not in keep
743 should_release = None
745 owned = lu.owned_locks(level)
747 # Not owning any lock at this level, do nothing
754 # Determine which locks to release
756 if should_release(name):
761 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
763 # Release just some locks
764 lu.glm.release(level, names=release)
766 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
769 lu.glm.release(level)
771 assert not lu.glm.is_owned(level), "No locks should be owned"
774 def _MapInstanceDisksToNodes(instances):
775 """Creates a map from (node, volume) to instance name.
777 @type instances: list of L{objects.Instance}
778 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
781 return dict(((node, vol), inst.name)
782 for inst in instances
783 for (node, vols) in inst.MapLVsByNode().items()
787 def _RunPostHook(lu, node_name):
788 """Runs the post-hook for an opcode on a single node.
791 hm = lu.proc.BuildHooksManager(lu)
793 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
795 # pylint: disable=W0702
796 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
799 def _CheckOutputFields(static, dynamic, selected):
800 """Checks whether all selected fields are valid.
802 @type static: L{utils.FieldSet}
803 @param static: static fields set
804 @type dynamic: L{utils.FieldSet}
805 @param dynamic: dynamic fields set
812 delta = f.NonMatching(selected)
814 raise errors.OpPrereqError("Unknown output fields selected: %s"
815 % ",".join(delta), errors.ECODE_INVAL)
818 def _CheckGlobalHvParams(params):
819 """Validates that given hypervisor params are not global ones.
821 This will ensure that instances don't get customised versions of
825 used_globals = constants.HVC_GLOBALS.intersection(params)
827 msg = ("The following hypervisor parameters are global and cannot"
828 " be customized at instance level, please modify them at"
829 " cluster level: %s" % utils.CommaJoin(used_globals))
830 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
833 def _CheckNodeOnline(lu, node, msg=None):
834 """Ensure that a given node is online.
836 @param lu: the LU on behalf of which we make the check
837 @param node: the node to check
838 @param msg: if passed, should be a message to replace the default one
839 @raise errors.OpPrereqError: if the node is offline
843 msg = "Can't use offline node"
844 if lu.cfg.GetNodeInfo(node).offline:
845 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
848 def _CheckNodeNotDrained(lu, node):
849 """Ensure that a given node is not drained.
851 @param lu: the LU on behalf of which we make the check
852 @param node: the node to check
853 @raise errors.OpPrereqError: if the node is drained
856 if lu.cfg.GetNodeInfo(node).drained:
857 raise errors.OpPrereqError("Can't use drained node %s" % node,
861 def _CheckNodeVmCapable(lu, node):
862 """Ensure that a given node is vm capable.
864 @param lu: the LU on behalf of which we make the check
865 @param node: the node to check
866 @raise errors.OpPrereqError: if the node is not vm capable
869 if not lu.cfg.GetNodeInfo(node).vm_capable:
870 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
874 def _CheckNodeHasOS(lu, node, os_name, force_variant):
875 """Ensure that a node supports a given OS.
877 @param lu: the LU on behalf of which we make the check
878 @param node: the node to check
879 @param os_name: the OS to query about
880 @param force_variant: whether to ignore variant errors
881 @raise errors.OpPrereqError: if the node is not supporting the OS
884 result = lu.rpc.call_os_get(node, os_name)
885 result.Raise("OS '%s' not in supported OS list for node %s" %
887 prereq=True, ecode=errors.ECODE_INVAL)
888 if not force_variant:
889 _CheckOSVariant(result.payload, os_name)
892 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
893 """Ensure that a node has the given secondary ip.
895 @type lu: L{LogicalUnit}
896 @param lu: the LU on behalf of which we make the check
898 @param node: the node to check
899 @type secondary_ip: string
900 @param secondary_ip: the ip to check
901 @type prereq: boolean
902 @param prereq: whether to throw a prerequisite or an execute error
903 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
904 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
907 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
908 result.Raise("Failure checking secondary ip on node %s" % node,
909 prereq=prereq, ecode=errors.ECODE_ENVIRON)
910 if not result.payload:
911 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
912 " please fix and re-run this command" % secondary_ip)
914 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
916 raise errors.OpExecError(msg)
919 def _GetClusterDomainSecret():
920 """Reads the cluster domain secret.
923 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
927 def _CheckInstanceState(lu, instance, req_states, msg=None):
928 """Ensure that an instance is in one of the required states.
930 @param lu: the LU on behalf of which we make the check
931 @param instance: the instance to check
932 @param msg: if passed, should be a message to replace the default one
933 @raise errors.OpPrereqError: if the instance is not in the required state
937 msg = "can't use instance from outside %s states" % ", ".join(req_states)
938 if instance.admin_state not in req_states:
939 raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
940 (instance, instance.admin_state, msg),
943 if constants.ADMINST_UP not in req_states:
944 pnode = instance.primary_node
945 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
946 ins_l.Raise("Can't contact node %s for instance information" % pnode,
947 prereq=True, ecode=errors.ECODE_ENVIRON)
949 if instance.name in ins_l.payload:
950 raise errors.OpPrereqError("Instance %s is running, %s" %
951 (instance.name, msg), errors.ECODE_STATE)
954 def _ExpandItemName(fn, name, kind):
955 """Expand an item name.
957 @param fn: the function to use for expansion
958 @param name: requested item name
959 @param kind: text description ('Node' or 'Instance')
960 @return: the resolved (full) name
961 @raise errors.OpPrereqError: if the item is not found
965 if full_name is None:
966 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
971 def _ExpandNodeName(cfg, name):
972 """Wrapper over L{_ExpandItemName} for nodes."""
973 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
976 def _ExpandInstanceName(cfg, name):
977 """Wrapper over L{_ExpandItemName} for instance."""
978 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
981 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
982 minmem, maxmem, vcpus, nics, disk_template, disks,
983 bep, hvp, hypervisor_name, tags):
984 """Builds instance related env variables for hooks
986 This builds the hook environment from individual variables.
989 @param name: the name of the instance
990 @type primary_node: string
991 @param primary_node: the name of the instance's primary node
992 @type secondary_nodes: list
993 @param secondary_nodes: list of secondary nodes as strings
994 @type os_type: string
995 @param os_type: the name of the instance's OS
997 @param status: the desired status of the instance
999 @param minmem: the minimum memory size of the instance
1000 @type maxmem: string
1001 @param maxmem: the maximum memory size of the instance
1003 @param vcpus: the count of VCPUs the instance has
1005 @param nics: list of tuples (ip, mac, mode, link) representing
1006 the NICs the instance has
1007 @type disk_template: string
1008 @param disk_template: the disk template of the instance
1010 @param disks: the list of (size, mode) pairs
1012 @param bep: the backend parameters for the instance
1014 @param hvp: the hypervisor parameters for the instance
1015 @type hypervisor_name: string
1016 @param hypervisor_name: the hypervisor for the instance
1018 @param tags: list of instance tags as strings
1020 @return: the hook environment for this instance
1025 "INSTANCE_NAME": name,
1026 "INSTANCE_PRIMARY": primary_node,
1027 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1028 "INSTANCE_OS_TYPE": os_type,
1029 "INSTANCE_STATUS": status,
1030 "INSTANCE_MINMEM": minmem,
1031 "INSTANCE_MAXMEM": maxmem,
1032 # TODO(2.7) remove deprecated "memory" value
1033 "INSTANCE_MEMORY": maxmem,
1034 "INSTANCE_VCPUS": vcpus,
1035 "INSTANCE_DISK_TEMPLATE": disk_template,
1036 "INSTANCE_HYPERVISOR": hypervisor_name,
1039 nic_count = len(nics)
1040 for idx, (ip, mac, mode, link) in enumerate(nics):
1043 env["INSTANCE_NIC%d_IP" % idx] = ip
1044 env["INSTANCE_NIC%d_MAC" % idx] = mac
1045 env["INSTANCE_NIC%d_MODE" % idx] = mode
1046 env["INSTANCE_NIC%d_LINK" % idx] = link
1047 if mode == constants.NIC_MODE_BRIDGED:
1048 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1052 env["INSTANCE_NIC_COUNT"] = nic_count
1055 disk_count = len(disks)
1056 for idx, (size, mode) in enumerate(disks):
1057 env["INSTANCE_DISK%d_SIZE" % idx] = size
1058 env["INSTANCE_DISK%d_MODE" % idx] = mode
1062 env["INSTANCE_DISK_COUNT"] = disk_count
1067 env["INSTANCE_TAGS"] = " ".join(tags)
1069 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1070 for key, value in source.items():
1071 env["INSTANCE_%s_%s" % (kind, key)] = value
1076 def _NICListToTuple(lu, nics):
1077 """Build a list of nic information tuples.
1079 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1080 value in LUInstanceQueryData.
1082 @type lu: L{LogicalUnit}
1083 @param lu: the logical unit on whose behalf we execute
1084 @type nics: list of L{objects.NIC}
1085 @param nics: list of nics to convert to hooks tuples
1089 cluster = lu.cfg.GetClusterInfo()
1093 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1094 mode = filled_params[constants.NIC_MODE]
1095 link = filled_params[constants.NIC_LINK]
1096 hooks_nics.append((ip, mac, mode, link))
1100 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1101 """Builds instance related env variables for hooks from an object.
1103 @type lu: L{LogicalUnit}
1104 @param lu: the logical unit on whose behalf we execute
1105 @type instance: L{objects.Instance}
1106 @param instance: the instance for which we should build the
1108 @type override: dict
1109 @param override: dictionary with key/values that will override
1112 @return: the hook environment dictionary
1115 cluster = lu.cfg.GetClusterInfo()
1116 bep = cluster.FillBE(instance)
1117 hvp = cluster.FillHV(instance)
1119 "name": instance.name,
1120 "primary_node": instance.primary_node,
1121 "secondary_nodes": instance.secondary_nodes,
1122 "os_type": instance.os,
1123 "status": instance.admin_state,
1124 "maxmem": bep[constants.BE_MAXMEM],
1125 "minmem": bep[constants.BE_MINMEM],
1126 "vcpus": bep[constants.BE_VCPUS],
1127 "nics": _NICListToTuple(lu, instance.nics),
1128 "disk_template": instance.disk_template,
1129 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1132 "hypervisor_name": instance.hypervisor,
1133 "tags": instance.tags,
1136 args.update(override)
1137 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1140 def _AdjustCandidatePool(lu, exceptions):
1141 """Adjust the candidate pool after node operations.
1144 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1146 lu.LogInfo("Promoted nodes to master candidate role: %s",
1147 utils.CommaJoin(node.name for node in mod_list))
1148 for name in mod_list:
1149 lu.context.ReaddNode(name)
1150 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1152 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1156 def _DecideSelfPromotion(lu, exceptions=None):
1157 """Decide whether I should promote myself as a master candidate.
1160 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1161 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1162 # the new node will increase mc_max with one, so:
1163 mc_should = min(mc_should + 1, cp_size)
1164 return mc_now < mc_should
1167 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1168 """Check that the brigdes needed by a list of nics exist.
1171 cluster = lu.cfg.GetClusterInfo()
1172 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1173 brlist = [params[constants.NIC_LINK] for params in paramslist
1174 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1176 result = lu.rpc.call_bridges_exist(target_node, brlist)
1177 result.Raise("Error checking bridges on destination node '%s'" %
1178 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1181 def _CheckInstanceBridgesExist(lu, instance, node=None):
1182 """Check that the brigdes needed by an instance exist.
1186 node = instance.primary_node
1187 _CheckNicsBridgesExist(lu, instance.nics, node)
1190 def _CheckOSVariant(os_obj, name):
1191 """Check whether an OS name conforms to the os variants specification.
1193 @type os_obj: L{objects.OS}
1194 @param os_obj: OS object to check
1196 @param name: OS name passed by the user, to check for validity
1199 variant = objects.OS.GetVariant(name)
1200 if not os_obj.supported_variants:
1202 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1203 " passed)" % (os_obj.name, variant),
1207 raise errors.OpPrereqError("OS name must include a variant",
1210 if variant not in os_obj.supported_variants:
1211 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1214 def _GetNodeInstancesInner(cfg, fn):
1215 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1218 def _GetNodeInstances(cfg, node_name):
1219 """Returns a list of all primary and secondary instances on a node.
1223 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1226 def _GetNodePrimaryInstances(cfg, node_name):
1227 """Returns primary instances on a node.
1230 return _GetNodeInstancesInner(cfg,
1231 lambda inst: node_name == inst.primary_node)
1234 def _GetNodeSecondaryInstances(cfg, node_name):
1235 """Returns secondary instances on a node.
1238 return _GetNodeInstancesInner(cfg,
1239 lambda inst: node_name in inst.secondary_nodes)
1242 def _GetStorageTypeArgs(cfg, storage_type):
1243 """Returns the arguments for a storage type.
1246 # Special case for file storage
1247 if storage_type == constants.ST_FILE:
1248 # storage.FileStorage wants a list of storage directories
1249 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1254 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1257 for dev in instance.disks:
1258 cfg.SetDiskID(dev, node_name)
1260 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1261 result.Raise("Failed to get disk status from node %s" % node_name,
1262 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1264 for idx, bdev_status in enumerate(result.payload):
1265 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1271 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1272 """Check the sanity of iallocator and node arguments and use the
1273 cluster-wide iallocator if appropriate.
1275 Check that at most one of (iallocator, node) is specified. If none is
1276 specified, then the LU's opcode's iallocator slot is filled with the
1277 cluster-wide default iallocator.
1279 @type iallocator_slot: string
1280 @param iallocator_slot: the name of the opcode iallocator slot
1281 @type node_slot: string
1282 @param node_slot: the name of the opcode target node slot
1285 node = getattr(lu.op, node_slot, None)
1286 iallocator = getattr(lu.op, iallocator_slot, None)
1288 if node is not None and iallocator is not None:
1289 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1291 elif node is None and iallocator is None:
1292 default_iallocator = lu.cfg.GetDefaultIAllocator()
1293 if default_iallocator:
1294 setattr(lu.op, iallocator_slot, default_iallocator)
1296 raise errors.OpPrereqError("No iallocator or node given and no"
1297 " cluster-wide default iallocator found;"
1298 " please specify either an iallocator or a"
1299 " node, or set a cluster-wide default"
1303 def _GetDefaultIAllocator(cfg, iallocator):
1304 """Decides on which iallocator to use.
1306 @type cfg: L{config.ConfigWriter}
1307 @param cfg: Cluster configuration object
1308 @type iallocator: string or None
1309 @param iallocator: Iallocator specified in opcode
1311 @return: Iallocator name
1315 # Use default iallocator
1316 iallocator = cfg.GetDefaultIAllocator()
1319 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1320 " opcode nor as a cluster-wide default",
1326 class LUClusterPostInit(LogicalUnit):
1327 """Logical unit for running hooks after cluster initialization.
1330 HPATH = "cluster-init"
1331 HTYPE = constants.HTYPE_CLUSTER
1333 def BuildHooksEnv(self):
1338 "OP_TARGET": self.cfg.GetClusterName(),
1341 def BuildHooksNodes(self):
1342 """Build hooks nodes.
1345 return ([], [self.cfg.GetMasterNode()])
1347 def Exec(self, feedback_fn):
1354 class LUClusterDestroy(LogicalUnit):
1355 """Logical unit for destroying the cluster.
1358 HPATH = "cluster-destroy"
1359 HTYPE = constants.HTYPE_CLUSTER
1361 def BuildHooksEnv(self):
1366 "OP_TARGET": self.cfg.GetClusterName(),
1369 def BuildHooksNodes(self):
1370 """Build hooks nodes.
1375 def CheckPrereq(self):
1376 """Check prerequisites.
1378 This checks whether the cluster is empty.
1380 Any errors are signaled by raising errors.OpPrereqError.
1383 master = self.cfg.GetMasterNode()
1385 nodelist = self.cfg.GetNodeList()
1386 if len(nodelist) != 1 or nodelist[0] != master:
1387 raise errors.OpPrereqError("There are still %d node(s) in"
1388 " this cluster." % (len(nodelist) - 1),
1390 instancelist = self.cfg.GetInstanceList()
1392 raise errors.OpPrereqError("There are still %d instance(s) in"
1393 " this cluster." % len(instancelist),
1396 def Exec(self, feedback_fn):
1397 """Destroys the cluster.
1400 master_params = self.cfg.GetMasterNetworkParameters()
1402 # Run post hooks on master node before it's removed
1403 _RunPostHook(self, master_params.name)
1405 ems = self.cfg.GetUseExternalMipScript()
1406 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1408 result.Raise("Could not disable the master role")
1410 return master_params.name
1413 def _VerifyCertificate(filename):
1414 """Verifies a certificate for L{LUClusterVerifyConfig}.
1416 @type filename: string
1417 @param filename: Path to PEM file
1421 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1422 utils.ReadFile(filename))
1423 except Exception, err: # pylint: disable=W0703
1424 return (LUClusterVerifyConfig.ETYPE_ERROR,
1425 "Failed to load X509 certificate %s: %s" % (filename, err))
1428 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1429 constants.SSL_CERT_EXPIRATION_ERROR)
1432 fnamemsg = "While verifying %s: %s" % (filename, msg)
1437 return (None, fnamemsg)
1438 elif errcode == utils.CERT_WARNING:
1439 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1440 elif errcode == utils.CERT_ERROR:
1441 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1443 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1446 def _GetAllHypervisorParameters(cluster, instances):
1447 """Compute the set of all hypervisor parameters.
1449 @type cluster: L{objects.Cluster}
1450 @param cluster: the cluster object
1451 @param instances: list of L{objects.Instance}
1452 @param instances: additional instances from which to obtain parameters
1453 @rtype: list of (origin, hypervisor, parameters)
1454 @return: a list with all parameters found, indicating the hypervisor they
1455 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1460 for hv_name in cluster.enabled_hypervisors:
1461 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1463 for os_name, os_hvp in cluster.os_hvp.items():
1464 for hv_name, hv_params in os_hvp.items():
1466 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1467 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1469 # TODO: collapse identical parameter values in a single one
1470 for instance in instances:
1471 if instance.hvparams:
1472 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1473 cluster.FillHV(instance)))
1478 class _VerifyErrors(object):
1479 """Mix-in for cluster/group verify LUs.
1481 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1482 self.op and self._feedback_fn to be available.)
1486 ETYPE_FIELD = "code"
1487 ETYPE_ERROR = "ERROR"
1488 ETYPE_WARNING = "WARNING"
1490 def _Error(self, ecode, item, msg, *args, **kwargs):
1491 """Format an error message.
1493 Based on the opcode's error_codes parameter, either format a
1494 parseable error code, or a simpler error string.
1496 This must be called only from Exec and functions called from Exec.
1499 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1500 itype, etxt, _ = ecode
1501 # first complete the msg
1504 # then format the whole message
1505 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1506 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1512 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1513 # and finally report it via the feedback_fn
1514 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1516 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1517 """Log an error message if the passed condition is True.
1521 or self.op.debug_simulate_errors) # pylint: disable=E1101
1523 # If the error code is in the list of ignored errors, demote the error to a
1525 (_, etxt, _) = ecode
1526 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1527 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1530 self._Error(ecode, *args, **kwargs)
1532 # do not mark the operation as failed for WARN cases only
1533 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1534 self.bad = self.bad or cond
1537 class LUClusterVerify(NoHooksLU):
1538 """Submits all jobs necessary to verify the cluster.
1543 def ExpandNames(self):
1544 self.needed_locks = {}
1546 def Exec(self, feedback_fn):
1549 if self.op.group_name:
1550 groups = [self.op.group_name]
1551 depends_fn = lambda: None
1553 groups = self.cfg.GetNodeGroupList()
1555 # Verify global configuration
1557 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1560 # Always depend on global verification
1561 depends_fn = lambda: [(-len(jobs), [])]
1563 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1564 ignore_errors=self.op.ignore_errors,
1565 depends=depends_fn())]
1566 for group in groups)
1568 # Fix up all parameters
1569 for op in itertools.chain(*jobs): # pylint: disable=W0142
1570 op.debug_simulate_errors = self.op.debug_simulate_errors
1571 op.verbose = self.op.verbose
1572 op.error_codes = self.op.error_codes
1574 op.skip_checks = self.op.skip_checks
1575 except AttributeError:
1576 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1578 return ResultWithJobs(jobs)
1581 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1582 """Verifies the cluster config.
1587 def _VerifyHVP(self, hvp_data):
1588 """Verifies locally the syntax of the hypervisor parameters.
1591 for item, hv_name, hv_params in hvp_data:
1592 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1595 hv_class = hypervisor.GetHypervisor(hv_name)
1596 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1597 hv_class.CheckParameterSyntax(hv_params)
1598 except errors.GenericError, err:
1599 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1601 def ExpandNames(self):
1602 # Information can be safely retrieved as the BGL is acquired in exclusive
1604 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1605 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1606 self.all_node_info = self.cfg.GetAllNodesInfo()
1607 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1608 self.needed_locks = {}
1610 def Exec(self, feedback_fn):
1611 """Verify integrity of cluster, performing various test on nodes.
1615 self._feedback_fn = feedback_fn
1617 feedback_fn("* Verifying cluster config")
1619 for msg in self.cfg.VerifyConfig():
1620 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1622 feedback_fn("* Verifying cluster certificate files")
1624 for cert_filename in constants.ALL_CERT_FILES:
1625 (errcode, msg) = _VerifyCertificate(cert_filename)
1626 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1628 feedback_fn("* Verifying hypervisor parameters")
1630 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1631 self.all_inst_info.values()))
1633 feedback_fn("* Verifying all nodes belong to an existing group")
1635 # We do this verification here because, should this bogus circumstance
1636 # occur, it would never be caught by VerifyGroup, which only acts on
1637 # nodes/instances reachable from existing node groups.
1639 dangling_nodes = set(node.name for node in self.all_node_info.values()
1640 if node.group not in self.all_group_info)
1642 dangling_instances = {}
1643 no_node_instances = []
1645 for inst in self.all_inst_info.values():
1646 if inst.primary_node in dangling_nodes:
1647 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1648 elif inst.primary_node not in self.all_node_info:
1649 no_node_instances.append(inst.name)
1654 utils.CommaJoin(dangling_instances.get(node.name,
1656 for node in dangling_nodes]
1658 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1660 "the following nodes (and their instances) belong to a non"
1661 " existing group: %s", utils.CommaJoin(pretty_dangling))
1663 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1665 "the following instances have a non-existing primary-node:"
1666 " %s", utils.CommaJoin(no_node_instances))
1671 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1672 """Verifies the status of a node group.
1675 HPATH = "cluster-verify"
1676 HTYPE = constants.HTYPE_CLUSTER
1679 _HOOKS_INDENT_RE = re.compile("^", re.M)
1681 class NodeImage(object):
1682 """A class representing the logical and physical status of a node.
1685 @ivar name: the node name to which this object refers
1686 @ivar volumes: a structure as returned from
1687 L{ganeti.backend.GetVolumeList} (runtime)
1688 @ivar instances: a list of running instances (runtime)
1689 @ivar pinst: list of configured primary instances (config)
1690 @ivar sinst: list of configured secondary instances (config)
1691 @ivar sbp: dictionary of {primary-node: list of instances} for all
1692 instances for which this node is secondary (config)
1693 @ivar mfree: free memory, as reported by hypervisor (runtime)
1694 @ivar dfree: free disk, as reported by the node (runtime)
1695 @ivar offline: the offline status (config)
1696 @type rpc_fail: boolean
1697 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1698 not whether the individual keys were correct) (runtime)
1699 @type lvm_fail: boolean
1700 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1701 @type hyp_fail: boolean
1702 @ivar hyp_fail: whether the RPC call didn't return the instance list
1703 @type ghost: boolean
1704 @ivar ghost: whether this is a known node or not (config)
1705 @type os_fail: boolean
1706 @ivar os_fail: whether the RPC call didn't return valid OS data
1708 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1709 @type vm_capable: boolean
1710 @ivar vm_capable: whether the node can host instances
1713 def __init__(self, offline=False, name=None, vm_capable=True):
1722 self.offline = offline
1723 self.vm_capable = vm_capable
1724 self.rpc_fail = False
1725 self.lvm_fail = False
1726 self.hyp_fail = False
1728 self.os_fail = False
1731 def ExpandNames(self):
1732 # This raises errors.OpPrereqError on its own:
1733 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1735 # Get instances in node group; this is unsafe and needs verification later
1736 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1738 self.needed_locks = {
1739 locking.LEVEL_INSTANCE: inst_names,
1740 locking.LEVEL_NODEGROUP: [self.group_uuid],
1741 locking.LEVEL_NODE: [],
1744 self.share_locks = _ShareAll()
1746 def DeclareLocks(self, level):
1747 if level == locking.LEVEL_NODE:
1748 # Get members of node group; this is unsafe and needs verification later
1749 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1751 all_inst_info = self.cfg.GetAllInstancesInfo()
1753 # In Exec(), we warn about mirrored instances that have primary and
1754 # secondary living in separate node groups. To fully verify that
1755 # volumes for these instances are healthy, we will need to do an
1756 # extra call to their secondaries. We ensure here those nodes will
1758 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1759 # Important: access only the instances whose lock is owned
1760 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1761 nodes.update(all_inst_info[inst].secondary_nodes)
1763 self.needed_locks[locking.LEVEL_NODE] = nodes
1765 def CheckPrereq(self):
1766 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1767 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1769 group_nodes = set(self.group_info.members)
1770 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1773 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1775 unlocked_instances = \
1776 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1779 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1780 utils.CommaJoin(unlocked_nodes))
1782 if unlocked_instances:
1783 raise errors.OpPrereqError("Missing lock for instances: %s" %
1784 utils.CommaJoin(unlocked_instances))
1786 self.all_node_info = self.cfg.GetAllNodesInfo()
1787 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1789 self.my_node_names = utils.NiceSort(group_nodes)
1790 self.my_inst_names = utils.NiceSort(group_instances)
1792 self.my_node_info = dict((name, self.all_node_info[name])
1793 for name in self.my_node_names)
1795 self.my_inst_info = dict((name, self.all_inst_info[name])
1796 for name in self.my_inst_names)
1798 # We detect here the nodes that will need the extra RPC calls for verifying
1799 # split LV volumes; they should be locked.
1800 extra_lv_nodes = set()
1802 for inst in self.my_inst_info.values():
1803 if inst.disk_template in constants.DTS_INT_MIRROR:
1804 group = self.my_node_info[inst.primary_node].group
1805 for nname in inst.secondary_nodes:
1806 if self.all_node_info[nname].group != group:
1807 extra_lv_nodes.add(nname)
1809 unlocked_lv_nodes = \
1810 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1812 if unlocked_lv_nodes:
1813 raise errors.OpPrereqError("these nodes could be locked: %s" %
1814 utils.CommaJoin(unlocked_lv_nodes))
1815 self.extra_lv_nodes = list(extra_lv_nodes)
1817 def _VerifyNode(self, ninfo, nresult):
1818 """Perform some basic validation on data returned from a node.
1820 - check the result data structure is well formed and has all the
1822 - check ganeti version
1824 @type ninfo: L{objects.Node}
1825 @param ninfo: the node to check
1826 @param nresult: the results from the node
1828 @return: whether overall this call was successful (and we can expect
1829 reasonable values in the respose)
1833 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1835 # main result, nresult should be a non-empty dict
1836 test = not nresult or not isinstance(nresult, dict)
1837 _ErrorIf(test, constants.CV_ENODERPC, node,
1838 "unable to verify node: no data returned")
1842 # compares ganeti version
1843 local_version = constants.PROTOCOL_VERSION
1844 remote_version = nresult.get("version", None)
1845 test = not (remote_version and
1846 isinstance(remote_version, (list, tuple)) and
1847 len(remote_version) == 2)
1848 _ErrorIf(test, constants.CV_ENODERPC, node,
1849 "connection to node returned invalid data")
1853 test = local_version != remote_version[0]
1854 _ErrorIf(test, constants.CV_ENODEVERSION, node,
1855 "incompatible protocol versions: master %s,"
1856 " node %s", local_version, remote_version[0])
1860 # node seems compatible, we can actually try to look into its results
1862 # full package version
1863 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1864 constants.CV_ENODEVERSION, node,
1865 "software version mismatch: master %s, node %s",
1866 constants.RELEASE_VERSION, remote_version[1],
1867 code=self.ETYPE_WARNING)
1869 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1870 if ninfo.vm_capable and isinstance(hyp_result, dict):
1871 for hv_name, hv_result in hyp_result.iteritems():
1872 test = hv_result is not None
1873 _ErrorIf(test, constants.CV_ENODEHV, node,
1874 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1876 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1877 if ninfo.vm_capable and isinstance(hvp_result, list):
1878 for item, hv_name, hv_result in hvp_result:
1879 _ErrorIf(True, constants.CV_ENODEHV, node,
1880 "hypervisor %s parameter verify failure (source %s): %s",
1881 hv_name, item, hv_result)
1883 test = nresult.get(constants.NV_NODESETUP,
1884 ["Missing NODESETUP results"])
1885 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1890 def _VerifyNodeTime(self, ninfo, nresult,
1891 nvinfo_starttime, nvinfo_endtime):
1892 """Check the node time.
1894 @type ninfo: L{objects.Node}
1895 @param ninfo: the node to check
1896 @param nresult: the remote results for the node
1897 @param nvinfo_starttime: the start time of the RPC call
1898 @param nvinfo_endtime: the end time of the RPC call
1902 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1904 ntime = nresult.get(constants.NV_TIME, None)
1906 ntime_merged = utils.MergeTime(ntime)
1907 except (ValueError, TypeError):
1908 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1911 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1912 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1913 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1914 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1918 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1919 "Node time diverges by at least %s from master node time",
1922 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1923 """Check the node LVM results.
1925 @type ninfo: L{objects.Node}
1926 @param ninfo: the node to check
1927 @param nresult: the remote results for the node
1928 @param vg_name: the configured VG name
1935 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1937 # checks vg existence and size > 20G
1938 vglist = nresult.get(constants.NV_VGLIST, None)
1940 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1942 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1943 constants.MIN_VG_SIZE)
1944 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1947 pvlist = nresult.get(constants.NV_PVLIST, None)
1948 test = pvlist is None
1949 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1951 # check that ':' is not present in PV names, since it's a
1952 # special character for lvcreate (denotes the range of PEs to
1954 for _, pvname, owner_vg in pvlist:
1955 test = ":" in pvname
1956 _ErrorIf(test, constants.CV_ENODELVM, node,
1957 "Invalid character ':' in PV '%s' of VG '%s'",
1960 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1961 """Check the node bridges.
1963 @type ninfo: L{objects.Node}
1964 @param ninfo: the node to check
1965 @param nresult: the remote results for the node
1966 @param bridges: the expected list of bridges
1973 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1975 missing = nresult.get(constants.NV_BRIDGES, None)
1976 test = not isinstance(missing, list)
1977 _ErrorIf(test, constants.CV_ENODENET, node,
1978 "did not return valid bridge information")
1980 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1981 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1983 def _VerifyNodeUserScripts(self, ninfo, nresult):
1984 """Check the results of user scripts presence and executability on the node
1986 @type ninfo: L{objects.Node}
1987 @param ninfo: the node to check
1988 @param nresult: the remote results for the node
1993 test = not constants.NV_USERSCRIPTS in nresult
1994 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
1995 "did not return user scripts information")
1997 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
1999 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2000 "user scripts not present or not executable: %s" %
2001 utils.CommaJoin(sorted(broken_scripts)))
2003 def _VerifyNodeNetwork(self, ninfo, nresult):
2004 """Check the node network connectivity results.
2006 @type ninfo: L{objects.Node}
2007 @param ninfo: the node to check
2008 @param nresult: the remote results for the node
2012 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2014 test = constants.NV_NODELIST not in nresult
2015 _ErrorIf(test, constants.CV_ENODESSH, node,
2016 "node hasn't returned node ssh connectivity data")
2018 if nresult[constants.NV_NODELIST]:
2019 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2020 _ErrorIf(True, constants.CV_ENODESSH, node,
2021 "ssh communication with node '%s': %s", a_node, a_msg)
2023 test = constants.NV_NODENETTEST not in nresult
2024 _ErrorIf(test, constants.CV_ENODENET, node,
2025 "node hasn't returned node tcp connectivity data")
2027 if nresult[constants.NV_NODENETTEST]:
2028 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2030 _ErrorIf(True, constants.CV_ENODENET, node,
2031 "tcp communication with node '%s': %s",
2032 anode, nresult[constants.NV_NODENETTEST][anode])
2034 test = constants.NV_MASTERIP not in nresult
2035 _ErrorIf(test, constants.CV_ENODENET, node,
2036 "node hasn't returned node master IP reachability data")
2038 if not nresult[constants.NV_MASTERIP]:
2039 if node == self.master_node:
2040 msg = "the master node cannot reach the master IP (not configured?)"
2042 msg = "cannot reach the master IP"
2043 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2045 def _VerifyInstance(self, instance, instanceconfig, node_image,
2047 """Verify an instance.
2049 This function checks to see if the required block devices are
2050 available on the instance's node.
2053 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2054 node_current = instanceconfig.primary_node
2056 node_vol_should = {}
2057 instanceconfig.MapLVsByNode(node_vol_should)
2059 for node in node_vol_should:
2060 n_img = node_image[node]
2061 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2062 # ignore missing volumes on offline or broken nodes
2064 for volume in node_vol_should[node]:
2065 test = volume not in n_img.volumes
2066 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2067 "volume %s missing on node %s", volume, node)
2069 if instanceconfig.admin_state == constants.ADMINST_UP:
2070 pri_img = node_image[node_current]
2071 test = instance not in pri_img.instances and not pri_img.offline
2072 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2073 "instance not running on its primary node %s",
2076 diskdata = [(nname, success, status, idx)
2077 for (nname, disks) in diskstatus.items()
2078 for idx, (success, status) in enumerate(disks)]
2080 for nname, success, bdev_status, idx in diskdata:
2081 # the 'ghost node' construction in Exec() ensures that we have a
2083 snode = node_image[nname]
2084 bad_snode = snode.ghost or snode.offline
2085 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2086 not success and not bad_snode,
2087 constants.CV_EINSTANCEFAULTYDISK, instance,
2088 "couldn't retrieve status for disk/%s on %s: %s",
2089 idx, nname, bdev_status)
2090 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2091 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2092 constants.CV_EINSTANCEFAULTYDISK, instance,
2093 "disk/%s on %s is faulty", idx, nname)
2095 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2096 """Verify if there are any unknown volumes in the cluster.
2098 The .os, .swap and backup volumes are ignored. All other volumes are
2099 reported as unknown.
2101 @type reserved: L{ganeti.utils.FieldSet}
2102 @param reserved: a FieldSet of reserved volume names
2105 for node, n_img in node_image.items():
2106 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2107 # skip non-healthy nodes
2109 for volume in n_img.volumes:
2110 test = ((node not in node_vol_should or
2111 volume not in node_vol_should[node]) and
2112 not reserved.Matches(volume))
2113 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2114 "volume %s is unknown", volume)
2116 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2117 """Verify N+1 Memory Resilience.
2119 Check that if one single node dies we can still start all the
2120 instances it was primary for.
2123 cluster_info = self.cfg.GetClusterInfo()
2124 for node, n_img in node_image.items():
2125 # This code checks that every node which is now listed as
2126 # secondary has enough memory to host all instances it is
2127 # supposed to should a single other node in the cluster fail.
2128 # FIXME: not ready for failover to an arbitrary node
2129 # FIXME: does not support file-backed instances
2130 # WARNING: we currently take into account down instances as well
2131 # as up ones, considering that even if they're down someone
2132 # might want to start them even in the event of a node failure.
2134 # we're skipping offline nodes from the N+1 warning, since
2135 # most likely we don't have good memory infromation from them;
2136 # we already list instances living on such nodes, and that's
2139 #TODO(dynmem): use MINMEM for checking
2140 #TODO(dynmem): also consider ballooning out other instances
2141 for prinode, instances in n_img.sbp.items():
2143 for instance in instances:
2144 bep = cluster_info.FillBE(instance_cfg[instance])
2145 if bep[constants.BE_AUTO_BALANCE]:
2146 needed_mem += bep[constants.BE_MAXMEM]
2147 test = n_img.mfree < needed_mem
2148 self._ErrorIf(test, constants.CV_ENODEN1, node,
2149 "not enough memory to accomodate instance failovers"
2150 " should node %s fail (%dMiB needed, %dMiB available)",
2151 prinode, needed_mem, n_img.mfree)
2154 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2155 (files_all, files_opt, files_mc, files_vm)):
2156 """Verifies file checksums collected from all nodes.
2158 @param errorif: Callback for reporting errors
2159 @param nodeinfo: List of L{objects.Node} objects
2160 @param master_node: Name of master node
2161 @param all_nvinfo: RPC results
2164 # Define functions determining which nodes to consider for a file
2167 (files_mc, lambda node: (node.master_candidate or
2168 node.name == master_node)),
2169 (files_vm, lambda node: node.vm_capable),
2172 # Build mapping from filename to list of nodes which should have the file
2174 for (files, fn) in files2nodefn:
2176 filenodes = nodeinfo
2178 filenodes = filter(fn, nodeinfo)
2179 nodefiles.update((filename,
2180 frozenset(map(operator.attrgetter("name"), filenodes)))
2181 for filename in files)
2183 assert set(nodefiles) == (files_all | files_mc | files_vm)
2185 fileinfo = dict((filename, {}) for filename in nodefiles)
2186 ignore_nodes = set()
2188 for node in nodeinfo:
2190 ignore_nodes.add(node.name)
2193 nresult = all_nvinfo[node.name]
2195 if nresult.fail_msg or not nresult.payload:
2198 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2200 test = not (node_files and isinstance(node_files, dict))
2201 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2202 "Node did not return file checksum data")
2204 ignore_nodes.add(node.name)
2207 # Build per-checksum mapping from filename to nodes having it
2208 for (filename, checksum) in node_files.items():
2209 assert filename in nodefiles
2210 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2212 for (filename, checksums) in fileinfo.items():
2213 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2215 # Nodes having the file
2216 with_file = frozenset(node_name
2217 for nodes in fileinfo[filename].values()
2218 for node_name in nodes) - ignore_nodes
2220 expected_nodes = nodefiles[filename] - ignore_nodes
2222 # Nodes missing file
2223 missing_file = expected_nodes - with_file
2225 if filename in files_opt:
2227 errorif(missing_file and missing_file != expected_nodes,
2228 constants.CV_ECLUSTERFILECHECK, None,
2229 "File %s is optional, but it must exist on all or no"
2230 " nodes (not found on %s)",
2231 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2233 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2234 "File %s is missing from node(s) %s", filename,
2235 utils.CommaJoin(utils.NiceSort(missing_file)))
2237 # Warn if a node has a file it shouldn't
2238 unexpected = with_file - expected_nodes
2240 constants.CV_ECLUSTERFILECHECK, None,
2241 "File %s should not exist on node(s) %s",
2242 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2244 # See if there are multiple versions of the file
2245 test = len(checksums) > 1
2247 variants = ["variant %s on %s" %
2248 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2249 for (idx, (checksum, nodes)) in
2250 enumerate(sorted(checksums.items()))]
2254 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2255 "File %s found with %s different checksums (%s)",
2256 filename, len(checksums), "; ".join(variants))
2258 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2260 """Verifies and the node DRBD status.
2262 @type ninfo: L{objects.Node}
2263 @param ninfo: the node to check
2264 @param nresult: the remote results for the node
2265 @param instanceinfo: the dict of instances
2266 @param drbd_helper: the configured DRBD usermode helper
2267 @param drbd_map: the DRBD map as returned by
2268 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2272 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2275 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2276 test = (helper_result == None)
2277 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2278 "no drbd usermode helper returned")
2280 status, payload = helper_result
2282 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2283 "drbd usermode helper check unsuccessful: %s", payload)
2284 test = status and (payload != drbd_helper)
2285 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2286 "wrong drbd usermode helper: %s", payload)
2288 # compute the DRBD minors
2290 for minor, instance in drbd_map[node].items():
2291 test = instance not in instanceinfo
2292 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2293 "ghost instance '%s' in temporary DRBD map", instance)
2294 # ghost instance should not be running, but otherwise we
2295 # don't give double warnings (both ghost instance and
2296 # unallocated minor in use)
2298 node_drbd[minor] = (instance, False)
2300 instance = instanceinfo[instance]
2301 node_drbd[minor] = (instance.name,
2302 instance.admin_state == constants.ADMINST_UP)
2304 # and now check them
2305 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2306 test = not isinstance(used_minors, (tuple, list))
2307 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2308 "cannot parse drbd status file: %s", str(used_minors))
2310 # we cannot check drbd status
2313 for minor, (iname, must_exist) in node_drbd.items():
2314 test = minor not in used_minors and must_exist
2315 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2316 "drbd minor %d of instance %s is not active", minor, iname)
2317 for minor in used_minors:
2318 test = minor not in node_drbd
2319 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2320 "unallocated drbd minor %d is in use", minor)
2322 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2323 """Builds the node OS structures.
2325 @type ninfo: L{objects.Node}
2326 @param ninfo: the node to check
2327 @param nresult: the remote results for the node
2328 @param nimg: the node image object
2332 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2334 remote_os = nresult.get(constants.NV_OSLIST, None)
2335 test = (not isinstance(remote_os, list) or
2336 not compat.all(isinstance(v, list) and len(v) == 7
2337 for v in remote_os))
2339 _ErrorIf(test, constants.CV_ENODEOS, node,
2340 "node hasn't returned valid OS data")
2349 for (name, os_path, status, diagnose,
2350 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2352 if name not in os_dict:
2355 # parameters is a list of lists instead of list of tuples due to
2356 # JSON lacking a real tuple type, fix it:
2357 parameters = [tuple(v) for v in parameters]
2358 os_dict[name].append((os_path, status, diagnose,
2359 set(variants), set(parameters), set(api_ver)))
2361 nimg.oslist = os_dict
2363 def _VerifyNodeOS(self, ninfo, nimg, base):
2364 """Verifies the node OS list.
2366 @type ninfo: L{objects.Node}
2367 @param ninfo: the node to check
2368 @param nimg: the node image object
2369 @param base: the 'template' node we match against (e.g. from the master)
2373 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2375 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2377 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2378 for os_name, os_data in nimg.oslist.items():
2379 assert os_data, "Empty OS status for OS %s?!" % os_name
2380 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2381 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2382 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2383 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2384 "OS '%s' has multiple entries (first one shadows the rest): %s",
2385 os_name, utils.CommaJoin([v[0] for v in os_data]))
2386 # comparisons with the 'base' image
2387 test = os_name not in base.oslist
2388 _ErrorIf(test, constants.CV_ENODEOS, node,
2389 "Extra OS %s not present on reference node (%s)",
2393 assert base.oslist[os_name], "Base node has empty OS status?"
2394 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2396 # base OS is invalid, skipping
2398 for kind, a, b in [("API version", f_api, b_api),
2399 ("variants list", f_var, b_var),
2400 ("parameters", beautify_params(f_param),
2401 beautify_params(b_param))]:
2402 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2403 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2404 kind, os_name, base.name,
2405 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2407 # check any missing OSes
2408 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2409 _ErrorIf(missing, constants.CV_ENODEOS, node,
2410 "OSes present on reference node %s but missing on this node: %s",
2411 base.name, utils.CommaJoin(missing))
2413 def _VerifyOob(self, ninfo, nresult):
2414 """Verifies out of band functionality of a node.
2416 @type ninfo: L{objects.Node}
2417 @param ninfo: the node to check
2418 @param nresult: the remote results for the node
2422 # We just have to verify the paths on master and/or master candidates
2423 # as the oob helper is invoked on the master
2424 if ((ninfo.master_candidate or ninfo.master_capable) and
2425 constants.NV_OOB_PATHS in nresult):
2426 for path_result in nresult[constants.NV_OOB_PATHS]:
2427 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2429 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2430 """Verifies and updates the node volume data.
2432 This function will update a L{NodeImage}'s internal structures
2433 with data from the remote call.
2435 @type ninfo: L{objects.Node}
2436 @param ninfo: the node to check
2437 @param nresult: the remote results for the node
2438 @param nimg: the node image object
2439 @param vg_name: the configured VG name
2443 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2445 nimg.lvm_fail = True
2446 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2449 elif isinstance(lvdata, basestring):
2450 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2451 utils.SafeEncode(lvdata))
2452 elif not isinstance(lvdata, dict):
2453 _ErrorIf(True, constants.CV_ENODELVM, node,
2454 "rpc call to node failed (lvlist)")
2456 nimg.volumes = lvdata
2457 nimg.lvm_fail = False
2459 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2460 """Verifies and updates the node instance list.
2462 If the listing was successful, then updates this node's instance
2463 list. Otherwise, it marks the RPC call as failed for the instance
2466 @type ninfo: L{objects.Node}
2467 @param ninfo: the node to check
2468 @param nresult: the remote results for the node
2469 @param nimg: the node image object
2472 idata = nresult.get(constants.NV_INSTANCELIST, None)
2473 test = not isinstance(idata, list)
2474 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2475 "rpc call to node failed (instancelist): %s",
2476 utils.SafeEncode(str(idata)))
2478 nimg.hyp_fail = True
2480 nimg.instances = idata
2482 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2483 """Verifies and computes a node information map
2485 @type ninfo: L{objects.Node}
2486 @param ninfo: the node to check
2487 @param nresult: the remote results for the node
2488 @param nimg: the node image object
2489 @param vg_name: the configured VG name
2493 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2495 # try to read free memory (from the hypervisor)
2496 hv_info = nresult.get(constants.NV_HVINFO, None)
2497 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2498 _ErrorIf(test, constants.CV_ENODEHV, node,
2499 "rpc call to node failed (hvinfo)")
2502 nimg.mfree = int(hv_info["memory_free"])
2503 except (ValueError, TypeError):
2504 _ErrorIf(True, constants.CV_ENODERPC, node,
2505 "node returned invalid nodeinfo, check hypervisor")
2507 # FIXME: devise a free space model for file based instances as well
2508 if vg_name is not None:
2509 test = (constants.NV_VGLIST not in nresult or
2510 vg_name not in nresult[constants.NV_VGLIST])
2511 _ErrorIf(test, constants.CV_ENODELVM, node,
2512 "node didn't return data for the volume group '%s'"
2513 " - it is either missing or broken", vg_name)
2516 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2517 except (ValueError, TypeError):
2518 _ErrorIf(True, constants.CV_ENODERPC, node,
2519 "node returned invalid LVM info, check LVM status")
2521 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2522 """Gets per-disk status information for all instances.
2524 @type nodelist: list of strings
2525 @param nodelist: Node names
2526 @type node_image: dict of (name, L{objects.Node})
2527 @param node_image: Node objects
2528 @type instanceinfo: dict of (name, L{objects.Instance})
2529 @param instanceinfo: Instance objects
2530 @rtype: {instance: {node: [(succes, payload)]}}
2531 @return: a dictionary of per-instance dictionaries with nodes as
2532 keys and disk information as values; the disk information is a
2533 list of tuples (success, payload)
2536 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2539 node_disks_devonly = {}
2540 diskless_instances = set()
2541 diskless = constants.DT_DISKLESS
2543 for nname in nodelist:
2544 node_instances = list(itertools.chain(node_image[nname].pinst,
2545 node_image[nname].sinst))
2546 diskless_instances.update(inst for inst in node_instances
2547 if instanceinfo[inst].disk_template == diskless)
2548 disks = [(inst, disk)
2549 for inst in node_instances
2550 for disk in instanceinfo[inst].disks]
2553 # No need to collect data
2556 node_disks[nname] = disks
2558 # Creating copies as SetDiskID below will modify the objects and that can
2559 # lead to incorrect data returned from nodes
2560 devonly = [dev.Copy() for (_, dev) in disks]
2563 self.cfg.SetDiskID(dev, nname)
2565 node_disks_devonly[nname] = devonly
2567 assert len(node_disks) == len(node_disks_devonly)
2569 # Collect data from all nodes with disks
2570 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2573 assert len(result) == len(node_disks)
2577 for (nname, nres) in result.items():
2578 disks = node_disks[nname]
2581 # No data from this node
2582 data = len(disks) * [(False, "node offline")]
2585 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2586 "while getting disk information: %s", msg)
2588 # No data from this node
2589 data = len(disks) * [(False, msg)]
2592 for idx, i in enumerate(nres.payload):
2593 if isinstance(i, (tuple, list)) and len(i) == 2:
2596 logging.warning("Invalid result from node %s, entry %d: %s",
2598 data.append((False, "Invalid result from the remote node"))
2600 for ((inst, _), status) in zip(disks, data):
2601 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2603 # Add empty entries for diskless instances.
2604 for inst in diskless_instances:
2605 assert inst not in instdisk
2608 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2609 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2610 compat.all(isinstance(s, (tuple, list)) and
2611 len(s) == 2 for s in statuses)
2612 for inst, nnames in instdisk.items()
2613 for nname, statuses in nnames.items())
2614 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2619 def _SshNodeSelector(group_uuid, all_nodes):
2620 """Create endless iterators for all potential SSH check hosts.
2623 nodes = [node for node in all_nodes
2624 if (node.group != group_uuid and
2626 keyfunc = operator.attrgetter("group")
2628 return map(itertools.cycle,
2629 [sorted(map(operator.attrgetter("name"), names))
2630 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2634 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2635 """Choose which nodes should talk to which other nodes.
2637 We will make nodes contact all nodes in their group, and one node from
2640 @warning: This algorithm has a known issue if one node group is much
2641 smaller than others (e.g. just one node). In such a case all other
2642 nodes will talk to the single node.
2645 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2646 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2648 return (online_nodes,
2649 dict((name, sorted([i.next() for i in sel]))
2650 for name in online_nodes))
2652 def BuildHooksEnv(self):
2655 Cluster-Verify hooks just ran in the post phase and their failure makes
2656 the output be logged in the verify output and the verification to fail.
2660 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2663 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2664 for node in self.my_node_info.values())
2668 def BuildHooksNodes(self):
2669 """Build hooks nodes.
2672 return ([], self.my_node_names)
2674 def Exec(self, feedback_fn):
2675 """Verify integrity of the node group, performing various test on nodes.
2678 # This method has too many local variables. pylint: disable=R0914
2679 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2681 if not self.my_node_names:
2683 feedback_fn("* Empty node group, skipping verification")
2687 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2688 verbose = self.op.verbose
2689 self._feedback_fn = feedback_fn
2691 vg_name = self.cfg.GetVGName()
2692 drbd_helper = self.cfg.GetDRBDHelper()
2693 cluster = self.cfg.GetClusterInfo()
2694 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2695 hypervisors = cluster.enabled_hypervisors
2696 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2698 i_non_redundant = [] # Non redundant instances
2699 i_non_a_balanced = [] # Non auto-balanced instances
2700 i_offline = 0 # Count of offline instances
2701 n_offline = 0 # Count of offline nodes
2702 n_drained = 0 # Count of nodes being drained
2703 node_vol_should = {}
2705 # FIXME: verify OS list
2708 filemap = _ComputeAncillaryFiles(cluster, False)
2710 # do local checksums
2711 master_node = self.master_node = self.cfg.GetMasterNode()
2712 master_ip = self.cfg.GetMasterIP()
2714 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2717 if self.cfg.GetUseExternalMipScript():
2718 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2720 node_verify_param = {
2721 constants.NV_FILELIST:
2722 utils.UniqueSequence(filename
2723 for files in filemap
2724 for filename in files),
2725 constants.NV_NODELIST:
2726 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2727 self.all_node_info.values()),
2728 constants.NV_HYPERVISOR: hypervisors,
2729 constants.NV_HVPARAMS:
2730 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2731 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2732 for node in node_data_list
2733 if not node.offline],
2734 constants.NV_INSTANCELIST: hypervisors,
2735 constants.NV_VERSION: None,
2736 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2737 constants.NV_NODESETUP: None,
2738 constants.NV_TIME: None,
2739 constants.NV_MASTERIP: (master_node, master_ip),
2740 constants.NV_OSLIST: None,
2741 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2742 constants.NV_USERSCRIPTS: user_scripts,
2745 if vg_name is not None:
2746 node_verify_param[constants.NV_VGLIST] = None
2747 node_verify_param[constants.NV_LVLIST] = vg_name
2748 node_verify_param[constants.NV_PVLIST] = [vg_name]
2749 node_verify_param[constants.NV_DRBDLIST] = None
2752 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2755 # FIXME: this needs to be changed per node-group, not cluster-wide
2757 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2758 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2759 bridges.add(default_nicpp[constants.NIC_LINK])
2760 for instance in self.my_inst_info.values():
2761 for nic in instance.nics:
2762 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2763 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2764 bridges.add(full_nic[constants.NIC_LINK])
2767 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2769 # Build our expected cluster state
2770 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2772 vm_capable=node.vm_capable))
2773 for node in node_data_list)
2777 for node in self.all_node_info.values():
2778 path = _SupportsOob(self.cfg, node)
2779 if path and path not in oob_paths:
2780 oob_paths.append(path)
2783 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2785 for instance in self.my_inst_names:
2786 inst_config = self.my_inst_info[instance]
2788 for nname in inst_config.all_nodes:
2789 if nname not in node_image:
2790 gnode = self.NodeImage(name=nname)
2791 gnode.ghost = (nname not in self.all_node_info)
2792 node_image[nname] = gnode
2794 inst_config.MapLVsByNode(node_vol_should)
2796 pnode = inst_config.primary_node
2797 node_image[pnode].pinst.append(instance)
2799 for snode in inst_config.secondary_nodes:
2800 nimg = node_image[snode]
2801 nimg.sinst.append(instance)
2802 if pnode not in nimg.sbp:
2803 nimg.sbp[pnode] = []
2804 nimg.sbp[pnode].append(instance)
2806 # At this point, we have the in-memory data structures complete,
2807 # except for the runtime information, which we'll gather next
2809 # Due to the way our RPC system works, exact response times cannot be
2810 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2811 # time before and after executing the request, we can at least have a time
2813 nvinfo_starttime = time.time()
2814 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2816 self.cfg.GetClusterName())
2817 nvinfo_endtime = time.time()
2819 if self.extra_lv_nodes and vg_name is not None:
2821 self.rpc.call_node_verify(self.extra_lv_nodes,
2822 {constants.NV_LVLIST: vg_name},
2823 self.cfg.GetClusterName())
2825 extra_lv_nvinfo = {}
2827 all_drbd_map = self.cfg.ComputeDRBDMap()
2829 feedback_fn("* Gathering disk information (%s nodes)" %
2830 len(self.my_node_names))
2831 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2834 feedback_fn("* Verifying configuration file consistency")
2836 # If not all nodes are being checked, we need to make sure the master node
2837 # and a non-checked vm_capable node are in the list.
2838 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2840 vf_nvinfo = all_nvinfo.copy()
2841 vf_node_info = list(self.my_node_info.values())
2842 additional_nodes = []
2843 if master_node not in self.my_node_info:
2844 additional_nodes.append(master_node)
2845 vf_node_info.append(self.all_node_info[master_node])
2846 # Add the first vm_capable node we find which is not included
2847 for node in absent_nodes:
2848 nodeinfo = self.all_node_info[node]
2849 if nodeinfo.vm_capable and not nodeinfo.offline:
2850 additional_nodes.append(node)
2851 vf_node_info.append(self.all_node_info[node])
2853 key = constants.NV_FILELIST
2854 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2855 {key: node_verify_param[key]},
2856 self.cfg.GetClusterName()))
2858 vf_nvinfo = all_nvinfo
2859 vf_node_info = self.my_node_info.values()
2861 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2863 feedback_fn("* Verifying node status")
2867 for node_i in node_data_list:
2869 nimg = node_image[node]
2873 feedback_fn("* Skipping offline node %s" % (node,))
2877 if node == master_node:
2879 elif node_i.master_candidate:
2880 ntype = "master candidate"
2881 elif node_i.drained:
2887 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2889 msg = all_nvinfo[node].fail_msg
2890 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2893 nimg.rpc_fail = True
2896 nresult = all_nvinfo[node].payload
2898 nimg.call_ok = self._VerifyNode(node_i, nresult)
2899 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2900 self._VerifyNodeNetwork(node_i, nresult)
2901 self._VerifyNodeUserScripts(node_i, nresult)
2902 self._VerifyOob(node_i, nresult)
2905 self._VerifyNodeLVM(node_i, nresult, vg_name)
2906 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2909 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2910 self._UpdateNodeInstances(node_i, nresult, nimg)
2911 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2912 self._UpdateNodeOS(node_i, nresult, nimg)
2914 if not nimg.os_fail:
2915 if refos_img is None:
2917 self._VerifyNodeOS(node_i, nimg, refos_img)
2918 self._VerifyNodeBridges(node_i, nresult, bridges)
2920 # Check whether all running instancies are primary for the node. (This
2921 # can no longer be done from _VerifyInstance below, since some of the
2922 # wrong instances could be from other node groups.)
2923 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2925 for inst in non_primary_inst:
2926 # FIXME: investigate best way to handle offline insts
2927 if inst.admin_state == constants.ADMINST_OFFLINE:
2929 feedback_fn("* Skipping offline instance %s" % inst.name)
2932 test = inst in self.all_inst_info
2933 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2934 "instance should not run on node %s", node_i.name)
2935 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2936 "node is running unknown instance %s", inst)
2938 for node, result in extra_lv_nvinfo.items():
2939 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2940 node_image[node], vg_name)
2942 feedback_fn("* Verifying instance status")
2943 for instance in self.my_inst_names:
2945 feedback_fn("* Verifying instance %s" % instance)
2946 inst_config = self.my_inst_info[instance]
2947 self._VerifyInstance(instance, inst_config, node_image,
2949 inst_nodes_offline = []
2951 pnode = inst_config.primary_node
2952 pnode_img = node_image[pnode]
2953 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2954 constants.CV_ENODERPC, pnode, "instance %s, connection to"
2955 " primary node failed", instance)
2957 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2959 constants.CV_EINSTANCEBADNODE, instance,
2960 "instance is marked as running and lives on offline node %s",
2961 inst_config.primary_node)
2963 # If the instance is non-redundant we cannot survive losing its primary
2964 # node, so we are not N+1 compliant. On the other hand we have no disk
2965 # templates with more than one secondary so that situation is not well
2967 # FIXME: does not support file-backed instances
2968 if not inst_config.secondary_nodes:
2969 i_non_redundant.append(instance)
2971 _ErrorIf(len(inst_config.secondary_nodes) > 1,
2972 constants.CV_EINSTANCELAYOUT,
2973 instance, "instance has multiple secondary nodes: %s",
2974 utils.CommaJoin(inst_config.secondary_nodes),
2975 code=self.ETYPE_WARNING)
2977 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2978 pnode = inst_config.primary_node
2979 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2980 instance_groups = {}
2982 for node in instance_nodes:
2983 instance_groups.setdefault(self.all_node_info[node].group,
2987 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2988 # Sort so that we always list the primary node first.
2989 for group, nodes in sorted(instance_groups.items(),
2990 key=lambda (_, nodes): pnode in nodes,
2993 self._ErrorIf(len(instance_groups) > 1,
2994 constants.CV_EINSTANCESPLITGROUPS,
2995 instance, "instance has primary and secondary nodes in"
2996 " different groups: %s", utils.CommaJoin(pretty_list),
2997 code=self.ETYPE_WARNING)
2999 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3000 i_non_a_balanced.append(instance)
3002 for snode in inst_config.secondary_nodes:
3003 s_img = node_image[snode]
3004 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3005 snode, "instance %s, connection to secondary node failed",
3009 inst_nodes_offline.append(snode)
3011 # warn that the instance lives on offline nodes
3012 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3013 "instance has offline secondary node(s) %s",
3014 utils.CommaJoin(inst_nodes_offline))
3015 # ... or ghost/non-vm_capable nodes
3016 for node in inst_config.all_nodes:
3017 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3018 instance, "instance lives on ghost node %s", node)
3019 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3020 instance, "instance lives on non-vm_capable node %s", node)
3022 feedback_fn("* Verifying orphan volumes")
3023 reserved = utils.FieldSet(*cluster.reserved_lvs)
3025 # We will get spurious "unknown volume" warnings if any node of this group
3026 # is secondary for an instance whose primary is in another group. To avoid
3027 # them, we find these instances and add their volumes to node_vol_should.
3028 for inst in self.all_inst_info.values():
3029 for secondary in inst.secondary_nodes:
3030 if (secondary in self.my_node_info
3031 and inst.name not in self.my_inst_info):
3032 inst.MapLVsByNode(node_vol_should)
3035 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3037 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3038 feedback_fn("* Verifying N+1 Memory redundancy")
3039 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3041 feedback_fn("* Other Notes")
3043 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3044 % len(i_non_redundant))
3046 if i_non_a_balanced:
3047 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3048 % len(i_non_a_balanced))
3051 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3054 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3057 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3061 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3062 """Analyze the post-hooks' result
3064 This method analyses the hook result, handles it, and sends some
3065 nicely-formatted feedback back to the user.
3067 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3068 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3069 @param hooks_results: the results of the multi-node hooks rpc call
3070 @param feedback_fn: function used send feedback back to the caller
3071 @param lu_result: previous Exec result
3072 @return: the new Exec result, based on the previous result
3076 # We only really run POST phase hooks, only for non-empty groups,
3077 # and are only interested in their results
3078 if not self.my_node_names:
3081 elif phase == constants.HOOKS_PHASE_POST:
3082 # Used to change hooks' output to proper indentation
3083 feedback_fn("* Hooks Results")
3084 assert hooks_results, "invalid result from hooks"
3086 for node_name in hooks_results:
3087 res = hooks_results[node_name]
3089 test = msg and not res.offline
3090 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3091 "Communication failure in hooks execution: %s", msg)
3092 if res.offline or msg:
3093 # No need to investigate payload if node is offline or gave
3096 for script, hkr, output in res.payload:
3097 test = hkr == constants.HKR_FAIL
3098 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3099 "Script %s failed, output:", script)
3101 output = self._HOOKS_INDENT_RE.sub(" ", output)
3102 feedback_fn("%s" % output)
3108 class LUClusterVerifyDisks(NoHooksLU):
3109 """Verifies the cluster disks status.
3114 def ExpandNames(self):
3115 self.share_locks = _ShareAll()
3116 self.needed_locks = {
3117 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3120 def Exec(self, feedback_fn):
3121 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3123 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3124 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3125 for group in group_names])
3128 class LUGroupVerifyDisks(NoHooksLU):
3129 """Verifies the status of all disks in a node group.
3134 def ExpandNames(self):
3135 # Raises errors.OpPrereqError on its own if group can't be found
3136 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3138 self.share_locks = _ShareAll()
3139 self.needed_locks = {
3140 locking.LEVEL_INSTANCE: [],
3141 locking.LEVEL_NODEGROUP: [],
3142 locking.LEVEL_NODE: [],
3145 def DeclareLocks(self, level):
3146 if level == locking.LEVEL_INSTANCE:
3147 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3149 # Lock instances optimistically, needs verification once node and group
3150 # locks have been acquired
3151 self.needed_locks[locking.LEVEL_INSTANCE] = \
3152 self.cfg.GetNodeGroupInstances(self.group_uuid)
3154 elif level == locking.LEVEL_NODEGROUP:
3155 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3157 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3158 set([self.group_uuid] +
3159 # Lock all groups used by instances optimistically; this requires
3160 # going via the node before it's locked, requiring verification
3163 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3164 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3166 elif level == locking.LEVEL_NODE:
3167 # This will only lock the nodes in the group to be verified which contain
3169 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3170 self._LockInstancesNodes()
3172 # Lock all nodes in group to be verified
3173 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3174 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3175 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3177 def CheckPrereq(self):
3178 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3179 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3180 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3182 assert self.group_uuid in owned_groups
3184 # Check if locked instances are still correct
3185 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3187 # Get instance information
3188 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3190 # Check if node groups for locked instances are still correct
3191 for (instance_name, inst) in self.instances.items():
3192 assert owned_nodes.issuperset(inst.all_nodes), \
3193 "Instance %s's nodes changed while we kept the lock" % instance_name
3195 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3198 assert self.group_uuid in inst_groups, \
3199 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3201 def Exec(self, feedback_fn):
3202 """Verify integrity of cluster disks.
3204 @rtype: tuple of three items
3205 @return: a tuple of (dict of node-to-node_error, list of instances
3206 which need activate-disks, dict of instance: (node, volume) for
3211 res_instances = set()
3214 nv_dict = _MapInstanceDisksToNodes([inst
3215 for inst in self.instances.values()
3216 if inst.admin_state == constants.ADMINST_UP])
3219 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3220 set(self.cfg.GetVmCapableNodeList()))
3222 node_lvs = self.rpc.call_lv_list(nodes, [])
3224 for (node, node_res) in node_lvs.items():
3225 if node_res.offline:
3228 msg = node_res.fail_msg
3230 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3231 res_nodes[node] = msg
3234 for lv_name, (_, _, lv_online) in node_res.payload.items():
3235 inst = nv_dict.pop((node, lv_name), None)
3236 if not (lv_online or inst is None):
3237 res_instances.add(inst)
3239 # any leftover items in nv_dict are missing LVs, let's arrange the data
3241 for key, inst in nv_dict.iteritems():
3242 res_missing.setdefault(inst, []).append(list(key))
3244 return (res_nodes, list(res_instances), res_missing)
3247 class LUClusterRepairDiskSizes(NoHooksLU):
3248 """Verifies the cluster disks sizes.
3253 def ExpandNames(self):
3254 if self.op.instances:
3255 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3256 self.needed_locks = {
3257 locking.LEVEL_NODE_RES: [],
3258 locking.LEVEL_INSTANCE: self.wanted_names,
3260 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3262 self.wanted_names = None
3263 self.needed_locks = {
3264 locking.LEVEL_NODE_RES: locking.ALL_SET,
3265 locking.LEVEL_INSTANCE: locking.ALL_SET,
3267 self.share_locks = {
3268 locking.LEVEL_NODE_RES: 1,
3269 locking.LEVEL_INSTANCE: 0,
3272 def DeclareLocks(self, level):
3273 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3274 self._LockInstancesNodes(primary_only=True, level=level)
3276 def CheckPrereq(self):
3277 """Check prerequisites.
3279 This only checks the optional instance list against the existing names.
3282 if self.wanted_names is None:
3283 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3285 self.wanted_instances = \
3286 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3288 def _EnsureChildSizes(self, disk):
3289 """Ensure children of the disk have the needed disk size.
3291 This is valid mainly for DRBD8 and fixes an issue where the
3292 children have smaller disk size.
3294 @param disk: an L{ganeti.objects.Disk} object
3297 if disk.dev_type == constants.LD_DRBD8:
3298 assert disk.children, "Empty children for DRBD8?"
3299 fchild = disk.children[0]
3300 mismatch = fchild.size < disk.size
3302 self.LogInfo("Child disk has size %d, parent %d, fixing",
3303 fchild.size, disk.size)
3304 fchild.size = disk.size
3306 # and we recurse on this child only, not on the metadev
3307 return self._EnsureChildSizes(fchild) or mismatch
3311 def Exec(self, feedback_fn):
3312 """Verify the size of cluster disks.
3315 # TODO: check child disks too
3316 # TODO: check differences in size between primary/secondary nodes
3318 for instance in self.wanted_instances:
3319 pnode = instance.primary_node
3320 if pnode not in per_node_disks:
3321 per_node_disks[pnode] = []
3322 for idx, disk in enumerate(instance.disks):
3323 per_node_disks[pnode].append((instance, idx, disk))
3325 assert not (frozenset(per_node_disks.keys()) -
3326 self.owned_locks(locking.LEVEL_NODE_RES)), \
3327 "Not owning correct locks"
3328 assert not self.owned_locks(locking.LEVEL_NODE)
3331 for node, dskl in per_node_disks.items():
3332 newl = [v[2].Copy() for v in dskl]
3334 self.cfg.SetDiskID(dsk, node)
3335 result = self.rpc.call_blockdev_getsize(node, newl)
3337 self.LogWarning("Failure in blockdev_getsize call to node"
3338 " %s, ignoring", node)
3340 if len(result.payload) != len(dskl):
3341 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3342 " result.payload=%s", node, len(dskl), result.payload)
3343 self.LogWarning("Invalid result from node %s, ignoring node results",
3346 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3348 self.LogWarning("Disk %d of instance %s did not return size"
3349 " information, ignoring", idx, instance.name)
3351 if not isinstance(size, (int, long)):
3352 self.LogWarning("Disk %d of instance %s did not return valid"
3353 " size information, ignoring", idx, instance.name)
3356 if size != disk.size:
3357 self.LogInfo("Disk %d of instance %s has mismatched size,"
3358 " correcting: recorded %d, actual %d", idx,
3359 instance.name, disk.size, size)
3361 self.cfg.Update(instance, feedback_fn)
3362 changed.append((instance.name, idx, size))
3363 if self._EnsureChildSizes(disk):
3364 self.cfg.Update(instance, feedback_fn)
3365 changed.append((instance.name, idx, disk.size))
3369 class LUClusterRename(LogicalUnit):
3370 """Rename the cluster.
3373 HPATH = "cluster-rename"
3374 HTYPE = constants.HTYPE_CLUSTER
3376 def BuildHooksEnv(self):
3381 "OP_TARGET": self.cfg.GetClusterName(),
3382 "NEW_NAME": self.op.name,
3385 def BuildHooksNodes(self):
3386 """Build hooks nodes.
3389 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3391 def CheckPrereq(self):
3392 """Verify that the passed name is a valid one.
3395 hostname = netutils.GetHostname(name=self.op.name,
3396 family=self.cfg.GetPrimaryIPFamily())
3398 new_name = hostname.name
3399 self.ip = new_ip = hostname.ip
3400 old_name = self.cfg.GetClusterName()
3401 old_ip = self.cfg.GetMasterIP()
3402 if new_name == old_name and new_ip == old_ip:
3403 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3404 " cluster has changed",
3406 if new_ip != old_ip:
3407 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3408 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3409 " reachable on the network" %
3410 new_ip, errors.ECODE_NOTUNIQUE)
3412 self.op.name = new_name
3414 def Exec(self, feedback_fn):
3415 """Rename the cluster.
3418 clustername = self.op.name
3421 # shutdown the master IP
3422 master_params = self.cfg.GetMasterNetworkParameters()
3423 ems = self.cfg.GetUseExternalMipScript()
3424 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3426 result.Raise("Could not disable the master role")
3429 cluster = self.cfg.GetClusterInfo()
3430 cluster.cluster_name = clustername
3431 cluster.master_ip = new_ip
3432 self.cfg.Update(cluster, feedback_fn)
3434 # update the known hosts file
3435 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3436 node_list = self.cfg.GetOnlineNodeList()
3438 node_list.remove(master_params.name)
3441 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3443 master_params.ip = new_ip
3444 result = self.rpc.call_node_activate_master_ip(master_params.name,
3446 msg = result.fail_msg
3448 self.LogWarning("Could not re-enable the master role on"
3449 " the master, please restart manually: %s", msg)
3454 def _ValidateNetmask(cfg, netmask):
3455 """Checks if a netmask is valid.
3457 @type cfg: L{config.ConfigWriter}
3458 @param cfg: The cluster configuration
3460 @param netmask: the netmask to be verified
3461 @raise errors.OpPrereqError: if the validation fails
3464 ip_family = cfg.GetPrimaryIPFamily()
3466 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3467 except errors.ProgrammerError:
3468 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3470 if not ipcls.ValidateNetmask(netmask):
3471 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3475 class LUClusterSetParams(LogicalUnit):
3476 """Change the parameters of the cluster.
3479 HPATH = "cluster-modify"
3480 HTYPE = constants.HTYPE_CLUSTER
3483 def CheckArguments(self):
3487 if self.op.uid_pool:
3488 uidpool.CheckUidPool(self.op.uid_pool)
3490 if self.op.add_uids:
3491 uidpool.CheckUidPool(self.op.add_uids)
3493 if self.op.remove_uids:
3494 uidpool.CheckUidPool(self.op.remove_uids)
3496 if self.op.master_netmask is not None:
3497 _ValidateNetmask(self.cfg, self.op.master_netmask)
3499 if self.op.diskparams:
3500 for dt_params in self.op.diskparams.values():
3501 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3503 def ExpandNames(self):
3504 # FIXME: in the future maybe other cluster params won't require checking on
3505 # all nodes to be modified.
3506 self.needed_locks = {
3507 locking.LEVEL_NODE: locking.ALL_SET,
3509 self.share_locks[locking.LEVEL_NODE] = 1
3511 def BuildHooksEnv(self):
3516 "OP_TARGET": self.cfg.GetClusterName(),
3517 "NEW_VG_NAME": self.op.vg_name,
3520 def BuildHooksNodes(self):
3521 """Build hooks nodes.
3524 mn = self.cfg.GetMasterNode()
3527 def CheckPrereq(self):
3528 """Check prerequisites.
3530 This checks whether the given params don't conflict and
3531 if the given volume group is valid.
3534 if self.op.vg_name is not None and not self.op.vg_name:
3535 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3536 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3537 " instances exist", errors.ECODE_INVAL)
3539 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3540 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3541 raise errors.OpPrereqError("Cannot disable drbd helper while"
3542 " drbd-based instances exist",
3545 node_list = self.owned_locks(locking.LEVEL_NODE)
3547 # if vg_name not None, checks given volume group on all nodes
3549 vglist = self.rpc.call_vg_list(node_list)
3550 for node in node_list:
3551 msg = vglist[node].fail_msg
3553 # ignoring down node
3554 self.LogWarning("Error while gathering data on node %s"
3555 " (ignoring node): %s", node, msg)
3557 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3559 constants.MIN_VG_SIZE)
3561 raise errors.OpPrereqError("Error on node '%s': %s" %
3562 (node, vgstatus), errors.ECODE_ENVIRON)
3564 if self.op.drbd_helper:
3565 # checks given drbd helper on all nodes
3566 helpers = self.rpc.call_drbd_helper(node_list)
3567 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3569 self.LogInfo("Not checking drbd helper on offline node %s", node)
3571 msg = helpers[node].fail_msg
3573 raise errors.OpPrereqError("Error checking drbd helper on node"
3574 " '%s': %s" % (node, msg),
3575 errors.ECODE_ENVIRON)
3576 node_helper = helpers[node].payload
3577 if node_helper != self.op.drbd_helper:
3578 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3579 (node, node_helper), errors.ECODE_ENVIRON)
3581 self.cluster = cluster = self.cfg.GetClusterInfo()
3582 # validate params changes
3583 if self.op.beparams:
3584 objects.UpgradeBeParams(self.op.beparams)
3585 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3586 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3588 if self.op.ndparams:
3589 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3590 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3592 # TODO: we need a more general way to handle resetting
3593 # cluster-level parameters to default values
3594 if self.new_ndparams["oob_program"] == "":
3595 self.new_ndparams["oob_program"] = \
3596 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3598 if self.op.nicparams:
3599 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3600 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3601 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3604 # check all instances for consistency
3605 for instance in self.cfg.GetAllInstancesInfo().values():
3606 for nic_idx, nic in enumerate(instance.nics):
3607 params_copy = copy.deepcopy(nic.nicparams)
3608 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3610 # check parameter syntax
3612 objects.NIC.CheckParameterSyntax(params_filled)
3613 except errors.ConfigurationError, err:
3614 nic_errors.append("Instance %s, nic/%d: %s" %
3615 (instance.name, nic_idx, err))
3617 # if we're moving instances to routed, check that they have an ip
3618 target_mode = params_filled[constants.NIC_MODE]
3619 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3620 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3621 " address" % (instance.name, nic_idx))
3623 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3624 "\n".join(nic_errors))
3626 # hypervisor list/parameters
3627 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3628 if self.op.hvparams:
3629 for hv_name, hv_dict in self.op.hvparams.items():
3630 if hv_name not in self.new_hvparams:
3631 self.new_hvparams[hv_name] = hv_dict
3633 self.new_hvparams[hv_name].update(hv_dict)
3635 # disk template parameters
3636 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3637 if self.op.diskparams:
3638 for dt_name, dt_params in self.op.diskparams.items():
3639 if dt_name not in self.op.diskparams:
3640 self.new_diskparams[dt_name] = dt_params
3642 self.new_diskparams[dt_name].update(dt_params)
3644 # os hypervisor parameters
3645 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3647 for os_name, hvs in self.op.os_hvp.items():
3648 if os_name not in self.new_os_hvp:
3649 self.new_os_hvp[os_name] = hvs
3651 for hv_name, hv_dict in hvs.items():
3652 if hv_name not in self.new_os_hvp[os_name]:
3653 self.new_os_hvp[os_name][hv_name] = hv_dict
3655 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3658 self.new_osp = objects.FillDict(cluster.osparams, {})
3659 if self.op.osparams:
3660 for os_name, osp in self.op.osparams.items():
3661 if os_name not in self.new_osp:
3662 self.new_osp[os_name] = {}
3664 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3667 if not self.new_osp[os_name]:
3668 # we removed all parameters
3669 del self.new_osp[os_name]
3671 # check the parameter validity (remote check)
3672 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3673 os_name, self.new_osp[os_name])
3675 # changes to the hypervisor list
3676 if self.op.enabled_hypervisors is not None:
3677 self.hv_list = self.op.enabled_hypervisors
3678 for hv in self.hv_list:
3679 # if the hypervisor doesn't already exist in the cluster
3680 # hvparams, we initialize it to empty, and then (in both
3681 # cases) we make sure to fill the defaults, as we might not
3682 # have a complete defaults list if the hypervisor wasn't
3684 if hv not in new_hvp:
3686 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3687 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3689 self.hv_list = cluster.enabled_hypervisors
3691 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3692 # either the enabled list has changed, or the parameters have, validate
3693 for hv_name, hv_params in self.new_hvparams.items():
3694 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3695 (self.op.enabled_hypervisors and
3696 hv_name in self.op.enabled_hypervisors)):
3697 # either this is a new hypervisor, or its parameters have changed
3698 hv_class = hypervisor.GetHypervisor(hv_name)
3699 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3700 hv_class.CheckParameterSyntax(hv_params)
3701 _CheckHVParams(self, node_list, hv_name, hv_params)
3704 # no need to check any newly-enabled hypervisors, since the
3705 # defaults have already been checked in the above code-block
3706 for os_name, os_hvp in self.new_os_hvp.items():
3707 for hv_name, hv_params in os_hvp.items():
3708 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3709 # we need to fill in the new os_hvp on top of the actual hv_p
3710 cluster_defaults = self.new_hvparams.get(hv_name, {})
3711 new_osp = objects.FillDict(cluster_defaults, hv_params)
3712 hv_class = hypervisor.GetHypervisor(hv_name)
3713 hv_class.CheckParameterSyntax(new_osp)
3714 _CheckHVParams(self, node_list, hv_name, new_osp)
3716 if self.op.default_iallocator:
3717 alloc_script = utils.FindFile(self.op.default_iallocator,
3718 constants.IALLOCATOR_SEARCH_PATH,
3720 if alloc_script is None:
3721 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3722 " specified" % self.op.default_iallocator,
3725 def Exec(self, feedback_fn):
3726 """Change the parameters of the cluster.
3729 if self.op.vg_name is not None:
3730 new_volume = self.op.vg_name
3733 if new_volume != self.cfg.GetVGName():
3734 self.cfg.SetVGName(new_volume)
3736 feedback_fn("Cluster LVM configuration already in desired"
3737 " state, not changing")
3738 if self.op.drbd_helper is not None:
3739 new_helper = self.op.drbd_helper
3742 if new_helper != self.cfg.GetDRBDHelper():
3743 self.cfg.SetDRBDHelper(new_helper)
3745 feedback_fn("Cluster DRBD helper already in desired state,"
3747 if self.op.hvparams:
3748 self.cluster.hvparams = self.new_hvparams
3750 self.cluster.os_hvp = self.new_os_hvp
3751 if self.op.enabled_hypervisors is not None:
3752 self.cluster.hvparams = self.new_hvparams
3753 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3754 if self.op.beparams:
3755 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3756 if self.op.nicparams:
3757 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3758 if self.op.osparams:
3759 self.cluster.osparams = self.new_osp
3760 if self.op.ndparams:
3761 self.cluster.ndparams = self.new_ndparams
3762 if self.op.diskparams:
3763 self.cluster.diskparams = self.new_diskparams
3765 if self.op.candidate_pool_size is not None:
3766 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3767 # we need to update the pool size here, otherwise the save will fail
3768 _AdjustCandidatePool(self, [])
3770 if self.op.maintain_node_health is not None:
3771 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3772 feedback_fn("Note: CONFD was disabled at build time, node health"
3773 " maintenance is not useful (still enabling it)")
3774 self.cluster.maintain_node_health = self.op.maintain_node_health
3776 if self.op.prealloc_wipe_disks is not None:
3777 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3779 if self.op.add_uids is not None:
3780 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3782 if self.op.remove_uids is not None:
3783 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3785 if self.op.uid_pool is not None:
3786 self.cluster.uid_pool = self.op.uid_pool
3788 if self.op.default_iallocator is not None:
3789 self.cluster.default_iallocator = self.op.default_iallocator
3791 if self.op.reserved_lvs is not None:
3792 self.cluster.reserved_lvs = self.op.reserved_lvs
3794 if self.op.use_external_mip_script is not None:
3795 self.cluster.use_external_mip_script = self.op.use_external_mip_script
3797 def helper_os(aname, mods, desc):
3799 lst = getattr(self.cluster, aname)
3800 for key, val in mods:
3801 if key == constants.DDM_ADD:
3803 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3806 elif key == constants.DDM_REMOVE:
3810 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3812 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3814 if self.op.hidden_os:
3815 helper_os("hidden_os", self.op.hidden_os, "hidden")
3817 if self.op.blacklisted_os:
3818 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3820 if self.op.master_netdev:
3821 master_params = self.cfg.GetMasterNetworkParameters()
3822 ems = self.cfg.GetUseExternalMipScript()
3823 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3824 self.cluster.master_netdev)
3825 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3827 result.Raise("Could not disable the master ip")
3828 feedback_fn("Changing master_netdev from %s to %s" %
3829 (master_params.netdev, self.op.master_netdev))
3830 self.cluster.master_netdev = self.op.master_netdev
3832 if self.op.master_netmask:
3833 master_params = self.cfg.GetMasterNetworkParameters()
3834 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3835 result = self.rpc.call_node_change_master_netmask(master_params.name,
3836 master_params.netmask,
3837 self.op.master_netmask,
3839 master_params.netdev)
3841 msg = "Could not change the master IP netmask: %s" % result.fail_msg
3844 self.cluster.master_netmask = self.op.master_netmask
3846 self.cfg.Update(self.cluster, feedback_fn)
3848 if self.op.master_netdev:
3849 master_params = self.cfg.GetMasterNetworkParameters()
3850 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3851 self.op.master_netdev)
3852 ems = self.cfg.GetUseExternalMipScript()
3853 result = self.rpc.call_node_activate_master_ip(master_params.name,
3856 self.LogWarning("Could not re-enable the master ip on"
3857 " the master, please restart manually: %s",
3861 def _UploadHelper(lu, nodes, fname):
3862 """Helper for uploading a file and showing warnings.
3865 if os.path.exists(fname):
3866 result = lu.rpc.call_upload_file(nodes, fname)
3867 for to_node, to_result in result.items():
3868 msg = to_result.fail_msg
3870 msg = ("Copy of file %s to node %s failed: %s" %
3871 (fname, to_node, msg))
3872 lu.proc.LogWarning(msg)
3875 def _ComputeAncillaryFiles(cluster, redist):
3876 """Compute files external to Ganeti which need to be consistent.
3878 @type redist: boolean
3879 @param redist: Whether to include files which need to be redistributed
3882 # Compute files for all nodes
3884 constants.SSH_KNOWN_HOSTS_FILE,
3885 constants.CONFD_HMAC_KEY,
3886 constants.CLUSTER_DOMAIN_SECRET_FILE,
3887 constants.SPICE_CERT_FILE,
3888 constants.SPICE_CACERT_FILE,
3889 constants.RAPI_USERS_FILE,
3893 files_all.update(constants.ALL_CERT_FILES)
3894 files_all.update(ssconf.SimpleStore().GetFileList())
3896 # we need to ship at least the RAPI certificate
3897 files_all.add(constants.RAPI_CERT_FILE)
3899 if cluster.modify_etc_hosts:
3900 files_all.add(constants.ETC_HOSTS)
3902 # Files which are optional, these must:
3903 # - be present in one other category as well
3904 # - either exist or not exist on all nodes of that category (mc, vm all)
3906 constants.RAPI_USERS_FILE,
3909 # Files which should only be on master candidates
3913 files_mc.add(constants.CLUSTER_CONF_FILE)
3915 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3917 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
3919 # Files which should only be on VM-capable nodes
3920 files_vm = set(filename
3921 for hv_name in cluster.enabled_hypervisors
3922 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3924 files_opt |= set(filename
3925 for hv_name in cluster.enabled_hypervisors
3926 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3928 # Filenames in each category must be unique
3929 all_files_set = files_all | files_mc | files_vm
3930 assert (len(all_files_set) ==
3931 sum(map(len, [files_all, files_mc, files_vm]))), \
3932 "Found file listed in more than one file list"
3934 # Optional files must be present in one other category
3935 assert all_files_set.issuperset(files_opt), \
3936 "Optional file not in a different required list"
3938 return (files_all, files_opt, files_mc, files_vm)
3941 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3942 """Distribute additional files which are part of the cluster configuration.
3944 ConfigWriter takes care of distributing the config and ssconf files, but
3945 there are more files which should be distributed to all nodes. This function
3946 makes sure those are copied.
3948 @param lu: calling logical unit
3949 @param additional_nodes: list of nodes not in the config to distribute to
3950 @type additional_vm: boolean
3951 @param additional_vm: whether the additional nodes are vm-capable or not
3954 # Gather target nodes
3955 cluster = lu.cfg.GetClusterInfo()
3956 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3958 online_nodes = lu.cfg.GetOnlineNodeList()
3959 vm_nodes = lu.cfg.GetVmCapableNodeList()
3961 if additional_nodes is not None:
3962 online_nodes.extend(additional_nodes)
3964 vm_nodes.extend(additional_nodes)
3966 # Never distribute to master node
3967 for nodelist in [online_nodes, vm_nodes]:
3968 if master_info.name in nodelist:
3969 nodelist.remove(master_info.name)
3972 (files_all, _, files_mc, files_vm) = \
3973 _ComputeAncillaryFiles(cluster, True)
3975 # Never re-distribute configuration file from here
3976 assert not (constants.CLUSTER_CONF_FILE in files_all or
3977 constants.CLUSTER_CONF_FILE in files_vm)
3978 assert not files_mc, "Master candidates not handled in this function"
3981 (online_nodes, files_all),
3982 (vm_nodes, files_vm),
3986 for (node_list, files) in filemap:
3988 _UploadHelper(lu, node_list, fname)
3991 class LUClusterRedistConf(NoHooksLU):
3992 """Force the redistribution of cluster configuration.
3994 This is a very simple LU.
3999 def ExpandNames(self):
4000 self.needed_locks = {
4001 locking.LEVEL_NODE: locking.ALL_SET,
4003 self.share_locks[locking.LEVEL_NODE] = 1
4005 def Exec(self, feedback_fn):
4006 """Redistribute the configuration.
4009 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4010 _RedistributeAncillaryFiles(self)
4013 class LUClusterActivateMasterIp(NoHooksLU):
4014 """Activate the master IP on the master node.
4017 def Exec(self, feedback_fn):
4018 """Activate the master IP.
4021 master_params = self.cfg.GetMasterNetworkParameters()
4022 ems = self.cfg.GetUseExternalMipScript()
4023 result = self.rpc.call_node_activate_master_ip(master_params.name,
4025 result.Raise("Could not activate the master IP")
4028 class LUClusterDeactivateMasterIp(NoHooksLU):
4029 """Deactivate the master IP on the master node.
4032 def Exec(self, feedback_fn):
4033 """Deactivate the master IP.
4036 master_params = self.cfg.GetMasterNetworkParameters()
4037 ems = self.cfg.GetUseExternalMipScript()
4038 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4040 result.Raise("Could not deactivate the master IP")
4043 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4044 """Sleep and poll for an instance's disk to sync.
4047 if not instance.disks or disks is not None and not disks:
4050 disks = _ExpandCheckDisks(instance, disks)
4053 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4055 node = instance.primary_node
4058 lu.cfg.SetDiskID(dev, node)
4060 # TODO: Convert to utils.Retry
4063 degr_retries = 10 # in seconds, as we sleep 1 second each time
4067 cumul_degraded = False
4068 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4069 msg = rstats.fail_msg
4071 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4074 raise errors.RemoteError("Can't contact node %s for mirror data,"
4075 " aborting." % node)
4078 rstats = rstats.payload
4080 for i, mstat in enumerate(rstats):
4082 lu.LogWarning("Can't compute data for node %s/%s",
4083 node, disks[i].iv_name)
4086 cumul_degraded = (cumul_degraded or
4087 (mstat.is_degraded and mstat.sync_percent is None))
4088 if mstat.sync_percent is not None:
4090 if mstat.estimated_time is not None:
4091 rem_time = ("%s remaining (estimated)" %
4092 utils.FormatSeconds(mstat.estimated_time))
4093 max_time = mstat.estimated_time
4095 rem_time = "no time estimate"
4096 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4097 (disks[i].iv_name, mstat.sync_percent, rem_time))
4099 # if we're done but degraded, let's do a few small retries, to
4100 # make sure we see a stable and not transient situation; therefore
4101 # we force restart of the loop
4102 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4103 logging.info("Degraded disks found, %d retries left", degr_retries)
4111 time.sleep(min(60, max_time))
4114 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4115 return not cumul_degraded
4118 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4119 """Check that mirrors are not degraded.
4121 The ldisk parameter, if True, will change the test from the
4122 is_degraded attribute (which represents overall non-ok status for
4123 the device(s)) to the ldisk (representing the local storage status).
4126 lu.cfg.SetDiskID(dev, node)
4130 if on_primary or dev.AssembleOnSecondary():
4131 rstats = lu.rpc.call_blockdev_find(node, dev)
4132 msg = rstats.fail_msg
4134 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4136 elif not rstats.payload:
4137 lu.LogWarning("Can't find disk on node %s", node)
4141 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4143 result = result and not rstats.payload.is_degraded
4146 for child in dev.children:
4147 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4152 class LUOobCommand(NoHooksLU):
4153 """Logical unit for OOB handling.
4157 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4159 def ExpandNames(self):
4160 """Gather locks we need.
4163 if self.op.node_names:
4164 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4165 lock_names = self.op.node_names
4167 lock_names = locking.ALL_SET
4169 self.needed_locks = {
4170 locking.LEVEL_NODE: lock_names,
4173 def CheckPrereq(self):
4174 """Check prerequisites.
4177 - the node exists in the configuration
4180 Any errors are signaled by raising errors.OpPrereqError.
4184 self.master_node = self.cfg.GetMasterNode()
4186 assert self.op.power_delay >= 0.0
4188 if self.op.node_names:
4189 if (self.op.command in self._SKIP_MASTER and
4190 self.master_node in self.op.node_names):
4191 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4192 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4194 if master_oob_handler:
4195 additional_text = ("run '%s %s %s' if you want to operate on the"
4196 " master regardless") % (master_oob_handler,
4200 additional_text = "it does not support out-of-band operations"
4202 raise errors.OpPrereqError(("Operating on the master node %s is not"
4203 " allowed for %s; %s") %
4204 (self.master_node, self.op.command,
4205 additional_text), errors.ECODE_INVAL)
4207 self.op.node_names = self.cfg.GetNodeList()
4208 if self.op.command in self._SKIP_MASTER:
4209 self.op.node_names.remove(self.master_node)
4211 if self.op.command in self._SKIP_MASTER:
4212 assert self.master_node not in self.op.node_names
4214 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4216 raise errors.OpPrereqError("Node %s not found" % node_name,
4219 self.nodes.append(node)
4221 if (not self.op.ignore_status and
4222 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4223 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4224 " not marked offline") % node_name,
4227 def Exec(self, feedback_fn):
4228 """Execute OOB and return result if we expect any.
4231 master_node = self.master_node
4234 for idx, node in enumerate(utils.NiceSort(self.nodes,
4235 key=lambda node: node.name)):
4236 node_entry = [(constants.RS_NORMAL, node.name)]
4237 ret.append(node_entry)
4239 oob_program = _SupportsOob(self.cfg, node)
4242 node_entry.append((constants.RS_UNAVAIL, None))
4245 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4246 self.op.command, oob_program, node.name)
4247 result = self.rpc.call_run_oob(master_node, oob_program,
4248 self.op.command, node.name,
4252 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4253 node.name, result.fail_msg)
4254 node_entry.append((constants.RS_NODATA, None))
4257 self._CheckPayload(result)
4258 except errors.OpExecError, err:
4259 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4261 node_entry.append((constants.RS_NODATA, None))
4263 if self.op.command == constants.OOB_HEALTH:
4264 # For health we should log important events
4265 for item, status in result.payload:
4266 if status in [constants.OOB_STATUS_WARNING,
4267 constants.OOB_STATUS_CRITICAL]:
4268 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4269 item, node.name, status)
4271 if self.op.command == constants.OOB_POWER_ON:
4273 elif self.op.command == constants.OOB_POWER_OFF:
4274 node.powered = False
4275 elif self.op.command == constants.OOB_POWER_STATUS:
4276 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4277 if powered != node.powered:
4278 logging.warning(("Recorded power state (%s) of node '%s' does not"
4279 " match actual power state (%s)"), node.powered,
4282 # For configuration changing commands we should update the node
4283 if self.op.command in (constants.OOB_POWER_ON,
4284 constants.OOB_POWER_OFF):
4285 self.cfg.Update(node, feedback_fn)
4287 node_entry.append((constants.RS_NORMAL, result.payload))
4289 if (self.op.command == constants.OOB_POWER_ON and
4290 idx < len(self.nodes) - 1):
4291 time.sleep(self.op.power_delay)
4295 def _CheckPayload(self, result):
4296 """Checks if the payload is valid.
4298 @param result: RPC result
4299 @raises errors.OpExecError: If payload is not valid
4303 if self.op.command == constants.OOB_HEALTH:
4304 if not isinstance(result.payload, list):
4305 errs.append("command 'health' is expected to return a list but got %s" %
4306 type(result.payload))
4308 for item, status in result.payload:
4309 if status not in constants.OOB_STATUSES:
4310 errs.append("health item '%s' has invalid status '%s'" %
4313 if self.op.command == constants.OOB_POWER_STATUS:
4314 if not isinstance(result.payload, dict):
4315 errs.append("power-status is expected to return a dict but got %s" %
4316 type(result.payload))
4318 if self.op.command in [
4319 constants.OOB_POWER_ON,
4320 constants.OOB_POWER_OFF,
4321 constants.OOB_POWER_CYCLE,
4323 if result.payload is not None:
4324 errs.append("%s is expected to not return payload but got '%s'" %
4325 (self.op.command, result.payload))
4328 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4329 utils.CommaJoin(errs))
4332 class _OsQuery(_QueryBase):
4333 FIELDS = query.OS_FIELDS
4335 def ExpandNames(self, lu):
4336 # Lock all nodes in shared mode
4337 # Temporary removal of locks, should be reverted later
4338 # TODO: reintroduce locks when they are lighter-weight
4339 lu.needed_locks = {}
4340 #self.share_locks[locking.LEVEL_NODE] = 1
4341 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4343 # The following variables interact with _QueryBase._GetNames
4345 self.wanted = self.names
4347 self.wanted = locking.ALL_SET
4349 self.do_locking = self.use_locking
4351 def DeclareLocks(self, lu, level):
4355 def _DiagnoseByOS(rlist):
4356 """Remaps a per-node return list into an a per-os per-node dictionary
4358 @param rlist: a map with node names as keys and OS objects as values
4361 @return: a dictionary with osnames as keys and as value another
4362 map, with nodes as keys and tuples of (path, status, diagnose,
4363 variants, parameters, api_versions) as values, eg::
4365 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4366 (/srv/..., False, "invalid api")],
4367 "node2": [(/srv/..., True, "", [], [])]}
4372 # we build here the list of nodes that didn't fail the RPC (at RPC
4373 # level), so that nodes with a non-responding node daemon don't
4374 # make all OSes invalid
4375 good_nodes = [node_name for node_name in rlist
4376 if not rlist[node_name].fail_msg]
4377 for node_name, nr in rlist.items():
4378 if nr.fail_msg or not nr.payload:
4380 for (name, path, status, diagnose, variants,
4381 params, api_versions) in nr.payload:
4382 if name not in all_os:
4383 # build a list of nodes for this os containing empty lists
4384 # for each node in node_list
4386 for nname in good_nodes:
4387 all_os[name][nname] = []
4388 # convert params from [name, help] to (name, help)
4389 params = [tuple(v) for v in params]
4390 all_os[name][node_name].append((path, status, diagnose,
4391 variants, params, api_versions))
4394 def _GetQueryData(self, lu):
4395 """Computes the list of nodes and their attributes.
4398 # Locking is not used
4399 assert not (compat.any(lu.glm.is_owned(level)
4400 for level in locking.LEVELS
4401 if level != locking.LEVEL_CLUSTER) or
4402 self.do_locking or self.use_locking)
4404 valid_nodes = [node.name
4405 for node in lu.cfg.GetAllNodesInfo().values()
4406 if not node.offline and node.vm_capable]
4407 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4408 cluster = lu.cfg.GetClusterInfo()
4412 for (os_name, os_data) in pol.items():
4413 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4414 hidden=(os_name in cluster.hidden_os),
4415 blacklisted=(os_name in cluster.blacklisted_os))
4419 api_versions = set()
4421 for idx, osl in enumerate(os_data.values()):
4422 info.valid = bool(info.valid and osl and osl[0][1])
4426 (node_variants, node_params, node_api) = osl[0][3:6]
4429 variants.update(node_variants)
4430 parameters.update(node_params)
4431 api_versions.update(node_api)
4433 # Filter out inconsistent values
4434 variants.intersection_update(node_variants)
4435 parameters.intersection_update(node_params)
4436 api_versions.intersection_update(node_api)
4438 info.variants = list(variants)
4439 info.parameters = list(parameters)
4440 info.api_versions = list(api_versions)
4442 data[os_name] = info
4444 # Prepare data in requested order
4445 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4449 class LUOsDiagnose(NoHooksLU):
4450 """Logical unit for OS diagnose/query.
4456 def _BuildFilter(fields, names):
4457 """Builds a filter for querying OSes.
4460 name_filter = qlang.MakeSimpleFilter("name", names)
4462 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4463 # respective field is not requested
4464 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4465 for fname in ["hidden", "blacklisted"]
4466 if fname not in fields]
4467 if "valid" not in fields:
4468 status_filter.append([qlang.OP_TRUE, "valid"])
4471 status_filter.insert(0, qlang.OP_AND)
4473 status_filter = None
4475 if name_filter and status_filter:
4476 return [qlang.OP_AND, name_filter, status_filter]
4480 return status_filter
4482 def CheckArguments(self):
4483 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4484 self.op.output_fields, False)
4486 def ExpandNames(self):
4487 self.oq.ExpandNames(self)
4489 def Exec(self, feedback_fn):
4490 return self.oq.OldStyleQuery(self)
4493 class LUNodeRemove(LogicalUnit):
4494 """Logical unit for removing a node.
4497 HPATH = "node-remove"
4498 HTYPE = constants.HTYPE_NODE
4500 def BuildHooksEnv(self):
4503 This doesn't run on the target node in the pre phase as a failed
4504 node would then be impossible to remove.
4508 "OP_TARGET": self.op.node_name,
4509 "NODE_NAME": self.op.node_name,
4512 def BuildHooksNodes(self):
4513 """Build hooks nodes.
4516 all_nodes = self.cfg.GetNodeList()
4518 all_nodes.remove(self.op.node_name)
4520 logging.warning("Node '%s', which is about to be removed, was not found"
4521 " in the list of all nodes", self.op.node_name)
4522 return (all_nodes, all_nodes)
4524 def CheckPrereq(self):
4525 """Check prerequisites.
4528 - the node exists in the configuration
4529 - it does not have primary or secondary instances
4530 - it's not the master
4532 Any errors are signaled by raising errors.OpPrereqError.
4535 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4536 node = self.cfg.GetNodeInfo(self.op.node_name)
4537 assert node is not None
4539 masternode = self.cfg.GetMasterNode()
4540 if node.name == masternode:
4541 raise errors.OpPrereqError("Node is the master node, failover to another"
4542 " node is required", errors.ECODE_INVAL)
4544 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4545 if node.name in instance.all_nodes:
4546 raise errors.OpPrereqError("Instance %s is still running on the node,"
4547 " please remove first" % instance_name,
4549 self.op.node_name = node.name
4552 def Exec(self, feedback_fn):
4553 """Removes the node from the cluster.
4557 logging.info("Stopping the node daemon and removing configs from node %s",
4560 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4562 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4565 # Promote nodes to master candidate as needed
4566 _AdjustCandidatePool(self, exceptions=[node.name])
4567 self.context.RemoveNode(node.name)
4569 # Run post hooks on the node before it's removed
4570 _RunPostHook(self, node.name)
4572 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4573 msg = result.fail_msg
4575 self.LogWarning("Errors encountered on the remote node while leaving"
4576 " the cluster: %s", msg)
4578 # Remove node from our /etc/hosts
4579 if self.cfg.GetClusterInfo().modify_etc_hosts:
4580 master_node = self.cfg.GetMasterNode()
4581 result = self.rpc.call_etc_hosts_modify(master_node,
4582 constants.ETC_HOSTS_REMOVE,
4584 result.Raise("Can't update hosts file with new host data")
4585 _RedistributeAncillaryFiles(self)
4588 class _NodeQuery(_QueryBase):
4589 FIELDS = query.NODE_FIELDS
4591 def ExpandNames(self, lu):
4592 lu.needed_locks = {}
4593 lu.share_locks = _ShareAll()
4596 self.wanted = _GetWantedNodes(lu, self.names)
4598 self.wanted = locking.ALL_SET
4600 self.do_locking = (self.use_locking and
4601 query.NQ_LIVE in self.requested_data)
4604 # If any non-static field is requested we need to lock the nodes
4605 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4607 def DeclareLocks(self, lu, level):
4610 def _GetQueryData(self, lu):
4611 """Computes the list of nodes and their attributes.
4614 all_info = lu.cfg.GetAllNodesInfo()
4616 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4618 # Gather data as requested
4619 if query.NQ_LIVE in self.requested_data:
4620 # filter out non-vm_capable nodes
4621 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4623 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4624 [lu.cfg.GetHypervisorType()])
4625 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4626 for (name, nresult) in node_data.items()
4627 if not nresult.fail_msg and nresult.payload)
4631 if query.NQ_INST in self.requested_data:
4632 node_to_primary = dict([(name, set()) for name in nodenames])
4633 node_to_secondary = dict([(name, set()) for name in nodenames])
4635 inst_data = lu.cfg.GetAllInstancesInfo()
4637 for inst in inst_data.values():
4638 if inst.primary_node in node_to_primary:
4639 node_to_primary[inst.primary_node].add(inst.name)
4640 for secnode in inst.secondary_nodes:
4641 if secnode in node_to_secondary:
4642 node_to_secondary[secnode].add(inst.name)
4644 node_to_primary = None
4645 node_to_secondary = None
4647 if query.NQ_OOB in self.requested_data:
4648 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4649 for name, node in all_info.iteritems())
4653 if query.NQ_GROUP in self.requested_data:
4654 groups = lu.cfg.GetAllNodeGroupsInfo()
4658 return query.NodeQueryData([all_info[name] for name in nodenames],
4659 live_data, lu.cfg.GetMasterNode(),
4660 node_to_primary, node_to_secondary, groups,
4661 oob_support, lu.cfg.GetClusterInfo())
4664 class LUNodeQuery(NoHooksLU):
4665 """Logical unit for querying nodes.
4668 # pylint: disable=W0142
4671 def CheckArguments(self):
4672 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4673 self.op.output_fields, self.op.use_locking)
4675 def ExpandNames(self):
4676 self.nq.ExpandNames(self)
4678 def DeclareLocks(self, level):
4679 self.nq.DeclareLocks(self, level)
4681 def Exec(self, feedback_fn):
4682 return self.nq.OldStyleQuery(self)
4685 class LUNodeQueryvols(NoHooksLU):
4686 """Logical unit for getting volumes on node(s).
4690 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4691 _FIELDS_STATIC = utils.FieldSet("node")
4693 def CheckArguments(self):
4694 _CheckOutputFields(static=self._FIELDS_STATIC,
4695 dynamic=self._FIELDS_DYNAMIC,
4696 selected=self.op.output_fields)
4698 def ExpandNames(self):
4699 self.share_locks = _ShareAll()
4700 self.needed_locks = {}
4702 if not self.op.nodes:
4703 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4705 self.needed_locks[locking.LEVEL_NODE] = \
4706 _GetWantedNodes(self, self.op.nodes)
4708 def Exec(self, feedback_fn):
4709 """Computes the list of nodes and their attributes.
4712 nodenames = self.owned_locks(locking.LEVEL_NODE)
4713 volumes = self.rpc.call_node_volumes(nodenames)
4715 ilist = self.cfg.GetAllInstancesInfo()
4716 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4719 for node in nodenames:
4720 nresult = volumes[node]
4723 msg = nresult.fail_msg
4725 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4728 node_vols = sorted(nresult.payload,
4729 key=operator.itemgetter("dev"))
4731 for vol in node_vols:
4733 for field in self.op.output_fields:
4736 elif field == "phys":
4740 elif field == "name":
4742 elif field == "size":
4743 val = int(float(vol["size"]))
4744 elif field == "instance":
4745 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4747 raise errors.ParameterError(field)
4748 node_output.append(str(val))
4750 output.append(node_output)
4755 class LUNodeQueryStorage(NoHooksLU):
4756 """Logical unit for getting information on storage units on node(s).
4759 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4762 def CheckArguments(self):
4763 _CheckOutputFields(static=self._FIELDS_STATIC,
4764 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4765 selected=self.op.output_fields)
4767 def ExpandNames(self):
4768 self.share_locks = _ShareAll()
4769 self.needed_locks = {}
4772 self.needed_locks[locking.LEVEL_NODE] = \
4773 _GetWantedNodes(self, self.op.nodes)
4775 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4777 def Exec(self, feedback_fn):
4778 """Computes the list of nodes and their attributes.
4781 self.nodes = self.owned_locks(locking.LEVEL_NODE)
4783 # Always get name to sort by
4784 if constants.SF_NAME in self.op.output_fields:
4785 fields = self.op.output_fields[:]
4787 fields = [constants.SF_NAME] + self.op.output_fields
4789 # Never ask for node or type as it's only known to the LU
4790 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4791 while extra in fields:
4792 fields.remove(extra)
4794 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4795 name_idx = field_idx[constants.SF_NAME]
4797 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4798 data = self.rpc.call_storage_list(self.nodes,
4799 self.op.storage_type, st_args,
4800 self.op.name, fields)
4804 for node in utils.NiceSort(self.nodes):
4805 nresult = data[node]
4809 msg = nresult.fail_msg
4811 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4814 rows = dict([(row[name_idx], row) for row in nresult.payload])
4816 for name in utils.NiceSort(rows.keys()):
4821 for field in self.op.output_fields:
4822 if field == constants.SF_NODE:
4824 elif field == constants.SF_TYPE:
4825 val = self.op.storage_type
4826 elif field in field_idx:
4827 val = row[field_idx[field]]
4829 raise errors.ParameterError(field)
4838 class _InstanceQuery(_QueryBase):
4839 FIELDS = query.INSTANCE_FIELDS
4841 def ExpandNames(self, lu):
4842 lu.needed_locks = {}
4843 lu.share_locks = _ShareAll()
4846 self.wanted = _GetWantedInstances(lu, self.names)
4848 self.wanted = locking.ALL_SET
4850 self.do_locking = (self.use_locking and
4851 query.IQ_LIVE in self.requested_data)
4853 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4854 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4855 lu.needed_locks[locking.LEVEL_NODE] = []
4856 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4858 self.do_grouplocks = (self.do_locking and
4859 query.IQ_NODES in self.requested_data)
4861 def DeclareLocks(self, lu, level):
4863 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4864 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4866 # Lock all groups used by instances optimistically; this requires going
4867 # via the node before it's locked, requiring verification later on
4868 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4870 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4871 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4872 elif level == locking.LEVEL_NODE:
4873 lu._LockInstancesNodes() # pylint: disable=W0212
4876 def _CheckGroupLocks(lu):
4877 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4878 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4880 # Check if node groups for locked instances are still correct
4881 for instance_name in owned_instances:
4882 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4884 def _GetQueryData(self, lu):
4885 """Computes the list of instances and their attributes.
4888 if self.do_grouplocks:
4889 self._CheckGroupLocks(lu)
4891 cluster = lu.cfg.GetClusterInfo()
4892 all_info = lu.cfg.GetAllInstancesInfo()
4894 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4896 instance_list = [all_info[name] for name in instance_names]
4897 nodes = frozenset(itertools.chain(*(inst.all_nodes
4898 for inst in instance_list)))
4899 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4902 wrongnode_inst = set()
4904 # Gather data as requested
4905 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4907 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4909 result = node_data[name]
4911 # offline nodes will be in both lists
4912 assert result.fail_msg
4913 offline_nodes.append(name)
4915 bad_nodes.append(name)
4916 elif result.payload:
4917 for inst in result.payload:
4918 if inst in all_info:
4919 if all_info[inst].primary_node == name:
4920 live_data.update(result.payload)
4922 wrongnode_inst.add(inst)
4924 # orphan instance; we don't list it here as we don't
4925 # handle this case yet in the output of instance listing
4926 logging.warning("Orphan instance '%s' found on node %s",
4928 # else no instance is alive
4932 if query.IQ_DISKUSAGE in self.requested_data:
4933 disk_usage = dict((inst.name,
4934 _ComputeDiskSize(inst.disk_template,
4935 [{constants.IDISK_SIZE: disk.size}
4936 for disk in inst.disks]))
4937 for inst in instance_list)
4941 if query.IQ_CONSOLE in self.requested_data:
4943 for inst in instance_list:
4944 if inst.name in live_data:
4945 # Instance is running
4946 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4948 consinfo[inst.name] = None
4949 assert set(consinfo.keys()) == set(instance_names)
4953 if query.IQ_NODES in self.requested_data:
4954 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4956 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4957 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4958 for uuid in set(map(operator.attrgetter("group"),
4964 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4965 disk_usage, offline_nodes, bad_nodes,
4966 live_data, wrongnode_inst, consinfo,
4970 class LUQuery(NoHooksLU):
4971 """Query for resources/items of a certain kind.
4974 # pylint: disable=W0142
4977 def CheckArguments(self):
4978 qcls = _GetQueryImplementation(self.op.what)
4980 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4982 def ExpandNames(self):
4983 self.impl.ExpandNames(self)
4985 def DeclareLocks(self, level):
4986 self.impl.DeclareLocks(self, level)
4988 def Exec(self, feedback_fn):
4989 return self.impl.NewStyleQuery(self)
4992 class LUQueryFields(NoHooksLU):
4993 """Query for resources/items of a certain kind.
4996 # pylint: disable=W0142
4999 def CheckArguments(self):
5000 self.qcls = _GetQueryImplementation(self.op.what)
5002 def ExpandNames(self):
5003 self.needed_locks = {}
5005 def Exec(self, feedback_fn):
5006 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5009 class LUNodeModifyStorage(NoHooksLU):
5010 """Logical unit for modifying a storage volume on a node.
5015 def CheckArguments(self):
5016 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5018 storage_type = self.op.storage_type
5021 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5023 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5024 " modified" % storage_type,
5027 diff = set(self.op.changes.keys()) - modifiable
5029 raise errors.OpPrereqError("The following fields can not be modified for"
5030 " storage units of type '%s': %r" %
5031 (storage_type, list(diff)),
5034 def ExpandNames(self):
5035 self.needed_locks = {
5036 locking.LEVEL_NODE: self.op.node_name,
5039 def Exec(self, feedback_fn):
5040 """Computes the list of nodes and their attributes.
5043 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5044 result = self.rpc.call_storage_modify(self.op.node_name,
5045 self.op.storage_type, st_args,
5046 self.op.name, self.op.changes)
5047 result.Raise("Failed to modify storage unit '%s' on %s" %
5048 (self.op.name, self.op.node_name))
5051 class LUNodeAdd(LogicalUnit):
5052 """Logical unit for adding node to the cluster.
5056 HTYPE = constants.HTYPE_NODE
5057 _NFLAGS = ["master_capable", "vm_capable"]
5059 def CheckArguments(self):
5060 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5061 # validate/normalize the node name
5062 self.hostname = netutils.GetHostname(name=self.op.node_name,
5063 family=self.primary_ip_family)
5064 self.op.node_name = self.hostname.name
5066 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5067 raise errors.OpPrereqError("Cannot readd the master node",
5070 if self.op.readd and self.op.group:
5071 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5072 " being readded", errors.ECODE_INVAL)
5074 def BuildHooksEnv(self):
5077 This will run on all nodes before, and on all nodes + the new node after.
5081 "OP_TARGET": self.op.node_name,
5082 "NODE_NAME": self.op.node_name,
5083 "NODE_PIP": self.op.primary_ip,
5084 "NODE_SIP": self.op.secondary_ip,
5085 "MASTER_CAPABLE": str(self.op.master_capable),
5086 "VM_CAPABLE": str(self.op.vm_capable),
5089 def BuildHooksNodes(self):
5090 """Build hooks nodes.
5093 # Exclude added node
5094 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5095 post_nodes = pre_nodes + [self.op.node_name, ]
5097 return (pre_nodes, post_nodes)
5099 def CheckPrereq(self):
5100 """Check prerequisites.
5103 - the new node is not already in the config
5105 - its parameters (single/dual homed) matches the cluster
5107 Any errors are signaled by raising errors.OpPrereqError.
5111 hostname = self.hostname
5112 node = hostname.name
5113 primary_ip = self.op.primary_ip = hostname.ip
5114 if self.op.secondary_ip is None:
5115 if self.primary_ip_family == netutils.IP6Address.family:
5116 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5117 " IPv4 address must be given as secondary",
5119 self.op.secondary_ip = primary_ip
5121 secondary_ip = self.op.secondary_ip
5122 if not netutils.IP4Address.IsValid(secondary_ip):
5123 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5124 " address" % secondary_ip, errors.ECODE_INVAL)
5126 node_list = cfg.GetNodeList()
5127 if not self.op.readd and node in node_list:
5128 raise errors.OpPrereqError("Node %s is already in the configuration" %
5129 node, errors.ECODE_EXISTS)
5130 elif self.op.readd and node not in node_list:
5131 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5134 self.changed_primary_ip = False
5136 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5137 if self.op.readd and node == existing_node_name:
5138 if existing_node.secondary_ip != secondary_ip:
5139 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5140 " address configuration as before",
5142 if existing_node.primary_ip != primary_ip:
5143 self.changed_primary_ip = True
5147 if (existing_node.primary_ip == primary_ip or
5148 existing_node.secondary_ip == primary_ip or
5149 existing_node.primary_ip == secondary_ip or
5150 existing_node.secondary_ip == secondary_ip):
5151 raise errors.OpPrereqError("New node ip address(es) conflict with"
5152 " existing node %s" % existing_node.name,
5153 errors.ECODE_NOTUNIQUE)
5155 # After this 'if' block, None is no longer a valid value for the
5156 # _capable op attributes
5158 old_node = self.cfg.GetNodeInfo(node)
5159 assert old_node is not None, "Can't retrieve locked node %s" % node
5160 for attr in self._NFLAGS:
5161 if getattr(self.op, attr) is None:
5162 setattr(self.op, attr, getattr(old_node, attr))
5164 for attr in self._NFLAGS:
5165 if getattr(self.op, attr) is None:
5166 setattr(self.op, attr, True)
5168 if self.op.readd and not self.op.vm_capable:
5169 pri, sec = cfg.GetNodeInstances(node)
5171 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5172 " flag set to false, but it already holds"
5173 " instances" % node,
5176 # check that the type of the node (single versus dual homed) is the
5177 # same as for the master
5178 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5179 master_singlehomed = myself.secondary_ip == myself.primary_ip
5180 newbie_singlehomed = secondary_ip == primary_ip
5181 if master_singlehomed != newbie_singlehomed:
5182 if master_singlehomed:
5183 raise errors.OpPrereqError("The master has no secondary ip but the"
5184 " new node has one",
5187 raise errors.OpPrereqError("The master has a secondary ip but the"
5188 " new node doesn't have one",
5191 # checks reachability
5192 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5193 raise errors.OpPrereqError("Node not reachable by ping",
5194 errors.ECODE_ENVIRON)
5196 if not newbie_singlehomed:
5197 # check reachability from my secondary ip to newbie's secondary ip
5198 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5199 source=myself.secondary_ip):
5200 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5201 " based ping to node daemon port",
5202 errors.ECODE_ENVIRON)
5209 if self.op.master_capable:
5210 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5212 self.master_candidate = False
5215 self.new_node = old_node
5217 node_group = cfg.LookupNodeGroup(self.op.group)
5218 self.new_node = objects.Node(name=node,
5219 primary_ip=primary_ip,
5220 secondary_ip=secondary_ip,
5221 master_candidate=self.master_candidate,
5222 offline=False, drained=False,
5225 if self.op.ndparams:
5226 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5228 def Exec(self, feedback_fn):
5229 """Adds the new node to the cluster.
5232 new_node = self.new_node
5233 node = new_node.name
5235 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5238 # We adding a new node so we assume it's powered
5239 new_node.powered = True
5241 # for re-adds, reset the offline/drained/master-candidate flags;
5242 # we need to reset here, otherwise offline would prevent RPC calls
5243 # later in the procedure; this also means that if the re-add
5244 # fails, we are left with a non-offlined, broken node
5246 new_node.drained = new_node.offline = False # pylint: disable=W0201
5247 self.LogInfo("Readding a node, the offline/drained flags were reset")
5248 # if we demote the node, we do cleanup later in the procedure
5249 new_node.master_candidate = self.master_candidate
5250 if self.changed_primary_ip:
5251 new_node.primary_ip = self.op.primary_ip
5253 # copy the master/vm_capable flags
5254 for attr in self._NFLAGS:
5255 setattr(new_node, attr, getattr(self.op, attr))
5257 # notify the user about any possible mc promotion
5258 if new_node.master_candidate:
5259 self.LogInfo("Node will be a master candidate")
5261 if self.op.ndparams:
5262 new_node.ndparams = self.op.ndparams
5264 new_node.ndparams = {}
5266 # check connectivity
5267 result = self.rpc.call_version([node])[node]
5268 result.Raise("Can't get version information from node %s" % node)
5269 if constants.PROTOCOL_VERSION == result.payload:
5270 logging.info("Communication to node %s fine, sw version %s match",
5271 node, result.payload)
5273 raise errors.OpExecError("Version mismatch master version %s,"
5274 " node version %s" %
5275 (constants.PROTOCOL_VERSION, result.payload))
5277 # Add node to our /etc/hosts, and add key to known_hosts
5278 if self.cfg.GetClusterInfo().modify_etc_hosts:
5279 master_node = self.cfg.GetMasterNode()
5280 result = self.rpc.call_etc_hosts_modify(master_node,
5281 constants.ETC_HOSTS_ADD,
5284 result.Raise("Can't update hosts file with new host data")
5286 if new_node.secondary_ip != new_node.primary_ip:
5287 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5290 node_verify_list = [self.cfg.GetMasterNode()]
5291 node_verify_param = {
5292 constants.NV_NODELIST: ([node], {}),
5293 # TODO: do a node-net-test as well?
5296 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5297 self.cfg.GetClusterName())
5298 for verifier in node_verify_list:
5299 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5300 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5302 for failed in nl_payload:
5303 feedback_fn("ssh/hostname verification failed"
5304 " (checking from %s): %s" %
5305 (verifier, nl_payload[failed]))
5306 raise errors.OpExecError("ssh/hostname verification failed")
5309 _RedistributeAncillaryFiles(self)
5310 self.context.ReaddNode(new_node)
5311 # make sure we redistribute the config
5312 self.cfg.Update(new_node, feedback_fn)
5313 # and make sure the new node will not have old files around
5314 if not new_node.master_candidate:
5315 result = self.rpc.call_node_demote_from_mc(new_node.name)
5316 msg = result.fail_msg
5318 self.LogWarning("Node failed to demote itself from master"
5319 " candidate status: %s" % msg)
5321 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5322 additional_vm=self.op.vm_capable)
5323 self.context.AddNode(new_node, self.proc.GetECId())
5326 class LUNodeSetParams(LogicalUnit):
5327 """Modifies the parameters of a node.
5329 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5330 to the node role (as _ROLE_*)
5331 @cvar _R2F: a dictionary from node role to tuples of flags
5332 @cvar _FLAGS: a list of attribute names corresponding to the flags
5335 HPATH = "node-modify"
5336 HTYPE = constants.HTYPE_NODE
5338 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5340 (True, False, False): _ROLE_CANDIDATE,
5341 (False, True, False): _ROLE_DRAINED,
5342 (False, False, True): _ROLE_OFFLINE,
5343 (False, False, False): _ROLE_REGULAR,
5345 _R2F = dict((v, k) for k, v in _F2R.items())
5346 _FLAGS = ["master_candidate", "drained", "offline"]
5348 def CheckArguments(self):
5349 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5350 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5351 self.op.master_capable, self.op.vm_capable,
5352 self.op.secondary_ip, self.op.ndparams]
5353 if all_mods.count(None) == len(all_mods):
5354 raise errors.OpPrereqError("Please pass at least one modification",
5356 if all_mods.count(True) > 1:
5357 raise errors.OpPrereqError("Can't set the node into more than one"
5358 " state at the same time",
5361 # Boolean value that tells us whether we might be demoting from MC
5362 self.might_demote = (self.op.master_candidate == False or
5363 self.op.offline == True or
5364 self.op.drained == True or
5365 self.op.master_capable == False)
5367 if self.op.secondary_ip:
5368 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5369 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5370 " address" % self.op.secondary_ip,
5373 self.lock_all = self.op.auto_promote and self.might_demote
5374 self.lock_instances = self.op.secondary_ip is not None
5376 def _InstanceFilter(self, instance):
5377 """Filter for getting affected instances.
5380 return (instance.disk_template in constants.DTS_INT_MIRROR and
5381 self.op.node_name in instance.all_nodes)
5383 def ExpandNames(self):
5385 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5387 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5389 # Since modifying a node can have severe effects on currently running
5390 # operations the resource lock is at least acquired in shared mode
5391 self.needed_locks[locking.LEVEL_NODE_RES] = \
5392 self.needed_locks[locking.LEVEL_NODE]
5394 # Get node resource and instance locks in shared mode; they are not used
5395 # for anything but read-only access
5396 self.share_locks[locking.LEVEL_NODE_RES] = 1
5397 self.share_locks[locking.LEVEL_INSTANCE] = 1
5399 if self.lock_instances:
5400 self.needed_locks[locking.LEVEL_INSTANCE] = \
5401 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5403 def BuildHooksEnv(self):
5406 This runs on the master node.
5410 "OP_TARGET": self.op.node_name,
5411 "MASTER_CANDIDATE": str(self.op.master_candidate),
5412 "OFFLINE": str(self.op.offline),
5413 "DRAINED": str(self.op.drained),
5414 "MASTER_CAPABLE": str(self.op.master_capable),
5415 "VM_CAPABLE": str(self.op.vm_capable),
5418 def BuildHooksNodes(self):
5419 """Build hooks nodes.
5422 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5425 def CheckPrereq(self):
5426 """Check prerequisites.
5428 This only checks the instance list against the existing names.
5431 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5433 if self.lock_instances:
5434 affected_instances = \
5435 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5437 # Verify instance locks
5438 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5439 wanted_instances = frozenset(affected_instances.keys())
5440 if wanted_instances - owned_instances:
5441 raise errors.OpPrereqError("Instances affected by changing node %s's"
5442 " secondary IP address have changed since"
5443 " locks were acquired, wanted '%s', have"
5444 " '%s'; retry the operation" %
5446 utils.CommaJoin(wanted_instances),
5447 utils.CommaJoin(owned_instances)),
5450 affected_instances = None
5452 if (self.op.master_candidate is not None or
5453 self.op.drained is not None or
5454 self.op.offline is not None):
5455 # we can't change the master's node flags
5456 if self.op.node_name == self.cfg.GetMasterNode():
5457 raise errors.OpPrereqError("The master role can be changed"
5458 " only via master-failover",
5461 if self.op.master_candidate and not node.master_capable:
5462 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5463 " it a master candidate" % node.name,
5466 if self.op.vm_capable == False:
5467 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5469 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5470 " the vm_capable flag" % node.name,
5473 if node.master_candidate and self.might_demote and not self.lock_all:
5474 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5475 # check if after removing the current node, we're missing master
5477 (mc_remaining, mc_should, _) = \
5478 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5479 if mc_remaining < mc_should:
5480 raise errors.OpPrereqError("Not enough master candidates, please"
5481 " pass auto promote option to allow"
5482 " promotion", errors.ECODE_STATE)
5484 self.old_flags = old_flags = (node.master_candidate,
5485 node.drained, node.offline)
5486 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5487 self.old_role = old_role = self._F2R[old_flags]
5489 # Check for ineffective changes
5490 for attr in self._FLAGS:
5491 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5492 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5493 setattr(self.op, attr, None)
5495 # Past this point, any flag change to False means a transition
5496 # away from the respective state, as only real changes are kept
5498 # TODO: We might query the real power state if it supports OOB
5499 if _SupportsOob(self.cfg, node):
5500 if self.op.offline is False and not (node.powered or
5501 self.op.powered == True):
5502 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5503 " offline status can be reset") %
5505 elif self.op.powered is not None:
5506 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5507 " as it does not support out-of-band"
5508 " handling") % self.op.node_name)
5510 # If we're being deofflined/drained, we'll MC ourself if needed
5511 if (self.op.drained == False or self.op.offline == False or
5512 (self.op.master_capable and not node.master_capable)):
5513 if _DecideSelfPromotion(self):
5514 self.op.master_candidate = True
5515 self.LogInfo("Auto-promoting node to master candidate")
5517 # If we're no longer master capable, we'll demote ourselves from MC
5518 if self.op.master_capable == False and node.master_candidate:
5519 self.LogInfo("Demoting from master candidate")
5520 self.op.master_candidate = False
5523 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5524 if self.op.master_candidate:
5525 new_role = self._ROLE_CANDIDATE
5526 elif self.op.drained:
5527 new_role = self._ROLE_DRAINED
5528 elif self.op.offline:
5529 new_role = self._ROLE_OFFLINE
5530 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5531 # False is still in new flags, which means we're un-setting (the
5533 new_role = self._ROLE_REGULAR
5534 else: # no new flags, nothing, keep old role
5537 self.new_role = new_role
5539 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5540 # Trying to transition out of offline status
5541 # TODO: Use standard RPC runner, but make sure it works when the node is
5542 # still marked offline
5543 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5545 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5546 " to report its version: %s" %
5547 (node.name, result.fail_msg),
5550 self.LogWarning("Transitioning node from offline to online state"
5551 " without using re-add. Please make sure the node"
5554 if self.op.secondary_ip:
5555 # Ok even without locking, because this can't be changed by any LU
5556 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5557 master_singlehomed = master.secondary_ip == master.primary_ip
5558 if master_singlehomed and self.op.secondary_ip:
5559 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5560 " homed cluster", errors.ECODE_INVAL)
5562 assert not (frozenset(affected_instances) -
5563 self.owned_locks(locking.LEVEL_INSTANCE))
5566 if affected_instances:
5567 raise errors.OpPrereqError("Cannot change secondary IP address:"
5568 " offline node has instances (%s)"
5569 " configured to use it" %
5570 utils.CommaJoin(affected_instances.keys()))
5572 # On online nodes, check that no instances are running, and that
5573 # the node has the new ip and we can reach it.
5574 for instance in affected_instances.values():
5575 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5576 msg="cannot change secondary ip")
5578 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5579 if master.name != node.name:
5580 # check reachability from master secondary ip to new secondary ip
5581 if not netutils.TcpPing(self.op.secondary_ip,
5582 constants.DEFAULT_NODED_PORT,
5583 source=master.secondary_ip):
5584 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5585 " based ping to node daemon port",
5586 errors.ECODE_ENVIRON)
5588 if self.op.ndparams:
5589 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5590 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5591 self.new_ndparams = new_ndparams
5593 def Exec(self, feedback_fn):
5598 old_role = self.old_role
5599 new_role = self.new_role
5603 if self.op.ndparams:
5604 node.ndparams = self.new_ndparams
5606 if self.op.powered is not None:
5607 node.powered = self.op.powered
5609 for attr in ["master_capable", "vm_capable"]:
5610 val = getattr(self.op, attr)
5612 setattr(node, attr, val)
5613 result.append((attr, str(val)))
5615 if new_role != old_role:
5616 # Tell the node to demote itself, if no longer MC and not offline
5617 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5618 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5620 self.LogWarning("Node failed to demote itself: %s", msg)
5622 new_flags = self._R2F[new_role]
5623 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5625 result.append((desc, str(nf)))
5626 (node.master_candidate, node.drained, node.offline) = new_flags
5628 # we locked all nodes, we adjust the CP before updating this node
5630 _AdjustCandidatePool(self, [node.name])
5632 if self.op.secondary_ip:
5633 node.secondary_ip = self.op.secondary_ip
5634 result.append(("secondary_ip", self.op.secondary_ip))
5636 # this will trigger configuration file update, if needed
5637 self.cfg.Update(node, feedback_fn)
5639 # this will trigger job queue propagation or cleanup if the mc
5641 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5642 self.context.ReaddNode(node)
5647 class LUNodePowercycle(NoHooksLU):
5648 """Powercycles a node.
5653 def CheckArguments(self):
5654 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5655 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5656 raise errors.OpPrereqError("The node is the master and the force"
5657 " parameter was not set",
5660 def ExpandNames(self):
5661 """Locking for PowercycleNode.
5663 This is a last-resort option and shouldn't block on other
5664 jobs. Therefore, we grab no locks.
5667 self.needed_locks = {}
5669 def Exec(self, feedback_fn):
5673 result = self.rpc.call_node_powercycle(self.op.node_name,
5674 self.cfg.GetHypervisorType())
5675 result.Raise("Failed to schedule the reboot")
5676 return result.payload
5679 class LUClusterQuery(NoHooksLU):
5680 """Query cluster configuration.
5685 def ExpandNames(self):
5686 self.needed_locks = {}
5688 def Exec(self, feedback_fn):
5689 """Return cluster config.
5692 cluster = self.cfg.GetClusterInfo()
5695 # Filter just for enabled hypervisors
5696 for os_name, hv_dict in cluster.os_hvp.items():
5697 os_hvp[os_name] = {}
5698 for hv_name, hv_params in hv_dict.items():
5699 if hv_name in cluster.enabled_hypervisors:
5700 os_hvp[os_name][hv_name] = hv_params
5702 # Convert ip_family to ip_version
5703 primary_ip_version = constants.IP4_VERSION
5704 if cluster.primary_ip_family == netutils.IP6Address.family:
5705 primary_ip_version = constants.IP6_VERSION
5708 "software_version": constants.RELEASE_VERSION,
5709 "protocol_version": constants.PROTOCOL_VERSION,
5710 "config_version": constants.CONFIG_VERSION,
5711 "os_api_version": max(constants.OS_API_VERSIONS),
5712 "export_version": constants.EXPORT_VERSION,
5713 "architecture": (platform.architecture()[0], platform.machine()),
5714 "name": cluster.cluster_name,
5715 "master": cluster.master_node,
5716 "default_hypervisor": cluster.enabled_hypervisors[0],
5717 "enabled_hypervisors": cluster.enabled_hypervisors,
5718 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5719 for hypervisor_name in cluster.enabled_hypervisors]),
5721 "beparams": cluster.beparams,
5722 "osparams": cluster.osparams,
5723 "nicparams": cluster.nicparams,
5724 "ndparams": cluster.ndparams,
5725 "candidate_pool_size": cluster.candidate_pool_size,
5726 "master_netdev": cluster.master_netdev,
5727 "master_netmask": cluster.master_netmask,
5728 "use_external_mip_script": cluster.use_external_mip_script,
5729 "volume_group_name": cluster.volume_group_name,
5730 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5731 "file_storage_dir": cluster.file_storage_dir,
5732 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5733 "maintain_node_health": cluster.maintain_node_health,
5734 "ctime": cluster.ctime,
5735 "mtime": cluster.mtime,
5736 "uuid": cluster.uuid,
5737 "tags": list(cluster.GetTags()),
5738 "uid_pool": cluster.uid_pool,
5739 "default_iallocator": cluster.default_iallocator,
5740 "reserved_lvs": cluster.reserved_lvs,
5741 "primary_ip_version": primary_ip_version,
5742 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5743 "hidden_os": cluster.hidden_os,
5744 "blacklisted_os": cluster.blacklisted_os,
5750 class LUClusterConfigQuery(NoHooksLU):
5751 """Return configuration values.
5755 _FIELDS_DYNAMIC = utils.FieldSet()
5756 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5757 "watcher_pause", "volume_group_name")
5759 def CheckArguments(self):
5760 _CheckOutputFields(static=self._FIELDS_STATIC,
5761 dynamic=self._FIELDS_DYNAMIC,
5762 selected=self.op.output_fields)
5764 def ExpandNames(self):
5765 self.needed_locks = {}
5767 def Exec(self, feedback_fn):
5768 """Dump a representation of the cluster config to the standard output.
5772 for field in self.op.output_fields:
5773 if field == "cluster_name":
5774 entry = self.cfg.GetClusterName()
5775 elif field == "master_node":
5776 entry = self.cfg.GetMasterNode()
5777 elif field == "drain_flag":
5778 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5779 elif field == "watcher_pause":
5780 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5781 elif field == "volume_group_name":
5782 entry = self.cfg.GetVGName()
5784 raise errors.ParameterError(field)
5785 values.append(entry)
5789 class LUInstanceActivateDisks(NoHooksLU):
5790 """Bring up an instance's disks.
5795 def ExpandNames(self):
5796 self._ExpandAndLockInstance()
5797 self.needed_locks[locking.LEVEL_NODE] = []
5798 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5800 def DeclareLocks(self, level):
5801 if level == locking.LEVEL_NODE:
5802 self._LockInstancesNodes()
5804 def CheckPrereq(self):
5805 """Check prerequisites.
5807 This checks that the instance is in the cluster.
5810 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5811 assert self.instance is not None, \
5812 "Cannot retrieve locked instance %s" % self.op.instance_name
5813 _CheckNodeOnline(self, self.instance.primary_node)
5815 def Exec(self, feedback_fn):
5816 """Activate the disks.
5819 disks_ok, disks_info = \
5820 _AssembleInstanceDisks(self, self.instance,
5821 ignore_size=self.op.ignore_size)
5823 raise errors.OpExecError("Cannot activate block devices")
5828 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5830 """Prepare the block devices for an instance.
5832 This sets up the block devices on all nodes.
5834 @type lu: L{LogicalUnit}
5835 @param lu: the logical unit on whose behalf we execute
5836 @type instance: L{objects.Instance}
5837 @param instance: the instance for whose disks we assemble
5838 @type disks: list of L{objects.Disk} or None
5839 @param disks: which disks to assemble (or all, if None)
5840 @type ignore_secondaries: boolean
5841 @param ignore_secondaries: if true, errors on secondary nodes
5842 won't result in an error return from the function
5843 @type ignore_size: boolean
5844 @param ignore_size: if true, the current known size of the disk
5845 will not be used during the disk activation, useful for cases
5846 when the size is wrong
5847 @return: False if the operation failed, otherwise a list of
5848 (host, instance_visible_name, node_visible_name)
5849 with the mapping from node devices to instance devices
5854 iname = instance.name
5855 disks = _ExpandCheckDisks(instance, disks)
5857 # With the two passes mechanism we try to reduce the window of
5858 # opportunity for the race condition of switching DRBD to primary
5859 # before handshaking occured, but we do not eliminate it
5861 # The proper fix would be to wait (with some limits) until the
5862 # connection has been made and drbd transitions from WFConnection
5863 # into any other network-connected state (Connected, SyncTarget,
5866 # 1st pass, assemble on all nodes in secondary mode
5867 for idx, inst_disk in enumerate(disks):
5868 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5870 node_disk = node_disk.Copy()
5871 node_disk.UnsetSize()
5872 lu.cfg.SetDiskID(node_disk, node)
5873 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5874 msg = result.fail_msg
5876 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5877 " (is_primary=False, pass=1): %s",
5878 inst_disk.iv_name, node, msg)
5879 if not ignore_secondaries:
5882 # FIXME: race condition on drbd migration to primary
5884 # 2nd pass, do only the primary node
5885 for idx, inst_disk in enumerate(disks):
5888 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5889 if node != instance.primary_node:
5892 node_disk = node_disk.Copy()
5893 node_disk.UnsetSize()
5894 lu.cfg.SetDiskID(node_disk, node)
5895 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5896 msg = result.fail_msg
5898 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5899 " (is_primary=True, pass=2): %s",
5900 inst_disk.iv_name, node, msg)
5903 dev_path = result.payload
5905 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5907 # leave the disks configured for the primary node
5908 # this is a workaround that would be fixed better by
5909 # improving the logical/physical id handling
5911 lu.cfg.SetDiskID(disk, instance.primary_node)
5913 return disks_ok, device_info
5916 def _StartInstanceDisks(lu, instance, force):
5917 """Start the disks of an instance.
5920 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5921 ignore_secondaries=force)
5923 _ShutdownInstanceDisks(lu, instance)
5924 if force is not None and not force:
5925 lu.proc.LogWarning("", hint="If the message above refers to a"
5927 " you can retry the operation using '--force'.")
5928 raise errors.OpExecError("Disk consistency error")
5931 class LUInstanceDeactivateDisks(NoHooksLU):
5932 """Shutdown an instance's disks.
5937 def ExpandNames(self):
5938 self._ExpandAndLockInstance()
5939 self.needed_locks[locking.LEVEL_NODE] = []
5940 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5942 def DeclareLocks(self, level):
5943 if level == locking.LEVEL_NODE:
5944 self._LockInstancesNodes()
5946 def CheckPrereq(self):
5947 """Check prerequisites.
5949 This checks that the instance is in the cluster.
5952 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5953 assert self.instance is not None, \
5954 "Cannot retrieve locked instance %s" % self.op.instance_name
5956 def Exec(self, feedback_fn):
5957 """Deactivate the disks
5960 instance = self.instance
5962 _ShutdownInstanceDisks(self, instance)
5964 _SafeShutdownInstanceDisks(self, instance)
5967 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5968 """Shutdown block devices of an instance.
5970 This function checks if an instance is running, before calling
5971 _ShutdownInstanceDisks.
5974 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
5975 _ShutdownInstanceDisks(lu, instance, disks=disks)
5978 def _ExpandCheckDisks(instance, disks):
5979 """Return the instance disks selected by the disks list
5981 @type disks: list of L{objects.Disk} or None
5982 @param disks: selected disks
5983 @rtype: list of L{objects.Disk}
5984 @return: selected instance disks to act on
5988 return instance.disks
5990 if not set(disks).issubset(instance.disks):
5991 raise errors.ProgrammerError("Can only act on disks belonging to the"
5996 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5997 """Shutdown block devices of an instance.
5999 This does the shutdown on all nodes of the instance.
6001 If the ignore_primary is false, errors on the primary node are
6006 disks = _ExpandCheckDisks(instance, disks)
6009 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6010 lu.cfg.SetDiskID(top_disk, node)
6011 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6012 msg = result.fail_msg
6014 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6015 disk.iv_name, node, msg)
6016 if ((node == instance.primary_node and not ignore_primary) or
6017 (node != instance.primary_node and not result.offline)):
6022 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6023 """Checks if a node has enough free memory.
6025 This function check if a given node has the needed amount of free
6026 memory. In case the node has less memory or we cannot get the
6027 information from the node, this function raise an OpPrereqError
6030 @type lu: C{LogicalUnit}
6031 @param lu: a logical unit from which we get configuration data
6033 @param node: the node to check
6034 @type reason: C{str}
6035 @param reason: string to use in the error message
6036 @type requested: C{int}
6037 @param requested: the amount of memory in MiB to check for
6038 @type hypervisor_name: C{str}
6039 @param hypervisor_name: the hypervisor to ask for memory stats
6040 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6041 we cannot check the node
6044 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6045 nodeinfo[node].Raise("Can't get data from node %s" % node,
6046 prereq=True, ecode=errors.ECODE_ENVIRON)
6047 (_, _, (hv_info, )) = nodeinfo[node].payload
6049 free_mem = hv_info.get("memory_free", None)
6050 if not isinstance(free_mem, int):
6051 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6052 " was '%s'" % (node, free_mem),
6053 errors.ECODE_ENVIRON)
6054 if requested > free_mem:
6055 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6056 " needed %s MiB, available %s MiB" %
6057 (node, reason, requested, free_mem),
6061 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6062 """Checks if nodes have enough free disk space in the all VGs.
6064 This function check if all given nodes have the needed amount of
6065 free disk. In case any node has less disk or we cannot get the
6066 information from the node, this function raise an OpPrereqError
6069 @type lu: C{LogicalUnit}
6070 @param lu: a logical unit from which we get configuration data
6071 @type nodenames: C{list}
6072 @param nodenames: the list of node names to check
6073 @type req_sizes: C{dict}
6074 @param req_sizes: the hash of vg and corresponding amount of disk in
6076 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6077 or we cannot check the node
6080 for vg, req_size in req_sizes.items():
6081 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6084 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6085 """Checks if nodes have enough free disk space in the specified VG.
6087 This function check if all given nodes have the needed amount of
6088 free disk. In case any node has less disk or we cannot get the
6089 information from the node, this function raise an OpPrereqError
6092 @type lu: C{LogicalUnit}
6093 @param lu: a logical unit from which we get configuration data
6094 @type nodenames: C{list}
6095 @param nodenames: the list of node names to check
6097 @param vg: the volume group to check
6098 @type requested: C{int}
6099 @param requested: the amount of disk in MiB to check for
6100 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6101 or we cannot check the node
6104 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6105 for node in nodenames:
6106 info = nodeinfo[node]
6107 info.Raise("Cannot get current information from node %s" % node,
6108 prereq=True, ecode=errors.ECODE_ENVIRON)
6109 (_, (vg_info, ), _) = info.payload
6110 vg_free = vg_info.get("vg_free", None)
6111 if not isinstance(vg_free, int):
6112 raise errors.OpPrereqError("Can't compute free disk space on node"
6113 " %s for vg %s, result was '%s'" %
6114 (node, vg, vg_free), errors.ECODE_ENVIRON)
6115 if requested > vg_free:
6116 raise errors.OpPrereqError("Not enough disk space on target node %s"
6117 " vg %s: required %d MiB, available %d MiB" %
6118 (node, vg, requested, vg_free),
6122 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6123 """Checks if nodes have enough physical CPUs
6125 This function checks if all given nodes have the needed number of
6126 physical CPUs. In case any node has less CPUs or we cannot get the
6127 information from the node, this function raises an OpPrereqError
6130 @type lu: C{LogicalUnit}
6131 @param lu: a logical unit from which we get configuration data
6132 @type nodenames: C{list}
6133 @param nodenames: the list of node names to check
6134 @type requested: C{int}
6135 @param requested: the minimum acceptable number of physical CPUs
6136 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6137 or we cannot check the node
6140 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6141 for node in nodenames:
6142 info = nodeinfo[node]
6143 info.Raise("Cannot get current information from node %s" % node,
6144 prereq=True, ecode=errors.ECODE_ENVIRON)
6145 (_, _, (hv_info, )) = info.payload
6146 num_cpus = hv_info.get("cpu_total", None)
6147 if not isinstance(num_cpus, int):
6148 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6149 " on node %s, result was '%s'" %
6150 (node, num_cpus), errors.ECODE_ENVIRON)
6151 if requested > num_cpus:
6152 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6153 "required" % (node, num_cpus, requested),
6157 class LUInstanceStartup(LogicalUnit):
6158 """Starts an instance.
6161 HPATH = "instance-start"
6162 HTYPE = constants.HTYPE_INSTANCE
6165 def CheckArguments(self):
6167 if self.op.beparams:
6168 # fill the beparams dict
6169 objects.UpgradeBeParams(self.op.beparams)
6170 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6172 def ExpandNames(self):
6173 self._ExpandAndLockInstance()
6175 def BuildHooksEnv(self):
6178 This runs on master, primary and secondary nodes of the instance.
6182 "FORCE": self.op.force,
6185 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6189 def BuildHooksNodes(self):
6190 """Build hooks nodes.
6193 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6196 def CheckPrereq(self):
6197 """Check prerequisites.
6199 This checks that the instance is in the cluster.
6202 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6203 assert self.instance is not None, \
6204 "Cannot retrieve locked instance %s" % self.op.instance_name
6207 if self.op.hvparams:
6208 # check hypervisor parameter syntax (locally)
6209 cluster = self.cfg.GetClusterInfo()
6210 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6211 filled_hvp = cluster.FillHV(instance)
6212 filled_hvp.update(self.op.hvparams)
6213 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6214 hv_type.CheckParameterSyntax(filled_hvp)
6215 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6217 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6219 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6221 if self.primary_offline and self.op.ignore_offline_nodes:
6222 self.proc.LogWarning("Ignoring offline primary node")
6224 if self.op.hvparams or self.op.beparams:
6225 self.proc.LogWarning("Overridden parameters are ignored")
6227 _CheckNodeOnline(self, instance.primary_node)
6229 bep = self.cfg.GetClusterInfo().FillBE(instance)
6231 # check bridges existence
6232 _CheckInstanceBridgesExist(self, instance)
6234 remote_info = self.rpc.call_instance_info(instance.primary_node,
6236 instance.hypervisor)
6237 remote_info.Raise("Error checking node %s" % instance.primary_node,
6238 prereq=True, ecode=errors.ECODE_ENVIRON)
6239 if not remote_info.payload: # not running already
6240 _CheckNodeFreeMemory(self, instance.primary_node,
6241 "starting instance %s" % instance.name,
6242 bep[constants.BE_MAXMEM], instance.hypervisor)
6244 def Exec(self, feedback_fn):
6245 """Start the instance.
6248 instance = self.instance
6249 force = self.op.force
6251 if not self.op.no_remember:
6252 self.cfg.MarkInstanceUp(instance.name)
6254 if self.primary_offline:
6255 assert self.op.ignore_offline_nodes
6256 self.proc.LogInfo("Primary node offline, marked instance as started")
6258 node_current = instance.primary_node
6260 _StartInstanceDisks(self, instance, force)
6263 self.rpc.call_instance_start(node_current,
6264 (instance, self.op.hvparams,
6266 self.op.startup_paused)
6267 msg = result.fail_msg
6269 _ShutdownInstanceDisks(self, instance)
6270 raise errors.OpExecError("Could not start instance: %s" % msg)
6273 class LUInstanceReboot(LogicalUnit):
6274 """Reboot an instance.
6277 HPATH = "instance-reboot"
6278 HTYPE = constants.HTYPE_INSTANCE
6281 def ExpandNames(self):
6282 self._ExpandAndLockInstance()
6284 def BuildHooksEnv(self):
6287 This runs on master, primary and secondary nodes of the instance.
6291 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6292 "REBOOT_TYPE": self.op.reboot_type,
6293 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6296 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6300 def BuildHooksNodes(self):
6301 """Build hooks nodes.
6304 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6307 def CheckPrereq(self):
6308 """Check prerequisites.
6310 This checks that the instance is in the cluster.
6313 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6314 assert self.instance is not None, \
6315 "Cannot retrieve locked instance %s" % self.op.instance_name
6316 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6317 _CheckNodeOnline(self, instance.primary_node)
6319 # check bridges existence
6320 _CheckInstanceBridgesExist(self, instance)
6322 def Exec(self, feedback_fn):
6323 """Reboot the instance.
6326 instance = self.instance
6327 ignore_secondaries = self.op.ignore_secondaries
6328 reboot_type = self.op.reboot_type
6330 remote_info = self.rpc.call_instance_info(instance.primary_node,
6332 instance.hypervisor)
6333 remote_info.Raise("Error checking node %s" % instance.primary_node)
6334 instance_running = bool(remote_info.payload)
6336 node_current = instance.primary_node
6338 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6339 constants.INSTANCE_REBOOT_HARD]:
6340 for disk in instance.disks:
6341 self.cfg.SetDiskID(disk, node_current)
6342 result = self.rpc.call_instance_reboot(node_current, instance,
6344 self.op.shutdown_timeout)
6345 result.Raise("Could not reboot instance")
6347 if instance_running:
6348 result = self.rpc.call_instance_shutdown(node_current, instance,
6349 self.op.shutdown_timeout)
6350 result.Raise("Could not shutdown instance for full reboot")
6351 _ShutdownInstanceDisks(self, instance)
6353 self.LogInfo("Instance %s was already stopped, starting now",
6355 _StartInstanceDisks(self, instance, ignore_secondaries)
6356 result = self.rpc.call_instance_start(node_current,
6357 (instance, None, None), False)
6358 msg = result.fail_msg
6360 _ShutdownInstanceDisks(self, instance)
6361 raise errors.OpExecError("Could not start instance for"
6362 " full reboot: %s" % msg)
6364 self.cfg.MarkInstanceUp(instance.name)
6367 class LUInstanceShutdown(LogicalUnit):
6368 """Shutdown an instance.
6371 HPATH = "instance-stop"
6372 HTYPE = constants.HTYPE_INSTANCE
6375 def ExpandNames(self):
6376 self._ExpandAndLockInstance()
6378 def BuildHooksEnv(self):
6381 This runs on master, primary and secondary nodes of the instance.
6384 env = _BuildInstanceHookEnvByObject(self, self.instance)
6385 env["TIMEOUT"] = self.op.timeout
6388 def BuildHooksNodes(self):
6389 """Build hooks nodes.
6392 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6395 def CheckPrereq(self):
6396 """Check prerequisites.
6398 This checks that the instance is in the cluster.
6401 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6402 assert self.instance is not None, \
6403 "Cannot retrieve locked instance %s" % self.op.instance_name
6405 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6407 self.primary_offline = \
6408 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6410 if self.primary_offline and self.op.ignore_offline_nodes:
6411 self.proc.LogWarning("Ignoring offline primary node")
6413 _CheckNodeOnline(self, self.instance.primary_node)
6415 def Exec(self, feedback_fn):
6416 """Shutdown the instance.
6419 instance = self.instance
6420 node_current = instance.primary_node
6421 timeout = self.op.timeout
6423 if not self.op.no_remember:
6424 self.cfg.MarkInstanceDown(instance.name)
6426 if self.primary_offline:
6427 assert self.op.ignore_offline_nodes
6428 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6430 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6431 msg = result.fail_msg
6433 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6435 _ShutdownInstanceDisks(self, instance)
6438 class LUInstanceReinstall(LogicalUnit):
6439 """Reinstall an instance.
6442 HPATH = "instance-reinstall"
6443 HTYPE = constants.HTYPE_INSTANCE
6446 def ExpandNames(self):
6447 self._ExpandAndLockInstance()
6449 def BuildHooksEnv(self):
6452 This runs on master, primary and secondary nodes of the instance.
6455 return _BuildInstanceHookEnvByObject(self, self.instance)
6457 def BuildHooksNodes(self):
6458 """Build hooks nodes.
6461 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6464 def CheckPrereq(self):
6465 """Check prerequisites.
6467 This checks that the instance is in the cluster and is not running.
6470 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6471 assert instance is not None, \
6472 "Cannot retrieve locked instance %s" % self.op.instance_name
6473 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6474 " offline, cannot reinstall")
6475 for node in instance.secondary_nodes:
6476 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6477 " cannot reinstall")
6479 if instance.disk_template == constants.DT_DISKLESS:
6480 raise errors.OpPrereqError("Instance '%s' has no disks" %
6481 self.op.instance_name,
6483 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6485 if self.op.os_type is not None:
6487 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6488 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6489 instance_os = self.op.os_type
6491 instance_os = instance.os
6493 nodelist = list(instance.all_nodes)
6495 if self.op.osparams:
6496 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6497 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6498 self.os_inst = i_osdict # the new dict (without defaults)
6502 self.instance = instance
6504 def Exec(self, feedback_fn):
6505 """Reinstall the instance.
6508 inst = self.instance
6510 if self.op.os_type is not None:
6511 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6512 inst.os = self.op.os_type
6513 # Write to configuration
6514 self.cfg.Update(inst, feedback_fn)
6516 _StartInstanceDisks(self, inst, None)
6518 feedback_fn("Running the instance OS create scripts...")
6519 # FIXME: pass debug option from opcode to backend
6520 result = self.rpc.call_instance_os_add(inst.primary_node,
6521 (inst, self.os_inst), True,
6522 self.op.debug_level)
6523 result.Raise("Could not install OS for instance %s on node %s" %
6524 (inst.name, inst.primary_node))
6526 _ShutdownInstanceDisks(self, inst)
6529 class LUInstanceRecreateDisks(LogicalUnit):
6530 """Recreate an instance's missing disks.
6533 HPATH = "instance-recreate-disks"
6534 HTYPE = constants.HTYPE_INSTANCE
6537 def CheckArguments(self):
6538 # normalise the disk list
6539 self.op.disks = sorted(frozenset(self.op.disks))
6541 def ExpandNames(self):
6542 self._ExpandAndLockInstance()
6543 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6545 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6546 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6548 self.needed_locks[locking.LEVEL_NODE] = []
6550 def DeclareLocks(self, level):
6551 if level == locking.LEVEL_NODE:
6552 # if we replace the nodes, we only need to lock the old primary,
6553 # otherwise we need to lock all nodes for disk re-creation
6554 primary_only = bool(self.op.nodes)
6555 self._LockInstancesNodes(primary_only=primary_only)
6556 elif level == locking.LEVEL_NODE_RES:
6558 self.needed_locks[locking.LEVEL_NODE_RES] = \
6559 self.needed_locks[locking.LEVEL_NODE][:]
6561 def BuildHooksEnv(self):
6564 This runs on master, primary and secondary nodes of the instance.
6567 return _BuildInstanceHookEnvByObject(self, self.instance)
6569 def BuildHooksNodes(self):
6570 """Build hooks nodes.
6573 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6576 def CheckPrereq(self):
6577 """Check prerequisites.
6579 This checks that the instance is in the cluster and is not running.
6582 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6583 assert instance is not None, \
6584 "Cannot retrieve locked instance %s" % self.op.instance_name
6586 if len(self.op.nodes) != len(instance.all_nodes):
6587 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6588 " %d replacement nodes were specified" %
6589 (instance.name, len(instance.all_nodes),
6590 len(self.op.nodes)),
6592 assert instance.disk_template != constants.DT_DRBD8 or \
6593 len(self.op.nodes) == 2
6594 assert instance.disk_template != constants.DT_PLAIN or \
6595 len(self.op.nodes) == 1
6596 primary_node = self.op.nodes[0]
6598 primary_node = instance.primary_node
6599 _CheckNodeOnline(self, primary_node)
6601 if instance.disk_template == constants.DT_DISKLESS:
6602 raise errors.OpPrereqError("Instance '%s' has no disks" %
6603 self.op.instance_name, errors.ECODE_INVAL)
6604 # if we replace nodes *and* the old primary is offline, we don't
6606 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6607 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6608 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6609 if not (self.op.nodes and old_pnode.offline):
6610 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6611 msg="cannot recreate disks")
6613 if not self.op.disks:
6614 self.op.disks = range(len(instance.disks))
6616 for idx in self.op.disks:
6617 if idx >= len(instance.disks):
6618 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6620 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6621 raise errors.OpPrereqError("Can't recreate disks partially and"
6622 " change the nodes at the same time",
6624 self.instance = instance
6626 def Exec(self, feedback_fn):
6627 """Recreate the disks.
6630 instance = self.instance
6632 assert (self.owned_locks(locking.LEVEL_NODE) ==
6633 self.owned_locks(locking.LEVEL_NODE_RES))
6636 mods = [] # keeps track of needed logical_id changes
6638 for idx, disk in enumerate(instance.disks):
6639 if idx not in self.op.disks: # disk idx has not been passed in
6642 # update secondaries for disks, if needed
6644 if disk.dev_type == constants.LD_DRBD8:
6645 # need to update the nodes and minors
6646 assert len(self.op.nodes) == 2
6647 assert len(disk.logical_id) == 6 # otherwise disk internals
6649 (_, _, old_port, _, _, old_secret) = disk.logical_id
6650 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6651 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6652 new_minors[0], new_minors[1], old_secret)
6653 assert len(disk.logical_id) == len(new_id)
6654 mods.append((idx, new_id))
6656 # now that we have passed all asserts above, we can apply the mods
6657 # in a single run (to avoid partial changes)
6658 for idx, new_id in mods:
6659 instance.disks[idx].logical_id = new_id
6661 # change primary node, if needed
6663 instance.primary_node = self.op.nodes[0]
6664 self.LogWarning("Changing the instance's nodes, you will have to"
6665 " remove any disks left on the older nodes manually")
6668 self.cfg.Update(instance, feedback_fn)
6670 _CreateDisks(self, instance, to_skip=to_skip)
6673 class LUInstanceRename(LogicalUnit):
6674 """Rename an instance.
6677 HPATH = "instance-rename"
6678 HTYPE = constants.HTYPE_INSTANCE
6680 def CheckArguments(self):
6684 if self.op.ip_check and not self.op.name_check:
6685 # TODO: make the ip check more flexible and not depend on the name check
6686 raise errors.OpPrereqError("IP address check requires a name check",
6689 def BuildHooksEnv(self):
6692 This runs on master, primary and secondary nodes of the instance.
6695 env = _BuildInstanceHookEnvByObject(self, self.instance)
6696 env["INSTANCE_NEW_NAME"] = self.op.new_name
6699 def BuildHooksNodes(self):
6700 """Build hooks nodes.
6703 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6706 def CheckPrereq(self):
6707 """Check prerequisites.
6709 This checks that the instance is in the cluster and is not running.
6712 self.op.instance_name = _ExpandInstanceName(self.cfg,
6713 self.op.instance_name)
6714 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6715 assert instance is not None
6716 _CheckNodeOnline(self, instance.primary_node)
6717 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6718 msg="cannot rename")
6719 self.instance = instance
6721 new_name = self.op.new_name
6722 if self.op.name_check:
6723 hostname = netutils.GetHostname(name=new_name)
6724 if hostname.name != new_name:
6725 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6727 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6728 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6729 " same as given hostname '%s'") %
6730 (hostname.name, self.op.new_name),
6732 new_name = self.op.new_name = hostname.name
6733 if (self.op.ip_check and
6734 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6735 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6736 (hostname.ip, new_name),
6737 errors.ECODE_NOTUNIQUE)
6739 instance_list = self.cfg.GetInstanceList()
6740 if new_name in instance_list and new_name != instance.name:
6741 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6742 new_name, errors.ECODE_EXISTS)
6744 def Exec(self, feedback_fn):
6745 """Rename the instance.
6748 inst = self.instance
6749 old_name = inst.name
6751 rename_file_storage = False
6752 if (inst.disk_template in constants.DTS_FILEBASED and
6753 self.op.new_name != inst.name):
6754 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6755 rename_file_storage = True
6757 self.cfg.RenameInstance(inst.name, self.op.new_name)
6758 # Change the instance lock. This is definitely safe while we hold the BGL.
6759 # Otherwise the new lock would have to be added in acquired mode.
6761 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6762 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6764 # re-read the instance from the configuration after rename
6765 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6767 if rename_file_storage:
6768 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6769 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6770 old_file_storage_dir,
6771 new_file_storage_dir)
6772 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6773 " (but the instance has been renamed in Ganeti)" %
6774 (inst.primary_node, old_file_storage_dir,
6775 new_file_storage_dir))
6777 _StartInstanceDisks(self, inst, None)
6779 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6780 old_name, self.op.debug_level)
6781 msg = result.fail_msg
6783 msg = ("Could not run OS rename script for instance %s on node %s"
6784 " (but the instance has been renamed in Ganeti): %s" %
6785 (inst.name, inst.primary_node, msg))
6786 self.proc.LogWarning(msg)
6788 _ShutdownInstanceDisks(self, inst)
6793 class LUInstanceRemove(LogicalUnit):
6794 """Remove an instance.
6797 HPATH = "instance-remove"
6798 HTYPE = constants.HTYPE_INSTANCE
6801 def ExpandNames(self):
6802 self._ExpandAndLockInstance()
6803 self.needed_locks[locking.LEVEL_NODE] = []
6804 self.needed_locks[locking.LEVEL_NODE_RES] = []
6805 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6807 def DeclareLocks(self, level):
6808 if level == locking.LEVEL_NODE:
6809 self._LockInstancesNodes()
6810 elif level == locking.LEVEL_NODE_RES:
6812 self.needed_locks[locking.LEVEL_NODE_RES] = \
6813 self.needed_locks[locking.LEVEL_NODE][:]
6815 def BuildHooksEnv(self):
6818 This runs on master, primary and secondary nodes of the instance.
6821 env = _BuildInstanceHookEnvByObject(self, self.instance)
6822 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6825 def BuildHooksNodes(self):
6826 """Build hooks nodes.
6829 nl = [self.cfg.GetMasterNode()]
6830 nl_post = list(self.instance.all_nodes) + nl
6831 return (nl, nl_post)
6833 def CheckPrereq(self):
6834 """Check prerequisites.
6836 This checks that the instance is in the cluster.
6839 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6840 assert self.instance is not None, \
6841 "Cannot retrieve locked instance %s" % self.op.instance_name
6843 def Exec(self, feedback_fn):
6844 """Remove the instance.
6847 instance = self.instance
6848 logging.info("Shutting down instance %s on node %s",
6849 instance.name, instance.primary_node)
6851 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6852 self.op.shutdown_timeout)
6853 msg = result.fail_msg
6855 if self.op.ignore_failures:
6856 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6858 raise errors.OpExecError("Could not shutdown instance %s on"
6860 (instance.name, instance.primary_node, msg))
6862 assert (self.owned_locks(locking.LEVEL_NODE) ==
6863 self.owned_locks(locking.LEVEL_NODE_RES))
6864 assert not (set(instance.all_nodes) -
6865 self.owned_locks(locking.LEVEL_NODE)), \
6866 "Not owning correct locks"
6868 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6871 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6872 """Utility function to remove an instance.
6875 logging.info("Removing block devices for instance %s", instance.name)
6877 if not _RemoveDisks(lu, instance):
6878 if not ignore_failures:
6879 raise errors.OpExecError("Can't remove instance's disks")
6880 feedback_fn("Warning: can't remove instance's disks")
6882 logging.info("Removing instance %s out of cluster config", instance.name)
6884 lu.cfg.RemoveInstance(instance.name)
6886 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6887 "Instance lock removal conflict"
6889 # Remove lock for the instance
6890 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6893 class LUInstanceQuery(NoHooksLU):
6894 """Logical unit for querying instances.
6897 # pylint: disable=W0142
6900 def CheckArguments(self):
6901 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6902 self.op.output_fields, self.op.use_locking)
6904 def ExpandNames(self):
6905 self.iq.ExpandNames(self)
6907 def DeclareLocks(self, level):
6908 self.iq.DeclareLocks(self, level)
6910 def Exec(self, feedback_fn):
6911 return self.iq.OldStyleQuery(self)
6914 class LUInstanceFailover(LogicalUnit):
6915 """Failover an instance.
6918 HPATH = "instance-failover"
6919 HTYPE = constants.HTYPE_INSTANCE
6922 def CheckArguments(self):
6923 """Check the arguments.
6926 self.iallocator = getattr(self.op, "iallocator", None)
6927 self.target_node = getattr(self.op, "target_node", None)
6929 def ExpandNames(self):
6930 self._ExpandAndLockInstance()
6932 if self.op.target_node is not None:
6933 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6935 self.needed_locks[locking.LEVEL_NODE] = []
6936 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6938 ignore_consistency = self.op.ignore_consistency
6939 shutdown_timeout = self.op.shutdown_timeout
6940 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6943 ignore_consistency=ignore_consistency,
6944 shutdown_timeout=shutdown_timeout)
6945 self.tasklets = [self._migrater]
6947 def DeclareLocks(self, level):
6948 if level == locking.LEVEL_NODE:
6949 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6950 if instance.disk_template in constants.DTS_EXT_MIRROR:
6951 if self.op.target_node is None:
6952 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6954 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6955 self.op.target_node]
6956 del self.recalculate_locks[locking.LEVEL_NODE]
6958 self._LockInstancesNodes()
6960 def BuildHooksEnv(self):
6963 This runs on master, primary and secondary nodes of the instance.
6966 instance = self._migrater.instance
6967 source_node = instance.primary_node
6968 target_node = self.op.target_node
6970 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6971 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6972 "OLD_PRIMARY": source_node,
6973 "NEW_PRIMARY": target_node,
6976 if instance.disk_template in constants.DTS_INT_MIRROR:
6977 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6978 env["NEW_SECONDARY"] = source_node
6980 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6982 env.update(_BuildInstanceHookEnvByObject(self, instance))
6986 def BuildHooksNodes(self):
6987 """Build hooks nodes.
6990 instance = self._migrater.instance
6991 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6992 return (nl, nl + [instance.primary_node])
6995 class LUInstanceMigrate(LogicalUnit):
6996 """Migrate an instance.
6998 This is migration without shutting down, compared to the failover,
6999 which is done with shutdown.
7002 HPATH = "instance-migrate"
7003 HTYPE = constants.HTYPE_INSTANCE
7006 def ExpandNames(self):
7007 self._ExpandAndLockInstance()
7009 if self.op.target_node is not None:
7010 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7012 self.needed_locks[locking.LEVEL_NODE] = []
7013 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7015 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7016 cleanup=self.op.cleanup,
7018 fallback=self.op.allow_failover)
7019 self.tasklets = [self._migrater]
7021 def DeclareLocks(self, level):
7022 if level == locking.LEVEL_NODE:
7023 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7024 if instance.disk_template in constants.DTS_EXT_MIRROR:
7025 if self.op.target_node is None:
7026 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7028 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7029 self.op.target_node]
7030 del self.recalculate_locks[locking.LEVEL_NODE]
7032 self._LockInstancesNodes()
7034 def BuildHooksEnv(self):
7037 This runs on master, primary and secondary nodes of the instance.
7040 instance = self._migrater.instance
7041 source_node = instance.primary_node
7042 target_node = self.op.target_node
7043 env = _BuildInstanceHookEnvByObject(self, instance)
7045 "MIGRATE_LIVE": self._migrater.live,
7046 "MIGRATE_CLEANUP": self.op.cleanup,
7047 "OLD_PRIMARY": source_node,
7048 "NEW_PRIMARY": target_node,
7051 if instance.disk_template in constants.DTS_INT_MIRROR:
7052 env["OLD_SECONDARY"] = target_node
7053 env["NEW_SECONDARY"] = source_node
7055 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7059 def BuildHooksNodes(self):
7060 """Build hooks nodes.
7063 instance = self._migrater.instance
7064 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7065 return (nl, nl + [instance.primary_node])
7068 class LUInstanceMove(LogicalUnit):
7069 """Move an instance by data-copying.
7072 HPATH = "instance-move"
7073 HTYPE = constants.HTYPE_INSTANCE
7076 def ExpandNames(self):
7077 self._ExpandAndLockInstance()
7078 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7079 self.op.target_node = target_node
7080 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7081 self.needed_locks[locking.LEVEL_NODE_RES] = []
7082 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7084 def DeclareLocks(self, level):
7085 if level == locking.LEVEL_NODE:
7086 self._LockInstancesNodes(primary_only=True)
7087 elif level == locking.LEVEL_NODE_RES:
7089 self.needed_locks[locking.LEVEL_NODE_RES] = \
7090 self.needed_locks[locking.LEVEL_NODE][:]
7092 def BuildHooksEnv(self):
7095 This runs on master, primary and secondary nodes of the instance.
7099 "TARGET_NODE": self.op.target_node,
7100 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7102 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7105 def BuildHooksNodes(self):
7106 """Build hooks nodes.
7110 self.cfg.GetMasterNode(),
7111 self.instance.primary_node,
7112 self.op.target_node,
7116 def CheckPrereq(self):
7117 """Check prerequisites.
7119 This checks that the instance is in the cluster.
7122 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7123 assert self.instance is not None, \
7124 "Cannot retrieve locked instance %s" % self.op.instance_name
7126 node = self.cfg.GetNodeInfo(self.op.target_node)
7127 assert node is not None, \
7128 "Cannot retrieve locked node %s" % self.op.target_node
7130 self.target_node = target_node = node.name
7132 if target_node == instance.primary_node:
7133 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7134 (instance.name, target_node),
7137 bep = self.cfg.GetClusterInfo().FillBE(instance)
7139 for idx, dsk in enumerate(instance.disks):
7140 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7141 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7142 " cannot copy" % idx, errors.ECODE_STATE)
7144 _CheckNodeOnline(self, target_node)
7145 _CheckNodeNotDrained(self, target_node)
7146 _CheckNodeVmCapable(self, target_node)
7148 if instance.admin_state == constants.ADMINST_UP:
7149 # check memory requirements on the secondary node
7150 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7151 instance.name, bep[constants.BE_MAXMEM],
7152 instance.hypervisor)
7154 self.LogInfo("Not checking memory on the secondary node as"
7155 " instance will not be started")
7157 # check bridge existance
7158 _CheckInstanceBridgesExist(self, instance, node=target_node)
7160 def Exec(self, feedback_fn):
7161 """Move an instance.
7163 The move is done by shutting it down on its present node, copying
7164 the data over (slow) and starting it on the new node.
7167 instance = self.instance
7169 source_node = instance.primary_node
7170 target_node = self.target_node
7172 self.LogInfo("Shutting down instance %s on source node %s",
7173 instance.name, source_node)
7175 assert (self.owned_locks(locking.LEVEL_NODE) ==
7176 self.owned_locks(locking.LEVEL_NODE_RES))
7178 result = self.rpc.call_instance_shutdown(source_node, instance,
7179 self.op.shutdown_timeout)
7180 msg = result.fail_msg
7182 if self.op.ignore_consistency:
7183 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7184 " Proceeding anyway. Please make sure node"
7185 " %s is down. Error details: %s",
7186 instance.name, source_node, source_node, msg)
7188 raise errors.OpExecError("Could not shutdown instance %s on"
7190 (instance.name, source_node, msg))
7192 # create the target disks
7194 _CreateDisks(self, instance, target_node=target_node)
7195 except errors.OpExecError:
7196 self.LogWarning("Device creation failed, reverting...")
7198 _RemoveDisks(self, instance, target_node=target_node)
7200 self.cfg.ReleaseDRBDMinors(instance.name)
7203 cluster_name = self.cfg.GetClusterInfo().cluster_name
7206 # activate, get path, copy the data over
7207 for idx, disk in enumerate(instance.disks):
7208 self.LogInfo("Copying data for disk %d", idx)
7209 result = self.rpc.call_blockdev_assemble(target_node, disk,
7210 instance.name, True, idx)
7212 self.LogWarning("Can't assemble newly created disk %d: %s",
7213 idx, result.fail_msg)
7214 errs.append(result.fail_msg)
7216 dev_path = result.payload
7217 result = self.rpc.call_blockdev_export(source_node, disk,
7218 target_node, dev_path,
7221 self.LogWarning("Can't copy data over for disk %d: %s",
7222 idx, result.fail_msg)
7223 errs.append(result.fail_msg)
7227 self.LogWarning("Some disks failed to copy, aborting")
7229 _RemoveDisks(self, instance, target_node=target_node)
7231 self.cfg.ReleaseDRBDMinors(instance.name)
7232 raise errors.OpExecError("Errors during disk copy: %s" %
7235 instance.primary_node = target_node
7236 self.cfg.Update(instance, feedback_fn)
7238 self.LogInfo("Removing the disks on the original node")
7239 _RemoveDisks(self, instance, target_node=source_node)
7241 # Only start the instance if it's marked as up
7242 if instance.admin_state == constants.ADMINST_UP:
7243 self.LogInfo("Starting instance %s on node %s",
7244 instance.name, target_node)
7246 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7247 ignore_secondaries=True)
7249 _ShutdownInstanceDisks(self, instance)
7250 raise errors.OpExecError("Can't activate the instance's disks")
7252 result = self.rpc.call_instance_start(target_node,
7253 (instance, None, None), False)
7254 msg = result.fail_msg
7256 _ShutdownInstanceDisks(self, instance)
7257 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7258 (instance.name, target_node, msg))
7261 class LUNodeMigrate(LogicalUnit):
7262 """Migrate all instances from a node.
7265 HPATH = "node-migrate"
7266 HTYPE = constants.HTYPE_NODE
7269 def CheckArguments(self):
7272 def ExpandNames(self):
7273 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7275 self.share_locks = _ShareAll()
7276 self.needed_locks = {
7277 locking.LEVEL_NODE: [self.op.node_name],
7280 def BuildHooksEnv(self):
7283 This runs on the master, the primary and all the secondaries.
7287 "NODE_NAME": self.op.node_name,
7290 def BuildHooksNodes(self):
7291 """Build hooks nodes.
7294 nl = [self.cfg.GetMasterNode()]
7297 def CheckPrereq(self):
7300 def Exec(self, feedback_fn):
7301 # Prepare jobs for migration instances
7303 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7306 iallocator=self.op.iallocator,
7307 target_node=self.op.target_node)]
7308 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7311 # TODO: Run iallocator in this opcode and pass correct placement options to
7312 # OpInstanceMigrate. Since other jobs can modify the cluster between
7313 # running the iallocator and the actual migration, a good consistency model
7314 # will have to be found.
7316 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7317 frozenset([self.op.node_name]))
7319 return ResultWithJobs(jobs)
7322 class TLMigrateInstance(Tasklet):
7323 """Tasklet class for instance migration.
7326 @ivar live: whether the migration will be done live or non-live;
7327 this variable is initalized only after CheckPrereq has run
7328 @type cleanup: boolean
7329 @ivar cleanup: Wheater we cleanup from a failed migration
7330 @type iallocator: string
7331 @ivar iallocator: The iallocator used to determine target_node
7332 @type target_node: string
7333 @ivar target_node: If given, the target_node to reallocate the instance to
7334 @type failover: boolean
7335 @ivar failover: Whether operation results in failover or migration
7336 @type fallback: boolean
7337 @ivar fallback: Whether fallback to failover is allowed if migration not
7339 @type ignore_consistency: boolean
7340 @ivar ignore_consistency: Wheter we should ignore consistency between source
7342 @type shutdown_timeout: int
7343 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7348 _MIGRATION_POLL_INTERVAL = 1 # seconds
7349 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7351 def __init__(self, lu, instance_name, cleanup=False,
7352 failover=False, fallback=False,
7353 ignore_consistency=False,
7354 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7355 """Initializes this class.
7358 Tasklet.__init__(self, lu)
7361 self.instance_name = instance_name
7362 self.cleanup = cleanup
7363 self.live = False # will be overridden later
7364 self.failover = failover
7365 self.fallback = fallback
7366 self.ignore_consistency = ignore_consistency
7367 self.shutdown_timeout = shutdown_timeout
7369 def CheckPrereq(self):
7370 """Check prerequisites.
7372 This checks that the instance is in the cluster.
7375 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7376 instance = self.cfg.GetInstanceInfo(instance_name)
7377 assert instance is not None
7378 self.instance = instance
7380 if (not self.cleanup and
7381 not instance.admin_state == constants.ADMINST_UP and
7382 not self.failover and self.fallback):
7383 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7384 " switching to failover")
7385 self.failover = True
7387 if instance.disk_template not in constants.DTS_MIRRORED:
7392 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7393 " %s" % (instance.disk_template, text),
7396 if instance.disk_template in constants.DTS_EXT_MIRROR:
7397 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7399 if self.lu.op.iallocator:
7400 self._RunAllocator()
7402 # We set set self.target_node as it is required by
7404 self.target_node = self.lu.op.target_node
7406 # self.target_node is already populated, either directly or by the
7408 target_node = self.target_node
7409 if self.target_node == instance.primary_node:
7410 raise errors.OpPrereqError("Cannot migrate instance %s"
7411 " to its primary (%s)" %
7412 (instance.name, instance.primary_node))
7414 if len(self.lu.tasklets) == 1:
7415 # It is safe to release locks only when we're the only tasklet
7417 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7418 keep=[instance.primary_node, self.target_node])
7421 secondary_nodes = instance.secondary_nodes
7422 if not secondary_nodes:
7423 raise errors.ConfigurationError("No secondary node but using"
7424 " %s disk template" %
7425 instance.disk_template)
7426 target_node = secondary_nodes[0]
7427 if self.lu.op.iallocator or (self.lu.op.target_node and
7428 self.lu.op.target_node != target_node):
7430 text = "failed over"
7433 raise errors.OpPrereqError("Instances with disk template %s cannot"
7434 " be %s to arbitrary nodes"
7435 " (neither an iallocator nor a target"
7436 " node can be passed)" %
7437 (instance.disk_template, text),
7440 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7442 # check memory requirements on the secondary node
7443 if not self.failover or instance.admin_state == constants.ADMINST_UP:
7444 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7445 instance.name, i_be[constants.BE_MAXMEM],
7446 instance.hypervisor)
7448 self.lu.LogInfo("Not checking memory on the secondary node as"
7449 " instance will not be started")
7451 # check bridge existance
7452 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7454 if not self.cleanup:
7455 _CheckNodeNotDrained(self.lu, target_node)
7456 if not self.failover:
7457 result = self.rpc.call_instance_migratable(instance.primary_node,
7459 if result.fail_msg and self.fallback:
7460 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7462 self.failover = True
7464 result.Raise("Can't migrate, please use failover",
7465 prereq=True, ecode=errors.ECODE_STATE)
7467 assert not (self.failover and self.cleanup)
7469 if not self.failover:
7470 if self.lu.op.live is not None and self.lu.op.mode is not None:
7471 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7472 " parameters are accepted",
7474 if self.lu.op.live is not None:
7476 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7478 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7479 # reset the 'live' parameter to None so that repeated
7480 # invocations of CheckPrereq do not raise an exception
7481 self.lu.op.live = None
7482 elif self.lu.op.mode is None:
7483 # read the default value from the hypervisor
7484 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7486 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7488 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7490 # Failover is never live
7493 def _RunAllocator(self):
7494 """Run the allocator based on input opcode.
7497 ial = IAllocator(self.cfg, self.rpc,
7498 mode=constants.IALLOCATOR_MODE_RELOC,
7499 name=self.instance_name,
7500 # TODO See why hail breaks with a single node below
7501 relocate_from=[self.instance.primary_node,
7502 self.instance.primary_node],
7505 ial.Run(self.lu.op.iallocator)
7508 raise errors.OpPrereqError("Can't compute nodes using"
7509 " iallocator '%s': %s" %
7510 (self.lu.op.iallocator, ial.info),
7512 if len(ial.result) != ial.required_nodes:
7513 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7514 " of nodes (%s), required %s" %
7515 (self.lu.op.iallocator, len(ial.result),
7516 ial.required_nodes), errors.ECODE_FAULT)
7517 self.target_node = ial.result[0]
7518 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7519 self.instance_name, self.lu.op.iallocator,
7520 utils.CommaJoin(ial.result))
7522 def _WaitUntilSync(self):
7523 """Poll with custom rpc for disk sync.
7525 This uses our own step-based rpc call.
7528 self.feedback_fn("* wait until resync is done")
7532 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7534 self.instance.disks)
7536 for node, nres in result.items():
7537 nres.Raise("Cannot resync disks on node %s" % node)
7538 node_done, node_percent = nres.payload
7539 all_done = all_done and node_done
7540 if node_percent is not None:
7541 min_percent = min(min_percent, node_percent)
7543 if min_percent < 100:
7544 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7547 def _EnsureSecondary(self, node):
7548 """Demote a node to secondary.
7551 self.feedback_fn("* switching node %s to secondary mode" % node)
7553 for dev in self.instance.disks:
7554 self.cfg.SetDiskID(dev, node)
7556 result = self.rpc.call_blockdev_close(node, self.instance.name,
7557 self.instance.disks)
7558 result.Raise("Cannot change disk to secondary on node %s" % node)
7560 def _GoStandalone(self):
7561 """Disconnect from the network.
7564 self.feedback_fn("* changing into standalone mode")
7565 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7566 self.instance.disks)
7567 for node, nres in result.items():
7568 nres.Raise("Cannot disconnect disks node %s" % node)
7570 def _GoReconnect(self, multimaster):
7571 """Reconnect to the network.
7577 msg = "single-master"
7578 self.feedback_fn("* changing disks into %s mode" % msg)
7579 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7580 self.instance.disks,
7581 self.instance.name, multimaster)
7582 for node, nres in result.items():
7583 nres.Raise("Cannot change disks config on node %s" % node)
7585 def _ExecCleanup(self):
7586 """Try to cleanup after a failed migration.
7588 The cleanup is done by:
7589 - check that the instance is running only on one node
7590 (and update the config if needed)
7591 - change disks on its secondary node to secondary
7592 - wait until disks are fully synchronized
7593 - disconnect from the network
7594 - change disks into single-master mode
7595 - wait again until disks are fully synchronized
7598 instance = self.instance
7599 target_node = self.target_node
7600 source_node = self.source_node
7602 # check running on only one node
7603 self.feedback_fn("* checking where the instance actually runs"
7604 " (if this hangs, the hypervisor might be in"
7606 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7607 for node, result in ins_l.items():
7608 result.Raise("Can't contact node %s" % node)
7610 runningon_source = instance.name in ins_l[source_node].payload
7611 runningon_target = instance.name in ins_l[target_node].payload
7613 if runningon_source and runningon_target:
7614 raise errors.OpExecError("Instance seems to be running on two nodes,"
7615 " or the hypervisor is confused; you will have"
7616 " to ensure manually that it runs only on one"
7617 " and restart this operation")
7619 if not (runningon_source or runningon_target):
7620 raise errors.OpExecError("Instance does not seem to be running at all;"
7621 " in this case it's safer to repair by"
7622 " running 'gnt-instance stop' to ensure disk"
7623 " shutdown, and then restarting it")
7625 if runningon_target:
7626 # the migration has actually succeeded, we need to update the config
7627 self.feedback_fn("* instance running on secondary node (%s),"
7628 " updating config" % target_node)
7629 instance.primary_node = target_node
7630 self.cfg.Update(instance, self.feedback_fn)
7631 demoted_node = source_node
7633 self.feedback_fn("* instance confirmed to be running on its"
7634 " primary node (%s)" % source_node)
7635 demoted_node = target_node
7637 if instance.disk_template in constants.DTS_INT_MIRROR:
7638 self._EnsureSecondary(demoted_node)
7640 self._WaitUntilSync()
7641 except errors.OpExecError:
7642 # we ignore here errors, since if the device is standalone, it
7643 # won't be able to sync
7645 self._GoStandalone()
7646 self._GoReconnect(False)
7647 self._WaitUntilSync()
7649 self.feedback_fn("* done")
7651 def _RevertDiskStatus(self):
7652 """Try to revert the disk status after a failed migration.
7655 target_node = self.target_node
7656 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7660 self._EnsureSecondary(target_node)
7661 self._GoStandalone()
7662 self._GoReconnect(False)
7663 self._WaitUntilSync()
7664 except errors.OpExecError, err:
7665 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7666 " please try to recover the instance manually;"
7667 " error '%s'" % str(err))
7669 def _AbortMigration(self):
7670 """Call the hypervisor code to abort a started migration.
7673 instance = self.instance
7674 target_node = self.target_node
7675 source_node = self.source_node
7676 migration_info = self.migration_info
7678 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7682 abort_msg = abort_result.fail_msg
7684 logging.error("Aborting migration failed on target node %s: %s",
7685 target_node, abort_msg)
7686 # Don't raise an exception here, as we stil have to try to revert the
7687 # disk status, even if this step failed.
7689 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7690 instance, False, self.live)
7691 abort_msg = abort_result.fail_msg
7693 logging.error("Aborting migration failed on source node %s: %s",
7694 source_node, abort_msg)
7696 def _ExecMigration(self):
7697 """Migrate an instance.
7699 The migrate is done by:
7700 - change the disks into dual-master mode
7701 - wait until disks are fully synchronized again
7702 - migrate the instance
7703 - change disks on the new secondary node (the old primary) to secondary
7704 - wait until disks are fully synchronized
7705 - change disks into single-master mode
7708 instance = self.instance
7709 target_node = self.target_node
7710 source_node = self.source_node
7712 # Check for hypervisor version mismatch and warn the user.
7713 nodeinfo = self.rpc.call_node_info([source_node, target_node],
7714 None, [self.instance.hypervisor])
7715 for ninfo in nodeinfo.values():
7716 ninfo.Raise("Unable to retrieve node information from node '%s'" %
7718 (_, _, (src_info, )) = nodeinfo[source_node].payload
7719 (_, _, (dst_info, )) = nodeinfo[target_node].payload
7721 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
7722 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
7723 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
7724 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
7725 if src_version != dst_version:
7726 self.feedback_fn("* warning: hypervisor version mismatch between"
7727 " source (%s) and target (%s) node" %
7728 (src_version, dst_version))
7730 self.feedback_fn("* checking disk consistency between source and target")
7731 for dev in instance.disks:
7732 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7733 raise errors.OpExecError("Disk %s is degraded or not fully"
7734 " synchronized on target node,"
7735 " aborting migration" % dev.iv_name)
7737 # First get the migration information from the remote node
7738 result = self.rpc.call_migration_info(source_node, instance)
7739 msg = result.fail_msg
7741 log_err = ("Failed fetching source migration information from %s: %s" %
7743 logging.error(log_err)
7744 raise errors.OpExecError(log_err)
7746 self.migration_info = migration_info = result.payload
7748 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7749 # Then switch the disks to master/master mode
7750 self._EnsureSecondary(target_node)
7751 self._GoStandalone()
7752 self._GoReconnect(True)
7753 self._WaitUntilSync()
7755 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7756 result = self.rpc.call_accept_instance(target_node,
7759 self.nodes_ip[target_node])
7761 msg = result.fail_msg
7763 logging.error("Instance pre-migration failed, trying to revert"
7764 " disk status: %s", msg)
7765 self.feedback_fn("Pre-migration failed, aborting")
7766 self._AbortMigration()
7767 self._RevertDiskStatus()
7768 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7769 (instance.name, msg))
7771 self.feedback_fn("* migrating instance to %s" % target_node)
7772 result = self.rpc.call_instance_migrate(source_node, instance,
7773 self.nodes_ip[target_node],
7775 msg = result.fail_msg
7777 logging.error("Instance migration failed, trying to revert"
7778 " disk status: %s", msg)
7779 self.feedback_fn("Migration failed, aborting")
7780 self._AbortMigration()
7781 self._RevertDiskStatus()
7782 raise errors.OpExecError("Could not migrate instance %s: %s" %
7783 (instance.name, msg))
7785 self.feedback_fn("* starting memory transfer")
7786 last_feedback = time.time()
7788 result = self.rpc.call_instance_get_migration_status(source_node,
7790 msg = result.fail_msg
7791 ms = result.payload # MigrationStatus instance
7792 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7793 logging.error("Instance migration failed, trying to revert"
7794 " disk status: %s", msg)
7795 self.feedback_fn("Migration failed, aborting")
7796 self._AbortMigration()
7797 self._RevertDiskStatus()
7798 raise errors.OpExecError("Could not migrate instance %s: %s" %
7799 (instance.name, msg))
7801 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7802 self.feedback_fn("* memory transfer complete")
7805 if (utils.TimeoutExpired(last_feedback,
7806 self._MIGRATION_FEEDBACK_INTERVAL) and
7807 ms.transferred_ram is not None):
7808 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7809 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7810 last_feedback = time.time()
7812 time.sleep(self._MIGRATION_POLL_INTERVAL)
7814 result = self.rpc.call_instance_finalize_migration_src(source_node,
7818 msg = result.fail_msg
7820 logging.error("Instance migration succeeded, but finalization failed"
7821 " on the source node: %s", msg)
7822 raise errors.OpExecError("Could not finalize instance migration: %s" %
7825 instance.primary_node = target_node
7827 # distribute new instance config to the other nodes
7828 self.cfg.Update(instance, self.feedback_fn)
7830 result = self.rpc.call_instance_finalize_migration_dst(target_node,
7834 msg = result.fail_msg
7836 logging.error("Instance migration succeeded, but finalization failed"
7837 " on the target node: %s", msg)
7838 raise errors.OpExecError("Could not finalize instance migration: %s" %
7841 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7842 self._EnsureSecondary(source_node)
7843 self._WaitUntilSync()
7844 self._GoStandalone()
7845 self._GoReconnect(False)
7846 self._WaitUntilSync()
7848 self.feedback_fn("* done")
7850 def _ExecFailover(self):
7851 """Failover an instance.
7853 The failover is done by shutting it down on its present node and
7854 starting it on the secondary.
7857 instance = self.instance
7858 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7860 source_node = instance.primary_node
7861 target_node = self.target_node
7863 if instance.admin_state == constants.ADMINST_UP:
7864 self.feedback_fn("* checking disk consistency between source and target")
7865 for dev in instance.disks:
7866 # for drbd, these are drbd over lvm
7867 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7868 if primary_node.offline:
7869 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7871 (primary_node.name, dev.iv_name, target_node))
7872 elif not self.ignore_consistency:
7873 raise errors.OpExecError("Disk %s is degraded on target node,"
7874 " aborting failover" % dev.iv_name)
7876 self.feedback_fn("* not checking disk consistency as instance is not"
7879 self.feedback_fn("* shutting down instance on source node")
7880 logging.info("Shutting down instance %s on node %s",
7881 instance.name, source_node)
7883 result = self.rpc.call_instance_shutdown(source_node, instance,
7884 self.shutdown_timeout)
7885 msg = result.fail_msg
7887 if self.ignore_consistency or primary_node.offline:
7888 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7889 " proceeding anyway; please make sure node"
7890 " %s is down; error details: %s",
7891 instance.name, source_node, source_node, msg)
7893 raise errors.OpExecError("Could not shutdown instance %s on"
7895 (instance.name, source_node, msg))
7897 self.feedback_fn("* deactivating the instance's disks on source node")
7898 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7899 raise errors.OpExecError("Can't shut down the instance's disks")
7901 instance.primary_node = target_node
7902 # distribute new instance config to the other nodes
7903 self.cfg.Update(instance, self.feedback_fn)
7905 # Only start the instance if it's marked as up
7906 if instance.admin_state == constants.ADMINST_UP:
7907 self.feedback_fn("* activating the instance's disks on target node %s" %
7909 logging.info("Starting instance %s on node %s",
7910 instance.name, target_node)
7912 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7913 ignore_secondaries=True)
7915 _ShutdownInstanceDisks(self.lu, instance)
7916 raise errors.OpExecError("Can't activate the instance's disks")
7918 self.feedback_fn("* starting the instance on the target node %s" %
7920 result = self.rpc.call_instance_start(target_node, (instance, None, None),
7922 msg = result.fail_msg
7924 _ShutdownInstanceDisks(self.lu, instance)
7925 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7926 (instance.name, target_node, msg))
7928 def Exec(self, feedback_fn):
7929 """Perform the migration.
7932 self.feedback_fn = feedback_fn
7933 self.source_node = self.instance.primary_node
7935 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7936 if self.instance.disk_template in constants.DTS_INT_MIRROR:
7937 self.target_node = self.instance.secondary_nodes[0]
7938 # Otherwise self.target_node has been populated either
7939 # directly, or through an iallocator.
7941 self.all_nodes = [self.source_node, self.target_node]
7942 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7943 in self.cfg.GetMultiNodeInfo(self.all_nodes))
7946 feedback_fn("Failover instance %s" % self.instance.name)
7947 self._ExecFailover()
7949 feedback_fn("Migrating instance %s" % self.instance.name)
7952 return self._ExecCleanup()
7954 return self._ExecMigration()
7957 def _CreateBlockDev(lu, node, instance, device, force_create,
7959 """Create a tree of block devices on a given node.
7961 If this device type has to be created on secondaries, create it and
7964 If not, just recurse to children keeping the same 'force' value.
7966 @param lu: the lu on whose behalf we execute
7967 @param node: the node on which to create the device
7968 @type instance: L{objects.Instance}
7969 @param instance: the instance which owns the device
7970 @type device: L{objects.Disk}
7971 @param device: the device to create
7972 @type force_create: boolean
7973 @param force_create: whether to force creation of this device; this
7974 will be change to True whenever we find a device which has
7975 CreateOnSecondary() attribute
7976 @param info: the extra 'metadata' we should attach to the device
7977 (this will be represented as a LVM tag)
7978 @type force_open: boolean
7979 @param force_open: this parameter will be passes to the
7980 L{backend.BlockdevCreate} function where it specifies
7981 whether we run on primary or not, and it affects both
7982 the child assembly and the device own Open() execution
7985 if device.CreateOnSecondary():
7989 for child in device.children:
7990 _CreateBlockDev(lu, node, instance, child, force_create,
7993 if not force_create:
7996 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7999 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8000 """Create a single block device on a given node.
8002 This will not recurse over children of the device, so they must be
8005 @param lu: the lu on whose behalf we execute
8006 @param node: the node on which to create the device
8007 @type instance: L{objects.Instance}
8008 @param instance: the instance which owns the device
8009 @type device: L{objects.Disk}
8010 @param device: the device to create
8011 @param info: the extra 'metadata' we should attach to the device
8012 (this will be represented as a LVM tag)
8013 @type force_open: boolean
8014 @param force_open: this parameter will be passes to the
8015 L{backend.BlockdevCreate} function where it specifies
8016 whether we run on primary or not, and it affects both
8017 the child assembly and the device own Open() execution
8020 lu.cfg.SetDiskID(device, node)
8021 result = lu.rpc.call_blockdev_create(node, device, device.size,
8022 instance.name, force_open, info)
8023 result.Raise("Can't create block device %s on"
8024 " node %s for instance %s" % (device, node, instance.name))
8025 if device.physical_id is None:
8026 device.physical_id = result.payload
8029 def _GenerateUniqueNames(lu, exts):
8030 """Generate a suitable LV name.
8032 This will generate a logical volume name for the given instance.
8037 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8038 results.append("%s%s" % (new_id, val))
8042 def _ComputeLDParams(disk_template, disk_params):
8043 """Computes Logical Disk parameters from Disk Template parameters.
8045 @type disk_template: string
8046 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8047 @type disk_params: dict
8048 @param disk_params: disk template parameters; dict(template_name -> parameters
8050 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8051 contains the LD parameters of the node. The tree is flattened in-order.
8054 if disk_template not in constants.DISK_TEMPLATES:
8055 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8058 if disk_template == constants.DT_DRBD8:
8059 result.append(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8])
8060 result.append(constants.DISK_LD_DEFAULTS[constants.LD_LV])
8061 result.append(constants.DISK_LD_DEFAULTS[constants.LD_LV])
8062 elif (disk_template == constants.DT_FILE or
8063 disk_template == constants.DT_SHARED_FILE):
8064 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8065 elif disk_template == constants.DT_PLAIN:
8066 result.append(constants.DISK_LD_DEFAULTS[constants.LD_LV])
8067 elif disk_template == constants.DT_BLOCK:
8068 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8073 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8074 iv_name, p_minor, s_minor, drbd_params, data_params,
8076 """Generate a drbd8 device complete with its children.
8079 assert len(vgnames) == len(names) == 2
8080 port = lu.cfg.AllocatePort()
8081 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8083 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8084 logical_id=(vgnames[0], names[0]),
8086 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8087 logical_id=(vgnames[1], names[1]),
8089 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8090 logical_id=(primary, secondary, port,
8093 children=[dev_data, dev_meta],
8094 iv_name=iv_name, params=drbd_params)
8098 def _GenerateDiskTemplate(lu, template_name,
8099 instance_name, primary_node,
8100 secondary_nodes, disk_info,
8101 file_storage_dir, file_driver,
8102 base_index, feedback_fn, disk_params):
8103 """Generate the entire disk layout for a given template type.
8106 #TODO: compute space requirements
8108 vgname = lu.cfg.GetVGName()
8109 disk_count = len(disk_info)
8111 ld_params = _ComputeLDParams(template_name, disk_params)
8112 if template_name == constants.DT_DISKLESS:
8114 elif template_name == constants.DT_PLAIN:
8115 if len(secondary_nodes) != 0:
8116 raise errors.ProgrammerError("Wrong template configuration")
8118 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8119 for i in range(disk_count)])
8120 for idx, disk in enumerate(disk_info):
8121 disk_index = idx + base_index
8122 vg = disk.get(constants.IDISK_VG, vgname)
8123 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8124 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8125 size=disk[constants.IDISK_SIZE],
8126 logical_id=(vg, names[idx]),
8127 iv_name="disk/%d" % disk_index,
8128 mode=disk[constants.IDISK_MODE],
8129 params=ld_params[0])
8130 disks.append(disk_dev)
8131 elif template_name == constants.DT_DRBD8:
8132 drbd_params, data_params, meta_params = ld_params
8133 if len(secondary_nodes) != 1:
8134 raise errors.ProgrammerError("Wrong template configuration")
8135 remote_node = secondary_nodes[0]
8136 minors = lu.cfg.AllocateDRBDMinor(
8137 [primary_node, remote_node] * len(disk_info), instance_name)
8140 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8141 for i in range(disk_count)]):
8142 names.append(lv_prefix + "_data")
8143 names.append(lv_prefix + "_meta")
8144 for idx, disk in enumerate(disk_info):
8145 disk_index = idx + base_index
8146 data_vg = disk.get(constants.IDISK_VG, vgname)
8147 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
8148 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8149 disk[constants.IDISK_SIZE],
8151 names[idx * 2:idx * 2 + 2],
8152 "disk/%d" % disk_index,
8153 minors[idx * 2], minors[idx * 2 + 1],
8154 drbd_params, data_params, meta_params)
8155 disk_dev.mode = disk[constants.IDISK_MODE]
8156 disks.append(disk_dev)
8157 elif template_name == constants.DT_FILE:
8158 if len(secondary_nodes) != 0:
8159 raise errors.ProgrammerError("Wrong template configuration")
8161 opcodes.RequireFileStorage()
8163 for idx, disk in enumerate(disk_info):
8164 disk_index = idx + base_index
8165 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8166 size=disk[constants.IDISK_SIZE],
8167 iv_name="disk/%d" % disk_index,
8168 logical_id=(file_driver,
8169 "%s/disk%d" % (file_storage_dir,
8171 mode=disk[constants.IDISK_MODE],
8172 params=ld_params[0])
8173 disks.append(disk_dev)
8174 elif template_name == constants.DT_SHARED_FILE:
8175 if len(secondary_nodes) != 0:
8176 raise errors.ProgrammerError("Wrong template configuration")
8178 opcodes.RequireSharedFileStorage()
8180 for idx, disk in enumerate(disk_info):
8181 disk_index = idx + base_index
8182 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8183 size=disk[constants.IDISK_SIZE],
8184 iv_name="disk/%d" % disk_index,
8185 logical_id=(file_driver,
8186 "%s/disk%d" % (file_storage_dir,
8188 mode=disk[constants.IDISK_MODE],
8189 params=ld_params[0])
8190 disks.append(disk_dev)
8191 elif template_name == constants.DT_BLOCK:
8192 if len(secondary_nodes) != 0:
8193 raise errors.ProgrammerError("Wrong template configuration")
8195 for idx, disk in enumerate(disk_info):
8196 disk_index = idx + base_index
8197 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8198 size=disk[constants.IDISK_SIZE],
8199 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8200 disk[constants.IDISK_ADOPT]),
8201 iv_name="disk/%d" % disk_index,
8202 mode=disk[constants.IDISK_MODE],
8203 params=ld_params[0])
8204 disks.append(disk_dev)
8207 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8211 def _GetInstanceInfoText(instance):
8212 """Compute that text that should be added to the disk's metadata.
8215 return "originstname+%s" % instance.name
8218 def _CalcEta(time_taken, written, total_size):
8219 """Calculates the ETA based on size written and total size.
8221 @param time_taken: The time taken so far
8222 @param written: amount written so far
8223 @param total_size: The total size of data to be written
8224 @return: The remaining time in seconds
8227 avg_time = time_taken / float(written)
8228 return (total_size - written) * avg_time
8231 def _WipeDisks(lu, instance):
8232 """Wipes instance disks.
8234 @type lu: L{LogicalUnit}
8235 @param lu: the logical unit on whose behalf we execute
8236 @type instance: L{objects.Instance}
8237 @param instance: the instance whose disks we should create
8238 @return: the success of the wipe
8241 node = instance.primary_node
8243 for device in instance.disks:
8244 lu.cfg.SetDiskID(device, node)
8246 logging.info("Pause sync of instance %s disks", instance.name)
8247 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8249 for idx, success in enumerate(result.payload):
8251 logging.warn("pause-sync of instance %s for disks %d failed",
8255 for idx, device in enumerate(instance.disks):
8256 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8257 # MAX_WIPE_CHUNK at max
8258 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8259 constants.MIN_WIPE_CHUNK_PERCENT)
8260 # we _must_ make this an int, otherwise rounding errors will
8262 wipe_chunk_size = int(wipe_chunk_size)
8264 lu.LogInfo("* Wiping disk %d", idx)
8265 logging.info("Wiping disk %d for instance %s, node %s using"
8266 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8271 start_time = time.time()
8273 while offset < size:
8274 wipe_size = min(wipe_chunk_size, size - offset)
8275 logging.debug("Wiping disk %d, offset %s, chunk %s",
8276 idx, offset, wipe_size)
8277 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8278 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8279 (idx, offset, wipe_size))
8282 if now - last_output >= 60:
8283 eta = _CalcEta(now - start_time, offset, size)
8284 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8285 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8288 logging.info("Resume sync of instance %s disks", instance.name)
8290 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8292 for idx, success in enumerate(result.payload):
8294 lu.LogWarning("Resume sync of disk %d failed, please have a"
8295 " look at the status and troubleshoot the issue", idx)
8296 logging.warn("resume-sync of instance %s for disks %d failed",
8300 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8301 """Create all disks for an instance.
8303 This abstracts away some work from AddInstance.
8305 @type lu: L{LogicalUnit}
8306 @param lu: the logical unit on whose behalf we execute
8307 @type instance: L{objects.Instance}
8308 @param instance: the instance whose disks we should create
8310 @param to_skip: list of indices to skip
8311 @type target_node: string
8312 @param target_node: if passed, overrides the target node for creation
8314 @return: the success of the creation
8317 info = _GetInstanceInfoText(instance)
8318 if target_node is None:
8319 pnode = instance.primary_node
8320 all_nodes = instance.all_nodes
8325 if instance.disk_template in constants.DTS_FILEBASED:
8326 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8327 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8329 result.Raise("Failed to create directory '%s' on"
8330 " node %s" % (file_storage_dir, pnode))
8332 # Note: this needs to be kept in sync with adding of disks in
8333 # LUInstanceSetParams
8334 for idx, device in enumerate(instance.disks):
8335 if to_skip and idx in to_skip:
8337 logging.info("Creating volume %s for instance %s",
8338 device.iv_name, instance.name)
8340 for node in all_nodes:
8341 f_create = node == pnode
8342 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8345 def _RemoveDisks(lu, instance, target_node=None):
8346 """Remove all disks for an instance.
8348 This abstracts away some work from `AddInstance()` and
8349 `RemoveInstance()`. Note that in case some of the devices couldn't
8350 be removed, the removal will continue with the other ones (compare
8351 with `_CreateDisks()`).
8353 @type lu: L{LogicalUnit}
8354 @param lu: the logical unit on whose behalf we execute
8355 @type instance: L{objects.Instance}
8356 @param instance: the instance whose disks we should remove
8357 @type target_node: string
8358 @param target_node: used to override the node on which to remove the disks
8360 @return: the success of the removal
8363 logging.info("Removing block devices for instance %s", instance.name)
8366 for device in instance.disks:
8368 edata = [(target_node, device)]
8370 edata = device.ComputeNodeTree(instance.primary_node)
8371 for node, disk in edata:
8372 lu.cfg.SetDiskID(disk, node)
8373 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8375 lu.LogWarning("Could not remove block device %s on node %s,"
8376 " continuing anyway: %s", device.iv_name, node, msg)
8379 # if this is a DRBD disk, return its port to the pool
8380 if device.dev_type in constants.LDS_DRBD:
8381 tcp_port = device.logical_id[2]
8382 lu.cfg.AddTcpUdpPort(tcp_port)
8384 if instance.disk_template == constants.DT_FILE:
8385 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8389 tgt = instance.primary_node
8390 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8392 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8393 file_storage_dir, instance.primary_node, result.fail_msg)
8399 def _ComputeDiskSizePerVG(disk_template, disks):
8400 """Compute disk size requirements in the volume group
8403 def _compute(disks, payload):
8404 """Universal algorithm.
8409 vgs[disk[constants.IDISK_VG]] = \
8410 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8414 # Required free disk space as a function of disk and swap space
8416 constants.DT_DISKLESS: {},
8417 constants.DT_PLAIN: _compute(disks, 0),
8418 # 128 MB are added for drbd metadata for each disk
8419 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8420 constants.DT_FILE: {},
8421 constants.DT_SHARED_FILE: {},
8424 if disk_template not in req_size_dict:
8425 raise errors.ProgrammerError("Disk template '%s' size requirement"
8426 " is unknown" % disk_template)
8428 return req_size_dict[disk_template]
8431 def _ComputeDiskSize(disk_template, disks):
8432 """Compute disk size requirements in the volume group
8435 # Required free disk space as a function of disk and swap space
8437 constants.DT_DISKLESS: None,
8438 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8439 # 128 MB are added for drbd metadata for each disk
8441 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8442 constants.DT_FILE: None,
8443 constants.DT_SHARED_FILE: 0,
8444 constants.DT_BLOCK: 0,
8447 if disk_template not in req_size_dict:
8448 raise errors.ProgrammerError("Disk template '%s' size requirement"
8449 " is unknown" % disk_template)
8451 return req_size_dict[disk_template]
8454 def _FilterVmNodes(lu, nodenames):
8455 """Filters out non-vm_capable nodes from a list.
8457 @type lu: L{LogicalUnit}
8458 @param lu: the logical unit for which we check
8459 @type nodenames: list
8460 @param nodenames: the list of nodes on which we should check
8462 @return: the list of vm-capable nodes
8465 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8466 return [name for name in nodenames if name not in vm_nodes]
8469 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8470 """Hypervisor parameter validation.
8472 This function abstract the hypervisor parameter validation to be
8473 used in both instance create and instance modify.
8475 @type lu: L{LogicalUnit}
8476 @param lu: the logical unit for which we check
8477 @type nodenames: list
8478 @param nodenames: the list of nodes on which we should check
8479 @type hvname: string
8480 @param hvname: the name of the hypervisor we should use
8481 @type hvparams: dict
8482 @param hvparams: the parameters which we need to check
8483 @raise errors.OpPrereqError: if the parameters are not valid
8486 nodenames = _FilterVmNodes(lu, nodenames)
8488 cluster = lu.cfg.GetClusterInfo()
8489 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8491 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8492 for node in nodenames:
8496 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8499 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8500 """OS parameters validation.
8502 @type lu: L{LogicalUnit}
8503 @param lu: the logical unit for which we check
8504 @type required: boolean
8505 @param required: whether the validation should fail if the OS is not
8507 @type nodenames: list
8508 @param nodenames: the list of nodes on which we should check
8509 @type osname: string
8510 @param osname: the name of the hypervisor we should use
8511 @type osparams: dict
8512 @param osparams: the parameters which we need to check
8513 @raise errors.OpPrereqError: if the parameters are not valid
8516 nodenames = _FilterVmNodes(lu, nodenames)
8517 result = lu.rpc.call_os_validate(nodenames, required, osname,
8518 [constants.OS_VALIDATE_PARAMETERS],
8520 for node, nres in result.items():
8521 # we don't check for offline cases since this should be run only
8522 # against the master node and/or an instance's nodes
8523 nres.Raise("OS Parameters validation failed on node %s" % node)
8524 if not nres.payload:
8525 lu.LogInfo("OS %s not found on node %s, validation skipped",
8529 class LUInstanceCreate(LogicalUnit):
8530 """Create an instance.
8533 HPATH = "instance-add"
8534 HTYPE = constants.HTYPE_INSTANCE
8537 def CheckArguments(self):
8541 # do not require name_check to ease forward/backward compatibility
8543 if self.op.no_install and self.op.start:
8544 self.LogInfo("No-installation mode selected, disabling startup")
8545 self.op.start = False
8546 # validate/normalize the instance name
8547 self.op.instance_name = \
8548 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8550 if self.op.ip_check and not self.op.name_check:
8551 # TODO: make the ip check more flexible and not depend on the name check
8552 raise errors.OpPrereqError("Cannot do IP address check without a name"
8553 " check", errors.ECODE_INVAL)
8555 # check nics' parameter names
8556 for nic in self.op.nics:
8557 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8559 # check disks. parameter names and consistent adopt/no-adopt strategy
8560 has_adopt = has_no_adopt = False
8561 for disk in self.op.disks:
8562 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8563 if constants.IDISK_ADOPT in disk:
8567 if has_adopt and has_no_adopt:
8568 raise errors.OpPrereqError("Either all disks are adopted or none is",
8571 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8572 raise errors.OpPrereqError("Disk adoption is not supported for the"
8573 " '%s' disk template" %
8574 self.op.disk_template,
8576 if self.op.iallocator is not None:
8577 raise errors.OpPrereqError("Disk adoption not allowed with an"
8578 " iallocator script", errors.ECODE_INVAL)
8579 if self.op.mode == constants.INSTANCE_IMPORT:
8580 raise errors.OpPrereqError("Disk adoption not allowed for"
8581 " instance import", errors.ECODE_INVAL)
8583 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8584 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8585 " but no 'adopt' parameter given" %
8586 self.op.disk_template,
8589 self.adopt_disks = has_adopt
8591 # instance name verification
8592 if self.op.name_check:
8593 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8594 self.op.instance_name = self.hostname1.name
8595 # used in CheckPrereq for ip ping check
8596 self.check_ip = self.hostname1.ip
8598 self.check_ip = None
8600 # file storage checks
8601 if (self.op.file_driver and
8602 not self.op.file_driver in constants.FILE_DRIVER):
8603 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8604 self.op.file_driver, errors.ECODE_INVAL)
8606 if self.op.disk_template == constants.DT_FILE:
8607 opcodes.RequireFileStorage()
8608 elif self.op.disk_template == constants.DT_SHARED_FILE:
8609 opcodes.RequireSharedFileStorage()
8611 ### Node/iallocator related checks
8612 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8614 if self.op.pnode is not None:
8615 if self.op.disk_template in constants.DTS_INT_MIRROR:
8616 if self.op.snode is None:
8617 raise errors.OpPrereqError("The networked disk templates need"
8618 " a mirror node", errors.ECODE_INVAL)
8620 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8622 self.op.snode = None
8624 self._cds = _GetClusterDomainSecret()
8626 if self.op.mode == constants.INSTANCE_IMPORT:
8627 # On import force_variant must be True, because if we forced it at
8628 # initial install, our only chance when importing it back is that it
8630 self.op.force_variant = True
8632 if self.op.no_install:
8633 self.LogInfo("No-installation mode has no effect during import")
8635 elif self.op.mode == constants.INSTANCE_CREATE:
8636 if self.op.os_type is None:
8637 raise errors.OpPrereqError("No guest OS specified",
8639 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8640 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8641 " installation" % self.op.os_type,
8643 if self.op.disk_template is None:
8644 raise errors.OpPrereqError("No disk template specified",
8647 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8648 # Check handshake to ensure both clusters have the same domain secret
8649 src_handshake = self.op.source_handshake
8650 if not src_handshake:
8651 raise errors.OpPrereqError("Missing source handshake",
8654 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8657 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8660 # Load and check source CA
8661 self.source_x509_ca_pem = self.op.source_x509_ca
8662 if not self.source_x509_ca_pem:
8663 raise errors.OpPrereqError("Missing source X509 CA",
8667 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8669 except OpenSSL.crypto.Error, err:
8670 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8671 (err, ), errors.ECODE_INVAL)
8673 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8674 if errcode is not None:
8675 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8678 self.source_x509_ca = cert
8680 src_instance_name = self.op.source_instance_name
8681 if not src_instance_name:
8682 raise errors.OpPrereqError("Missing source instance name",
8685 self.source_instance_name = \
8686 netutils.GetHostname(name=src_instance_name).name
8689 raise errors.OpPrereqError("Invalid instance creation mode %r" %
8690 self.op.mode, errors.ECODE_INVAL)
8692 def ExpandNames(self):
8693 """ExpandNames for CreateInstance.
8695 Figure out the right locks for instance creation.
8698 self.needed_locks = {}
8700 instance_name = self.op.instance_name
8701 # this is just a preventive check, but someone might still add this
8702 # instance in the meantime, and creation will fail at lock-add time
8703 if instance_name in self.cfg.GetInstanceList():
8704 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8705 instance_name, errors.ECODE_EXISTS)
8707 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8709 if self.op.iallocator:
8710 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8711 # specifying a group on instance creation and then selecting nodes from
8713 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8714 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8716 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8717 nodelist = [self.op.pnode]
8718 if self.op.snode is not None:
8719 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8720 nodelist.append(self.op.snode)
8721 self.needed_locks[locking.LEVEL_NODE] = nodelist
8722 # Lock resources of instance's primary and secondary nodes (copy to
8723 # prevent accidential modification)
8724 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8726 # in case of import lock the source node too
8727 if self.op.mode == constants.INSTANCE_IMPORT:
8728 src_node = self.op.src_node
8729 src_path = self.op.src_path
8731 if src_path is None:
8732 self.op.src_path = src_path = self.op.instance_name
8734 if src_node is None:
8735 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8736 self.op.src_node = None
8737 if os.path.isabs(src_path):
8738 raise errors.OpPrereqError("Importing an instance from a path"
8739 " requires a source node option",
8742 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8743 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8744 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8745 if not os.path.isabs(src_path):
8746 self.op.src_path = src_path = \
8747 utils.PathJoin(constants.EXPORT_DIR, src_path)
8749 def _RunAllocator(self):
8750 """Run the allocator based on input opcode.
8753 nics = [n.ToDict() for n in self.nics]
8754 ial = IAllocator(self.cfg, self.rpc,
8755 mode=constants.IALLOCATOR_MODE_ALLOC,
8756 name=self.op.instance_name,
8757 disk_template=self.op.disk_template,
8760 vcpus=self.be_full[constants.BE_VCPUS],
8761 memory=self.be_full[constants.BE_MAXMEM],
8764 hypervisor=self.op.hypervisor,
8767 ial.Run(self.op.iallocator)
8770 raise errors.OpPrereqError("Can't compute nodes using"
8771 " iallocator '%s': %s" %
8772 (self.op.iallocator, ial.info),
8774 if len(ial.result) != ial.required_nodes:
8775 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8776 " of nodes (%s), required %s" %
8777 (self.op.iallocator, len(ial.result),
8778 ial.required_nodes), errors.ECODE_FAULT)
8779 self.op.pnode = ial.result[0]
8780 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8781 self.op.instance_name, self.op.iallocator,
8782 utils.CommaJoin(ial.result))
8783 if ial.required_nodes == 2:
8784 self.op.snode = ial.result[1]
8786 def BuildHooksEnv(self):
8789 This runs on master, primary and secondary nodes of the instance.
8793 "ADD_MODE": self.op.mode,
8795 if self.op.mode == constants.INSTANCE_IMPORT:
8796 env["SRC_NODE"] = self.op.src_node
8797 env["SRC_PATH"] = self.op.src_path
8798 env["SRC_IMAGES"] = self.src_images
8800 env.update(_BuildInstanceHookEnv(
8801 name=self.op.instance_name,
8802 primary_node=self.op.pnode,
8803 secondary_nodes=self.secondaries,
8804 status=self.op.start,
8805 os_type=self.op.os_type,
8806 minmem=self.be_full[constants.BE_MINMEM],
8807 maxmem=self.be_full[constants.BE_MAXMEM],
8808 vcpus=self.be_full[constants.BE_VCPUS],
8809 nics=_NICListToTuple(self, self.nics),
8810 disk_template=self.op.disk_template,
8811 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8812 for d in self.disks],
8815 hypervisor_name=self.op.hypervisor,
8821 def BuildHooksNodes(self):
8822 """Build hooks nodes.
8825 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8828 def _ReadExportInfo(self):
8829 """Reads the export information from disk.
8831 It will override the opcode source node and path with the actual
8832 information, if these two were not specified before.
8834 @return: the export information
8837 assert self.op.mode == constants.INSTANCE_IMPORT
8839 src_node = self.op.src_node
8840 src_path = self.op.src_path
8842 if src_node is None:
8843 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8844 exp_list = self.rpc.call_export_list(locked_nodes)
8846 for node in exp_list:
8847 if exp_list[node].fail_msg:
8849 if src_path in exp_list[node].payload:
8851 self.op.src_node = src_node = node
8852 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8856 raise errors.OpPrereqError("No export found for relative path %s" %
8857 src_path, errors.ECODE_INVAL)
8859 _CheckNodeOnline(self, src_node)
8860 result = self.rpc.call_export_info(src_node, src_path)
8861 result.Raise("No export or invalid export found in dir %s" % src_path)
8863 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8864 if not export_info.has_section(constants.INISECT_EXP):
8865 raise errors.ProgrammerError("Corrupted export config",
8866 errors.ECODE_ENVIRON)
8868 ei_version = export_info.get(constants.INISECT_EXP, "version")
8869 if (int(ei_version) != constants.EXPORT_VERSION):
8870 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8871 (ei_version, constants.EXPORT_VERSION),
8872 errors.ECODE_ENVIRON)
8875 def _ReadExportParams(self, einfo):
8876 """Use export parameters as defaults.
8878 In case the opcode doesn't specify (as in override) some instance
8879 parameters, then try to use them from the export information, if
8883 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8885 if self.op.disk_template is None:
8886 if einfo.has_option(constants.INISECT_INS, "disk_template"):
8887 self.op.disk_template = einfo.get(constants.INISECT_INS,
8889 if self.op.disk_template not in constants.DISK_TEMPLATES:
8890 raise errors.OpPrereqError("Disk template specified in configuration"
8891 " file is not one of the allowed values:"
8892 " %s" % " ".join(constants.DISK_TEMPLATES))
8894 raise errors.OpPrereqError("No disk template specified and the export"
8895 " is missing the disk_template information",
8898 if not self.op.disks:
8900 # TODO: import the disk iv_name too
8901 for idx in range(constants.MAX_DISKS):
8902 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8903 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8904 disks.append({constants.IDISK_SIZE: disk_sz})
8905 self.op.disks = disks
8906 if not disks and self.op.disk_template != constants.DT_DISKLESS:
8907 raise errors.OpPrereqError("No disk info specified and the export"
8908 " is missing the disk information",
8911 if not self.op.nics:
8913 for idx in range(constants.MAX_NICS):
8914 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8916 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8917 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8924 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8925 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8927 if (self.op.hypervisor is None and
8928 einfo.has_option(constants.INISECT_INS, "hypervisor")):
8929 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8931 if einfo.has_section(constants.INISECT_HYP):
8932 # use the export parameters but do not override the ones
8933 # specified by the user
8934 for name, value in einfo.items(constants.INISECT_HYP):
8935 if name not in self.op.hvparams:
8936 self.op.hvparams[name] = value
8938 if einfo.has_section(constants.INISECT_BEP):
8939 # use the parameters, without overriding
8940 for name, value in einfo.items(constants.INISECT_BEP):
8941 if name not in self.op.beparams:
8942 self.op.beparams[name] = value
8943 # Compatibility for the old "memory" be param
8944 if name == constants.BE_MEMORY:
8945 if constants.BE_MAXMEM not in self.op.beparams:
8946 self.op.beparams[constants.BE_MAXMEM] = value
8947 if constants.BE_MINMEM not in self.op.beparams:
8948 self.op.beparams[constants.BE_MINMEM] = value
8950 # try to read the parameters old style, from the main section
8951 for name in constants.BES_PARAMETERS:
8952 if (name not in self.op.beparams and
8953 einfo.has_option(constants.INISECT_INS, name)):
8954 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8956 if einfo.has_section(constants.INISECT_OSP):
8957 # use the parameters, without overriding
8958 for name, value in einfo.items(constants.INISECT_OSP):
8959 if name not in self.op.osparams:
8960 self.op.osparams[name] = value
8962 def _RevertToDefaults(self, cluster):
8963 """Revert the instance parameters to the default values.
8967 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8968 for name in self.op.hvparams.keys():
8969 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8970 del self.op.hvparams[name]
8972 be_defs = cluster.SimpleFillBE({})
8973 for name in self.op.beparams.keys():
8974 if name in be_defs and be_defs[name] == self.op.beparams[name]:
8975 del self.op.beparams[name]
8977 nic_defs = cluster.SimpleFillNIC({})
8978 for nic in self.op.nics:
8979 for name in constants.NICS_PARAMETERS:
8980 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8983 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8984 for name in self.op.osparams.keys():
8985 if name in os_defs and os_defs[name] == self.op.osparams[name]:
8986 del self.op.osparams[name]
8988 def _CalculateFileStorageDir(self):
8989 """Calculate final instance file storage dir.
8992 # file storage dir calculation/check
8993 self.instance_file_storage_dir = None
8994 if self.op.disk_template in constants.DTS_FILEBASED:
8995 # build the full file storage dir path
8998 if self.op.disk_template == constants.DT_SHARED_FILE:
8999 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9001 get_fsd_fn = self.cfg.GetFileStorageDir
9003 cfg_storagedir = get_fsd_fn()
9004 if not cfg_storagedir:
9005 raise errors.OpPrereqError("Cluster file storage dir not defined")
9006 joinargs.append(cfg_storagedir)
9008 if self.op.file_storage_dir is not None:
9009 joinargs.append(self.op.file_storage_dir)
9011 joinargs.append(self.op.instance_name)
9013 # pylint: disable=W0142
9014 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9016 def CheckPrereq(self):
9017 """Check prerequisites.
9020 self._CalculateFileStorageDir()
9022 if self.op.mode == constants.INSTANCE_IMPORT:
9023 export_info = self._ReadExportInfo()
9024 self._ReadExportParams(export_info)
9026 if (not self.cfg.GetVGName() and
9027 self.op.disk_template not in constants.DTS_NOT_LVM):
9028 raise errors.OpPrereqError("Cluster does not support lvm-based"
9029 " instances", errors.ECODE_STATE)
9031 if (self.op.hypervisor is None or
9032 self.op.hypervisor == constants.VALUE_AUTO):
9033 self.op.hypervisor = self.cfg.GetHypervisorType()
9035 cluster = self.cfg.GetClusterInfo()
9036 enabled_hvs = cluster.enabled_hypervisors
9037 if self.op.hypervisor not in enabled_hvs:
9038 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9039 " cluster (%s)" % (self.op.hypervisor,
9040 ",".join(enabled_hvs)),
9043 # Check tag validity
9044 for tag in self.op.tags:
9045 objects.TaggableObject.ValidateTag(tag)
9047 # check hypervisor parameter syntax (locally)
9048 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9049 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9051 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9052 hv_type.CheckParameterSyntax(filled_hvp)
9053 self.hv_full = filled_hvp
9054 # check that we don't specify global parameters on an instance
9055 _CheckGlobalHvParams(self.op.hvparams)
9057 # fill and remember the beparams dict
9058 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9059 for param, value in self.op.beparams.iteritems():
9060 if value == constants.VALUE_AUTO:
9061 self.op.beparams[param] = default_beparams[param]
9062 objects.UpgradeBeParams(self.op.beparams)
9063 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9064 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9066 # build os parameters
9067 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9069 # now that hvp/bep are in final format, let's reset to defaults,
9071 if self.op.identify_defaults:
9072 self._RevertToDefaults(cluster)
9076 for idx, nic in enumerate(self.op.nics):
9077 nic_mode_req = nic.get(constants.INIC_MODE, None)
9078 nic_mode = nic_mode_req
9079 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9080 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9082 # in routed mode, for the first nic, the default ip is 'auto'
9083 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9084 default_ip_mode = constants.VALUE_AUTO
9086 default_ip_mode = constants.VALUE_NONE
9088 # ip validity checks
9089 ip = nic.get(constants.INIC_IP, default_ip_mode)
9090 if ip is None or ip.lower() == constants.VALUE_NONE:
9092 elif ip.lower() == constants.VALUE_AUTO:
9093 if not self.op.name_check:
9094 raise errors.OpPrereqError("IP address set to auto but name checks"
9095 " have been skipped",
9097 nic_ip = self.hostname1.ip
9099 if not netutils.IPAddress.IsValid(ip):
9100 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9104 # TODO: check the ip address for uniqueness
9105 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9106 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9109 # MAC address verification
9110 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9111 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9112 mac = utils.NormalizeAndValidateMac(mac)
9115 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9116 except errors.ReservationError:
9117 raise errors.OpPrereqError("MAC address %s already in use"
9118 " in cluster" % mac,
9119 errors.ECODE_NOTUNIQUE)
9121 # Build nic parameters
9122 link = nic.get(constants.INIC_LINK, None)
9123 if link == constants.VALUE_AUTO:
9124 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9127 nicparams[constants.NIC_MODE] = nic_mode
9129 nicparams[constants.NIC_LINK] = link
9131 check_params = cluster.SimpleFillNIC(nicparams)
9132 objects.NIC.CheckParameterSyntax(check_params)
9133 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9135 # disk checks/pre-build
9136 default_vg = self.cfg.GetVGName()
9138 for disk in self.op.disks:
9139 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9140 if mode not in constants.DISK_ACCESS_SET:
9141 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9142 mode, errors.ECODE_INVAL)
9143 size = disk.get(constants.IDISK_SIZE, None)
9145 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9148 except (TypeError, ValueError):
9149 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9152 data_vg = disk.get(constants.IDISK_VG, default_vg)
9154 constants.IDISK_SIZE: size,
9155 constants.IDISK_MODE: mode,
9156 constants.IDISK_VG: data_vg,
9157 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
9159 if constants.IDISK_ADOPT in disk:
9160 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9161 self.disks.append(new_disk)
9163 if self.op.mode == constants.INSTANCE_IMPORT:
9165 for idx in range(len(self.disks)):
9166 option = "disk%d_dump" % idx
9167 if export_info.has_option(constants.INISECT_INS, option):
9168 # FIXME: are the old os-es, disk sizes, etc. useful?
9169 export_name = export_info.get(constants.INISECT_INS, option)
9170 image = utils.PathJoin(self.op.src_path, export_name)
9171 disk_images.append(image)
9173 disk_images.append(False)
9175 self.src_images = disk_images
9177 old_name = export_info.get(constants.INISECT_INS, "name")
9178 if self.op.instance_name == old_name:
9179 for idx, nic in enumerate(self.nics):
9180 if nic.mac == constants.VALUE_AUTO:
9181 nic_mac_ini = "nic%d_mac" % idx
9182 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9184 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9186 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9187 if self.op.ip_check:
9188 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9189 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9190 (self.check_ip, self.op.instance_name),
9191 errors.ECODE_NOTUNIQUE)
9193 #### mac address generation
9194 # By generating here the mac address both the allocator and the hooks get
9195 # the real final mac address rather than the 'auto' or 'generate' value.
9196 # There is a race condition between the generation and the instance object
9197 # creation, which means that we know the mac is valid now, but we're not
9198 # sure it will be when we actually add the instance. If things go bad
9199 # adding the instance will abort because of a duplicate mac, and the
9200 # creation job will fail.
9201 for nic in self.nics:
9202 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9203 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9207 if self.op.iallocator is not None:
9208 self._RunAllocator()
9210 # Release all unneeded node locks
9211 _ReleaseLocks(self, locking.LEVEL_NODE,
9212 keep=filter(None, [self.op.pnode, self.op.snode,
9215 #### node related checks
9217 # check primary node
9218 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9219 assert self.pnode is not None, \
9220 "Cannot retrieve locked node %s" % self.op.pnode
9222 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9223 pnode.name, errors.ECODE_STATE)
9225 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9226 pnode.name, errors.ECODE_STATE)
9227 if not pnode.vm_capable:
9228 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9229 " '%s'" % pnode.name, errors.ECODE_STATE)
9231 self.secondaries = []
9233 # mirror node verification
9234 if self.op.disk_template in constants.DTS_INT_MIRROR:
9235 if self.op.snode == pnode.name:
9236 raise errors.OpPrereqError("The secondary node cannot be the"
9237 " primary node", errors.ECODE_INVAL)
9238 _CheckNodeOnline(self, self.op.snode)
9239 _CheckNodeNotDrained(self, self.op.snode)
9240 _CheckNodeVmCapable(self, self.op.snode)
9241 self.secondaries.append(self.op.snode)
9243 snode = self.cfg.GetNodeInfo(self.op.snode)
9244 if pnode.group != snode.group:
9245 self.LogWarning("The primary and secondary nodes are in two"
9246 " different node groups; the disk parameters"
9247 " from the first disk's node group will be"
9250 nodenames = [pnode.name] + self.secondaries
9252 # disk parameters (not customizable at instance or node level)
9253 # just use the primary node parameters, ignoring the secondary.
9254 self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9256 if not self.adopt_disks:
9257 # Check lv size requirements, if not adopting
9258 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9259 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9261 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9262 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9263 disk[constants.IDISK_ADOPT])
9264 for disk in self.disks])
9265 if len(all_lvs) != len(self.disks):
9266 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9268 for lv_name in all_lvs:
9270 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9271 # to ReserveLV uses the same syntax
9272 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9273 except errors.ReservationError:
9274 raise errors.OpPrereqError("LV named %s used by another instance" %
9275 lv_name, errors.ECODE_NOTUNIQUE)
9277 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9278 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9280 node_lvs = self.rpc.call_lv_list([pnode.name],
9281 vg_names.payload.keys())[pnode.name]
9282 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9283 node_lvs = node_lvs.payload
9285 delta = all_lvs.difference(node_lvs.keys())
9287 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9288 utils.CommaJoin(delta),
9290 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9292 raise errors.OpPrereqError("Online logical volumes found, cannot"
9293 " adopt: %s" % utils.CommaJoin(online_lvs),
9295 # update the size of disk based on what is found
9296 for dsk in self.disks:
9297 dsk[constants.IDISK_SIZE] = \
9298 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9299 dsk[constants.IDISK_ADOPT])][0]))
9301 elif self.op.disk_template == constants.DT_BLOCK:
9302 # Normalize and de-duplicate device paths
9303 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9304 for disk in self.disks])
9305 if len(all_disks) != len(self.disks):
9306 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9308 baddisks = [d for d in all_disks
9309 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9311 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9312 " cannot be adopted" %
9313 (", ".join(baddisks),
9314 constants.ADOPTABLE_BLOCKDEV_ROOT),
9317 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9318 list(all_disks))[pnode.name]
9319 node_disks.Raise("Cannot get block device information from node %s" %
9321 node_disks = node_disks.payload
9322 delta = all_disks.difference(node_disks.keys())
9324 raise errors.OpPrereqError("Missing block device(s): %s" %
9325 utils.CommaJoin(delta),
9327 for dsk in self.disks:
9328 dsk[constants.IDISK_SIZE] = \
9329 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9331 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9333 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9334 # check OS parameters (remotely)
9335 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9337 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9339 # memory check on primary node
9340 #TODO(dynmem): use MINMEM for checking
9342 _CheckNodeFreeMemory(self, self.pnode.name,
9343 "creating instance %s" % self.op.instance_name,
9344 self.be_full[constants.BE_MAXMEM],
9347 self.dry_run_result = list(nodenames)
9349 def Exec(self, feedback_fn):
9350 """Create and add the instance to the cluster.
9353 instance = self.op.instance_name
9354 pnode_name = self.pnode.name
9356 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9357 self.owned_locks(locking.LEVEL_NODE)), \
9358 "Node locks differ from node resource locks"
9360 ht_kind = self.op.hypervisor
9361 if ht_kind in constants.HTS_REQ_PORT:
9362 network_port = self.cfg.AllocatePort()
9366 disks = _GenerateDiskTemplate(self,
9367 self.op.disk_template,
9368 instance, pnode_name,
9371 self.instance_file_storage_dir,
9372 self.op.file_driver,
9377 iobj = objects.Instance(name=instance, os=self.op.os_type,
9378 primary_node=pnode_name,
9379 nics=self.nics, disks=disks,
9380 disk_template=self.op.disk_template,
9381 admin_state=constants.ADMINST_DOWN,
9382 network_port=network_port,
9383 beparams=self.op.beparams,
9384 hvparams=self.op.hvparams,
9385 hypervisor=self.op.hypervisor,
9386 osparams=self.op.osparams,
9390 for tag in self.op.tags:
9393 if self.adopt_disks:
9394 if self.op.disk_template == constants.DT_PLAIN:
9395 # rename LVs to the newly-generated names; we need to construct
9396 # 'fake' LV disks with the old data, plus the new unique_id
9397 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9399 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9400 rename_to.append(t_dsk.logical_id)
9401 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9402 self.cfg.SetDiskID(t_dsk, pnode_name)
9403 result = self.rpc.call_blockdev_rename(pnode_name,
9404 zip(tmp_disks, rename_to))
9405 result.Raise("Failed to rename adoped LVs")
9407 feedback_fn("* creating instance disks...")
9409 _CreateDisks(self, iobj)
9410 except errors.OpExecError:
9411 self.LogWarning("Device creation failed, reverting...")
9413 _RemoveDisks(self, iobj)
9415 self.cfg.ReleaseDRBDMinors(instance)
9418 feedback_fn("adding instance %s to cluster config" % instance)
9420 self.cfg.AddInstance(iobj, self.proc.GetECId())
9422 # Declare that we don't want to remove the instance lock anymore, as we've
9423 # added the instance to the config
9424 del self.remove_locks[locking.LEVEL_INSTANCE]
9426 if self.op.mode == constants.INSTANCE_IMPORT:
9427 # Release unused nodes
9428 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9431 _ReleaseLocks(self, locking.LEVEL_NODE)
9434 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9435 feedback_fn("* wiping instance disks...")
9437 _WipeDisks(self, iobj)
9438 except errors.OpExecError, err:
9439 logging.exception("Wiping disks failed")
9440 self.LogWarning("Wiping instance disks failed (%s)", err)
9444 # Something is already wrong with the disks, don't do anything else
9446 elif self.op.wait_for_sync:
9447 disk_abort = not _WaitForSync(self, iobj)
9448 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9449 # make sure the disks are not degraded (still sync-ing is ok)
9450 feedback_fn("* checking mirrors status")
9451 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9456 _RemoveDisks(self, iobj)
9457 self.cfg.RemoveInstance(iobj.name)
9458 # Make sure the instance lock gets removed
9459 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9460 raise errors.OpExecError("There are some degraded disks for"
9463 # Release all node resource locks
9464 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9466 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9467 if self.op.mode == constants.INSTANCE_CREATE:
9468 if not self.op.no_install:
9469 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9470 not self.op.wait_for_sync)
9472 feedback_fn("* pausing disk sync to install instance OS")
9473 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9475 for idx, success in enumerate(result.payload):
9477 logging.warn("pause-sync of instance %s for disk %d failed",
9480 feedback_fn("* running the instance OS create scripts...")
9481 # FIXME: pass debug option from opcode to backend
9483 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9484 self.op.debug_level)
9486 feedback_fn("* resuming disk sync")
9487 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9489 for idx, success in enumerate(result.payload):
9491 logging.warn("resume-sync of instance %s for disk %d failed",
9494 os_add_result.Raise("Could not add os for instance %s"
9495 " on node %s" % (instance, pnode_name))
9497 elif self.op.mode == constants.INSTANCE_IMPORT:
9498 feedback_fn("* running the instance OS import scripts...")
9502 for idx, image in enumerate(self.src_images):
9506 # FIXME: pass debug option from opcode to backend
9507 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9508 constants.IEIO_FILE, (image, ),
9509 constants.IEIO_SCRIPT,
9510 (iobj.disks[idx], idx),
9512 transfers.append(dt)
9515 masterd.instance.TransferInstanceData(self, feedback_fn,
9516 self.op.src_node, pnode_name,
9517 self.pnode.secondary_ip,
9519 if not compat.all(import_result):
9520 self.LogWarning("Some disks for instance %s on node %s were not"
9521 " imported successfully" % (instance, pnode_name))
9523 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9524 feedback_fn("* preparing remote import...")
9525 # The source cluster will stop the instance before attempting to make a
9526 # connection. In some cases stopping an instance can take a long time,
9527 # hence the shutdown timeout is added to the connection timeout.
9528 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9529 self.op.source_shutdown_timeout)
9530 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9532 assert iobj.primary_node == self.pnode.name
9534 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9535 self.source_x509_ca,
9536 self._cds, timeouts)
9537 if not compat.all(disk_results):
9538 # TODO: Should the instance still be started, even if some disks
9539 # failed to import (valid for local imports, too)?
9540 self.LogWarning("Some disks for instance %s on node %s were not"
9541 " imported successfully" % (instance, pnode_name))
9543 # Run rename script on newly imported instance
9544 assert iobj.name == instance
9545 feedback_fn("Running rename script for %s" % instance)
9546 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9547 self.source_instance_name,
9548 self.op.debug_level)
9550 self.LogWarning("Failed to run rename script for %s on node"
9551 " %s: %s" % (instance, pnode_name, result.fail_msg))
9554 # also checked in the prereq part
9555 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9558 assert not self.owned_locks(locking.LEVEL_NODE_RES)
9561 iobj.admin_state = constants.ADMINST_UP
9562 self.cfg.Update(iobj, feedback_fn)
9563 logging.info("Starting instance %s on node %s", instance, pnode_name)
9564 feedback_fn("* starting instance...")
9565 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9567 result.Raise("Could not start instance")
9569 return list(iobj.all_nodes)
9572 class LUInstanceConsole(NoHooksLU):
9573 """Connect to an instance's console.
9575 This is somewhat special in that it returns the command line that
9576 you need to run on the master node in order to connect to the
9582 def ExpandNames(self):
9583 self.share_locks = _ShareAll()
9584 self._ExpandAndLockInstance()
9586 def CheckPrereq(self):
9587 """Check prerequisites.
9589 This checks that the instance is in the cluster.
9592 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9593 assert self.instance is not None, \
9594 "Cannot retrieve locked instance %s" % self.op.instance_name
9595 _CheckNodeOnline(self, self.instance.primary_node)
9597 def Exec(self, feedback_fn):
9598 """Connect to the console of an instance
9601 instance = self.instance
9602 node = instance.primary_node
9604 node_insts = self.rpc.call_instance_list([node],
9605 [instance.hypervisor])[node]
9606 node_insts.Raise("Can't get node information from %s" % node)
9608 if instance.name not in node_insts.payload:
9609 if instance.admin_state == constants.ADMINST_UP:
9610 state = constants.INSTST_ERRORDOWN
9611 elif instance.admin_state == constants.ADMINST_DOWN:
9612 state = constants.INSTST_ADMINDOWN
9614 state = constants.INSTST_ADMINOFFLINE
9615 raise errors.OpExecError("Instance %s is not running (state %s)" %
9616 (instance.name, state))
9618 logging.debug("Connecting to console of %s on %s", instance.name, node)
9620 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9623 def _GetInstanceConsole(cluster, instance):
9624 """Returns console information for an instance.
9626 @type cluster: L{objects.Cluster}
9627 @type instance: L{objects.Instance}
9631 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9632 # beparams and hvparams are passed separately, to avoid editing the
9633 # instance and then saving the defaults in the instance itself.
9634 hvparams = cluster.FillHV(instance)
9635 beparams = cluster.FillBE(instance)
9636 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9638 assert console.instance == instance.name
9639 assert console.Validate()
9641 return console.ToDict()
9644 class LUInstanceReplaceDisks(LogicalUnit):
9645 """Replace the disks of an instance.
9648 HPATH = "mirrors-replace"
9649 HTYPE = constants.HTYPE_INSTANCE
9652 def CheckArguments(self):
9653 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9656 def ExpandNames(self):
9657 self._ExpandAndLockInstance()
9659 assert locking.LEVEL_NODE not in self.needed_locks
9660 assert locking.LEVEL_NODE_RES not in self.needed_locks
9661 assert locking.LEVEL_NODEGROUP not in self.needed_locks
9663 assert self.op.iallocator is None or self.op.remote_node is None, \
9664 "Conflicting options"
9666 if self.op.remote_node is not None:
9667 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9669 # Warning: do not remove the locking of the new secondary here
9670 # unless DRBD8.AddChildren is changed to work in parallel;
9671 # currently it doesn't since parallel invocations of
9672 # FindUnusedMinor will conflict
9673 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9674 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9676 self.needed_locks[locking.LEVEL_NODE] = []
9677 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9679 if self.op.iallocator is not None:
9680 # iallocator will select a new node in the same group
9681 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9683 self.needed_locks[locking.LEVEL_NODE_RES] = []
9685 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9686 self.op.iallocator, self.op.remote_node,
9687 self.op.disks, False, self.op.early_release)
9689 self.tasklets = [self.replacer]
9691 def DeclareLocks(self, level):
9692 if level == locking.LEVEL_NODEGROUP:
9693 assert self.op.remote_node is None
9694 assert self.op.iallocator is not None
9695 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9697 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9698 # Lock all groups used by instance optimistically; this requires going
9699 # via the node before it's locked, requiring verification later on
9700 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9701 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9703 elif level == locking.LEVEL_NODE:
9704 if self.op.iallocator is not None:
9705 assert self.op.remote_node is None
9706 assert not self.needed_locks[locking.LEVEL_NODE]
9708 # Lock member nodes of all locked groups
9709 self.needed_locks[locking.LEVEL_NODE] = [node_name
9710 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9711 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9713 self._LockInstancesNodes()
9714 elif level == locking.LEVEL_NODE_RES:
9716 self.needed_locks[locking.LEVEL_NODE_RES] = \
9717 self.needed_locks[locking.LEVEL_NODE]
9719 def BuildHooksEnv(self):
9722 This runs on the master, the primary and all the secondaries.
9725 instance = self.replacer.instance
9727 "MODE": self.op.mode,
9728 "NEW_SECONDARY": self.op.remote_node,
9729 "OLD_SECONDARY": instance.secondary_nodes[0],
9731 env.update(_BuildInstanceHookEnvByObject(self, instance))
9734 def BuildHooksNodes(self):
9735 """Build hooks nodes.
9738 instance = self.replacer.instance
9740 self.cfg.GetMasterNode(),
9741 instance.primary_node,
9743 if self.op.remote_node is not None:
9744 nl.append(self.op.remote_node)
9747 def CheckPrereq(self):
9748 """Check prerequisites.
9751 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9752 self.op.iallocator is None)
9754 # Verify if node group locks are still correct
9755 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9757 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9759 return LogicalUnit.CheckPrereq(self)
9762 class TLReplaceDisks(Tasklet):
9763 """Replaces disks for an instance.
9765 Note: Locking is not within the scope of this class.
9768 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9769 disks, delay_iallocator, early_release):
9770 """Initializes this class.
9773 Tasklet.__init__(self, lu)
9776 self.instance_name = instance_name
9778 self.iallocator_name = iallocator_name
9779 self.remote_node = remote_node
9781 self.delay_iallocator = delay_iallocator
9782 self.early_release = early_release
9785 self.instance = None
9786 self.new_node = None
9787 self.target_node = None
9788 self.other_node = None
9789 self.remote_node_info = None
9790 self.node_secondary_ip = None
9793 def CheckArguments(mode, remote_node, iallocator):
9794 """Helper function for users of this class.
9797 # check for valid parameter combination
9798 if mode == constants.REPLACE_DISK_CHG:
9799 if remote_node is None and iallocator is None:
9800 raise errors.OpPrereqError("When changing the secondary either an"
9801 " iallocator script must be used or the"
9802 " new node given", errors.ECODE_INVAL)
9804 if remote_node is not None and iallocator is not None:
9805 raise errors.OpPrereqError("Give either the iallocator or the new"
9806 " secondary, not both", errors.ECODE_INVAL)
9808 elif remote_node is not None or iallocator is not None:
9809 # Not replacing the secondary
9810 raise errors.OpPrereqError("The iallocator and new node options can"
9811 " only be used when changing the"
9812 " secondary node", errors.ECODE_INVAL)
9815 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9816 """Compute a new secondary node using an IAllocator.
9819 ial = IAllocator(lu.cfg, lu.rpc,
9820 mode=constants.IALLOCATOR_MODE_RELOC,
9822 relocate_from=list(relocate_from))
9824 ial.Run(iallocator_name)
9827 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9828 " %s" % (iallocator_name, ial.info),
9831 if len(ial.result) != ial.required_nodes:
9832 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9833 " of nodes (%s), required %s" %
9835 len(ial.result), ial.required_nodes),
9838 remote_node_name = ial.result[0]
9840 lu.LogInfo("Selected new secondary for instance '%s': %s",
9841 instance_name, remote_node_name)
9843 return remote_node_name
9845 def _FindFaultyDisks(self, node_name):
9846 """Wrapper for L{_FindFaultyInstanceDisks}.
9849 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9852 def _CheckDisksActivated(self, instance):
9853 """Checks if the instance disks are activated.
9855 @param instance: The instance to check disks
9856 @return: True if they are activated, False otherwise
9859 nodes = instance.all_nodes
9861 for idx, dev in enumerate(instance.disks):
9863 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9864 self.cfg.SetDiskID(dev, node)
9866 result = self.rpc.call_blockdev_find(node, dev)
9870 elif result.fail_msg or not result.payload:
9875 def CheckPrereq(self):
9876 """Check prerequisites.
9878 This checks that the instance is in the cluster.
9881 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9882 assert instance is not None, \
9883 "Cannot retrieve locked instance %s" % self.instance_name
9885 if instance.disk_template != constants.DT_DRBD8:
9886 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9887 " instances", errors.ECODE_INVAL)
9889 if len(instance.secondary_nodes) != 1:
9890 raise errors.OpPrereqError("The instance has a strange layout,"
9891 " expected one secondary but found %d" %
9892 len(instance.secondary_nodes),
9895 if not self.delay_iallocator:
9896 self._CheckPrereq2()
9898 def _CheckPrereq2(self):
9899 """Check prerequisites, second part.
9901 This function should always be part of CheckPrereq. It was separated and is
9902 now called from Exec because during node evacuation iallocator was only
9903 called with an unmodified cluster model, not taking planned changes into
9907 instance = self.instance
9908 secondary_node = instance.secondary_nodes[0]
9910 if self.iallocator_name is None:
9911 remote_node = self.remote_node
9913 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9914 instance.name, instance.secondary_nodes)
9916 if remote_node is None:
9917 self.remote_node_info = None
9919 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9920 "Remote node '%s' is not locked" % remote_node
9922 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9923 assert self.remote_node_info is not None, \
9924 "Cannot retrieve locked node %s" % remote_node
9926 if remote_node == self.instance.primary_node:
9927 raise errors.OpPrereqError("The specified node is the primary node of"
9928 " the instance", errors.ECODE_INVAL)
9930 if remote_node == secondary_node:
9931 raise errors.OpPrereqError("The specified node is already the"
9932 " secondary node of the instance",
9935 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9936 constants.REPLACE_DISK_CHG):
9937 raise errors.OpPrereqError("Cannot specify disks to be replaced",
9940 if self.mode == constants.REPLACE_DISK_AUTO:
9941 if not self._CheckDisksActivated(instance):
9942 raise errors.OpPrereqError("Please run activate-disks on instance %s"
9943 " first" % self.instance_name,
9945 faulty_primary = self._FindFaultyDisks(instance.primary_node)
9946 faulty_secondary = self._FindFaultyDisks(secondary_node)
9948 if faulty_primary and faulty_secondary:
9949 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9950 " one node and can not be repaired"
9951 " automatically" % self.instance_name,
9955 self.disks = faulty_primary
9956 self.target_node = instance.primary_node
9957 self.other_node = secondary_node
9958 check_nodes = [self.target_node, self.other_node]
9959 elif faulty_secondary:
9960 self.disks = faulty_secondary
9961 self.target_node = secondary_node
9962 self.other_node = instance.primary_node
9963 check_nodes = [self.target_node, self.other_node]
9969 # Non-automatic modes
9970 if self.mode == constants.REPLACE_DISK_PRI:
9971 self.target_node = instance.primary_node
9972 self.other_node = secondary_node
9973 check_nodes = [self.target_node, self.other_node]
9975 elif self.mode == constants.REPLACE_DISK_SEC:
9976 self.target_node = secondary_node
9977 self.other_node = instance.primary_node
9978 check_nodes = [self.target_node, self.other_node]
9980 elif self.mode == constants.REPLACE_DISK_CHG:
9981 self.new_node = remote_node
9982 self.other_node = instance.primary_node
9983 self.target_node = secondary_node
9984 check_nodes = [self.new_node, self.other_node]
9986 _CheckNodeNotDrained(self.lu, remote_node)
9987 _CheckNodeVmCapable(self.lu, remote_node)
9989 old_node_info = self.cfg.GetNodeInfo(secondary_node)
9990 assert old_node_info is not None
9991 if old_node_info.offline and not self.early_release:
9992 # doesn't make sense to delay the release
9993 self.early_release = True
9994 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9995 " early-release mode", secondary_node)
9998 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10001 # If not specified all disks should be replaced
10003 self.disks = range(len(self.instance.disks))
10005 # TODO: compute disk parameters
10006 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10007 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10008 if primary_node_info.group != secondary_node_info.group:
10009 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10010 " different node groups; the disk parameters of the"
10011 " primary node's group will be applied.")
10013 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10015 for node in check_nodes:
10016 _CheckNodeOnline(self.lu, node)
10018 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10021 if node_name is not None)
10023 # Release unneeded node and node resource locks
10024 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10025 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10027 # Release any owned node group
10028 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10029 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10031 # Check whether disks are valid
10032 for disk_idx in self.disks:
10033 instance.FindDisk(disk_idx)
10035 # Get secondary node IP addresses
10036 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10037 in self.cfg.GetMultiNodeInfo(touched_nodes))
10039 def Exec(self, feedback_fn):
10040 """Execute disk replacement.
10042 This dispatches the disk replacement to the appropriate handler.
10045 if self.delay_iallocator:
10046 self._CheckPrereq2()
10049 # Verify owned locks before starting operation
10050 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10051 assert set(owned_nodes) == set(self.node_secondary_ip), \
10052 ("Incorrect node locks, owning %s, expected %s" %
10053 (owned_nodes, self.node_secondary_ip.keys()))
10054 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10055 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10057 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10058 assert list(owned_instances) == [self.instance_name], \
10059 "Instance '%s' not locked" % self.instance_name
10061 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10062 "Should not own any node group lock at this point"
10065 feedback_fn("No disks need replacement")
10068 feedback_fn("Replacing disk(s) %s for %s" %
10069 (utils.CommaJoin(self.disks), self.instance.name))
10071 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10073 # Activate the instance disks if we're replacing them on a down instance
10075 _StartInstanceDisks(self.lu, self.instance, True)
10078 # Should we replace the secondary node?
10079 if self.new_node is not None:
10080 fn = self._ExecDrbd8Secondary
10082 fn = self._ExecDrbd8DiskOnly
10084 result = fn(feedback_fn)
10086 # Deactivate the instance disks if we're replacing them on a
10089 _SafeShutdownInstanceDisks(self.lu, self.instance)
10091 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10094 # Verify owned locks
10095 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10096 nodes = frozenset(self.node_secondary_ip)
10097 assert ((self.early_release and not owned_nodes) or
10098 (not self.early_release and not (set(owned_nodes) - nodes))), \
10099 ("Not owning the correct locks, early_release=%s, owned=%r,"
10100 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10104 def _CheckVolumeGroup(self, nodes):
10105 self.lu.LogInfo("Checking volume groups")
10107 vgname = self.cfg.GetVGName()
10109 # Make sure volume group exists on all involved nodes
10110 results = self.rpc.call_vg_list(nodes)
10112 raise errors.OpExecError("Can't list volume groups on the nodes")
10115 res = results[node]
10116 res.Raise("Error checking node %s" % node)
10117 if vgname not in res.payload:
10118 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10121 def _CheckDisksExistence(self, nodes):
10122 # Check disk existence
10123 for idx, dev in enumerate(self.instance.disks):
10124 if idx not in self.disks:
10128 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10129 self.cfg.SetDiskID(dev, node)
10131 result = self.rpc.call_blockdev_find(node, dev)
10133 msg = result.fail_msg
10134 if msg or not result.payload:
10136 msg = "disk not found"
10137 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10140 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10141 for idx, dev in enumerate(self.instance.disks):
10142 if idx not in self.disks:
10145 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10148 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10150 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10151 " replace disks for instance %s" %
10152 (node_name, self.instance.name))
10154 def _CreateNewStorage(self, node_name):
10155 """Create new storage on the primary or secondary node.
10157 This is only used for same-node replaces, not for changing the
10158 secondary node, hence we don't want to modify the existing disk.
10163 for idx, dev in enumerate(self.instance.disks):
10164 if idx not in self.disks:
10167 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10169 self.cfg.SetDiskID(dev, node_name)
10171 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10172 names = _GenerateUniqueNames(self.lu, lv_names)
10174 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10176 vg_data = dev.children[0].logical_id[0]
10177 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10178 logical_id=(vg_data, names[0]), params=data_p)
10179 vg_meta = dev.children[1].logical_id[0]
10180 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10181 logical_id=(vg_meta, names[1]), params=meta_p)
10183 new_lvs = [lv_data, lv_meta]
10184 old_lvs = [child.Copy() for child in dev.children]
10185 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10187 # we pass force_create=True to force the LVM creation
10188 for new_lv in new_lvs:
10189 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10190 _GetInstanceInfoText(self.instance), False)
10194 def _CheckDevices(self, node_name, iv_names):
10195 for name, (dev, _, _) in iv_names.iteritems():
10196 self.cfg.SetDiskID(dev, node_name)
10198 result = self.rpc.call_blockdev_find(node_name, dev)
10200 msg = result.fail_msg
10201 if msg or not result.payload:
10203 msg = "disk not found"
10204 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10207 if result.payload.is_degraded:
10208 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10210 def _RemoveOldStorage(self, node_name, iv_names):
10211 for name, (_, old_lvs, _) in iv_names.iteritems():
10212 self.lu.LogInfo("Remove logical volumes for %s" % name)
10215 self.cfg.SetDiskID(lv, node_name)
10217 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10219 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10220 hint="remove unused LVs manually")
10222 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10223 """Replace a disk on the primary or secondary for DRBD 8.
10225 The algorithm for replace is quite complicated:
10227 1. for each disk to be replaced:
10229 1. create new LVs on the target node with unique names
10230 1. detach old LVs from the drbd device
10231 1. rename old LVs to name_replaced.<time_t>
10232 1. rename new LVs to old LVs
10233 1. attach the new LVs (with the old names now) to the drbd device
10235 1. wait for sync across all devices
10237 1. for each modified disk:
10239 1. remove old LVs (which have the name name_replaces.<time_t>)
10241 Failures are not very well handled.
10246 # Step: check device activation
10247 self.lu.LogStep(1, steps_total, "Check device existence")
10248 self._CheckDisksExistence([self.other_node, self.target_node])
10249 self._CheckVolumeGroup([self.target_node, self.other_node])
10251 # Step: check other node consistency
10252 self.lu.LogStep(2, steps_total, "Check peer consistency")
10253 self._CheckDisksConsistency(self.other_node,
10254 self.other_node == self.instance.primary_node,
10257 # Step: create new storage
10258 self.lu.LogStep(3, steps_total, "Allocate new storage")
10259 iv_names = self._CreateNewStorage(self.target_node)
10261 # Step: for each lv, detach+rename*2+attach
10262 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10263 for dev, old_lvs, new_lvs in iv_names.itervalues():
10264 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10266 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10268 result.Raise("Can't detach drbd from local storage on node"
10269 " %s for device %s" % (self.target_node, dev.iv_name))
10271 #cfg.Update(instance)
10273 # ok, we created the new LVs, so now we know we have the needed
10274 # storage; as such, we proceed on the target node to rename
10275 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10276 # using the assumption that logical_id == physical_id (which in
10277 # turn is the unique_id on that node)
10279 # FIXME(iustin): use a better name for the replaced LVs
10280 temp_suffix = int(time.time())
10281 ren_fn = lambda d, suff: (d.physical_id[0],
10282 d.physical_id[1] + "_replaced-%s" % suff)
10284 # Build the rename list based on what LVs exist on the node
10285 rename_old_to_new = []
10286 for to_ren in old_lvs:
10287 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10288 if not result.fail_msg and result.payload:
10290 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10292 self.lu.LogInfo("Renaming the old LVs on the target node")
10293 result = self.rpc.call_blockdev_rename(self.target_node,
10295 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10297 # Now we rename the new LVs to the old LVs
10298 self.lu.LogInfo("Renaming the new LVs on the target node")
10299 rename_new_to_old = [(new, old.physical_id)
10300 for old, new in zip(old_lvs, new_lvs)]
10301 result = self.rpc.call_blockdev_rename(self.target_node,
10303 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10305 # Intermediate steps of in memory modifications
10306 for old, new in zip(old_lvs, new_lvs):
10307 new.logical_id = old.logical_id
10308 self.cfg.SetDiskID(new, self.target_node)
10310 # We need to modify old_lvs so that removal later removes the
10311 # right LVs, not the newly added ones; note that old_lvs is a
10313 for disk in old_lvs:
10314 disk.logical_id = ren_fn(disk, temp_suffix)
10315 self.cfg.SetDiskID(disk, self.target_node)
10317 # Now that the new lvs have the old name, we can add them to the device
10318 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10319 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10321 msg = result.fail_msg
10323 for new_lv in new_lvs:
10324 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10327 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10328 hint=("cleanup manually the unused logical"
10330 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10332 cstep = itertools.count(5)
10334 if self.early_release:
10335 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10336 self._RemoveOldStorage(self.target_node, iv_names)
10337 # TODO: Check if releasing locks early still makes sense
10338 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10340 # Release all resource locks except those used by the instance
10341 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10342 keep=self.node_secondary_ip.keys())
10344 # Release all node locks while waiting for sync
10345 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10347 # TODO: Can the instance lock be downgraded here? Take the optional disk
10348 # shutdown in the caller into consideration.
10351 # This can fail as the old devices are degraded and _WaitForSync
10352 # does a combined result over all disks, so we don't check its return value
10353 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10354 _WaitForSync(self.lu, self.instance)
10356 # Check all devices manually
10357 self._CheckDevices(self.instance.primary_node, iv_names)
10359 # Step: remove old storage
10360 if not self.early_release:
10361 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10362 self._RemoveOldStorage(self.target_node, iv_names)
10364 def _ExecDrbd8Secondary(self, feedback_fn):
10365 """Replace the secondary node for DRBD 8.
10367 The algorithm for replace is quite complicated:
10368 - for all disks of the instance:
10369 - create new LVs on the new node with same names
10370 - shutdown the drbd device on the old secondary
10371 - disconnect the drbd network on the primary
10372 - create the drbd device on the new secondary
10373 - network attach the drbd on the primary, using an artifice:
10374 the drbd code for Attach() will connect to the network if it
10375 finds a device which is connected to the good local disks but
10376 not network enabled
10377 - wait for sync across all devices
10378 - remove all disks from the old secondary
10380 Failures are not very well handled.
10385 pnode = self.instance.primary_node
10387 # Step: check device activation
10388 self.lu.LogStep(1, steps_total, "Check device existence")
10389 self._CheckDisksExistence([self.instance.primary_node])
10390 self._CheckVolumeGroup([self.instance.primary_node])
10392 # Step: check other node consistency
10393 self.lu.LogStep(2, steps_total, "Check peer consistency")
10394 self._CheckDisksConsistency(self.instance.primary_node, True, True)
10396 # Step: create new storage
10397 self.lu.LogStep(3, steps_total, "Allocate new storage")
10398 for idx, dev in enumerate(self.instance.disks):
10399 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10400 (self.new_node, idx))
10401 # we pass force_create=True to force LVM creation
10402 for new_lv in dev.children:
10403 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10404 _GetInstanceInfoText(self.instance), False)
10406 # Step 4: dbrd minors and drbd setups changes
10407 # after this, we must manually remove the drbd minors on both the
10408 # error and the success paths
10409 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10410 minors = self.cfg.AllocateDRBDMinor([self.new_node
10411 for dev in self.instance.disks],
10412 self.instance.name)
10413 logging.debug("Allocated minors %r", minors)
10416 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10417 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10418 (self.new_node, idx))
10419 # create new devices on new_node; note that we create two IDs:
10420 # one without port, so the drbd will be activated without
10421 # networking information on the new node at this stage, and one
10422 # with network, for the latter activation in step 4
10423 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10424 if self.instance.primary_node == o_node1:
10427 assert self.instance.primary_node == o_node2, "Three-node instance?"
10430 new_alone_id = (self.instance.primary_node, self.new_node, None,
10431 p_minor, new_minor, o_secret)
10432 new_net_id = (self.instance.primary_node, self.new_node, o_port,
10433 p_minor, new_minor, o_secret)
10435 iv_names[idx] = (dev, dev.children, new_net_id)
10436 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10438 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10439 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10440 logical_id=new_alone_id,
10441 children=dev.children,
10443 params=drbd_params)
10445 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10446 _GetInstanceInfoText(self.instance), False)
10447 except errors.GenericError:
10448 self.cfg.ReleaseDRBDMinors(self.instance.name)
10451 # We have new devices, shutdown the drbd on the old secondary
10452 for idx, dev in enumerate(self.instance.disks):
10453 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10454 self.cfg.SetDiskID(dev, self.target_node)
10455 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10457 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10458 "node: %s" % (idx, msg),
10459 hint=("Please cleanup this device manually as"
10460 " soon as possible"))
10462 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10463 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10464 self.instance.disks)[pnode]
10466 msg = result.fail_msg
10468 # detaches didn't succeed (unlikely)
10469 self.cfg.ReleaseDRBDMinors(self.instance.name)
10470 raise errors.OpExecError("Can't detach the disks from the network on"
10471 " old node: %s" % (msg,))
10473 # if we managed to detach at least one, we update all the disks of
10474 # the instance to point to the new secondary
10475 self.lu.LogInfo("Updating instance configuration")
10476 for dev, _, new_logical_id in iv_names.itervalues():
10477 dev.logical_id = new_logical_id
10478 self.cfg.SetDiskID(dev, self.instance.primary_node)
10480 self.cfg.Update(self.instance, feedback_fn)
10482 # Release all node locks (the configuration has been updated)
10483 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10485 # and now perform the drbd attach
10486 self.lu.LogInfo("Attaching primary drbds to new secondary"
10487 " (standalone => connected)")
10488 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10490 self.node_secondary_ip,
10491 self.instance.disks,
10492 self.instance.name,
10494 for to_node, to_result in result.items():
10495 msg = to_result.fail_msg
10497 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10499 hint=("please do a gnt-instance info to see the"
10500 " status of disks"))
10502 cstep = itertools.count(5)
10504 if self.early_release:
10505 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10506 self._RemoveOldStorage(self.target_node, iv_names)
10507 # TODO: Check if releasing locks early still makes sense
10508 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10510 # Release all resource locks except those used by the instance
10511 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10512 keep=self.node_secondary_ip.keys())
10514 # TODO: Can the instance lock be downgraded here? Take the optional disk
10515 # shutdown in the caller into consideration.
10518 # This can fail as the old devices are degraded and _WaitForSync
10519 # does a combined result over all disks, so we don't check its return value
10520 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10521 _WaitForSync(self.lu, self.instance)
10523 # Check all devices manually
10524 self._CheckDevices(self.instance.primary_node, iv_names)
10526 # Step: remove old storage
10527 if not self.early_release:
10528 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10529 self._RemoveOldStorage(self.target_node, iv_names)
10532 class LURepairNodeStorage(NoHooksLU):
10533 """Repairs the volume group on a node.
10538 def CheckArguments(self):
10539 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10541 storage_type = self.op.storage_type
10543 if (constants.SO_FIX_CONSISTENCY not in
10544 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10545 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10546 " repaired" % storage_type,
10547 errors.ECODE_INVAL)
10549 def ExpandNames(self):
10550 self.needed_locks = {
10551 locking.LEVEL_NODE: [self.op.node_name],
10554 def _CheckFaultyDisks(self, instance, node_name):
10555 """Ensure faulty disks abort the opcode or at least warn."""
10557 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10559 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10560 " node '%s'" % (instance.name, node_name),
10561 errors.ECODE_STATE)
10562 except errors.OpPrereqError, err:
10563 if self.op.ignore_consistency:
10564 self.proc.LogWarning(str(err.args[0]))
10568 def CheckPrereq(self):
10569 """Check prerequisites.
10572 # Check whether any instance on this node has faulty disks
10573 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10574 if inst.admin_state != constants.ADMINST_UP:
10576 check_nodes = set(inst.all_nodes)
10577 check_nodes.discard(self.op.node_name)
10578 for inst_node_name in check_nodes:
10579 self._CheckFaultyDisks(inst, inst_node_name)
10581 def Exec(self, feedback_fn):
10582 feedback_fn("Repairing storage unit '%s' on %s ..." %
10583 (self.op.name, self.op.node_name))
10585 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10586 result = self.rpc.call_storage_execute(self.op.node_name,
10587 self.op.storage_type, st_args,
10589 constants.SO_FIX_CONSISTENCY)
10590 result.Raise("Failed to repair storage unit '%s' on %s" %
10591 (self.op.name, self.op.node_name))
10594 class LUNodeEvacuate(NoHooksLU):
10595 """Evacuates instances off a list of nodes.
10600 _MODE2IALLOCATOR = {
10601 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10602 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10603 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10605 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10606 assert (frozenset(_MODE2IALLOCATOR.values()) ==
10607 constants.IALLOCATOR_NEVAC_MODES)
10609 def CheckArguments(self):
10610 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10612 def ExpandNames(self):
10613 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10615 if self.op.remote_node is not None:
10616 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10617 assert self.op.remote_node
10619 if self.op.remote_node == self.op.node_name:
10620 raise errors.OpPrereqError("Can not use evacuated node as a new"
10621 " secondary node", errors.ECODE_INVAL)
10623 if self.op.mode != constants.NODE_EVAC_SEC:
10624 raise errors.OpPrereqError("Without the use of an iallocator only"
10625 " secondary instances can be evacuated",
10626 errors.ECODE_INVAL)
10629 self.share_locks = _ShareAll()
10630 self.needed_locks = {
10631 locking.LEVEL_INSTANCE: [],
10632 locking.LEVEL_NODEGROUP: [],
10633 locking.LEVEL_NODE: [],
10636 # Determine nodes (via group) optimistically, needs verification once locks
10637 # have been acquired
10638 self.lock_nodes = self._DetermineNodes()
10640 def _DetermineNodes(self):
10641 """Gets the list of nodes to operate on.
10644 if self.op.remote_node is None:
10645 # Iallocator will choose any node(s) in the same group
10646 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10648 group_nodes = frozenset([self.op.remote_node])
10650 # Determine nodes to be locked
10651 return set([self.op.node_name]) | group_nodes
10653 def _DetermineInstances(self):
10654 """Builds list of instances to operate on.
10657 assert self.op.mode in constants.NODE_EVAC_MODES
10659 if self.op.mode == constants.NODE_EVAC_PRI:
10660 # Primary instances only
10661 inst_fn = _GetNodePrimaryInstances
10662 assert self.op.remote_node is None, \
10663 "Evacuating primary instances requires iallocator"
10664 elif self.op.mode == constants.NODE_EVAC_SEC:
10665 # Secondary instances only
10666 inst_fn = _GetNodeSecondaryInstances
10669 assert self.op.mode == constants.NODE_EVAC_ALL
10670 inst_fn = _GetNodeInstances
10671 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10673 raise errors.OpPrereqError("Due to an issue with the iallocator"
10674 " interface it is not possible to evacuate"
10675 " all instances at once; specify explicitly"
10676 " whether to evacuate primary or secondary"
10678 errors.ECODE_INVAL)
10680 return inst_fn(self.cfg, self.op.node_name)
10682 def DeclareLocks(self, level):
10683 if level == locking.LEVEL_INSTANCE:
10684 # Lock instances optimistically, needs verification once node and group
10685 # locks have been acquired
10686 self.needed_locks[locking.LEVEL_INSTANCE] = \
10687 set(i.name for i in self._DetermineInstances())
10689 elif level == locking.LEVEL_NODEGROUP:
10690 # Lock node groups for all potential target nodes optimistically, needs
10691 # verification once nodes have been acquired
10692 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10693 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10695 elif level == locking.LEVEL_NODE:
10696 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10698 def CheckPrereq(self):
10700 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10701 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10702 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10704 need_nodes = self._DetermineNodes()
10706 if not owned_nodes.issuperset(need_nodes):
10707 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10708 " locks were acquired, current nodes are"
10709 " are '%s', used to be '%s'; retry the"
10711 (self.op.node_name,
10712 utils.CommaJoin(need_nodes),
10713 utils.CommaJoin(owned_nodes)),
10714 errors.ECODE_STATE)
10716 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10717 if owned_groups != wanted_groups:
10718 raise errors.OpExecError("Node groups changed since locks were acquired,"
10719 " current groups are '%s', used to be '%s';"
10720 " retry the operation" %
10721 (utils.CommaJoin(wanted_groups),
10722 utils.CommaJoin(owned_groups)))
10724 # Determine affected instances
10725 self.instances = self._DetermineInstances()
10726 self.instance_names = [i.name for i in self.instances]
10728 if set(self.instance_names) != owned_instances:
10729 raise errors.OpExecError("Instances on node '%s' changed since locks"
10730 " were acquired, current instances are '%s',"
10731 " used to be '%s'; retry the operation" %
10732 (self.op.node_name,
10733 utils.CommaJoin(self.instance_names),
10734 utils.CommaJoin(owned_instances)))
10736 if self.instance_names:
10737 self.LogInfo("Evacuating instances from node '%s': %s",
10739 utils.CommaJoin(utils.NiceSort(self.instance_names)))
10741 self.LogInfo("No instances to evacuate from node '%s'",
10744 if self.op.remote_node is not None:
10745 for i in self.instances:
10746 if i.primary_node == self.op.remote_node:
10747 raise errors.OpPrereqError("Node %s is the primary node of"
10748 " instance %s, cannot use it as"
10750 (self.op.remote_node, i.name),
10751 errors.ECODE_INVAL)
10753 def Exec(self, feedback_fn):
10754 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10756 if not self.instance_names:
10757 # No instances to evacuate
10760 elif self.op.iallocator is not None:
10761 # TODO: Implement relocation to other group
10762 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10763 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10764 instances=list(self.instance_names))
10766 ial.Run(self.op.iallocator)
10768 if not ial.success:
10769 raise errors.OpPrereqError("Can't compute node evacuation using"
10770 " iallocator '%s': %s" %
10771 (self.op.iallocator, ial.info),
10772 errors.ECODE_NORES)
10774 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10776 elif self.op.remote_node is not None:
10777 assert self.op.mode == constants.NODE_EVAC_SEC
10779 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10780 remote_node=self.op.remote_node,
10782 mode=constants.REPLACE_DISK_CHG,
10783 early_release=self.op.early_release)]
10784 for instance_name in self.instance_names
10788 raise errors.ProgrammerError("No iallocator or remote node")
10790 return ResultWithJobs(jobs)
10793 def _SetOpEarlyRelease(early_release, op):
10794 """Sets C{early_release} flag on opcodes if available.
10798 op.early_release = early_release
10799 except AttributeError:
10800 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10805 def _NodeEvacDest(use_nodes, group, nodes):
10806 """Returns group or nodes depending on caller's choice.
10810 return utils.CommaJoin(nodes)
10815 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10816 """Unpacks the result of change-group and node-evacuate iallocator requests.
10818 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10819 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10821 @type lu: L{LogicalUnit}
10822 @param lu: Logical unit instance
10823 @type alloc_result: tuple/list
10824 @param alloc_result: Result from iallocator
10825 @type early_release: bool
10826 @param early_release: Whether to release locks early if possible
10827 @type use_nodes: bool
10828 @param use_nodes: Whether to display node names instead of groups
10831 (moved, failed, jobs) = alloc_result
10834 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10835 for (name, reason) in failed)
10836 lu.LogWarning("Unable to evacuate instances %s", failreason)
10837 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10840 lu.LogInfo("Instances to be moved: %s",
10841 utils.CommaJoin("%s (to %s)" %
10842 (name, _NodeEvacDest(use_nodes, group, nodes))
10843 for (name, group, nodes) in moved))
10845 return [map(compat.partial(_SetOpEarlyRelease, early_release),
10846 map(opcodes.OpCode.LoadOpCode, ops))
10850 class LUInstanceGrowDisk(LogicalUnit):
10851 """Grow a disk of an instance.
10854 HPATH = "disk-grow"
10855 HTYPE = constants.HTYPE_INSTANCE
10858 def ExpandNames(self):
10859 self._ExpandAndLockInstance()
10860 self.needed_locks[locking.LEVEL_NODE] = []
10861 self.needed_locks[locking.LEVEL_NODE_RES] = []
10862 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
10864 def DeclareLocks(self, level):
10865 if level == locking.LEVEL_NODE:
10866 self._LockInstancesNodes()
10867 elif level == locking.LEVEL_NODE_RES:
10869 self.needed_locks[locking.LEVEL_NODE_RES] = \
10870 self.needed_locks[locking.LEVEL_NODE][:]
10872 def BuildHooksEnv(self):
10873 """Build hooks env.
10875 This runs on the master, the primary and all the secondaries.
10879 "DISK": self.op.disk,
10880 "AMOUNT": self.op.amount,
10882 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10885 def BuildHooksNodes(self):
10886 """Build hooks nodes.
10889 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10892 def CheckPrereq(self):
10893 """Check prerequisites.
10895 This checks that the instance is in the cluster.
10898 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10899 assert instance is not None, \
10900 "Cannot retrieve locked instance %s" % self.op.instance_name
10901 nodenames = list(instance.all_nodes)
10902 for node in nodenames:
10903 _CheckNodeOnline(self, node)
10905 self.instance = instance
10907 if instance.disk_template not in constants.DTS_GROWABLE:
10908 raise errors.OpPrereqError("Instance's disk layout does not support"
10909 " growing", errors.ECODE_INVAL)
10911 self.disk = instance.FindDisk(self.op.disk)
10913 if instance.disk_template not in (constants.DT_FILE,
10914 constants.DT_SHARED_FILE):
10915 # TODO: check the free disk space for file, when that feature will be
10917 _CheckNodesFreeDiskPerVG(self, nodenames,
10918 self.disk.ComputeGrowth(self.op.amount))
10920 def Exec(self, feedback_fn):
10921 """Execute disk grow.
10924 instance = self.instance
10927 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10928 assert (self.owned_locks(locking.LEVEL_NODE) ==
10929 self.owned_locks(locking.LEVEL_NODE_RES))
10931 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10933 raise errors.OpExecError("Cannot activate block device to grow")
10935 feedback_fn("Growing disk %s of instance '%s' by %s" %
10936 (self.op.disk, instance.name,
10937 utils.FormatUnit(self.op.amount, "h")))
10939 # First run all grow ops in dry-run mode
10940 for node in instance.all_nodes:
10941 self.cfg.SetDiskID(disk, node)
10942 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10943 result.Raise("Grow request failed to node %s" % node)
10945 # We know that (as far as we can test) operations across different
10946 # nodes will succeed, time to run it for real
10947 for node in instance.all_nodes:
10948 self.cfg.SetDiskID(disk, node)
10949 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10950 result.Raise("Grow request failed to node %s" % node)
10952 # TODO: Rewrite code to work properly
10953 # DRBD goes into sync mode for a short amount of time after executing the
10954 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10955 # calling "resize" in sync mode fails. Sleeping for a short amount of
10956 # time is a work-around.
10959 disk.RecordGrow(self.op.amount)
10960 self.cfg.Update(instance, feedback_fn)
10962 # Changes have been recorded, release node lock
10963 _ReleaseLocks(self, locking.LEVEL_NODE)
10965 # Downgrade lock while waiting for sync
10966 self.glm.downgrade(locking.LEVEL_INSTANCE)
10968 if self.op.wait_for_sync:
10969 disk_abort = not _WaitForSync(self, instance, disks=[disk])
10971 self.proc.LogWarning("Disk sync-ing has not returned a good"
10972 " status; please check the instance")
10973 if instance.admin_state != constants.ADMINST_UP:
10974 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10975 elif instance.admin_state != constants.ADMINST_UP:
10976 self.proc.LogWarning("Not shutting down the disk even if the instance is"
10977 " not supposed to be running because no wait for"
10978 " sync mode was requested")
10980 assert self.owned_locks(locking.LEVEL_NODE_RES)
10981 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10984 class LUInstanceQueryData(NoHooksLU):
10985 """Query runtime instance data.
10990 def ExpandNames(self):
10991 self.needed_locks = {}
10993 # Use locking if requested or when non-static information is wanted
10994 if not (self.op.static or self.op.use_locking):
10995 self.LogWarning("Non-static data requested, locks need to be acquired")
10996 self.op.use_locking = True
10998 if self.op.instances or not self.op.use_locking:
10999 # Expand instance names right here
11000 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11002 # Will use acquired locks
11003 self.wanted_names = None
11005 if self.op.use_locking:
11006 self.share_locks = _ShareAll()
11008 if self.wanted_names is None:
11009 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11011 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11013 self.needed_locks[locking.LEVEL_NODE] = []
11014 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11016 def DeclareLocks(self, level):
11017 if self.op.use_locking and level == locking.LEVEL_NODE:
11018 self._LockInstancesNodes()
11020 def CheckPrereq(self):
11021 """Check prerequisites.
11023 This only checks the optional instance list against the existing names.
11026 if self.wanted_names is None:
11027 assert self.op.use_locking, "Locking was not used"
11028 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11030 self.wanted_instances = \
11031 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11033 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11034 """Returns the status of a block device
11037 if self.op.static or not node:
11040 self.cfg.SetDiskID(dev, node)
11042 result = self.rpc.call_blockdev_find(node, dev)
11046 result.Raise("Can't compute disk status for %s" % instance_name)
11048 status = result.payload
11052 return (status.dev_path, status.major, status.minor,
11053 status.sync_percent, status.estimated_time,
11054 status.is_degraded, status.ldisk_status)
11056 def _ComputeDiskStatus(self, instance, snode, dev):
11057 """Compute block device status.
11060 if dev.dev_type in constants.LDS_DRBD:
11061 # we change the snode then (otherwise we use the one passed in)
11062 if dev.logical_id[0] == instance.primary_node:
11063 snode = dev.logical_id[1]
11065 snode = dev.logical_id[0]
11067 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11068 instance.name, dev)
11069 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11072 dev_children = map(compat.partial(self._ComputeDiskStatus,
11079 "iv_name": dev.iv_name,
11080 "dev_type": dev.dev_type,
11081 "logical_id": dev.logical_id,
11082 "physical_id": dev.physical_id,
11083 "pstatus": dev_pstatus,
11084 "sstatus": dev_sstatus,
11085 "children": dev_children,
11090 def Exec(self, feedback_fn):
11091 """Gather and return data"""
11094 cluster = self.cfg.GetClusterInfo()
11096 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11097 for i in self.wanted_instances)
11098 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11099 if self.op.static or pnode.offline:
11100 remote_state = None
11102 self.LogWarning("Primary node %s is marked offline, returning static"
11103 " information only for instance %s" %
11104 (pnode.name, instance.name))
11106 remote_info = self.rpc.call_instance_info(instance.primary_node,
11108 instance.hypervisor)
11109 remote_info.Raise("Error checking node %s" % instance.primary_node)
11110 remote_info = remote_info.payload
11111 if remote_info and "state" in remote_info:
11112 remote_state = "up"
11114 if instance.admin_state == constants.ADMINST_UP:
11115 remote_state = "down"
11117 remote_state = instance.admin_state
11119 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11122 result[instance.name] = {
11123 "name": instance.name,
11124 "config_state": instance.admin_state,
11125 "run_state": remote_state,
11126 "pnode": instance.primary_node,
11127 "snodes": instance.secondary_nodes,
11129 # this happens to be the same format used for hooks
11130 "nics": _NICListToTuple(self, instance.nics),
11131 "disk_template": instance.disk_template,
11133 "hypervisor": instance.hypervisor,
11134 "network_port": instance.network_port,
11135 "hv_instance": instance.hvparams,
11136 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11137 "be_instance": instance.beparams,
11138 "be_actual": cluster.FillBE(instance),
11139 "os_instance": instance.osparams,
11140 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11141 "serial_no": instance.serial_no,
11142 "mtime": instance.mtime,
11143 "ctime": instance.ctime,
11144 "uuid": instance.uuid,
11150 class LUInstanceSetParams(LogicalUnit):
11151 """Modifies an instances's parameters.
11154 HPATH = "instance-modify"
11155 HTYPE = constants.HTYPE_INSTANCE
11158 def CheckArguments(self):
11159 if not (self.op.nics or self.op.disks or self.op.disk_template or
11160 self.op.hvparams or self.op.beparams or self.op.os_name or
11161 self.op.online_inst or self.op.offline_inst):
11162 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11164 if self.op.hvparams:
11165 _CheckGlobalHvParams(self.op.hvparams)
11169 for disk_op, disk_dict in self.op.disks:
11170 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11171 if disk_op == constants.DDM_REMOVE:
11172 disk_addremove += 1
11174 elif disk_op == constants.DDM_ADD:
11175 disk_addremove += 1
11177 if not isinstance(disk_op, int):
11178 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11179 if not isinstance(disk_dict, dict):
11180 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11181 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11183 if disk_op == constants.DDM_ADD:
11184 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11185 if mode not in constants.DISK_ACCESS_SET:
11186 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11187 errors.ECODE_INVAL)
11188 size = disk_dict.get(constants.IDISK_SIZE, None)
11190 raise errors.OpPrereqError("Required disk parameter size missing",
11191 errors.ECODE_INVAL)
11194 except (TypeError, ValueError), err:
11195 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11196 str(err), errors.ECODE_INVAL)
11197 disk_dict[constants.IDISK_SIZE] = size
11199 # modification of disk
11200 if constants.IDISK_SIZE in disk_dict:
11201 raise errors.OpPrereqError("Disk size change not possible, use"
11202 " grow-disk", errors.ECODE_INVAL)
11204 if disk_addremove > 1:
11205 raise errors.OpPrereqError("Only one disk add or remove operation"
11206 " supported at a time", errors.ECODE_INVAL)
11208 if self.op.disks and self.op.disk_template is not None:
11209 raise errors.OpPrereqError("Disk template conversion and other disk"
11210 " changes not supported at the same time",
11211 errors.ECODE_INVAL)
11213 if (self.op.disk_template and
11214 self.op.disk_template in constants.DTS_INT_MIRROR and
11215 self.op.remote_node is None):
11216 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11217 " one requires specifying a secondary node",
11218 errors.ECODE_INVAL)
11222 for nic_op, nic_dict in self.op.nics:
11223 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11224 if nic_op == constants.DDM_REMOVE:
11227 elif nic_op == constants.DDM_ADD:
11230 if not isinstance(nic_op, int):
11231 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11232 if not isinstance(nic_dict, dict):
11233 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11234 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11236 # nic_dict should be a dict
11237 nic_ip = nic_dict.get(constants.INIC_IP, None)
11238 if nic_ip is not None:
11239 if nic_ip.lower() == constants.VALUE_NONE:
11240 nic_dict[constants.INIC_IP] = None
11242 if not netutils.IPAddress.IsValid(nic_ip):
11243 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11244 errors.ECODE_INVAL)
11246 nic_bridge = nic_dict.get("bridge", None)
11247 nic_link = nic_dict.get(constants.INIC_LINK, None)
11248 if nic_bridge and nic_link:
11249 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11250 " at the same time", errors.ECODE_INVAL)
11251 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11252 nic_dict["bridge"] = None
11253 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11254 nic_dict[constants.INIC_LINK] = None
11256 if nic_op == constants.DDM_ADD:
11257 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11258 if nic_mac is None:
11259 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11261 if constants.INIC_MAC in nic_dict:
11262 nic_mac = nic_dict[constants.INIC_MAC]
11263 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11264 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11266 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11267 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11268 " modifying an existing nic",
11269 errors.ECODE_INVAL)
11271 if nic_addremove > 1:
11272 raise errors.OpPrereqError("Only one NIC add or remove operation"
11273 " supported at a time", errors.ECODE_INVAL)
11275 def ExpandNames(self):
11276 self._ExpandAndLockInstance()
11277 # Can't even acquire node locks in shared mode as upcoming changes in
11278 # Ganeti 2.6 will start to modify the node object on disk conversion
11279 self.needed_locks[locking.LEVEL_NODE] = []
11280 self.needed_locks[locking.LEVEL_NODE_RES] = []
11281 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11283 def DeclareLocks(self, level):
11284 if level == locking.LEVEL_NODE:
11285 self._LockInstancesNodes()
11286 if self.op.disk_template and self.op.remote_node:
11287 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11288 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11289 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11291 self.needed_locks[locking.LEVEL_NODE_RES] = \
11292 self.needed_locks[locking.LEVEL_NODE][:]
11294 def BuildHooksEnv(self):
11295 """Build hooks env.
11297 This runs on the master, primary and secondaries.
11301 if constants.BE_MINMEM in self.be_new:
11302 args["minmem"] = self.be_new[constants.BE_MINMEM]
11303 if constants.BE_MAXMEM in self.be_new:
11304 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11305 if constants.BE_VCPUS in self.be_new:
11306 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11307 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11308 # information at all.
11311 nic_override = dict(self.op.nics)
11312 for idx, nic in enumerate(self.instance.nics):
11313 if idx in nic_override:
11314 this_nic_override = nic_override[idx]
11316 this_nic_override = {}
11317 if constants.INIC_IP in this_nic_override:
11318 ip = this_nic_override[constants.INIC_IP]
11321 if constants.INIC_MAC in this_nic_override:
11322 mac = this_nic_override[constants.INIC_MAC]
11325 if idx in self.nic_pnew:
11326 nicparams = self.nic_pnew[idx]
11328 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11329 mode = nicparams[constants.NIC_MODE]
11330 link = nicparams[constants.NIC_LINK]
11331 args["nics"].append((ip, mac, mode, link))
11332 if constants.DDM_ADD in nic_override:
11333 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11334 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11335 nicparams = self.nic_pnew[constants.DDM_ADD]
11336 mode = nicparams[constants.NIC_MODE]
11337 link = nicparams[constants.NIC_LINK]
11338 args["nics"].append((ip, mac, mode, link))
11339 elif constants.DDM_REMOVE in nic_override:
11340 del args["nics"][-1]
11342 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11343 if self.op.disk_template:
11344 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11348 def BuildHooksNodes(self):
11349 """Build hooks nodes.
11352 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11355 def CheckPrereq(self):
11356 """Check prerequisites.
11358 This only checks the instance list against the existing names.
11361 # checking the new params on the primary/secondary nodes
11363 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11364 cluster = self.cluster = self.cfg.GetClusterInfo()
11365 assert self.instance is not None, \
11366 "Cannot retrieve locked instance %s" % self.op.instance_name
11367 pnode = instance.primary_node
11368 nodelist = list(instance.all_nodes)
11369 pnode_info = self.cfg.GetNodeInfo(pnode)
11370 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11373 if self.op.os_name and not self.op.force:
11374 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11375 self.op.force_variant)
11376 instance_os = self.op.os_name
11378 instance_os = instance.os
11380 if self.op.disk_template:
11381 if instance.disk_template == self.op.disk_template:
11382 raise errors.OpPrereqError("Instance already has disk template %s" %
11383 instance.disk_template, errors.ECODE_INVAL)
11385 if (instance.disk_template,
11386 self.op.disk_template) not in self._DISK_CONVERSIONS:
11387 raise errors.OpPrereqError("Unsupported disk template conversion from"
11388 " %s to %s" % (instance.disk_template,
11389 self.op.disk_template),
11390 errors.ECODE_INVAL)
11391 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11392 msg="cannot change disk template")
11393 if self.op.disk_template in constants.DTS_INT_MIRROR:
11394 if self.op.remote_node == pnode:
11395 raise errors.OpPrereqError("Given new secondary node %s is the same"
11396 " as the primary node of the instance" %
11397 self.op.remote_node, errors.ECODE_STATE)
11398 _CheckNodeOnline(self, self.op.remote_node)
11399 _CheckNodeNotDrained(self, self.op.remote_node)
11400 # FIXME: here we assume that the old instance type is DT_PLAIN
11401 assert instance.disk_template == constants.DT_PLAIN
11402 disks = [{constants.IDISK_SIZE: d.size,
11403 constants.IDISK_VG: d.logical_id[0]}
11404 for d in instance.disks]
11405 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11406 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11408 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11409 if pnode_info.group != snode_info.group:
11410 self.LogWarning("The primary and secondary nodes are in two"
11411 " different node groups; the disk parameters"
11412 " from the first disk's node group will be"
11415 # hvparams processing
11416 if self.op.hvparams:
11417 hv_type = instance.hypervisor
11418 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11419 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11420 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11423 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11424 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11425 self.hv_proposed = self.hv_new = hv_new # the new actual values
11426 self.hv_inst = i_hvdict # the new dict (without defaults)
11428 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11430 self.hv_new = self.hv_inst = {}
11432 # beparams processing
11433 if self.op.beparams:
11434 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11436 objects.UpgradeBeParams(i_bedict)
11437 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11438 be_new = cluster.SimpleFillBE(i_bedict)
11439 self.be_proposed = self.be_new = be_new # the new actual values
11440 self.be_inst = i_bedict # the new dict (without defaults)
11442 self.be_new = self.be_inst = {}
11443 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11444 be_old = cluster.FillBE(instance)
11446 # CPU param validation -- checking every time a paramtere is
11447 # changed to cover all cases where either CPU mask or vcpus have
11449 if (constants.BE_VCPUS in self.be_proposed and
11450 constants.HV_CPU_MASK in self.hv_proposed):
11452 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11453 # Verify mask is consistent with number of vCPUs. Can skip this
11454 # test if only 1 entry in the CPU mask, which means same mask
11455 # is applied to all vCPUs.
11456 if (len(cpu_list) > 1 and
11457 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11458 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11460 (self.be_proposed[constants.BE_VCPUS],
11461 self.hv_proposed[constants.HV_CPU_MASK]),
11462 errors.ECODE_INVAL)
11464 # Only perform this test if a new CPU mask is given
11465 if constants.HV_CPU_MASK in self.hv_new:
11466 # Calculate the largest CPU number requested
11467 max_requested_cpu = max(map(max, cpu_list))
11468 # Check that all of the instance's nodes have enough physical CPUs to
11469 # satisfy the requested CPU mask
11470 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11471 max_requested_cpu + 1, instance.hypervisor)
11473 # osparams processing
11474 if self.op.osparams:
11475 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11476 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11477 self.os_inst = i_osdict # the new dict (without defaults)
11483 #TODO(dynmem): do the appropriate check involving MINMEM
11484 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11485 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11486 mem_check_list = [pnode]
11487 if be_new[constants.BE_AUTO_BALANCE]:
11488 # either we changed auto_balance to yes or it was from before
11489 mem_check_list.extend(instance.secondary_nodes)
11490 instance_info = self.rpc.call_instance_info(pnode, instance.name,
11491 instance.hypervisor)
11492 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11493 [instance.hypervisor])
11494 pninfo = nodeinfo[pnode]
11495 msg = pninfo.fail_msg
11497 # Assume the primary node is unreachable and go ahead
11498 self.warn.append("Can't get info from primary node %s: %s" %
11501 (_, _, (pnhvinfo, )) = pninfo.payload
11502 if not isinstance(pnhvinfo.get("memory_free", None), int):
11503 self.warn.append("Node data from primary node %s doesn't contain"
11504 " free memory information" % pnode)
11505 elif instance_info.fail_msg:
11506 self.warn.append("Can't get instance runtime information: %s" %
11507 instance_info.fail_msg)
11509 if instance_info.payload:
11510 current_mem = int(instance_info.payload["memory"])
11512 # Assume instance not running
11513 # (there is a slight race condition here, but it's not very
11514 # probable, and we have no other way to check)
11515 # TODO: Describe race condition
11517 #TODO(dynmem): do the appropriate check involving MINMEM
11518 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11519 pnhvinfo["memory_free"])
11521 raise errors.OpPrereqError("This change will prevent the instance"
11522 " from starting, due to %d MB of memory"
11523 " missing on its primary node" %
11525 errors.ECODE_NORES)
11527 if be_new[constants.BE_AUTO_BALANCE]:
11528 for node, nres in nodeinfo.items():
11529 if node not in instance.secondary_nodes:
11531 nres.Raise("Can't get info from secondary node %s" % node,
11532 prereq=True, ecode=errors.ECODE_STATE)
11533 (_, _, (nhvinfo, )) = nres.payload
11534 if not isinstance(nhvinfo.get("memory_free", None), int):
11535 raise errors.OpPrereqError("Secondary node %s didn't return free"
11536 " memory information" % node,
11537 errors.ECODE_STATE)
11538 #TODO(dynmem): do the appropriate check involving MINMEM
11539 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11540 raise errors.OpPrereqError("This change will prevent the instance"
11541 " from failover to its secondary node"
11542 " %s, due to not enough memory" % node,
11543 errors.ECODE_STATE)
11547 self.nic_pinst = {}
11548 for nic_op, nic_dict in self.op.nics:
11549 if nic_op == constants.DDM_REMOVE:
11550 if not instance.nics:
11551 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11552 errors.ECODE_INVAL)
11554 if nic_op != constants.DDM_ADD:
11556 if not instance.nics:
11557 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11558 " no NICs" % nic_op,
11559 errors.ECODE_INVAL)
11560 if nic_op < 0 or nic_op >= len(instance.nics):
11561 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11563 (nic_op, len(instance.nics) - 1),
11564 errors.ECODE_INVAL)
11565 old_nic_params = instance.nics[nic_op].nicparams
11566 old_nic_ip = instance.nics[nic_op].ip
11568 old_nic_params = {}
11571 update_params_dict = dict([(key, nic_dict[key])
11572 for key in constants.NICS_PARAMETERS
11573 if key in nic_dict])
11575 if "bridge" in nic_dict:
11576 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11578 new_nic_params = _GetUpdatedParams(old_nic_params,
11579 update_params_dict)
11580 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11581 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11582 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11583 self.nic_pinst[nic_op] = new_nic_params
11584 self.nic_pnew[nic_op] = new_filled_nic_params
11585 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11587 if new_nic_mode == constants.NIC_MODE_BRIDGED:
11588 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11589 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11591 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11593 self.warn.append(msg)
11595 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11596 if new_nic_mode == constants.NIC_MODE_ROUTED:
11597 if constants.INIC_IP in nic_dict:
11598 nic_ip = nic_dict[constants.INIC_IP]
11600 nic_ip = old_nic_ip
11602 raise errors.OpPrereqError("Cannot set the nic ip to None"
11603 " on a routed nic", errors.ECODE_INVAL)
11604 if constants.INIC_MAC in nic_dict:
11605 nic_mac = nic_dict[constants.INIC_MAC]
11606 if nic_mac is None:
11607 raise errors.OpPrereqError("Cannot set the nic mac to None",
11608 errors.ECODE_INVAL)
11609 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11610 # otherwise generate the mac
11611 nic_dict[constants.INIC_MAC] = \
11612 self.cfg.GenerateMAC(self.proc.GetECId())
11614 # or validate/reserve the current one
11616 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11617 except errors.ReservationError:
11618 raise errors.OpPrereqError("MAC address %s already in use"
11619 " in cluster" % nic_mac,
11620 errors.ECODE_NOTUNIQUE)
11623 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11624 raise errors.OpPrereqError("Disk operations not supported for"
11625 " diskless instances",
11626 errors.ECODE_INVAL)
11627 for disk_op, _ in self.op.disks:
11628 if disk_op == constants.DDM_REMOVE:
11629 if len(instance.disks) == 1:
11630 raise errors.OpPrereqError("Cannot remove the last disk of"
11631 " an instance", errors.ECODE_INVAL)
11632 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11633 msg="cannot remove disks")
11635 if (disk_op == constants.DDM_ADD and
11636 len(instance.disks) >= constants.MAX_DISKS):
11637 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11638 " add more" % constants.MAX_DISKS,
11639 errors.ECODE_STATE)
11640 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11642 if disk_op < 0 or disk_op >= len(instance.disks):
11643 raise errors.OpPrereqError("Invalid disk index %s, valid values"
11645 (disk_op, len(instance.disks)),
11646 errors.ECODE_INVAL)
11648 # disabling the instance
11649 if self.op.offline_inst:
11650 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11651 msg="cannot change instance state to offline")
11653 # enabling the instance
11654 if self.op.online_inst:
11655 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11656 msg="cannot make instance go online")
11658 def _ConvertPlainToDrbd(self, feedback_fn):
11659 """Converts an instance from plain to drbd.
11662 feedback_fn("Converting template to drbd")
11663 instance = self.instance
11664 pnode = instance.primary_node
11665 snode = self.op.remote_node
11667 assert instance.disk_template == constants.DT_PLAIN
11669 # create a fake disk info for _GenerateDiskTemplate
11670 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11671 constants.IDISK_VG: d.logical_id[0]}
11672 for d in instance.disks]
11673 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11674 instance.name, pnode, [snode],
11675 disk_info, None, None, 0, feedback_fn,
11677 info = _GetInstanceInfoText(instance)
11678 feedback_fn("Creating aditional volumes...")
11679 # first, create the missing data and meta devices
11680 for disk in new_disks:
11681 # unfortunately this is... not too nice
11682 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11684 for child in disk.children:
11685 _CreateSingleBlockDev(self, snode, instance, child, info, True)
11686 # at this stage, all new LVs have been created, we can rename the
11688 feedback_fn("Renaming original volumes...")
11689 rename_list = [(o, n.children[0].logical_id)
11690 for (o, n) in zip(instance.disks, new_disks)]
11691 result = self.rpc.call_blockdev_rename(pnode, rename_list)
11692 result.Raise("Failed to rename original LVs")
11694 feedback_fn("Initializing DRBD devices...")
11695 # all child devices are in place, we can now create the DRBD devices
11696 for disk in new_disks:
11697 for node in [pnode, snode]:
11698 f_create = node == pnode
11699 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11701 # at this point, the instance has been modified
11702 instance.disk_template = constants.DT_DRBD8
11703 instance.disks = new_disks
11704 self.cfg.Update(instance, feedback_fn)
11706 # Release node locks while waiting for sync
11707 _ReleaseLocks(self, locking.LEVEL_NODE)
11709 # disks are created, waiting for sync
11710 disk_abort = not _WaitForSync(self, instance,
11711 oneshot=not self.op.wait_for_sync)
11713 raise errors.OpExecError("There are some degraded disks for"
11714 " this instance, please cleanup manually")
11716 # Node resource locks will be released by caller
11718 def _ConvertDrbdToPlain(self, feedback_fn):
11719 """Converts an instance from drbd to plain.
11722 instance = self.instance
11724 assert len(instance.secondary_nodes) == 1
11725 assert instance.disk_template == constants.DT_DRBD8
11727 pnode = instance.primary_node
11728 snode = instance.secondary_nodes[0]
11729 feedback_fn("Converting template to plain")
11731 old_disks = instance.disks
11732 new_disks = [d.children[0] for d in old_disks]
11734 # copy over size and mode
11735 for parent, child in zip(old_disks, new_disks):
11736 child.size = parent.size
11737 child.mode = parent.mode
11739 # update instance structure
11740 instance.disks = new_disks
11741 instance.disk_template = constants.DT_PLAIN
11742 self.cfg.Update(instance, feedback_fn)
11744 # Release locks in case removing disks takes a while
11745 _ReleaseLocks(self, locking.LEVEL_NODE)
11747 feedback_fn("Removing volumes on the secondary node...")
11748 for disk in old_disks:
11749 self.cfg.SetDiskID(disk, snode)
11750 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11752 self.LogWarning("Could not remove block device %s on node %s,"
11753 " continuing anyway: %s", disk.iv_name, snode, msg)
11755 feedback_fn("Removing unneeded volumes on the primary node...")
11756 for idx, disk in enumerate(old_disks):
11757 meta = disk.children[1]
11758 self.cfg.SetDiskID(meta, pnode)
11759 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11761 self.LogWarning("Could not remove metadata for disk %d on node %s,"
11762 " continuing anyway: %s", idx, pnode, msg)
11764 # this is a DRBD disk, return its port to the pool
11765 for disk in old_disks:
11766 tcp_port = disk.logical_id[2]
11767 self.cfg.AddTcpUdpPort(tcp_port)
11769 # Node resource locks will be released by caller
11771 def Exec(self, feedback_fn):
11772 """Modifies an instance.
11774 All parameters take effect only at the next restart of the instance.
11777 # Process here the warnings from CheckPrereq, as we don't have a
11778 # feedback_fn there.
11779 for warn in self.warn:
11780 feedback_fn("WARNING: %s" % warn)
11782 assert ((self.op.disk_template is None) ^
11783 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
11784 "Not owning any node resource locks"
11787 instance = self.instance
11789 for disk_op, disk_dict in self.op.disks:
11790 if disk_op == constants.DDM_REMOVE:
11791 # remove the last disk
11792 device = instance.disks.pop()
11793 device_idx = len(instance.disks)
11794 for node, disk in device.ComputeNodeTree(instance.primary_node):
11795 self.cfg.SetDiskID(disk, node)
11796 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11798 self.LogWarning("Could not remove disk/%d on node %s: %s,"
11799 " continuing anyway", device_idx, node, msg)
11800 result.append(("disk/%d" % device_idx, "remove"))
11802 # if this is a DRBD disk, return its port to the pool
11803 if device.dev_type in constants.LDS_DRBD:
11804 tcp_port = device.logical_id[2]
11805 self.cfg.AddTcpUdpPort(tcp_port)
11806 elif disk_op == constants.DDM_ADD:
11808 if instance.disk_template in (constants.DT_FILE,
11809 constants.DT_SHARED_FILE):
11810 file_driver, file_path = instance.disks[0].logical_id
11811 file_path = os.path.dirname(file_path)
11813 file_driver = file_path = None
11814 disk_idx_base = len(instance.disks)
11815 new_disk = _GenerateDiskTemplate(self,
11816 instance.disk_template,
11817 instance.name, instance.primary_node,
11818 instance.secondary_nodes,
11824 self.diskparams)[0]
11825 instance.disks.append(new_disk)
11826 info = _GetInstanceInfoText(instance)
11828 logging.info("Creating volume %s for instance %s",
11829 new_disk.iv_name, instance.name)
11830 # Note: this needs to be kept in sync with _CreateDisks
11832 for node in instance.all_nodes:
11833 f_create = node == instance.primary_node
11835 _CreateBlockDev(self, node, instance, new_disk,
11836 f_create, info, f_create)
11837 except errors.OpExecError, err:
11838 self.LogWarning("Failed to create volume %s (%s) on"
11840 new_disk.iv_name, new_disk, node, err)
11841 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11842 (new_disk.size, new_disk.mode)))
11844 # change a given disk
11845 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11846 result.append(("disk.mode/%d" % disk_op,
11847 disk_dict[constants.IDISK_MODE]))
11849 if self.op.disk_template:
11851 check_nodes = set(instance.all_nodes)
11852 if self.op.remote_node:
11853 check_nodes.add(self.op.remote_node)
11854 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
11855 owned = self.owned_locks(level)
11856 assert not (check_nodes - owned), \
11857 ("Not owning the correct locks, owning %r, expected at least %r" %
11858 (owned, check_nodes))
11860 r_shut = _ShutdownInstanceDisks(self, instance)
11862 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11863 " proceed with disk template conversion")
11864 mode = (instance.disk_template, self.op.disk_template)
11866 self._DISK_CONVERSIONS[mode](self, feedback_fn)
11868 self.cfg.ReleaseDRBDMinors(instance.name)
11870 result.append(("disk_template", self.op.disk_template))
11872 assert instance.disk_template == self.op.disk_template, \
11873 ("Expected disk template '%s', found '%s'" %
11874 (self.op.disk_template, instance.disk_template))
11876 # Release node and resource locks if there are any (they might already have
11877 # been released during disk conversion)
11878 _ReleaseLocks(self, locking.LEVEL_NODE)
11879 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11882 for nic_op, nic_dict in self.op.nics:
11883 if nic_op == constants.DDM_REMOVE:
11884 # remove the last nic
11885 del instance.nics[-1]
11886 result.append(("nic.%d" % len(instance.nics), "remove"))
11887 elif nic_op == constants.DDM_ADD:
11888 # mac and bridge should be set, by now
11889 mac = nic_dict[constants.INIC_MAC]
11890 ip = nic_dict.get(constants.INIC_IP, None)
11891 nicparams = self.nic_pinst[constants.DDM_ADD]
11892 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11893 instance.nics.append(new_nic)
11894 result.append(("nic.%d" % (len(instance.nics) - 1),
11895 "add:mac=%s,ip=%s,mode=%s,link=%s" %
11896 (new_nic.mac, new_nic.ip,
11897 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11898 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11901 for key in (constants.INIC_MAC, constants.INIC_IP):
11902 if key in nic_dict:
11903 setattr(instance.nics[nic_op], key, nic_dict[key])
11904 if nic_op in self.nic_pinst:
11905 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11906 for key, val in nic_dict.iteritems():
11907 result.append(("nic.%s/%d" % (key, nic_op), val))
11910 if self.op.hvparams:
11911 instance.hvparams = self.hv_inst
11912 for key, val in self.op.hvparams.iteritems():
11913 result.append(("hv/%s" % key, val))
11916 if self.op.beparams:
11917 instance.beparams = self.be_inst
11918 for key, val in self.op.beparams.iteritems():
11919 result.append(("be/%s" % key, val))
11922 if self.op.os_name:
11923 instance.os = self.op.os_name
11926 if self.op.osparams:
11927 instance.osparams = self.os_inst
11928 for key, val in self.op.osparams.iteritems():
11929 result.append(("os/%s" % key, val))
11931 # online/offline instance
11932 if self.op.online_inst:
11933 self.cfg.MarkInstanceDown(instance.name)
11934 result.append(("admin_state", constants.ADMINST_DOWN))
11935 if self.op.offline_inst:
11936 self.cfg.MarkInstanceOffline(instance.name)
11937 result.append(("admin_state", constants.ADMINST_OFFLINE))
11939 self.cfg.Update(instance, feedback_fn)
11941 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
11942 self.owned_locks(locking.LEVEL_NODE)), \
11943 "All node locks should have been released by now"
11947 _DISK_CONVERSIONS = {
11948 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11949 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11953 class LUInstanceChangeGroup(LogicalUnit):
11954 HPATH = "instance-change-group"
11955 HTYPE = constants.HTYPE_INSTANCE
11958 def ExpandNames(self):
11959 self.share_locks = _ShareAll()
11960 self.needed_locks = {
11961 locking.LEVEL_NODEGROUP: [],
11962 locking.LEVEL_NODE: [],
11965 self._ExpandAndLockInstance()
11967 if self.op.target_groups:
11968 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11969 self.op.target_groups)
11971 self.req_target_uuids = None
11973 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11975 def DeclareLocks(self, level):
11976 if level == locking.LEVEL_NODEGROUP:
11977 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11979 if self.req_target_uuids:
11980 lock_groups = set(self.req_target_uuids)
11982 # Lock all groups used by instance optimistically; this requires going
11983 # via the node before it's locked, requiring verification later on
11984 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11985 lock_groups.update(instance_groups)
11987 # No target groups, need to lock all of them
11988 lock_groups = locking.ALL_SET
11990 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11992 elif level == locking.LEVEL_NODE:
11993 if self.req_target_uuids:
11994 # Lock all nodes used by instances
11995 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11996 self._LockInstancesNodes()
11998 # Lock all nodes in all potential target groups
11999 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12000 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12001 member_nodes = [node_name
12002 for group in lock_groups
12003 for node_name in self.cfg.GetNodeGroup(group).members]
12004 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12006 # Lock all nodes as all groups are potential targets
12007 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12009 def CheckPrereq(self):
12010 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12011 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12012 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12014 assert (self.req_target_uuids is None or
12015 owned_groups.issuperset(self.req_target_uuids))
12016 assert owned_instances == set([self.op.instance_name])
12018 # Get instance information
12019 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12021 # Check if node groups for locked instance are still correct
12022 assert owned_nodes.issuperset(self.instance.all_nodes), \
12023 ("Instance %s's nodes changed while we kept the lock" %
12024 self.op.instance_name)
12026 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12029 if self.req_target_uuids:
12030 # User requested specific target groups
12031 self.target_uuids = self.req_target_uuids
12033 # All groups except those used by the instance are potential targets
12034 self.target_uuids = owned_groups - inst_groups
12036 conflicting_groups = self.target_uuids & inst_groups
12037 if conflicting_groups:
12038 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12039 " used by the instance '%s'" %
12040 (utils.CommaJoin(conflicting_groups),
12041 self.op.instance_name),
12042 errors.ECODE_INVAL)
12044 if not self.target_uuids:
12045 raise errors.OpPrereqError("There are no possible target groups",
12046 errors.ECODE_INVAL)
12048 def BuildHooksEnv(self):
12049 """Build hooks env.
12052 assert self.target_uuids
12055 "TARGET_GROUPS": " ".join(self.target_uuids),
12058 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12062 def BuildHooksNodes(self):
12063 """Build hooks nodes.
12066 mn = self.cfg.GetMasterNode()
12067 return ([mn], [mn])
12069 def Exec(self, feedback_fn):
12070 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12072 assert instances == [self.op.instance_name], "Instance not locked"
12074 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12075 instances=instances, target_groups=list(self.target_uuids))
12077 ial.Run(self.op.iallocator)
12079 if not ial.success:
12080 raise errors.OpPrereqError("Can't compute solution for changing group of"
12081 " instance '%s' using iallocator '%s': %s" %
12082 (self.op.instance_name, self.op.iallocator,
12084 errors.ECODE_NORES)
12086 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12088 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12089 " instance '%s'", len(jobs), self.op.instance_name)
12091 return ResultWithJobs(jobs)
12094 class LUBackupQuery(NoHooksLU):
12095 """Query the exports list
12100 def ExpandNames(self):
12101 self.needed_locks = {}
12102 self.share_locks[locking.LEVEL_NODE] = 1
12103 if not self.op.nodes:
12104 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12106 self.needed_locks[locking.LEVEL_NODE] = \
12107 _GetWantedNodes(self, self.op.nodes)
12109 def Exec(self, feedback_fn):
12110 """Compute the list of all the exported system images.
12113 @return: a dictionary with the structure node->(export-list)
12114 where export-list is a list of the instances exported on
12118 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12119 rpcresult = self.rpc.call_export_list(self.nodes)
12121 for node in rpcresult:
12122 if rpcresult[node].fail_msg:
12123 result[node] = False
12125 result[node] = rpcresult[node].payload
12130 class LUBackupPrepare(NoHooksLU):
12131 """Prepares an instance for an export and returns useful information.
12136 def ExpandNames(self):
12137 self._ExpandAndLockInstance()
12139 def CheckPrereq(self):
12140 """Check prerequisites.
12143 instance_name = self.op.instance_name
12145 self.instance = self.cfg.GetInstanceInfo(instance_name)
12146 assert self.instance is not None, \
12147 "Cannot retrieve locked instance %s" % self.op.instance_name
12148 _CheckNodeOnline(self, self.instance.primary_node)
12150 self._cds = _GetClusterDomainSecret()
12152 def Exec(self, feedback_fn):
12153 """Prepares an instance for an export.
12156 instance = self.instance
12158 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12159 salt = utils.GenerateSecret(8)
12161 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12162 result = self.rpc.call_x509_cert_create(instance.primary_node,
12163 constants.RIE_CERT_VALIDITY)
12164 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12166 (name, cert_pem) = result.payload
12168 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12172 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12173 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12175 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12181 class LUBackupExport(LogicalUnit):
12182 """Export an instance to an image in the cluster.
12185 HPATH = "instance-export"
12186 HTYPE = constants.HTYPE_INSTANCE
12189 def CheckArguments(self):
12190 """Check the arguments.
12193 self.x509_key_name = self.op.x509_key_name
12194 self.dest_x509_ca_pem = self.op.destination_x509_ca
12196 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12197 if not self.x509_key_name:
12198 raise errors.OpPrereqError("Missing X509 key name for encryption",
12199 errors.ECODE_INVAL)
12201 if not self.dest_x509_ca_pem:
12202 raise errors.OpPrereqError("Missing destination X509 CA",
12203 errors.ECODE_INVAL)
12205 def ExpandNames(self):
12206 self._ExpandAndLockInstance()
12208 # Lock all nodes for local exports
12209 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12210 # FIXME: lock only instance primary and destination node
12212 # Sad but true, for now we have do lock all nodes, as we don't know where
12213 # the previous export might be, and in this LU we search for it and
12214 # remove it from its current node. In the future we could fix this by:
12215 # - making a tasklet to search (share-lock all), then create the
12216 # new one, then one to remove, after
12217 # - removing the removal operation altogether
12218 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12220 def DeclareLocks(self, level):
12221 """Last minute lock declaration."""
12222 # All nodes are locked anyway, so nothing to do here.
12224 def BuildHooksEnv(self):
12225 """Build hooks env.
12227 This will run on the master, primary node and target node.
12231 "EXPORT_MODE": self.op.mode,
12232 "EXPORT_NODE": self.op.target_node,
12233 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12234 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12235 # TODO: Generic function for boolean env variables
12236 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12239 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12243 def BuildHooksNodes(self):
12244 """Build hooks nodes.
12247 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12249 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12250 nl.append(self.op.target_node)
12254 def CheckPrereq(self):
12255 """Check prerequisites.
12257 This checks that the instance and node names are valid.
12260 instance_name = self.op.instance_name
12262 self.instance = self.cfg.GetInstanceInfo(instance_name)
12263 assert self.instance is not None, \
12264 "Cannot retrieve locked instance %s" % self.op.instance_name
12265 _CheckNodeOnline(self, self.instance.primary_node)
12267 if (self.op.remove_instance and
12268 self.instance.admin_state == constants.ADMINST_UP and
12269 not self.op.shutdown):
12270 raise errors.OpPrereqError("Can not remove instance without shutting it"
12273 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12274 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12275 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12276 assert self.dst_node is not None
12278 _CheckNodeOnline(self, self.dst_node.name)
12279 _CheckNodeNotDrained(self, self.dst_node.name)
12282 self.dest_disk_info = None
12283 self.dest_x509_ca = None
12285 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12286 self.dst_node = None
12288 if len(self.op.target_node) != len(self.instance.disks):
12289 raise errors.OpPrereqError(("Received destination information for %s"
12290 " disks, but instance %s has %s disks") %
12291 (len(self.op.target_node), instance_name,
12292 len(self.instance.disks)),
12293 errors.ECODE_INVAL)
12295 cds = _GetClusterDomainSecret()
12297 # Check X509 key name
12299 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12300 except (TypeError, ValueError), err:
12301 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12303 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12304 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12305 errors.ECODE_INVAL)
12307 # Load and verify CA
12309 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12310 except OpenSSL.crypto.Error, err:
12311 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12312 (err, ), errors.ECODE_INVAL)
12314 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12315 if errcode is not None:
12316 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12317 (msg, ), errors.ECODE_INVAL)
12319 self.dest_x509_ca = cert
12321 # Verify target information
12323 for idx, disk_data in enumerate(self.op.target_node):
12325 (host, port, magic) = \
12326 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12327 except errors.GenericError, err:
12328 raise errors.OpPrereqError("Target info for disk %s: %s" %
12329 (idx, err), errors.ECODE_INVAL)
12331 disk_info.append((host, port, magic))
12333 assert len(disk_info) == len(self.op.target_node)
12334 self.dest_disk_info = disk_info
12337 raise errors.ProgrammerError("Unhandled export mode %r" %
12340 # instance disk type verification
12341 # TODO: Implement export support for file-based disks
12342 for disk in self.instance.disks:
12343 if disk.dev_type == constants.LD_FILE:
12344 raise errors.OpPrereqError("Export not supported for instances with"
12345 " file-based disks", errors.ECODE_INVAL)
12347 def _CleanupExports(self, feedback_fn):
12348 """Removes exports of current instance from all other nodes.
12350 If an instance in a cluster with nodes A..D was exported to node C, its
12351 exports will be removed from the nodes A, B and D.
12354 assert self.op.mode != constants.EXPORT_MODE_REMOTE
12356 nodelist = self.cfg.GetNodeList()
12357 nodelist.remove(self.dst_node.name)
12359 # on one-node clusters nodelist will be empty after the removal
12360 # if we proceed the backup would be removed because OpBackupQuery
12361 # substitutes an empty list with the full cluster node list.
12362 iname = self.instance.name
12364 feedback_fn("Removing old exports for instance %s" % iname)
12365 exportlist = self.rpc.call_export_list(nodelist)
12366 for node in exportlist:
12367 if exportlist[node].fail_msg:
12369 if iname in exportlist[node].payload:
12370 msg = self.rpc.call_export_remove(node, iname).fail_msg
12372 self.LogWarning("Could not remove older export for instance %s"
12373 " on node %s: %s", iname, node, msg)
12375 def Exec(self, feedback_fn):
12376 """Export an instance to an image in the cluster.
12379 assert self.op.mode in constants.EXPORT_MODES
12381 instance = self.instance
12382 src_node = instance.primary_node
12384 if self.op.shutdown:
12385 # shutdown the instance, but not the disks
12386 feedback_fn("Shutting down instance %s" % instance.name)
12387 result = self.rpc.call_instance_shutdown(src_node, instance,
12388 self.op.shutdown_timeout)
12389 # TODO: Maybe ignore failures if ignore_remove_failures is set
12390 result.Raise("Could not shutdown instance %s on"
12391 " node %s" % (instance.name, src_node))
12393 # set the disks ID correctly since call_instance_start needs the
12394 # correct drbd minor to create the symlinks
12395 for disk in instance.disks:
12396 self.cfg.SetDiskID(disk, src_node)
12398 activate_disks = (instance.admin_state != constants.ADMINST_UP)
12401 # Activate the instance disks if we'exporting a stopped instance
12402 feedback_fn("Activating disks for %s" % instance.name)
12403 _StartInstanceDisks(self, instance, None)
12406 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12409 helper.CreateSnapshots()
12411 if (self.op.shutdown and
12412 instance.admin_state == constants.ADMINST_UP and
12413 not self.op.remove_instance):
12414 assert not activate_disks
12415 feedback_fn("Starting instance %s" % instance.name)
12416 result = self.rpc.call_instance_start(src_node,
12417 (instance, None, None), False)
12418 msg = result.fail_msg
12420 feedback_fn("Failed to start instance: %s" % msg)
12421 _ShutdownInstanceDisks(self, instance)
12422 raise errors.OpExecError("Could not start instance: %s" % msg)
12424 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12425 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12426 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12427 connect_timeout = constants.RIE_CONNECT_TIMEOUT
12428 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12430 (key_name, _, _) = self.x509_key_name
12433 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12436 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12437 key_name, dest_ca_pem,
12442 # Check for backwards compatibility
12443 assert len(dresults) == len(instance.disks)
12444 assert compat.all(isinstance(i, bool) for i in dresults), \
12445 "Not all results are boolean: %r" % dresults
12449 feedback_fn("Deactivating disks for %s" % instance.name)
12450 _ShutdownInstanceDisks(self, instance)
12452 if not (compat.all(dresults) and fin_resu):
12455 failures.append("export finalization")
12456 if not compat.all(dresults):
12457 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12459 failures.append("disk export: disk(s) %s" % fdsk)
12461 raise errors.OpExecError("Export failed, errors in %s" %
12462 utils.CommaJoin(failures))
12464 # At this point, the export was successful, we can cleanup/finish
12466 # Remove instance if requested
12467 if self.op.remove_instance:
12468 feedback_fn("Removing instance %s" % instance.name)
12469 _RemoveInstance(self, feedback_fn, instance,
12470 self.op.ignore_remove_failures)
12472 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12473 self._CleanupExports(feedback_fn)
12475 return fin_resu, dresults
12478 class LUBackupRemove(NoHooksLU):
12479 """Remove exports related to the named instance.
12484 def ExpandNames(self):
12485 self.needed_locks = {}
12486 # We need all nodes to be locked in order for RemoveExport to work, but we
12487 # don't need to lock the instance itself, as nothing will happen to it (and
12488 # we can remove exports also for a removed instance)
12489 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12491 def Exec(self, feedback_fn):
12492 """Remove any export.
12495 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12496 # If the instance was not found we'll try with the name that was passed in.
12497 # This will only work if it was an FQDN, though.
12499 if not instance_name:
12501 instance_name = self.op.instance_name
12503 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12504 exportlist = self.rpc.call_export_list(locked_nodes)
12506 for node in exportlist:
12507 msg = exportlist[node].fail_msg
12509 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12511 if instance_name in exportlist[node].payload:
12513 result = self.rpc.call_export_remove(node, instance_name)
12514 msg = result.fail_msg
12516 logging.error("Could not remove export for instance %s"
12517 " on node %s: %s", instance_name, node, msg)
12519 if fqdn_warn and not found:
12520 feedback_fn("Export not found. If trying to remove an export belonging"
12521 " to a deleted instance please use its Fully Qualified"
12525 class LUGroupAdd(LogicalUnit):
12526 """Logical unit for creating node groups.
12529 HPATH = "group-add"
12530 HTYPE = constants.HTYPE_GROUP
12533 def ExpandNames(self):
12534 # We need the new group's UUID here so that we can create and acquire the
12535 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12536 # that it should not check whether the UUID exists in the configuration.
12537 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12538 self.needed_locks = {}
12539 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12541 def CheckPrereq(self):
12542 """Check prerequisites.
12544 This checks that the given group name is not an existing node group
12549 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12550 except errors.OpPrereqError:
12553 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12554 " node group (UUID: %s)" %
12555 (self.op.group_name, existing_uuid),
12556 errors.ECODE_EXISTS)
12558 if self.op.ndparams:
12559 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12561 if self.op.diskparams:
12562 for templ in constants.DISK_TEMPLATES:
12563 if templ not in self.op.diskparams:
12564 self.op.diskparams[templ] = {}
12565 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12567 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12569 def BuildHooksEnv(self):
12570 """Build hooks env.
12574 "GROUP_NAME": self.op.group_name,
12577 def BuildHooksNodes(self):
12578 """Build hooks nodes.
12581 mn = self.cfg.GetMasterNode()
12582 return ([mn], [mn])
12584 def Exec(self, feedback_fn):
12585 """Add the node group to the cluster.
12588 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12589 uuid=self.group_uuid,
12590 alloc_policy=self.op.alloc_policy,
12591 ndparams=self.op.ndparams,
12592 diskparams=self.op.diskparams)
12594 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12595 del self.remove_locks[locking.LEVEL_NODEGROUP]
12598 class LUGroupAssignNodes(NoHooksLU):
12599 """Logical unit for assigning nodes to groups.
12604 def ExpandNames(self):
12605 # These raise errors.OpPrereqError on their own:
12606 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12607 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12609 # We want to lock all the affected nodes and groups. We have readily
12610 # available the list of nodes, and the *destination* group. To gather the
12611 # list of "source" groups, we need to fetch node information later on.
12612 self.needed_locks = {
12613 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12614 locking.LEVEL_NODE: self.op.nodes,
12617 def DeclareLocks(self, level):
12618 if level == locking.LEVEL_NODEGROUP:
12619 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12621 # Try to get all affected nodes' groups without having the group or node
12622 # lock yet. Needs verification later in the code flow.
12623 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12625 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12627 def CheckPrereq(self):
12628 """Check prerequisites.
12631 assert self.needed_locks[locking.LEVEL_NODEGROUP]
12632 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12633 frozenset(self.op.nodes))
12635 expected_locks = (set([self.group_uuid]) |
12636 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12637 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12638 if actual_locks != expected_locks:
12639 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12640 " current groups are '%s', used to be '%s'" %
12641 (utils.CommaJoin(expected_locks),
12642 utils.CommaJoin(actual_locks)))
12644 self.node_data = self.cfg.GetAllNodesInfo()
12645 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12646 instance_data = self.cfg.GetAllInstancesInfo()
12648 if self.group is None:
12649 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12650 (self.op.group_name, self.group_uuid))
12652 (new_splits, previous_splits) = \
12653 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12654 for node in self.op.nodes],
12655 self.node_data, instance_data)
12658 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12660 if not self.op.force:
12661 raise errors.OpExecError("The following instances get split by this"
12662 " change and --force was not given: %s" %
12665 self.LogWarning("This operation will split the following instances: %s",
12668 if previous_splits:
12669 self.LogWarning("In addition, these already-split instances continue"
12670 " to be split across groups: %s",
12671 utils.CommaJoin(utils.NiceSort(previous_splits)))
12673 def Exec(self, feedback_fn):
12674 """Assign nodes to a new group.
12677 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12679 self.cfg.AssignGroupNodes(mods)
12682 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12683 """Check for split instances after a node assignment.
12685 This method considers a series of node assignments as an atomic operation,
12686 and returns information about split instances after applying the set of
12689 In particular, it returns information about newly split instances, and
12690 instances that were already split, and remain so after the change.
12692 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12695 @type changes: list of (node_name, new_group_uuid) pairs.
12696 @param changes: list of node assignments to consider.
12697 @param node_data: a dict with data for all nodes
12698 @param instance_data: a dict with all instances to consider
12699 @rtype: a two-tuple
12700 @return: a list of instances that were previously okay and result split as a
12701 consequence of this change, and a list of instances that were previously
12702 split and this change does not fix.
12705 changed_nodes = dict((node, group) for node, group in changes
12706 if node_data[node].group != group)
12708 all_split_instances = set()
12709 previously_split_instances = set()
12711 def InstanceNodes(instance):
12712 return [instance.primary_node] + list(instance.secondary_nodes)
12714 for inst in instance_data.values():
12715 if inst.disk_template not in constants.DTS_INT_MIRROR:
12718 instance_nodes = InstanceNodes(inst)
12720 if len(set(node_data[node].group for node in instance_nodes)) > 1:
12721 previously_split_instances.add(inst.name)
12723 if len(set(changed_nodes.get(node, node_data[node].group)
12724 for node in instance_nodes)) > 1:
12725 all_split_instances.add(inst.name)
12727 return (list(all_split_instances - previously_split_instances),
12728 list(previously_split_instances & all_split_instances))
12731 class _GroupQuery(_QueryBase):
12732 FIELDS = query.GROUP_FIELDS
12734 def ExpandNames(self, lu):
12735 lu.needed_locks = {}
12737 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12738 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12741 self.wanted = [name_to_uuid[name]
12742 for name in utils.NiceSort(name_to_uuid.keys())]
12744 # Accept names to be either names or UUIDs.
12747 all_uuid = frozenset(self._all_groups.keys())
12749 for name in self.names:
12750 if name in all_uuid:
12751 self.wanted.append(name)
12752 elif name in name_to_uuid:
12753 self.wanted.append(name_to_uuid[name])
12755 missing.append(name)
12758 raise errors.OpPrereqError("Some groups do not exist: %s" %
12759 utils.CommaJoin(missing),
12760 errors.ECODE_NOENT)
12762 def DeclareLocks(self, lu, level):
12765 def _GetQueryData(self, lu):
12766 """Computes the list of node groups and their attributes.
12769 do_nodes = query.GQ_NODE in self.requested_data
12770 do_instances = query.GQ_INST in self.requested_data
12772 group_to_nodes = None
12773 group_to_instances = None
12775 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12776 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12777 # latter GetAllInstancesInfo() is not enough, for we have to go through
12778 # instance->node. Hence, we will need to process nodes even if we only need
12779 # instance information.
12780 if do_nodes or do_instances:
12781 all_nodes = lu.cfg.GetAllNodesInfo()
12782 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12785 for node in all_nodes.values():
12786 if node.group in group_to_nodes:
12787 group_to_nodes[node.group].append(node.name)
12788 node_to_group[node.name] = node.group
12791 all_instances = lu.cfg.GetAllInstancesInfo()
12792 group_to_instances = dict((uuid, []) for uuid in self.wanted)
12794 for instance in all_instances.values():
12795 node = instance.primary_node
12796 if node in node_to_group:
12797 group_to_instances[node_to_group[node]].append(instance.name)
12800 # Do not pass on node information if it was not requested.
12801 group_to_nodes = None
12803 return query.GroupQueryData([self._all_groups[uuid]
12804 for uuid in self.wanted],
12805 group_to_nodes, group_to_instances)
12808 class LUGroupQuery(NoHooksLU):
12809 """Logical unit for querying node groups.
12814 def CheckArguments(self):
12815 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12816 self.op.output_fields, False)
12818 def ExpandNames(self):
12819 self.gq.ExpandNames(self)
12821 def DeclareLocks(self, level):
12822 self.gq.DeclareLocks(self, level)
12824 def Exec(self, feedback_fn):
12825 return self.gq.OldStyleQuery(self)
12828 class LUGroupSetParams(LogicalUnit):
12829 """Modifies the parameters of a node group.
12832 HPATH = "group-modify"
12833 HTYPE = constants.HTYPE_GROUP
12836 def CheckArguments(self):
12839 self.op.diskparams,
12840 self.op.alloc_policy,
12843 if all_changes.count(None) == len(all_changes):
12844 raise errors.OpPrereqError("Please pass at least one modification",
12845 errors.ECODE_INVAL)
12847 def ExpandNames(self):
12848 # This raises errors.OpPrereqError on its own:
12849 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12851 self.needed_locks = {
12852 locking.LEVEL_NODEGROUP: [self.group_uuid],
12855 def CheckPrereq(self):
12856 """Check prerequisites.
12859 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12861 if self.group is None:
12862 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12863 (self.op.group_name, self.group_uuid))
12865 if self.op.ndparams:
12866 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12867 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12868 self.new_ndparams = new_ndparams
12870 if self.op.diskparams:
12871 self.new_diskparams = dict()
12872 for templ in constants.DISK_TEMPLATES:
12873 if templ not in self.op.diskparams:
12874 self.op.diskparams[templ] = {}
12875 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
12876 self.op.diskparams[templ])
12877 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
12878 self.new_diskparams[templ] = new_templ_params
12880 def BuildHooksEnv(self):
12881 """Build hooks env.
12885 "GROUP_NAME": self.op.group_name,
12886 "NEW_ALLOC_POLICY": self.op.alloc_policy,
12889 def BuildHooksNodes(self):
12890 """Build hooks nodes.
12893 mn = self.cfg.GetMasterNode()
12894 return ([mn], [mn])
12896 def Exec(self, feedback_fn):
12897 """Modifies the node group.
12902 if self.op.ndparams:
12903 self.group.ndparams = self.new_ndparams
12904 result.append(("ndparams", str(self.group.ndparams)))
12906 if self.op.diskparams:
12907 self.group.diskparams = self.new_diskparams
12908 result.append(("diskparams", str(self.group.diskparams)))
12910 if self.op.alloc_policy:
12911 self.group.alloc_policy = self.op.alloc_policy
12913 self.cfg.Update(self.group, feedback_fn)
12917 class LUGroupRemove(LogicalUnit):
12918 HPATH = "group-remove"
12919 HTYPE = constants.HTYPE_GROUP
12922 def ExpandNames(self):
12923 # This will raises errors.OpPrereqError on its own:
12924 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12925 self.needed_locks = {
12926 locking.LEVEL_NODEGROUP: [self.group_uuid],
12929 def CheckPrereq(self):
12930 """Check prerequisites.
12932 This checks that the given group name exists as a node group, that is
12933 empty (i.e., contains no nodes), and that is not the last group of the
12937 # Verify that the group is empty.
12938 group_nodes = [node.name
12939 for node in self.cfg.GetAllNodesInfo().values()
12940 if node.group == self.group_uuid]
12943 raise errors.OpPrereqError("Group '%s' not empty, has the following"
12945 (self.op.group_name,
12946 utils.CommaJoin(utils.NiceSort(group_nodes))),
12947 errors.ECODE_STATE)
12949 # Verify the cluster would not be left group-less.
12950 if len(self.cfg.GetNodeGroupList()) == 1:
12951 raise errors.OpPrereqError("Group '%s' is the only group,"
12952 " cannot be removed" %
12953 self.op.group_name,
12954 errors.ECODE_STATE)
12956 def BuildHooksEnv(self):
12957 """Build hooks env.
12961 "GROUP_NAME": self.op.group_name,
12964 def BuildHooksNodes(self):
12965 """Build hooks nodes.
12968 mn = self.cfg.GetMasterNode()
12969 return ([mn], [mn])
12971 def Exec(self, feedback_fn):
12972 """Remove the node group.
12976 self.cfg.RemoveNodeGroup(self.group_uuid)
12977 except errors.ConfigurationError:
12978 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12979 (self.op.group_name, self.group_uuid))
12981 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12984 class LUGroupRename(LogicalUnit):
12985 HPATH = "group-rename"
12986 HTYPE = constants.HTYPE_GROUP
12989 def ExpandNames(self):
12990 # This raises errors.OpPrereqError on its own:
12991 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12993 self.needed_locks = {
12994 locking.LEVEL_NODEGROUP: [self.group_uuid],
12997 def CheckPrereq(self):
12998 """Check prerequisites.
13000 Ensures requested new name is not yet used.
13004 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13005 except errors.OpPrereqError:
13008 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13009 " node group (UUID: %s)" %
13010 (self.op.new_name, new_name_uuid),
13011 errors.ECODE_EXISTS)
13013 def BuildHooksEnv(self):
13014 """Build hooks env.
13018 "OLD_NAME": self.op.group_name,
13019 "NEW_NAME": self.op.new_name,
13022 def BuildHooksNodes(self):
13023 """Build hooks nodes.
13026 mn = self.cfg.GetMasterNode()
13028 all_nodes = self.cfg.GetAllNodesInfo()
13029 all_nodes.pop(mn, None)
13032 run_nodes.extend(node.name for node in all_nodes.values()
13033 if node.group == self.group_uuid)
13035 return (run_nodes, run_nodes)
13037 def Exec(self, feedback_fn):
13038 """Rename the node group.
13041 group = self.cfg.GetNodeGroup(self.group_uuid)
13044 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13045 (self.op.group_name, self.group_uuid))
13047 group.name = self.op.new_name
13048 self.cfg.Update(group, feedback_fn)
13050 return self.op.new_name
13053 class LUGroupEvacuate(LogicalUnit):
13054 HPATH = "group-evacuate"
13055 HTYPE = constants.HTYPE_GROUP
13058 def ExpandNames(self):
13059 # This raises errors.OpPrereqError on its own:
13060 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13062 if self.op.target_groups:
13063 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13064 self.op.target_groups)
13066 self.req_target_uuids = []
13068 if self.group_uuid in self.req_target_uuids:
13069 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13070 " as a target group (targets are %s)" %
13072 utils.CommaJoin(self.req_target_uuids)),
13073 errors.ECODE_INVAL)
13075 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13077 self.share_locks = _ShareAll()
13078 self.needed_locks = {
13079 locking.LEVEL_INSTANCE: [],
13080 locking.LEVEL_NODEGROUP: [],
13081 locking.LEVEL_NODE: [],
13084 def DeclareLocks(self, level):
13085 if level == locking.LEVEL_INSTANCE:
13086 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13088 # Lock instances optimistically, needs verification once node and group
13089 # locks have been acquired
13090 self.needed_locks[locking.LEVEL_INSTANCE] = \
13091 self.cfg.GetNodeGroupInstances(self.group_uuid)
13093 elif level == locking.LEVEL_NODEGROUP:
13094 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13096 if self.req_target_uuids:
13097 lock_groups = set([self.group_uuid] + self.req_target_uuids)
13099 # Lock all groups used by instances optimistically; this requires going
13100 # via the node before it's locked, requiring verification later on
13101 lock_groups.update(group_uuid
13102 for instance_name in
13103 self.owned_locks(locking.LEVEL_INSTANCE)
13105 self.cfg.GetInstanceNodeGroups(instance_name))
13107 # No target groups, need to lock all of them
13108 lock_groups = locking.ALL_SET
13110 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13112 elif level == locking.LEVEL_NODE:
13113 # This will only lock the nodes in the group to be evacuated which
13114 # contain actual instances
13115 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13116 self._LockInstancesNodes()
13118 # Lock all nodes in group to be evacuated and target groups
13119 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13120 assert self.group_uuid in owned_groups
13121 member_nodes = [node_name
13122 for group in owned_groups
13123 for node_name in self.cfg.GetNodeGroup(group).members]
13124 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13126 def CheckPrereq(self):
13127 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13128 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13129 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13131 assert owned_groups.issuperset(self.req_target_uuids)
13132 assert self.group_uuid in owned_groups
13134 # Check if locked instances are still correct
13135 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13137 # Get instance information
13138 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13140 # Check if node groups for locked instances are still correct
13141 for instance_name in owned_instances:
13142 inst = self.instances[instance_name]
13143 assert owned_nodes.issuperset(inst.all_nodes), \
13144 "Instance %s's nodes changed while we kept the lock" % instance_name
13146 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13149 assert self.group_uuid in inst_groups, \
13150 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13152 if self.req_target_uuids:
13153 # User requested specific target groups
13154 self.target_uuids = self.req_target_uuids
13156 # All groups except the one to be evacuated are potential targets
13157 self.target_uuids = [group_uuid for group_uuid in owned_groups
13158 if group_uuid != self.group_uuid]
13160 if not self.target_uuids:
13161 raise errors.OpPrereqError("There are no possible target groups",
13162 errors.ECODE_INVAL)
13164 def BuildHooksEnv(self):
13165 """Build hooks env.
13169 "GROUP_NAME": self.op.group_name,
13170 "TARGET_GROUPS": " ".join(self.target_uuids),
13173 def BuildHooksNodes(self):
13174 """Build hooks nodes.
13177 mn = self.cfg.GetMasterNode()
13179 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13181 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13183 return (run_nodes, run_nodes)
13185 def Exec(self, feedback_fn):
13186 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13188 assert self.group_uuid not in self.target_uuids
13190 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13191 instances=instances, target_groups=self.target_uuids)
13193 ial.Run(self.op.iallocator)
13195 if not ial.success:
13196 raise errors.OpPrereqError("Can't compute group evacuation using"
13197 " iallocator '%s': %s" %
13198 (self.op.iallocator, ial.info),
13199 errors.ECODE_NORES)
13201 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13203 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13204 len(jobs), self.op.group_name)
13206 return ResultWithJobs(jobs)
13209 class TagsLU(NoHooksLU): # pylint: disable=W0223
13210 """Generic tags LU.
13212 This is an abstract class which is the parent of all the other tags LUs.
13215 def ExpandNames(self):
13216 self.group_uuid = None
13217 self.needed_locks = {}
13218 if self.op.kind == constants.TAG_NODE:
13219 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13220 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13221 elif self.op.kind == constants.TAG_INSTANCE:
13222 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13223 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13224 elif self.op.kind == constants.TAG_NODEGROUP:
13225 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13227 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13228 # not possible to acquire the BGL based on opcode parameters)
13230 def CheckPrereq(self):
13231 """Check prerequisites.
13234 if self.op.kind == constants.TAG_CLUSTER:
13235 self.target = self.cfg.GetClusterInfo()
13236 elif self.op.kind == constants.TAG_NODE:
13237 self.target = self.cfg.GetNodeInfo(self.op.name)
13238 elif self.op.kind == constants.TAG_INSTANCE:
13239 self.target = self.cfg.GetInstanceInfo(self.op.name)
13240 elif self.op.kind == constants.TAG_NODEGROUP:
13241 self.target = self.cfg.GetNodeGroup(self.group_uuid)
13243 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13244 str(self.op.kind), errors.ECODE_INVAL)
13247 class LUTagsGet(TagsLU):
13248 """Returns the tags of a given object.
13253 def ExpandNames(self):
13254 TagsLU.ExpandNames(self)
13256 # Share locks as this is only a read operation
13257 self.share_locks = _ShareAll()
13259 def Exec(self, feedback_fn):
13260 """Returns the tag list.
13263 return list(self.target.GetTags())
13266 class LUTagsSearch(NoHooksLU):
13267 """Searches the tags for a given pattern.
13272 def ExpandNames(self):
13273 self.needed_locks = {}
13275 def CheckPrereq(self):
13276 """Check prerequisites.
13278 This checks the pattern passed for validity by compiling it.
13282 self.re = re.compile(self.op.pattern)
13283 except re.error, err:
13284 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13285 (self.op.pattern, err), errors.ECODE_INVAL)
13287 def Exec(self, feedback_fn):
13288 """Returns the tag list.
13292 tgts = [("/cluster", cfg.GetClusterInfo())]
13293 ilist = cfg.GetAllInstancesInfo().values()
13294 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13295 nlist = cfg.GetAllNodesInfo().values()
13296 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13297 tgts.extend(("/nodegroup/%s" % n.name, n)
13298 for n in cfg.GetAllNodeGroupsInfo().values())
13300 for path, target in tgts:
13301 for tag in target.GetTags():
13302 if self.re.search(tag):
13303 results.append((path, tag))
13307 class LUTagsSet(TagsLU):
13308 """Sets a tag on a given object.
13313 def CheckPrereq(self):
13314 """Check prerequisites.
13316 This checks the type and length of the tag name and value.
13319 TagsLU.CheckPrereq(self)
13320 for tag in self.op.tags:
13321 objects.TaggableObject.ValidateTag(tag)
13323 def Exec(self, feedback_fn):
13328 for tag in self.op.tags:
13329 self.target.AddTag(tag)
13330 except errors.TagError, err:
13331 raise errors.OpExecError("Error while setting tag: %s" % str(err))
13332 self.cfg.Update(self.target, feedback_fn)
13335 class LUTagsDel(TagsLU):
13336 """Delete a list of tags from a given object.
13341 def CheckPrereq(self):
13342 """Check prerequisites.
13344 This checks that we have the given tag.
13347 TagsLU.CheckPrereq(self)
13348 for tag in self.op.tags:
13349 objects.TaggableObject.ValidateTag(tag)
13350 del_tags = frozenset(self.op.tags)
13351 cur_tags = self.target.GetTags()
13353 diff_tags = del_tags - cur_tags
13355 diff_names = ("'%s'" % i for i in sorted(diff_tags))
13356 raise errors.OpPrereqError("Tag(s) %s not found" %
13357 (utils.CommaJoin(diff_names), ),
13358 errors.ECODE_NOENT)
13360 def Exec(self, feedback_fn):
13361 """Remove the tag from the object.
13364 for tag in self.op.tags:
13365 self.target.RemoveTag(tag)
13366 self.cfg.Update(self.target, feedback_fn)
13369 class LUTestDelay(NoHooksLU):
13370 """Sleep for a specified amount of time.
13372 This LU sleeps on the master and/or nodes for a specified amount of
13378 def ExpandNames(self):
13379 """Expand names and set required locks.
13381 This expands the node list, if any.
13384 self.needed_locks = {}
13385 if self.op.on_nodes:
13386 # _GetWantedNodes can be used here, but is not always appropriate to use
13387 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13388 # more information.
13389 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13390 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13392 def _TestDelay(self):
13393 """Do the actual sleep.
13396 if self.op.on_master:
13397 if not utils.TestDelay(self.op.duration):
13398 raise errors.OpExecError("Error during master delay test")
13399 if self.op.on_nodes:
13400 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13401 for node, node_result in result.items():
13402 node_result.Raise("Failure during rpc call to node %s" % node)
13404 def Exec(self, feedback_fn):
13405 """Execute the test delay opcode, with the wanted repetitions.
13408 if self.op.repeat == 0:
13411 top_value = self.op.repeat - 1
13412 for i in range(self.op.repeat):
13413 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13417 class LUTestJqueue(NoHooksLU):
13418 """Utility LU to test some aspects of the job queue.
13423 # Must be lower than default timeout for WaitForJobChange to see whether it
13424 # notices changed jobs
13425 _CLIENT_CONNECT_TIMEOUT = 20.0
13426 _CLIENT_CONFIRM_TIMEOUT = 60.0
13429 def _NotifyUsingSocket(cls, cb, errcls):
13430 """Opens a Unix socket and waits for another program to connect.
13433 @param cb: Callback to send socket name to client
13434 @type errcls: class
13435 @param errcls: Exception class to use for errors
13438 # Using a temporary directory as there's no easy way to create temporary
13439 # sockets without writing a custom loop around tempfile.mktemp and
13441 tmpdir = tempfile.mkdtemp()
13443 tmpsock = utils.PathJoin(tmpdir, "sock")
13445 logging.debug("Creating temporary socket at %s", tmpsock)
13446 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13451 # Send details to client
13454 # Wait for client to connect before continuing
13455 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13457 (conn, _) = sock.accept()
13458 except socket.error, err:
13459 raise errcls("Client didn't connect in time (%s)" % err)
13463 # Remove as soon as client is connected
13464 shutil.rmtree(tmpdir)
13466 # Wait for client to close
13469 # pylint: disable=E1101
13470 # Instance of '_socketobject' has no ... member
13471 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13473 except socket.error, err:
13474 raise errcls("Client failed to confirm notification (%s)" % err)
13478 def _SendNotification(self, test, arg, sockname):
13479 """Sends a notification to the client.
13482 @param test: Test name
13483 @param arg: Test argument (depends on test)
13484 @type sockname: string
13485 @param sockname: Socket path
13488 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13490 def _Notify(self, prereq, test, arg):
13491 """Notifies the client of a test.
13494 @param prereq: Whether this is a prereq-phase test
13496 @param test: Test name
13497 @param arg: Test argument (depends on test)
13501 errcls = errors.OpPrereqError
13503 errcls = errors.OpExecError
13505 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13509 def CheckArguments(self):
13510 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13511 self.expandnames_calls = 0
13513 def ExpandNames(self):
13514 checkargs_calls = getattr(self, "checkargs_calls", 0)
13515 if checkargs_calls < 1:
13516 raise errors.ProgrammerError("CheckArguments was not called")
13518 self.expandnames_calls += 1
13520 if self.op.notify_waitlock:
13521 self._Notify(True, constants.JQT_EXPANDNAMES, None)
13523 self.LogInfo("Expanding names")
13525 # Get lock on master node (just to get a lock, not for a particular reason)
13526 self.needed_locks = {
13527 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13530 def Exec(self, feedback_fn):
13531 if self.expandnames_calls < 1:
13532 raise errors.ProgrammerError("ExpandNames was not called")
13534 if self.op.notify_exec:
13535 self._Notify(False, constants.JQT_EXEC, None)
13537 self.LogInfo("Executing")
13539 if self.op.log_messages:
13540 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13541 for idx, msg in enumerate(self.op.log_messages):
13542 self.LogInfo("Sending log message %s", idx + 1)
13543 feedback_fn(constants.JQT_MSGPREFIX + msg)
13544 # Report how many test messages have been sent
13545 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13548 raise errors.OpExecError("Opcode failure was requested")
13553 class IAllocator(object):
13554 """IAllocator framework.
13556 An IAllocator instance has three sets of attributes:
13557 - cfg that is needed to query the cluster
13558 - input data (all members of the _KEYS class attribute are required)
13559 - four buffer attributes (in|out_data|text), that represent the
13560 input (to the external script) in text and data structure format,
13561 and the output from it, again in two formats
13562 - the result variables from the script (success, info, nodes) for
13566 # pylint: disable=R0902
13567 # lots of instance attributes
13569 def __init__(self, cfg, rpc_runner, mode, **kwargs):
13571 self.rpc = rpc_runner
13572 # init buffer variables
13573 self.in_text = self.out_text = self.in_data = self.out_data = None
13574 # init all input fields so that pylint is happy
13576 self.memory = self.disks = self.disk_template = None
13577 self.os = self.tags = self.nics = self.vcpus = None
13578 self.hypervisor = None
13579 self.relocate_from = None
13581 self.instances = None
13582 self.evac_mode = None
13583 self.target_groups = []
13585 self.required_nodes = None
13586 # init result fields
13587 self.success = self.info = self.result = None
13590 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13592 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13593 " IAllocator" % self.mode)
13595 keyset = [n for (n, _) in keydata]
13598 if key not in keyset:
13599 raise errors.ProgrammerError("Invalid input parameter '%s' to"
13600 " IAllocator" % key)
13601 setattr(self, key, kwargs[key])
13604 if key not in kwargs:
13605 raise errors.ProgrammerError("Missing input parameter '%s' to"
13606 " IAllocator" % key)
13607 self._BuildInputData(compat.partial(fn, self), keydata)
13609 def _ComputeClusterData(self):
13610 """Compute the generic allocator input data.
13612 This is the data that is independent of the actual operation.
13616 cluster_info = cfg.GetClusterInfo()
13619 "version": constants.IALLOCATOR_VERSION,
13620 "cluster_name": cfg.GetClusterName(),
13621 "cluster_tags": list(cluster_info.GetTags()),
13622 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13623 # we don't have job IDs
13625 ninfo = cfg.GetAllNodesInfo()
13626 iinfo = cfg.GetAllInstancesInfo().values()
13627 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13630 node_list = [n.name for n in ninfo.values() if n.vm_capable]
13632 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13633 hypervisor_name = self.hypervisor
13634 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13635 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13637 hypervisor_name = cluster_info.enabled_hypervisors[0]
13639 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
13642 self.rpc.call_all_instances_info(node_list,
13643 cluster_info.enabled_hypervisors)
13645 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13647 config_ndata = self._ComputeBasicNodeData(ninfo)
13648 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13649 i_list, config_ndata)
13650 assert len(data["nodes"]) == len(ninfo), \
13651 "Incomplete node data computed"
13653 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13655 self.in_data = data
13658 def _ComputeNodeGroupData(cfg):
13659 """Compute node groups data.
13662 ng = dict((guuid, {
13663 "name": gdata.name,
13664 "alloc_policy": gdata.alloc_policy,
13666 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13671 def _ComputeBasicNodeData(node_cfg):
13672 """Compute global node data.
13675 @returns: a dict of name: (node dict, node config)
13678 # fill in static (config-based) values
13679 node_results = dict((ninfo.name, {
13680 "tags": list(ninfo.GetTags()),
13681 "primary_ip": ninfo.primary_ip,
13682 "secondary_ip": ninfo.secondary_ip,
13683 "offline": ninfo.offline,
13684 "drained": ninfo.drained,
13685 "master_candidate": ninfo.master_candidate,
13686 "group": ninfo.group,
13687 "master_capable": ninfo.master_capable,
13688 "vm_capable": ninfo.vm_capable,
13690 for ninfo in node_cfg.values())
13692 return node_results
13695 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13697 """Compute global node data.
13699 @param node_results: the basic node structures as filled from the config
13702 #TODO(dynmem): compute the right data on MAX and MIN memory
13703 # make a copy of the current dict
13704 node_results = dict(node_results)
13705 for nname, nresult in node_data.items():
13706 assert nname in node_results, "Missing basic data for node %s" % nname
13707 ninfo = node_cfg[nname]
13709 if not (ninfo.offline or ninfo.drained):
13710 nresult.Raise("Can't get data for node %s" % nname)
13711 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13713 remote_info = _MakeLegacyNodeInfo(nresult.payload)
13715 for attr in ["memory_total", "memory_free", "memory_dom0",
13716 "vg_size", "vg_free", "cpu_total"]:
13717 if attr not in remote_info:
13718 raise errors.OpExecError("Node '%s' didn't return attribute"
13719 " '%s'" % (nname, attr))
13720 if not isinstance(remote_info[attr], int):
13721 raise errors.OpExecError("Node '%s' returned invalid value"
13723 (nname, attr, remote_info[attr]))
13724 # compute memory used by primary instances
13725 i_p_mem = i_p_up_mem = 0
13726 for iinfo, beinfo in i_list:
13727 if iinfo.primary_node == nname:
13728 i_p_mem += beinfo[constants.BE_MAXMEM]
13729 if iinfo.name not in node_iinfo[nname].payload:
13732 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13733 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
13734 remote_info["memory_free"] -= max(0, i_mem_diff)
13736 if iinfo.admin_state == constants.ADMINST_UP:
13737 i_p_up_mem += beinfo[constants.BE_MAXMEM]
13739 # compute memory used by instances
13741 "total_memory": remote_info["memory_total"],
13742 "reserved_memory": remote_info["memory_dom0"],
13743 "free_memory": remote_info["memory_free"],
13744 "total_disk": remote_info["vg_size"],
13745 "free_disk": remote_info["vg_free"],
13746 "total_cpus": remote_info["cpu_total"],
13747 "i_pri_memory": i_p_mem,
13748 "i_pri_up_memory": i_p_up_mem,
13750 pnr_dyn.update(node_results[nname])
13751 node_results[nname] = pnr_dyn
13753 return node_results
13756 def _ComputeInstanceData(cluster_info, i_list):
13757 """Compute global instance data.
13761 for iinfo, beinfo in i_list:
13763 for nic in iinfo.nics:
13764 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13768 "mode": filled_params[constants.NIC_MODE],
13769 "link": filled_params[constants.NIC_LINK],
13771 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13772 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13773 nic_data.append(nic_dict)
13775 "tags": list(iinfo.GetTags()),
13776 "admin_state": iinfo.admin_state,
13777 "vcpus": beinfo[constants.BE_VCPUS],
13778 "memory": beinfo[constants.BE_MAXMEM],
13780 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13782 "disks": [{constants.IDISK_SIZE: dsk.size,
13783 constants.IDISK_MODE: dsk.mode}
13784 for dsk in iinfo.disks],
13785 "disk_template": iinfo.disk_template,
13786 "hypervisor": iinfo.hypervisor,
13788 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13790 instance_data[iinfo.name] = pir
13792 return instance_data
13794 def _AddNewInstance(self):
13795 """Add new instance data to allocator structure.
13797 This in combination with _AllocatorGetClusterData will create the
13798 correct structure needed as input for the allocator.
13800 The checks for the completeness of the opcode must have already been
13804 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13806 if self.disk_template in constants.DTS_INT_MIRROR:
13807 self.required_nodes = 2
13809 self.required_nodes = 1
13813 "disk_template": self.disk_template,
13816 "vcpus": self.vcpus,
13817 "memory": self.memory,
13818 "disks": self.disks,
13819 "disk_space_total": disk_space,
13821 "required_nodes": self.required_nodes,
13822 "hypervisor": self.hypervisor,
13827 def _AddRelocateInstance(self):
13828 """Add relocate instance data to allocator structure.
13830 This in combination with _IAllocatorGetClusterData will create the
13831 correct structure needed as input for the allocator.
13833 The checks for the completeness of the opcode must have already been
13837 instance = self.cfg.GetInstanceInfo(self.name)
13838 if instance is None:
13839 raise errors.ProgrammerError("Unknown instance '%s' passed to"
13840 " IAllocator" % self.name)
13842 if instance.disk_template not in constants.DTS_MIRRORED:
13843 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13844 errors.ECODE_INVAL)
13846 if instance.disk_template in constants.DTS_INT_MIRROR and \
13847 len(instance.secondary_nodes) != 1:
13848 raise errors.OpPrereqError("Instance has not exactly one secondary node",
13849 errors.ECODE_STATE)
13851 self.required_nodes = 1
13852 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13853 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13857 "disk_space_total": disk_space,
13858 "required_nodes": self.required_nodes,
13859 "relocate_from": self.relocate_from,
13863 def _AddNodeEvacuate(self):
13864 """Get data for node-evacuate requests.
13868 "instances": self.instances,
13869 "evac_mode": self.evac_mode,
13872 def _AddChangeGroup(self):
13873 """Get data for node-evacuate requests.
13877 "instances": self.instances,
13878 "target_groups": self.target_groups,
13881 def _BuildInputData(self, fn, keydata):
13882 """Build input data structures.
13885 self._ComputeClusterData()
13888 request["type"] = self.mode
13889 for keyname, keytype in keydata:
13890 if keyname not in request:
13891 raise errors.ProgrammerError("Request parameter %s is missing" %
13893 val = request[keyname]
13894 if not keytype(val):
13895 raise errors.ProgrammerError("Request parameter %s doesn't pass"
13896 " validation, value %s, expected"
13897 " type %s" % (keyname, val, keytype))
13898 self.in_data["request"] = request
13900 self.in_text = serializer.Dump(self.in_data)
13902 _STRING_LIST = ht.TListOf(ht.TString)
13903 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13904 # pylint: disable=E1101
13905 # Class '...' has no 'OP_ID' member
13906 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13907 opcodes.OpInstanceMigrate.OP_ID,
13908 opcodes.OpInstanceReplaceDisks.OP_ID])
13912 ht.TListOf(ht.TAnd(ht.TIsLength(3),
13913 ht.TItems([ht.TNonEmptyString,
13914 ht.TNonEmptyString,
13915 ht.TListOf(ht.TNonEmptyString),
13918 ht.TListOf(ht.TAnd(ht.TIsLength(2),
13919 ht.TItems([ht.TNonEmptyString,
13922 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13923 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13926 constants.IALLOCATOR_MODE_ALLOC:
13929 ("name", ht.TString),
13930 ("memory", ht.TInt),
13931 ("disks", ht.TListOf(ht.TDict)),
13932 ("disk_template", ht.TString),
13933 ("os", ht.TString),
13934 ("tags", _STRING_LIST),
13935 ("nics", ht.TListOf(ht.TDict)),
13936 ("vcpus", ht.TInt),
13937 ("hypervisor", ht.TString),
13939 constants.IALLOCATOR_MODE_RELOC:
13940 (_AddRelocateInstance,
13941 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13943 constants.IALLOCATOR_MODE_NODE_EVAC:
13944 (_AddNodeEvacuate, [
13945 ("instances", _STRING_LIST),
13946 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13948 constants.IALLOCATOR_MODE_CHG_GROUP:
13949 (_AddChangeGroup, [
13950 ("instances", _STRING_LIST),
13951 ("target_groups", _STRING_LIST),
13955 def Run(self, name, validate=True, call_fn=None):
13956 """Run an instance allocator and return the results.
13959 if call_fn is None:
13960 call_fn = self.rpc.call_iallocator_runner
13962 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13963 result.Raise("Failure while running the iallocator script")
13965 self.out_text = result.payload
13967 self._ValidateResult()
13969 def _ValidateResult(self):
13970 """Process the allocator results.
13972 This will process and if successful save the result in
13973 self.out_data and the other parameters.
13977 rdict = serializer.Load(self.out_text)
13978 except Exception, err:
13979 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13981 if not isinstance(rdict, dict):
13982 raise errors.OpExecError("Can't parse iallocator results: not a dict")
13984 # TODO: remove backwards compatiblity in later versions
13985 if "nodes" in rdict and "result" not in rdict:
13986 rdict["result"] = rdict["nodes"]
13989 for key in "success", "info", "result":
13990 if key not in rdict:
13991 raise errors.OpExecError("Can't parse iallocator results:"
13992 " missing key '%s'" % key)
13993 setattr(self, key, rdict[key])
13995 if not self._result_check(self.result):
13996 raise errors.OpExecError("Iallocator returned invalid result,"
13997 " expected %s, got %s" %
13998 (self._result_check, self.result),
13999 errors.ECODE_INVAL)
14001 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14002 assert self.relocate_from is not None
14003 assert self.required_nodes == 1
14005 node2group = dict((name, ndata["group"])
14006 for (name, ndata) in self.in_data["nodes"].items())
14008 fn = compat.partial(self._NodesToGroups, node2group,
14009 self.in_data["nodegroups"])
14011 instance = self.cfg.GetInstanceInfo(self.name)
14012 request_groups = fn(self.relocate_from + [instance.primary_node])
14013 result_groups = fn(rdict["result"] + [instance.primary_node])
14015 if self.success and not set(result_groups).issubset(request_groups):
14016 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14017 " differ from original groups (%s)" %
14018 (utils.CommaJoin(result_groups),
14019 utils.CommaJoin(request_groups)))
14021 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14022 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14024 self.out_data = rdict
14027 def _NodesToGroups(node2group, groups, nodes):
14028 """Returns a list of unique group names for a list of nodes.
14030 @type node2group: dict
14031 @param node2group: Map from node name to group UUID
14033 @param groups: Group information
14035 @param nodes: Node names
14042 group_uuid = node2group[node]
14044 # Ignore unknown node
14048 group = groups[group_uuid]
14050 # Can't find group, let's use UUID
14051 group_name = group_uuid
14053 group_name = group["name"]
14055 result.add(group_name)
14057 return sorted(result)
14060 class LUTestAllocator(NoHooksLU):
14061 """Run allocator tests.
14063 This LU runs the allocator tests
14066 def CheckPrereq(self):
14067 """Check prerequisites.
14069 This checks the opcode parameters depending on the director and mode test.
14072 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14073 for attr in ["memory", "disks", "disk_template",
14074 "os", "tags", "nics", "vcpus"]:
14075 if not hasattr(self.op, attr):
14076 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14077 attr, errors.ECODE_INVAL)
14078 iname = self.cfg.ExpandInstanceName(self.op.name)
14079 if iname is not None:
14080 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14081 iname, errors.ECODE_EXISTS)
14082 if not isinstance(self.op.nics, list):
14083 raise errors.OpPrereqError("Invalid parameter 'nics'",
14084 errors.ECODE_INVAL)
14085 if not isinstance(self.op.disks, list):
14086 raise errors.OpPrereqError("Invalid parameter 'disks'",
14087 errors.ECODE_INVAL)
14088 for row in self.op.disks:
14089 if (not isinstance(row, dict) or
14090 constants.IDISK_SIZE not in row or
14091 not isinstance(row[constants.IDISK_SIZE], int) or
14092 constants.IDISK_MODE not in row or
14093 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14094 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14095 " parameter", errors.ECODE_INVAL)
14096 if self.op.hypervisor is None:
14097 self.op.hypervisor = self.cfg.GetHypervisorType()
14098 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14099 fname = _ExpandInstanceName(self.cfg, self.op.name)
14100 self.op.name = fname
14101 self.relocate_from = \
14102 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14103 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14104 constants.IALLOCATOR_MODE_NODE_EVAC):
14105 if not self.op.instances:
14106 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14107 self.op.instances = _GetWantedInstances(self, self.op.instances)
14109 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14110 self.op.mode, errors.ECODE_INVAL)
14112 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14113 if self.op.allocator is None:
14114 raise errors.OpPrereqError("Missing allocator name",
14115 errors.ECODE_INVAL)
14116 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14117 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14118 self.op.direction, errors.ECODE_INVAL)
14120 def Exec(self, feedback_fn):
14121 """Run the allocator test.
14124 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14125 ial = IAllocator(self.cfg, self.rpc,
14128 memory=self.op.memory,
14129 disks=self.op.disks,
14130 disk_template=self.op.disk_template,
14134 vcpus=self.op.vcpus,
14135 hypervisor=self.op.hypervisor,
14137 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14138 ial = IAllocator(self.cfg, self.rpc,
14141 relocate_from=list(self.relocate_from),
14143 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14144 ial = IAllocator(self.cfg, self.rpc,
14146 instances=self.op.instances,
14147 target_groups=self.op.target_groups)
14148 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14149 ial = IAllocator(self.cfg, self.rpc,
14151 instances=self.op.instances,
14152 evac_mode=self.op.evac_mode)
14154 raise errors.ProgrammerError("Uncatched mode %s in"
14155 " LUTestAllocator.Exec", self.op.mode)
14157 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14158 result = ial.in_text
14160 ial.Run(self.op.allocator, validate=False)
14161 result = ial.out_text
14165 #: Query type implementations
14167 constants.QR_INSTANCE: _InstanceQuery,
14168 constants.QR_NODE: _NodeQuery,
14169 constants.QR_GROUP: _GroupQuery,
14170 constants.QR_OS: _OsQuery,
14173 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14176 def _GetQueryImplementation(name):
14177 """Returns the implemtnation for a query type.
14179 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14183 return _QUERY_IMPL[name]
14185 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14186 errors.ECODE_INVAL)