4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
63 import ganeti.masterd.instance # pylint: disable-msg=W0611
67 """Data container for LU results with jobs.
69 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71 contained in the C{jobs} attribute and include the job IDs in the opcode
75 def __init__(self, jobs, **kwargs):
76 """Initializes this class.
78 Additional return values can be specified as keyword arguments.
80 @type jobs: list of lists of L{opcode.OpCode}
81 @param jobs: A list of lists of opcode objects
88 class LogicalUnit(object):
89 """Logical Unit base class.
91 Subclasses must follow these rules:
92 - implement ExpandNames
93 - implement CheckPrereq (except when tasklets are used)
94 - implement Exec (except when tasklets are used)
95 - implement BuildHooksEnv
96 - implement BuildHooksNodes
97 - redefine HPATH and HTYPE
98 - optionally redefine their run requirements:
99 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
101 Note that all commands require root permissions.
103 @ivar dry_run_result: the value (if any) that will be returned to the caller
104 in dry-run mode (signalled by opcode dry_run parameter)
111 def __init__(self, processor, op, context, rpc):
112 """Constructor for LogicalUnit.
114 This needs to be overridden in derived classes in order to check op
118 self.proc = processor
120 self.cfg = context.cfg
121 self.glm = context.glm
123 self.owned_locks = context.glm.list_owned
124 self.context = context
126 # Dicts used to declare locking needs to mcpu
127 self.needed_locks = None
128 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
130 self.remove_locks = {}
131 # Used to force good behavior when calling helper functions
132 self.recalculate_locks = {}
134 self.Log = processor.Log # pylint: disable-msg=C0103
135 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
136 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
137 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
138 # support for dry-run
139 self.dry_run_result = None
140 # support for generic debug attribute
141 if (not hasattr(self.op, "debug_level") or
142 not isinstance(self.op.debug_level, int)):
143 self.op.debug_level = 0
148 # Validate opcode parameters and set defaults
149 self.op.Validate(True)
151 self.CheckArguments()
153 def CheckArguments(self):
154 """Check syntactic validity for the opcode arguments.
156 This method is for doing a simple syntactic check and ensure
157 validity of opcode parameters, without any cluster-related
158 checks. While the same can be accomplished in ExpandNames and/or
159 CheckPrereq, doing these separate is better because:
161 - ExpandNames is left as as purely a lock-related function
162 - CheckPrereq is run after we have acquired locks (and possible
165 The function is allowed to change the self.op attribute so that
166 later methods can no longer worry about missing parameters.
171 def ExpandNames(self):
172 """Expand names for this LU.
174 This method is called before starting to execute the opcode, and it should
175 update all the parameters of the opcode to their canonical form (e.g. a
176 short node name must be fully expanded after this method has successfully
177 completed). This way locking, hooks, logging, etc. can work correctly.
179 LUs which implement this method must also populate the self.needed_locks
180 member, as a dict with lock levels as keys, and a list of needed lock names
183 - use an empty dict if you don't need any lock
184 - if you don't need any lock at a particular level omit that level
185 - don't put anything for the BGL level
186 - if you want all locks at a level use locking.ALL_SET as a value
188 If you need to share locks (rather than acquire them exclusively) at one
189 level you can modify self.share_locks, setting a true value (usually 1) for
190 that level. By default locks are not shared.
192 This function can also define a list of tasklets, which then will be
193 executed in order instead of the usual LU-level CheckPrereq and Exec
194 functions, if those are not defined by the LU.
198 # Acquire all nodes and one instance
199 self.needed_locks = {
200 locking.LEVEL_NODE: locking.ALL_SET,
201 locking.LEVEL_INSTANCE: ['instance1.example.com'],
203 # Acquire just two nodes
204 self.needed_locks = {
205 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
208 self.needed_locks = {} # No, you can't leave it to the default value None
211 # The implementation of this method is mandatory only if the new LU is
212 # concurrent, so that old LUs don't need to be changed all at the same
215 self.needed_locks = {} # Exclusive LUs don't need locks.
217 raise NotImplementedError
219 def DeclareLocks(self, level):
220 """Declare LU locking needs for a level
222 While most LUs can just declare their locking needs at ExpandNames time,
223 sometimes there's the need to calculate some locks after having acquired
224 the ones before. This function is called just before acquiring locks at a
225 particular level, but after acquiring the ones at lower levels, and permits
226 such calculations. It can be used to modify self.needed_locks, and by
227 default it does nothing.
229 This function is only called if you have something already set in
230 self.needed_locks for the level.
232 @param level: Locking level which is going to be locked
233 @type level: member of ganeti.locking.LEVELS
237 def CheckPrereq(self):
238 """Check prerequisites for this LU.
240 This method should check that the prerequisites for the execution
241 of this LU are fulfilled. It can do internode communication, but
242 it should be idempotent - no cluster or system changes are
245 The method should raise errors.OpPrereqError in case something is
246 not fulfilled. Its return value is ignored.
248 This method should also update all the parameters of the opcode to
249 their canonical form if it hasn't been done by ExpandNames before.
252 if self.tasklets is not None:
253 for (idx, tl) in enumerate(self.tasklets):
254 logging.debug("Checking prerequisites for tasklet %s/%s",
255 idx + 1, len(self.tasklets))
260 def Exec(self, feedback_fn):
263 This method should implement the actual work. It should raise
264 errors.OpExecError for failures that are somewhat dealt with in
268 if self.tasklets is not None:
269 for (idx, tl) in enumerate(self.tasklets):
270 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
273 raise NotImplementedError
275 def BuildHooksEnv(self):
276 """Build hooks environment for this LU.
279 @return: Dictionary containing the environment that will be used for
280 running the hooks for this LU. The keys of the dict must not be prefixed
281 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282 will extend the environment with additional variables. If no environment
283 should be defined, an empty dictionary should be returned (not C{None}).
284 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
288 raise NotImplementedError
290 def BuildHooksNodes(self):
291 """Build list of nodes to run LU's hooks.
293 @rtype: tuple; (list, list)
294 @return: Tuple containing a list of node names on which the hook
295 should run before the execution and a list of node names on which the
296 hook should run after the execution. No nodes should be returned as an
297 empty list (and not None).
298 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
302 raise NotImplementedError
304 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305 """Notify the LU about the results of its hooks.
307 This method is called every time a hooks phase is executed, and notifies
308 the Logical Unit about the hooks' result. The LU can then use it to alter
309 its result based on the hooks. By default the method does nothing and the
310 previous result is passed back unchanged but any LU can define it if it
311 wants to use the local cluster hook-scripts somehow.
313 @param phase: one of L{constants.HOOKS_PHASE_POST} or
314 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315 @param hook_results: the results of the multi-node hooks rpc call
316 @param feedback_fn: function used send feedback back to the caller
317 @param lu_result: the previous Exec result this LU had, or None
319 @return: the new Exec result, based on the previous result
323 # API must be kept, thus we ignore the unused argument and could
324 # be a function warnings
325 # pylint: disable-msg=W0613,R0201
328 def _ExpandAndLockInstance(self):
329 """Helper function to expand and lock an instance.
331 Many LUs that work on an instance take its name in self.op.instance_name
332 and need to expand it and then declare the expanded name for locking. This
333 function does it, and then updates self.op.instance_name to the expanded
334 name. It also initializes needed_locks as a dict, if this hasn't been done
338 if self.needed_locks is None:
339 self.needed_locks = {}
341 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342 "_ExpandAndLockInstance called with instance-level locks set"
343 self.op.instance_name = _ExpandInstanceName(self.cfg,
344 self.op.instance_name)
345 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
347 def _LockInstancesNodes(self, primary_only=False):
348 """Helper function to declare instances' nodes for locking.
350 This function should be called after locking one or more instances to lock
351 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352 with all primary or secondary nodes for instances already locked and
353 present in self.needed_locks[locking.LEVEL_INSTANCE].
355 It should be called from DeclareLocks, and for safety only works if
356 self.recalculate_locks[locking.LEVEL_NODE] is set.
358 In the future it may grow parameters to just lock some instance's nodes, or
359 to just lock primaries or secondary nodes, if needed.
361 If should be called in DeclareLocks in a way similar to::
363 if level == locking.LEVEL_NODE:
364 self._LockInstancesNodes()
366 @type primary_only: boolean
367 @param primary_only: only lock primary nodes of locked instances
370 assert locking.LEVEL_NODE in self.recalculate_locks, \
371 "_LockInstancesNodes helper function called with no nodes to recalculate"
373 # TODO: check if we're really been called with the instance locks held
375 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376 # future we might want to have different behaviors depending on the value
377 # of self.recalculate_locks[locking.LEVEL_NODE]
379 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381 wanted_nodes.append(instance.primary_node)
383 wanted_nodes.extend(instance.secondary_nodes)
385 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
390 del self.recalculate_locks[locking.LEVEL_NODE]
393 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
394 """Simple LU which runs no hooks.
396 This LU is intended as a parent for other LogicalUnits which will
397 run no hooks, in order to reduce duplicate code.
403 def BuildHooksEnv(self):
404 """Empty BuildHooksEnv for NoHooksLu.
406 This just raises an error.
409 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
411 def BuildHooksNodes(self):
412 """Empty BuildHooksNodes for NoHooksLU.
415 raise AssertionError("BuildHooksNodes called for NoHooksLU")
419 """Tasklet base class.
421 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422 they can mix legacy code with tasklets. Locking needs to be done in the LU,
423 tasklets know nothing about locks.
425 Subclasses must follow these rules:
426 - Implement CheckPrereq
430 def __init__(self, lu):
437 def CheckPrereq(self):
438 """Check prerequisites for this tasklets.
440 This method should check whether the prerequisites for the execution of
441 this tasklet are fulfilled. It can do internode communication, but it
442 should be idempotent - no cluster or system changes are allowed.
444 The method should raise errors.OpPrereqError in case something is not
445 fulfilled. Its return value is ignored.
447 This method should also update all parameters to their canonical form if it
448 hasn't been done before.
453 def Exec(self, feedback_fn):
454 """Execute the tasklet.
456 This method should implement the actual work. It should raise
457 errors.OpExecError for failures that are somewhat dealt with in code, or
461 raise NotImplementedError
465 """Base for query utility classes.
468 #: Attribute holding field definitions
471 def __init__(self, filter_, fields, use_locking):
472 """Initializes this class.
475 self.use_locking = use_locking
477 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
479 self.requested_data = self.query.RequestedData()
480 self.names = self.query.RequestedNames()
482 # Sort only if no names were requested
483 self.sort_by_name = not self.names
485 self.do_locking = None
488 def _GetNames(self, lu, all_names, lock_level):
489 """Helper function to determine names asked for in the query.
493 names = lu.owned_locks(lock_level)
497 if self.wanted == locking.ALL_SET:
498 assert not self.names
499 # caller didn't specify names, so ordering is not important
500 return utils.NiceSort(names)
502 # caller specified names and we must keep the same order
504 assert not self.do_locking or lu.glm.is_owned(lock_level)
506 missing = set(self.wanted).difference(names)
508 raise errors.OpExecError("Some items were removed before retrieving"
509 " their data: %s" % missing)
511 # Return expanded names
514 def ExpandNames(self, lu):
515 """Expand names for this query.
517 See L{LogicalUnit.ExpandNames}.
520 raise NotImplementedError()
522 def DeclareLocks(self, lu, level):
523 """Declare locks for this query.
525 See L{LogicalUnit.DeclareLocks}.
528 raise NotImplementedError()
530 def _GetQueryData(self, lu):
531 """Collects all data for this query.
533 @return: Query data object
536 raise NotImplementedError()
538 def NewStyleQuery(self, lu):
539 """Collect data and execute query.
542 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543 sort_by_name=self.sort_by_name)
545 def OldStyleQuery(self, lu):
546 """Collect data and execute query.
549 return self.query.OldStyleQuery(self._GetQueryData(lu),
550 sort_by_name=self.sort_by_name)
554 """Returns a dict declaring all lock levels shared.
557 return dict.fromkeys(locking.LEVELS, 1)
560 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
561 """Checks if the owned node groups are still correct for an instance.
563 @type cfg: L{config.ConfigWriter}
564 @param cfg: The cluster configuration
565 @type instance_name: string
566 @param instance_name: Instance name
567 @type owned_groups: set or frozenset
568 @param owned_groups: List of currently owned node groups
571 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
573 if not owned_groups.issuperset(inst_groups):
574 raise errors.OpPrereqError("Instance %s's node groups changed since"
575 " locks were acquired, current groups are"
576 " are '%s', owning groups '%s'; retry the"
579 utils.CommaJoin(inst_groups),
580 utils.CommaJoin(owned_groups)),
586 def _SupportsOob(cfg, node):
587 """Tells if node supports OOB.
589 @type cfg: L{config.ConfigWriter}
590 @param cfg: The cluster configuration
591 @type node: L{objects.Node}
592 @param node: The node
593 @return: The OOB script if supported or an empty string otherwise
596 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
599 def _GetWantedNodes(lu, nodes):
600 """Returns list of checked and expanded node names.
602 @type lu: L{LogicalUnit}
603 @param lu: the logical unit on whose behalf we execute
605 @param nodes: list of node names or None for all nodes
607 @return: the list of nodes, sorted
608 @raise errors.ProgrammerError: if the nodes parameter is wrong type
612 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
614 return utils.NiceSort(lu.cfg.GetNodeList())
617 def _GetWantedInstances(lu, instances):
618 """Returns list of checked and expanded instance names.
620 @type lu: L{LogicalUnit}
621 @param lu: the logical unit on whose behalf we execute
622 @type instances: list
623 @param instances: list of instance names or None for all instances
625 @return: the list of instances, sorted
626 @raise errors.OpPrereqError: if the instances parameter is wrong type
627 @raise errors.OpPrereqError: if any of the passed instances is not found
631 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
633 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
637 def _GetUpdatedParams(old_params, update_dict,
638 use_default=True, use_none=False):
639 """Return the new version of a parameter dictionary.
641 @type old_params: dict
642 @param old_params: old parameters
643 @type update_dict: dict
644 @param update_dict: dict containing new parameter values, or
645 constants.VALUE_DEFAULT to reset the parameter to its default
647 @param use_default: boolean
648 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
649 values as 'to be deleted' values
650 @param use_none: boolean
651 @type use_none: whether to recognise C{None} values as 'to be
654 @return: the new parameter dictionary
657 params_copy = copy.deepcopy(old_params)
658 for key, val in update_dict.iteritems():
659 if ((use_default and val == constants.VALUE_DEFAULT) or
660 (use_none and val is None)):
666 params_copy[key] = val
670 def _ReleaseLocks(lu, level, names=None, keep=None):
671 """Releases locks owned by an LU.
673 @type lu: L{LogicalUnit}
674 @param level: Lock level
675 @type names: list or None
676 @param names: Names of locks to release
677 @type keep: list or None
678 @param keep: Names of locks to retain
681 assert not (keep is not None and names is not None), \
682 "Only one of the 'names' and the 'keep' parameters can be given"
684 if names is not None:
685 should_release = names.__contains__
687 should_release = lambda name: name not in keep
689 should_release = None
695 # Determine which locks to release
696 for name in lu.owned_locks(level):
697 if should_release(name):
702 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
704 # Release just some locks
705 lu.glm.release(level, names=release)
707 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
710 lu.glm.release(level)
712 assert not lu.glm.is_owned(level), "No locks should be owned"
715 def _MapInstanceDisksToNodes(instances):
716 """Creates a map from (node, volume) to instance name.
718 @type instances: list of L{objects.Instance}
719 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
722 return dict(((node, vol), inst.name)
723 for inst in instances
724 for (node, vols) in inst.MapLVsByNode().items()
728 def _RunPostHook(lu, node_name):
729 """Runs the post-hook for an opcode on a single node.
732 hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
734 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
736 # pylint: disable-msg=W0702
737 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
740 def _CheckOutputFields(static, dynamic, selected):
741 """Checks whether all selected fields are valid.
743 @type static: L{utils.FieldSet}
744 @param static: static fields set
745 @type dynamic: L{utils.FieldSet}
746 @param dynamic: dynamic fields set
753 delta = f.NonMatching(selected)
755 raise errors.OpPrereqError("Unknown output fields selected: %s"
756 % ",".join(delta), errors.ECODE_INVAL)
759 def _CheckGlobalHvParams(params):
760 """Validates that given hypervisor params are not global ones.
762 This will ensure that instances don't get customised versions of
766 used_globals = constants.HVC_GLOBALS.intersection(params)
768 msg = ("The following hypervisor parameters are global and cannot"
769 " be customized at instance level, please modify them at"
770 " cluster level: %s" % utils.CommaJoin(used_globals))
771 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
774 def _CheckNodeOnline(lu, node, msg=None):
775 """Ensure that a given node is online.
777 @param lu: the LU on behalf of which we make the check
778 @param node: the node to check
779 @param msg: if passed, should be a message to replace the default one
780 @raise errors.OpPrereqError: if the node is offline
784 msg = "Can't use offline node"
785 if lu.cfg.GetNodeInfo(node).offline:
786 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
789 def _CheckNodeNotDrained(lu, node):
790 """Ensure that a given node is not drained.
792 @param lu: the LU on behalf of which we make the check
793 @param node: the node to check
794 @raise errors.OpPrereqError: if the node is drained
797 if lu.cfg.GetNodeInfo(node).drained:
798 raise errors.OpPrereqError("Can't use drained node %s" % node,
802 def _CheckNodeVmCapable(lu, node):
803 """Ensure that a given node is vm capable.
805 @param lu: the LU on behalf of which we make the check
806 @param node: the node to check
807 @raise errors.OpPrereqError: if the node is not vm capable
810 if not lu.cfg.GetNodeInfo(node).vm_capable:
811 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
815 def _CheckNodeHasOS(lu, node, os_name, force_variant):
816 """Ensure that a node supports a given OS.
818 @param lu: the LU on behalf of which we make the check
819 @param node: the node to check
820 @param os_name: the OS to query about
821 @param force_variant: whether to ignore variant errors
822 @raise errors.OpPrereqError: if the node is not supporting the OS
825 result = lu.rpc.call_os_get(node, os_name)
826 result.Raise("OS '%s' not in supported OS list for node %s" %
828 prereq=True, ecode=errors.ECODE_INVAL)
829 if not force_variant:
830 _CheckOSVariant(result.payload, os_name)
833 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
834 """Ensure that a node has the given secondary ip.
836 @type lu: L{LogicalUnit}
837 @param lu: the LU on behalf of which we make the check
839 @param node: the node to check
840 @type secondary_ip: string
841 @param secondary_ip: the ip to check
842 @type prereq: boolean
843 @param prereq: whether to throw a prerequisite or an execute error
844 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
845 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
848 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
849 result.Raise("Failure checking secondary ip on node %s" % node,
850 prereq=prereq, ecode=errors.ECODE_ENVIRON)
851 if not result.payload:
852 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
853 " please fix and re-run this command" % secondary_ip)
855 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
857 raise errors.OpExecError(msg)
860 def _GetClusterDomainSecret():
861 """Reads the cluster domain secret.
864 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
868 def _CheckInstanceDown(lu, instance, reason):
869 """Ensure that an instance is not running."""
870 if instance.admin_up:
871 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
872 (instance.name, reason), errors.ECODE_STATE)
874 pnode = instance.primary_node
875 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
876 ins_l.Raise("Can't contact node %s for instance information" % pnode,
877 prereq=True, ecode=errors.ECODE_ENVIRON)
879 if instance.name in ins_l.payload:
880 raise errors.OpPrereqError("Instance %s is running, %s" %
881 (instance.name, reason), errors.ECODE_STATE)
884 def _ExpandItemName(fn, name, kind):
885 """Expand an item name.
887 @param fn: the function to use for expansion
888 @param name: requested item name
889 @param kind: text description ('Node' or 'Instance')
890 @return: the resolved (full) name
891 @raise errors.OpPrereqError: if the item is not found
895 if full_name is None:
896 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
901 def _ExpandNodeName(cfg, name):
902 """Wrapper over L{_ExpandItemName} for nodes."""
903 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
906 def _ExpandInstanceName(cfg, name):
907 """Wrapper over L{_ExpandItemName} for instance."""
908 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
911 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
912 memory, vcpus, nics, disk_template, disks,
913 bep, hvp, hypervisor_name, tags):
914 """Builds instance related env variables for hooks
916 This builds the hook environment from individual variables.
919 @param name: the name of the instance
920 @type primary_node: string
921 @param primary_node: the name of the instance's primary node
922 @type secondary_nodes: list
923 @param secondary_nodes: list of secondary nodes as strings
924 @type os_type: string
925 @param os_type: the name of the instance's OS
926 @type status: boolean
927 @param status: the should_run status of the instance
929 @param memory: the memory size of the instance
931 @param vcpus: the count of VCPUs the instance has
933 @param nics: list of tuples (ip, mac, mode, link) representing
934 the NICs the instance has
935 @type disk_template: string
936 @param disk_template: the disk template of the instance
938 @param disks: the list of (size, mode) pairs
940 @param bep: the backend parameters for the instance
942 @param hvp: the hypervisor parameters for the instance
943 @type hypervisor_name: string
944 @param hypervisor_name: the hypervisor for the instance
946 @param tags: list of instance tags as strings
948 @return: the hook environment for this instance
957 "INSTANCE_NAME": name,
958 "INSTANCE_PRIMARY": primary_node,
959 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
960 "INSTANCE_OS_TYPE": os_type,
961 "INSTANCE_STATUS": str_status,
962 "INSTANCE_MEMORY": memory,
963 "INSTANCE_VCPUS": vcpus,
964 "INSTANCE_DISK_TEMPLATE": disk_template,
965 "INSTANCE_HYPERVISOR": hypervisor_name,
969 nic_count = len(nics)
970 for idx, (ip, mac, mode, link) in enumerate(nics):
973 env["INSTANCE_NIC%d_IP" % idx] = ip
974 env["INSTANCE_NIC%d_MAC" % idx] = mac
975 env["INSTANCE_NIC%d_MODE" % idx] = mode
976 env["INSTANCE_NIC%d_LINK" % idx] = link
977 if mode == constants.NIC_MODE_BRIDGED:
978 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
982 env["INSTANCE_NIC_COUNT"] = nic_count
985 disk_count = len(disks)
986 for idx, (size, mode) in enumerate(disks):
987 env["INSTANCE_DISK%d_SIZE" % idx] = size
988 env["INSTANCE_DISK%d_MODE" % idx] = mode
992 env["INSTANCE_DISK_COUNT"] = disk_count
997 env["INSTANCE_TAGS"] = " ".join(tags)
999 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1000 for key, value in source.items():
1001 env["INSTANCE_%s_%s" % (kind, key)] = value
1006 def _NICListToTuple(lu, nics):
1007 """Build a list of nic information tuples.
1009 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1010 value in LUInstanceQueryData.
1012 @type lu: L{LogicalUnit}
1013 @param lu: the logical unit on whose behalf we execute
1014 @type nics: list of L{objects.NIC}
1015 @param nics: list of nics to convert to hooks tuples
1019 cluster = lu.cfg.GetClusterInfo()
1023 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1024 mode = filled_params[constants.NIC_MODE]
1025 link = filled_params[constants.NIC_LINK]
1026 hooks_nics.append((ip, mac, mode, link))
1030 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1031 """Builds instance related env variables for hooks from an object.
1033 @type lu: L{LogicalUnit}
1034 @param lu: the logical unit on whose behalf we execute
1035 @type instance: L{objects.Instance}
1036 @param instance: the instance for which we should build the
1038 @type override: dict
1039 @param override: dictionary with key/values that will override
1042 @return: the hook environment dictionary
1045 cluster = lu.cfg.GetClusterInfo()
1046 bep = cluster.FillBE(instance)
1047 hvp = cluster.FillHV(instance)
1049 "name": instance.name,
1050 "primary_node": instance.primary_node,
1051 "secondary_nodes": instance.secondary_nodes,
1052 "os_type": instance.os,
1053 "status": instance.admin_up,
1054 "memory": bep[constants.BE_MEMORY],
1055 "vcpus": bep[constants.BE_VCPUS],
1056 "nics": _NICListToTuple(lu, instance.nics),
1057 "disk_template": instance.disk_template,
1058 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1061 "hypervisor_name": instance.hypervisor,
1062 "tags": instance.tags,
1065 args.update(override)
1066 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1069 def _AdjustCandidatePool(lu, exceptions):
1070 """Adjust the candidate pool after node operations.
1073 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1075 lu.LogInfo("Promoted nodes to master candidate role: %s",
1076 utils.CommaJoin(node.name for node in mod_list))
1077 for name in mod_list:
1078 lu.context.ReaddNode(name)
1079 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1081 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1085 def _DecideSelfPromotion(lu, exceptions=None):
1086 """Decide whether I should promote myself as a master candidate.
1089 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1090 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1091 # the new node will increase mc_max with one, so:
1092 mc_should = min(mc_should + 1, cp_size)
1093 return mc_now < mc_should
1096 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1097 """Check that the brigdes needed by a list of nics exist.
1100 cluster = lu.cfg.GetClusterInfo()
1101 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1102 brlist = [params[constants.NIC_LINK] for params in paramslist
1103 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1105 result = lu.rpc.call_bridges_exist(target_node, brlist)
1106 result.Raise("Error checking bridges on destination node '%s'" %
1107 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1110 def _CheckInstanceBridgesExist(lu, instance, node=None):
1111 """Check that the brigdes needed by an instance exist.
1115 node = instance.primary_node
1116 _CheckNicsBridgesExist(lu, instance.nics, node)
1119 def _CheckOSVariant(os_obj, name):
1120 """Check whether an OS name conforms to the os variants specification.
1122 @type os_obj: L{objects.OS}
1123 @param os_obj: OS object to check
1125 @param name: OS name passed by the user, to check for validity
1128 variant = objects.OS.GetVariant(name)
1129 if not os_obj.supported_variants:
1131 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1132 " passed)" % (os_obj.name, variant),
1136 raise errors.OpPrereqError("OS name must include a variant",
1139 if variant not in os_obj.supported_variants:
1140 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1143 def _GetNodeInstancesInner(cfg, fn):
1144 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1147 def _GetNodeInstances(cfg, node_name):
1148 """Returns a list of all primary and secondary instances on a node.
1152 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1155 def _GetNodePrimaryInstances(cfg, node_name):
1156 """Returns primary instances on a node.
1159 return _GetNodeInstancesInner(cfg,
1160 lambda inst: node_name == inst.primary_node)
1163 def _GetNodeSecondaryInstances(cfg, node_name):
1164 """Returns secondary instances on a node.
1167 return _GetNodeInstancesInner(cfg,
1168 lambda inst: node_name in inst.secondary_nodes)
1171 def _GetStorageTypeArgs(cfg, storage_type):
1172 """Returns the arguments for a storage type.
1175 # Special case for file storage
1176 if storage_type == constants.ST_FILE:
1177 # storage.FileStorage wants a list of storage directories
1178 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1183 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1186 for dev in instance.disks:
1187 cfg.SetDiskID(dev, node_name)
1189 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1190 result.Raise("Failed to get disk status from node %s" % node_name,
1191 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1193 for idx, bdev_status in enumerate(result.payload):
1194 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1200 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1201 """Check the sanity of iallocator and node arguments and use the
1202 cluster-wide iallocator if appropriate.
1204 Check that at most one of (iallocator, node) is specified. If none is
1205 specified, then the LU's opcode's iallocator slot is filled with the
1206 cluster-wide default iallocator.
1208 @type iallocator_slot: string
1209 @param iallocator_slot: the name of the opcode iallocator slot
1210 @type node_slot: string
1211 @param node_slot: the name of the opcode target node slot
1214 node = getattr(lu.op, node_slot, None)
1215 iallocator = getattr(lu.op, iallocator_slot, None)
1217 if node is not None and iallocator is not None:
1218 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1220 elif node is None and iallocator is None:
1221 default_iallocator = lu.cfg.GetDefaultIAllocator()
1222 if default_iallocator:
1223 setattr(lu.op, iallocator_slot, default_iallocator)
1225 raise errors.OpPrereqError("No iallocator or node given and no"
1226 " cluster-wide default iallocator found;"
1227 " please specify either an iallocator or a"
1228 " node, or set a cluster-wide default"
1232 def _GetDefaultIAllocator(cfg, iallocator):
1233 """Decides on which iallocator to use.
1235 @type cfg: L{config.ConfigWriter}
1236 @param cfg: Cluster configuration object
1237 @type iallocator: string or None
1238 @param iallocator: Iallocator specified in opcode
1240 @return: Iallocator name
1244 # Use default iallocator
1245 iallocator = cfg.GetDefaultIAllocator()
1248 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1249 " opcode nor as a cluster-wide default",
1255 class LUClusterPostInit(LogicalUnit):
1256 """Logical unit for running hooks after cluster initialization.
1259 HPATH = "cluster-init"
1260 HTYPE = constants.HTYPE_CLUSTER
1262 def BuildHooksEnv(self):
1267 "OP_TARGET": self.cfg.GetClusterName(),
1270 def BuildHooksNodes(self):
1271 """Build hooks nodes.
1274 return ([], [self.cfg.GetMasterNode()])
1276 def Exec(self, feedback_fn):
1283 class LUClusterDestroy(LogicalUnit):
1284 """Logical unit for destroying the cluster.
1287 HPATH = "cluster-destroy"
1288 HTYPE = constants.HTYPE_CLUSTER
1290 def BuildHooksEnv(self):
1295 "OP_TARGET": self.cfg.GetClusterName(),
1298 def BuildHooksNodes(self):
1299 """Build hooks nodes.
1304 def CheckPrereq(self):
1305 """Check prerequisites.
1307 This checks whether the cluster is empty.
1309 Any errors are signaled by raising errors.OpPrereqError.
1312 master = self.cfg.GetMasterNode()
1314 nodelist = self.cfg.GetNodeList()
1315 if len(nodelist) != 1 or nodelist[0] != master:
1316 raise errors.OpPrereqError("There are still %d node(s) in"
1317 " this cluster." % (len(nodelist) - 1),
1319 instancelist = self.cfg.GetInstanceList()
1321 raise errors.OpPrereqError("There are still %d instance(s) in"
1322 " this cluster." % len(instancelist),
1325 def Exec(self, feedback_fn):
1326 """Destroys the cluster.
1329 master = self.cfg.GetMasterNode()
1331 # Run post hooks on master node before it's removed
1332 _RunPostHook(self, master)
1334 result = self.rpc.call_node_stop_master(master, False)
1335 result.Raise("Could not disable the master role")
1340 def _VerifyCertificate(filename):
1341 """Verifies a certificate for L{LUClusterVerifyConfig}.
1343 @type filename: string
1344 @param filename: Path to PEM file
1348 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1349 utils.ReadFile(filename))
1350 except Exception, err: # pylint: disable-msg=W0703
1351 return (LUClusterVerifyConfig.ETYPE_ERROR,
1352 "Failed to load X509 certificate %s: %s" % (filename, err))
1355 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1356 constants.SSL_CERT_EXPIRATION_ERROR)
1359 fnamemsg = "While verifying %s: %s" % (filename, msg)
1364 return (None, fnamemsg)
1365 elif errcode == utils.CERT_WARNING:
1366 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1367 elif errcode == utils.CERT_ERROR:
1368 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1370 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1373 def _GetAllHypervisorParameters(cluster, instances):
1374 """Compute the set of all hypervisor parameters.
1376 @type cluster: L{objects.Cluster}
1377 @param cluster: the cluster object
1378 @param instances: list of L{objects.Instance}
1379 @param instances: additional instances from which to obtain parameters
1380 @rtype: list of (origin, hypervisor, parameters)
1381 @return: a list with all parameters found, indicating the hypervisor they
1382 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1387 for hv_name in cluster.enabled_hypervisors:
1388 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1390 for os_name, os_hvp in cluster.os_hvp.items():
1391 for hv_name, hv_params in os_hvp.items():
1393 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1394 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1396 # TODO: collapse identical parameter values in a single one
1397 for instance in instances:
1398 if instance.hvparams:
1399 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1400 cluster.FillHV(instance)))
1405 class _VerifyErrors(object):
1406 """Mix-in for cluster/group verify LUs.
1408 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1409 self.op and self._feedback_fn to be available.)
1412 TCLUSTER = "cluster"
1414 TINSTANCE = "instance"
1416 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1417 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1418 ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1419 ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1420 ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1421 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1422 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1423 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1424 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1425 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1426 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1427 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1428 ENODEDRBD = (TNODE, "ENODEDRBD")
1429 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1430 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1431 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1432 ENODEHV = (TNODE, "ENODEHV")
1433 ENODELVM = (TNODE, "ENODELVM")
1434 ENODEN1 = (TNODE, "ENODEN1")
1435 ENODENET = (TNODE, "ENODENET")
1436 ENODEOS = (TNODE, "ENODEOS")
1437 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1438 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1439 ENODERPC = (TNODE, "ENODERPC")
1440 ENODESSH = (TNODE, "ENODESSH")
1441 ENODEVERSION = (TNODE, "ENODEVERSION")
1442 ENODESETUP = (TNODE, "ENODESETUP")
1443 ENODETIME = (TNODE, "ENODETIME")
1444 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1446 ETYPE_FIELD = "code"
1447 ETYPE_ERROR = "ERROR"
1448 ETYPE_WARNING = "WARNING"
1450 def _Error(self, ecode, item, msg, *args, **kwargs):
1451 """Format an error message.
1453 Based on the opcode's error_codes parameter, either format a
1454 parseable error code, or a simpler error string.
1456 This must be called only from Exec and functions called from Exec.
1459 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1461 # first complete the msg
1464 # then format the whole message
1465 if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1466 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1472 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1473 # and finally report it via the feedback_fn
1474 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1476 def _ErrorIf(self, cond, *args, **kwargs):
1477 """Log an error message if the passed condition is True.
1481 or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1483 self._Error(*args, **kwargs)
1484 # do not mark the operation as failed for WARN cases only
1485 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1486 self.bad = self.bad or cond
1489 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1490 """Verifies the cluster config.
1495 def _VerifyHVP(self, hvp_data):
1496 """Verifies locally the syntax of the hypervisor parameters.
1499 for item, hv_name, hv_params in hvp_data:
1500 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1503 hv_class = hypervisor.GetHypervisor(hv_name)
1504 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1505 hv_class.CheckParameterSyntax(hv_params)
1506 except errors.GenericError, err:
1507 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1509 def ExpandNames(self):
1510 # Information can be safely retrieved as the BGL is acquired in exclusive
1512 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1513 self.all_node_info = self.cfg.GetAllNodesInfo()
1514 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1515 self.needed_locks = {}
1517 def Exec(self, feedback_fn):
1518 """Verify integrity of cluster, performing various test on nodes.
1522 self._feedback_fn = feedback_fn
1524 feedback_fn("* Verifying cluster config")
1526 for msg in self.cfg.VerifyConfig():
1527 self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1529 feedback_fn("* Verifying cluster certificate files")
1531 for cert_filename in constants.ALL_CERT_FILES:
1532 (errcode, msg) = _VerifyCertificate(cert_filename)
1533 self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1535 feedback_fn("* Verifying hypervisor parameters")
1537 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1538 self.all_inst_info.values()))
1540 feedback_fn("* Verifying all nodes belong to an existing group")
1542 # We do this verification here because, should this bogus circumstance
1543 # occur, it would never be caught by VerifyGroup, which only acts on
1544 # nodes/instances reachable from existing node groups.
1546 dangling_nodes = set(node.name for node in self.all_node_info.values()
1547 if node.group not in self.all_group_info)
1549 dangling_instances = {}
1550 no_node_instances = []
1552 for inst in self.all_inst_info.values():
1553 if inst.primary_node in dangling_nodes:
1554 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1555 elif inst.primary_node not in self.all_node_info:
1556 no_node_instances.append(inst.name)
1561 utils.CommaJoin(dangling_instances.get(node.name,
1563 for node in dangling_nodes]
1565 self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1566 "the following nodes (and their instances) belong to a non"
1567 " existing group: %s", utils.CommaJoin(pretty_dangling))
1569 self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1570 "the following instances have a non-existing primary-node:"
1571 " %s", utils.CommaJoin(no_node_instances))
1573 return (not self.bad, [g.name for g in self.all_group_info.values()])
1576 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1577 """Verifies the status of a node group.
1580 HPATH = "cluster-verify"
1581 HTYPE = constants.HTYPE_CLUSTER
1584 _HOOKS_INDENT_RE = re.compile("^", re.M)
1586 class NodeImage(object):
1587 """A class representing the logical and physical status of a node.
1590 @ivar name: the node name to which this object refers
1591 @ivar volumes: a structure as returned from
1592 L{ganeti.backend.GetVolumeList} (runtime)
1593 @ivar instances: a list of running instances (runtime)
1594 @ivar pinst: list of configured primary instances (config)
1595 @ivar sinst: list of configured secondary instances (config)
1596 @ivar sbp: dictionary of {primary-node: list of instances} for all
1597 instances for which this node is secondary (config)
1598 @ivar mfree: free memory, as reported by hypervisor (runtime)
1599 @ivar dfree: free disk, as reported by the node (runtime)
1600 @ivar offline: the offline status (config)
1601 @type rpc_fail: boolean
1602 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1603 not whether the individual keys were correct) (runtime)
1604 @type lvm_fail: boolean
1605 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1606 @type hyp_fail: boolean
1607 @ivar hyp_fail: whether the RPC call didn't return the instance list
1608 @type ghost: boolean
1609 @ivar ghost: whether this is a known node or not (config)
1610 @type os_fail: boolean
1611 @ivar os_fail: whether the RPC call didn't return valid OS data
1613 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1614 @type vm_capable: boolean
1615 @ivar vm_capable: whether the node can host instances
1618 def __init__(self, offline=False, name=None, vm_capable=True):
1627 self.offline = offline
1628 self.vm_capable = vm_capable
1629 self.rpc_fail = False
1630 self.lvm_fail = False
1631 self.hyp_fail = False
1633 self.os_fail = False
1636 def ExpandNames(self):
1637 # This raises errors.OpPrereqError on its own:
1638 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1640 # Get instances in node group; this is unsafe and needs verification later
1641 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1643 self.needed_locks = {
1644 locking.LEVEL_INSTANCE: inst_names,
1645 locking.LEVEL_NODEGROUP: [self.group_uuid],
1646 locking.LEVEL_NODE: [],
1649 self.share_locks = _ShareAll()
1651 def DeclareLocks(self, level):
1652 if level == locking.LEVEL_NODE:
1653 # Get members of node group; this is unsafe and needs verification later
1654 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1656 all_inst_info = self.cfg.GetAllInstancesInfo()
1658 # In Exec(), we warn about mirrored instances that have primary and
1659 # secondary living in separate node groups. To fully verify that
1660 # volumes for these instances are healthy, we will need to do an
1661 # extra call to their secondaries. We ensure here those nodes will
1663 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1664 # Important: access only the instances whose lock is owned
1665 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1666 nodes.update(all_inst_info[inst].secondary_nodes)
1668 self.needed_locks[locking.LEVEL_NODE] = nodes
1670 def CheckPrereq(self):
1671 group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1672 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1675 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1677 unlocked_instances = \
1678 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1681 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1682 utils.CommaJoin(unlocked_nodes))
1684 if unlocked_instances:
1685 raise errors.OpPrereqError("Missing lock for instances: %s" %
1686 utils.CommaJoin(unlocked_instances))
1688 self.all_node_info = self.cfg.GetAllNodesInfo()
1689 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1691 self.my_node_names = utils.NiceSort(group_nodes)
1692 self.my_inst_names = utils.NiceSort(group_instances)
1694 self.my_node_info = dict((name, self.all_node_info[name])
1695 for name in self.my_node_names)
1697 self.my_inst_info = dict((name, self.all_inst_info[name])
1698 for name in self.my_inst_names)
1700 # We detect here the nodes that will need the extra RPC calls for verifying
1701 # split LV volumes; they should be locked.
1702 extra_lv_nodes = set()
1704 for inst in self.my_inst_info.values():
1705 if inst.disk_template in constants.DTS_INT_MIRROR:
1706 group = self.my_node_info[inst.primary_node].group
1707 for nname in inst.secondary_nodes:
1708 if self.all_node_info[nname].group != group:
1709 extra_lv_nodes.add(nname)
1711 unlocked_lv_nodes = \
1712 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1714 if unlocked_lv_nodes:
1715 raise errors.OpPrereqError("these nodes could be locked: %s" %
1716 utils.CommaJoin(unlocked_lv_nodes))
1717 self.extra_lv_nodes = list(extra_lv_nodes)
1719 def _VerifyNode(self, ninfo, nresult):
1720 """Perform some basic validation on data returned from a node.
1722 - check the result data structure is well formed and has all the
1724 - check ganeti version
1726 @type ninfo: L{objects.Node}
1727 @param ninfo: the node to check
1728 @param nresult: the results from the node
1730 @return: whether overall this call was successful (and we can expect
1731 reasonable values in the respose)
1735 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1737 # main result, nresult should be a non-empty dict
1738 test = not nresult or not isinstance(nresult, dict)
1739 _ErrorIf(test, self.ENODERPC, node,
1740 "unable to verify node: no data returned")
1744 # compares ganeti version
1745 local_version = constants.PROTOCOL_VERSION
1746 remote_version = nresult.get("version", None)
1747 test = not (remote_version and
1748 isinstance(remote_version, (list, tuple)) and
1749 len(remote_version) == 2)
1750 _ErrorIf(test, self.ENODERPC, node,
1751 "connection to node returned invalid data")
1755 test = local_version != remote_version[0]
1756 _ErrorIf(test, self.ENODEVERSION, node,
1757 "incompatible protocol versions: master %s,"
1758 " node %s", local_version, remote_version[0])
1762 # node seems compatible, we can actually try to look into its results
1764 # full package version
1765 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1766 self.ENODEVERSION, node,
1767 "software version mismatch: master %s, node %s",
1768 constants.RELEASE_VERSION, remote_version[1],
1769 code=self.ETYPE_WARNING)
1771 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1772 if ninfo.vm_capable and isinstance(hyp_result, dict):
1773 for hv_name, hv_result in hyp_result.iteritems():
1774 test = hv_result is not None
1775 _ErrorIf(test, self.ENODEHV, node,
1776 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1778 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1779 if ninfo.vm_capable and isinstance(hvp_result, list):
1780 for item, hv_name, hv_result in hvp_result:
1781 _ErrorIf(True, self.ENODEHV, node,
1782 "hypervisor %s parameter verify failure (source %s): %s",
1783 hv_name, item, hv_result)
1785 test = nresult.get(constants.NV_NODESETUP,
1786 ["Missing NODESETUP results"])
1787 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1792 def _VerifyNodeTime(self, ninfo, nresult,
1793 nvinfo_starttime, nvinfo_endtime):
1794 """Check the node time.
1796 @type ninfo: L{objects.Node}
1797 @param ninfo: the node to check
1798 @param nresult: the remote results for the node
1799 @param nvinfo_starttime: the start time of the RPC call
1800 @param nvinfo_endtime: the end time of the RPC call
1804 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1806 ntime = nresult.get(constants.NV_TIME, None)
1808 ntime_merged = utils.MergeTime(ntime)
1809 except (ValueError, TypeError):
1810 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1813 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1814 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1815 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1816 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1820 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1821 "Node time diverges by at least %s from master node time",
1824 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1825 """Check the node LVM results.
1827 @type ninfo: L{objects.Node}
1828 @param ninfo: the node to check
1829 @param nresult: the remote results for the node
1830 @param vg_name: the configured VG name
1837 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1839 # checks vg existence and size > 20G
1840 vglist = nresult.get(constants.NV_VGLIST, None)
1842 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1844 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1845 constants.MIN_VG_SIZE)
1846 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1849 pvlist = nresult.get(constants.NV_PVLIST, None)
1850 test = pvlist is None
1851 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1853 # check that ':' is not present in PV names, since it's a
1854 # special character for lvcreate (denotes the range of PEs to
1856 for _, pvname, owner_vg in pvlist:
1857 test = ":" in pvname
1858 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1859 " '%s' of VG '%s'", pvname, owner_vg)
1861 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1862 """Check the node bridges.
1864 @type ninfo: L{objects.Node}
1865 @param ninfo: the node to check
1866 @param nresult: the remote results for the node
1867 @param bridges: the expected list of bridges
1874 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1876 missing = nresult.get(constants.NV_BRIDGES, None)
1877 test = not isinstance(missing, list)
1878 _ErrorIf(test, self.ENODENET, node,
1879 "did not return valid bridge information")
1881 _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1882 utils.CommaJoin(sorted(missing)))
1884 def _VerifyNodeNetwork(self, ninfo, nresult):
1885 """Check the node network connectivity results.
1887 @type ninfo: L{objects.Node}
1888 @param ninfo: the node to check
1889 @param nresult: the remote results for the node
1893 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1895 test = constants.NV_NODELIST not in nresult
1896 _ErrorIf(test, self.ENODESSH, node,
1897 "node hasn't returned node ssh connectivity data")
1899 if nresult[constants.NV_NODELIST]:
1900 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1901 _ErrorIf(True, self.ENODESSH, node,
1902 "ssh communication with node '%s': %s", a_node, a_msg)
1904 test = constants.NV_NODENETTEST not in nresult
1905 _ErrorIf(test, self.ENODENET, node,
1906 "node hasn't returned node tcp connectivity data")
1908 if nresult[constants.NV_NODENETTEST]:
1909 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1911 _ErrorIf(True, self.ENODENET, node,
1912 "tcp communication with node '%s': %s",
1913 anode, nresult[constants.NV_NODENETTEST][anode])
1915 test = constants.NV_MASTERIP not in nresult
1916 _ErrorIf(test, self.ENODENET, node,
1917 "node hasn't returned node master IP reachability data")
1919 if not nresult[constants.NV_MASTERIP]:
1920 if node == self.master_node:
1921 msg = "the master node cannot reach the master IP (not configured?)"
1923 msg = "cannot reach the master IP"
1924 _ErrorIf(True, self.ENODENET, node, msg)
1926 def _VerifyInstance(self, instance, instanceconfig, node_image,
1928 """Verify an instance.
1930 This function checks to see if the required block devices are
1931 available on the instance's node.
1934 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1935 node_current = instanceconfig.primary_node
1937 node_vol_should = {}
1938 instanceconfig.MapLVsByNode(node_vol_should)
1940 for node in node_vol_should:
1941 n_img = node_image[node]
1942 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1943 # ignore missing volumes on offline or broken nodes
1945 for volume in node_vol_should[node]:
1946 test = volume not in n_img.volumes
1947 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1948 "volume %s missing on node %s", volume, node)
1950 if instanceconfig.admin_up:
1951 pri_img = node_image[node_current]
1952 test = instance not in pri_img.instances and not pri_img.offline
1953 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1954 "instance not running on its primary node %s",
1957 diskdata = [(nname, success, status, idx)
1958 for (nname, disks) in diskstatus.items()
1959 for idx, (success, status) in enumerate(disks)]
1961 for nname, success, bdev_status, idx in diskdata:
1962 # the 'ghost node' construction in Exec() ensures that we have a
1964 snode = node_image[nname]
1965 bad_snode = snode.ghost or snode.offline
1966 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1967 self.EINSTANCEFAULTYDISK, instance,
1968 "couldn't retrieve status for disk/%s on %s: %s",
1969 idx, nname, bdev_status)
1970 _ErrorIf((instanceconfig.admin_up and success and
1971 bdev_status.ldisk_status == constants.LDS_FAULTY),
1972 self.EINSTANCEFAULTYDISK, instance,
1973 "disk/%s on %s is faulty", idx, nname)
1975 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1976 """Verify if there are any unknown volumes in the cluster.
1978 The .os, .swap and backup volumes are ignored. All other volumes are
1979 reported as unknown.
1981 @type reserved: L{ganeti.utils.FieldSet}
1982 @param reserved: a FieldSet of reserved volume names
1985 for node, n_img in node_image.items():
1986 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1987 # skip non-healthy nodes
1989 for volume in n_img.volumes:
1990 test = ((node not in node_vol_should or
1991 volume not in node_vol_should[node]) and
1992 not reserved.Matches(volume))
1993 self._ErrorIf(test, self.ENODEORPHANLV, node,
1994 "volume %s is unknown", volume)
1996 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1997 """Verify N+1 Memory Resilience.
1999 Check that if one single node dies we can still start all the
2000 instances it was primary for.
2003 cluster_info = self.cfg.GetClusterInfo()
2004 for node, n_img in node_image.items():
2005 # This code checks that every node which is now listed as
2006 # secondary has enough memory to host all instances it is
2007 # supposed to should a single other node in the cluster fail.
2008 # FIXME: not ready for failover to an arbitrary node
2009 # FIXME: does not support file-backed instances
2010 # WARNING: we currently take into account down instances as well
2011 # as up ones, considering that even if they're down someone
2012 # might want to start them even in the event of a node failure.
2014 # we're skipping offline nodes from the N+1 warning, since
2015 # most likely we don't have good memory infromation from them;
2016 # we already list instances living on such nodes, and that's
2019 for prinode, instances in n_img.sbp.items():
2021 for instance in instances:
2022 bep = cluster_info.FillBE(instance_cfg[instance])
2023 if bep[constants.BE_AUTO_BALANCE]:
2024 needed_mem += bep[constants.BE_MEMORY]
2025 test = n_img.mfree < needed_mem
2026 self._ErrorIf(test, self.ENODEN1, node,
2027 "not enough memory to accomodate instance failovers"
2028 " should node %s fail (%dMiB needed, %dMiB available)",
2029 prinode, needed_mem, n_img.mfree)
2032 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2033 (files_all, files_all_opt, files_mc, files_vm)):
2034 """Verifies file checksums collected from all nodes.
2036 @param errorif: Callback for reporting errors
2037 @param nodeinfo: List of L{objects.Node} objects
2038 @param master_node: Name of master node
2039 @param all_nvinfo: RPC results
2042 node_names = frozenset(node.name for node in nodeinfo if not node.offline)
2044 assert master_node in node_names
2045 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2046 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2047 "Found file listed in more than one file list"
2049 # Define functions determining which nodes to consider for a file
2050 file2nodefn = dict([(filename, fn)
2051 for (files, fn) in [(files_all, None),
2052 (files_all_opt, None),
2053 (files_mc, lambda node: (node.master_candidate or
2054 node.name == master_node)),
2055 (files_vm, lambda node: node.vm_capable)]
2056 for filename in files])
2058 fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
2060 for node in nodeinfo:
2064 nresult = all_nvinfo[node.name]
2066 if nresult.fail_msg or not nresult.payload:
2069 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2071 test = not (node_files and isinstance(node_files, dict))
2072 errorif(test, cls.ENODEFILECHECK, node.name,
2073 "Node did not return file checksum data")
2077 for (filename, checksum) in node_files.items():
2078 # Check if the file should be considered for a node
2079 fn = file2nodefn[filename]
2080 if fn is None or fn(node):
2081 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2083 for (filename, checksums) in fileinfo.items():
2084 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2086 # Nodes having the file
2087 with_file = frozenset(node_name
2088 for nodes in fileinfo[filename].values()
2089 for node_name in nodes)
2091 # Nodes missing file
2092 missing_file = node_names - with_file
2094 if filename in files_all_opt:
2096 errorif(missing_file and missing_file != node_names,
2097 cls.ECLUSTERFILECHECK, None,
2098 "File %s is optional, but it must exist on all or no"
2099 " nodes (not found on %s)",
2100 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2102 errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2103 "File %s is missing from node(s) %s", filename,
2104 utils.CommaJoin(utils.NiceSort(missing_file)))
2106 # See if there are multiple versions of the file
2107 test = len(checksums) > 1
2109 variants = ["variant %s on %s" %
2110 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2111 for (idx, (checksum, nodes)) in
2112 enumerate(sorted(checksums.items()))]
2116 errorif(test, cls.ECLUSTERFILECHECK, None,
2117 "File %s found with %s different checksums (%s)",
2118 filename, len(checksums), "; ".join(variants))
2120 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2122 """Verifies and the node DRBD status.
2124 @type ninfo: L{objects.Node}
2125 @param ninfo: the node to check
2126 @param nresult: the remote results for the node
2127 @param instanceinfo: the dict of instances
2128 @param drbd_helper: the configured DRBD usermode helper
2129 @param drbd_map: the DRBD map as returned by
2130 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2134 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2137 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2138 test = (helper_result == None)
2139 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2140 "no drbd usermode helper returned")
2142 status, payload = helper_result
2144 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2145 "drbd usermode helper check unsuccessful: %s", payload)
2146 test = status and (payload != drbd_helper)
2147 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2148 "wrong drbd usermode helper: %s", payload)
2150 # compute the DRBD minors
2152 for minor, instance in drbd_map[node].items():
2153 test = instance not in instanceinfo
2154 _ErrorIf(test, self.ECLUSTERCFG, None,
2155 "ghost instance '%s' in temporary DRBD map", instance)
2156 # ghost instance should not be running, but otherwise we
2157 # don't give double warnings (both ghost instance and
2158 # unallocated minor in use)
2160 node_drbd[minor] = (instance, False)
2162 instance = instanceinfo[instance]
2163 node_drbd[minor] = (instance.name, instance.admin_up)
2165 # and now check them
2166 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2167 test = not isinstance(used_minors, (tuple, list))
2168 _ErrorIf(test, self.ENODEDRBD, node,
2169 "cannot parse drbd status file: %s", str(used_minors))
2171 # we cannot check drbd status
2174 for minor, (iname, must_exist) in node_drbd.items():
2175 test = minor not in used_minors and must_exist
2176 _ErrorIf(test, self.ENODEDRBD, node,
2177 "drbd minor %d of instance %s is not active", minor, iname)
2178 for minor in used_minors:
2179 test = minor not in node_drbd
2180 _ErrorIf(test, self.ENODEDRBD, node,
2181 "unallocated drbd minor %d is in use", minor)
2183 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2184 """Builds the node OS structures.
2186 @type ninfo: L{objects.Node}
2187 @param ninfo: the node to check
2188 @param nresult: the remote results for the node
2189 @param nimg: the node image object
2193 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2195 remote_os = nresult.get(constants.NV_OSLIST, None)
2196 test = (not isinstance(remote_os, list) or
2197 not compat.all(isinstance(v, list) and len(v) == 7
2198 for v in remote_os))
2200 _ErrorIf(test, self.ENODEOS, node,
2201 "node hasn't returned valid OS data")
2210 for (name, os_path, status, diagnose,
2211 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2213 if name not in os_dict:
2216 # parameters is a list of lists instead of list of tuples due to
2217 # JSON lacking a real tuple type, fix it:
2218 parameters = [tuple(v) for v in parameters]
2219 os_dict[name].append((os_path, status, diagnose,
2220 set(variants), set(parameters), set(api_ver)))
2222 nimg.oslist = os_dict
2224 def _VerifyNodeOS(self, ninfo, nimg, base):
2225 """Verifies the node OS list.
2227 @type ninfo: L{objects.Node}
2228 @param ninfo: the node to check
2229 @param nimg: the node image object
2230 @param base: the 'template' node we match against (e.g. from the master)
2234 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2236 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2238 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2239 for os_name, os_data in nimg.oslist.items():
2240 assert os_data, "Empty OS status for OS %s?!" % os_name
2241 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2242 _ErrorIf(not f_status, self.ENODEOS, node,
2243 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2244 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2245 "OS '%s' has multiple entries (first one shadows the rest): %s",
2246 os_name, utils.CommaJoin([v[0] for v in os_data]))
2247 # comparisons with the 'base' image
2248 test = os_name not in base.oslist
2249 _ErrorIf(test, self.ENODEOS, node,
2250 "Extra OS %s not present on reference node (%s)",
2254 assert base.oslist[os_name], "Base node has empty OS status?"
2255 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2257 # base OS is invalid, skipping
2259 for kind, a, b in [("API version", f_api, b_api),
2260 ("variants list", f_var, b_var),
2261 ("parameters", beautify_params(f_param),
2262 beautify_params(b_param))]:
2263 _ErrorIf(a != b, self.ENODEOS, node,
2264 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2265 kind, os_name, base.name,
2266 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2268 # check any missing OSes
2269 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2270 _ErrorIf(missing, self.ENODEOS, node,
2271 "OSes present on reference node %s but missing on this node: %s",
2272 base.name, utils.CommaJoin(missing))
2274 def _VerifyOob(self, ninfo, nresult):
2275 """Verifies out of band functionality of a node.
2277 @type ninfo: L{objects.Node}
2278 @param ninfo: the node to check
2279 @param nresult: the remote results for the node
2283 # We just have to verify the paths on master and/or master candidates
2284 # as the oob helper is invoked on the master
2285 if ((ninfo.master_candidate or ninfo.master_capable) and
2286 constants.NV_OOB_PATHS in nresult):
2287 for path_result in nresult[constants.NV_OOB_PATHS]:
2288 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2290 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2291 """Verifies and updates the node volume data.
2293 This function will update a L{NodeImage}'s internal structures
2294 with data from the remote call.
2296 @type ninfo: L{objects.Node}
2297 @param ninfo: the node to check
2298 @param nresult: the remote results for the node
2299 @param nimg: the node image object
2300 @param vg_name: the configured VG name
2304 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2306 nimg.lvm_fail = True
2307 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2310 elif isinstance(lvdata, basestring):
2311 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2312 utils.SafeEncode(lvdata))
2313 elif not isinstance(lvdata, dict):
2314 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2316 nimg.volumes = lvdata
2317 nimg.lvm_fail = False
2319 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2320 """Verifies and updates the node instance list.
2322 If the listing was successful, then updates this node's instance
2323 list. Otherwise, it marks the RPC call as failed for the instance
2326 @type ninfo: L{objects.Node}
2327 @param ninfo: the node to check
2328 @param nresult: the remote results for the node
2329 @param nimg: the node image object
2332 idata = nresult.get(constants.NV_INSTANCELIST, None)
2333 test = not isinstance(idata, list)
2334 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2335 " (instancelist): %s", utils.SafeEncode(str(idata)))
2337 nimg.hyp_fail = True
2339 nimg.instances = idata
2341 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2342 """Verifies and computes a node information map
2344 @type ninfo: L{objects.Node}
2345 @param ninfo: the node to check
2346 @param nresult: the remote results for the node
2347 @param nimg: the node image object
2348 @param vg_name: the configured VG name
2352 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2354 # try to read free memory (from the hypervisor)
2355 hv_info = nresult.get(constants.NV_HVINFO, None)
2356 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2357 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2360 nimg.mfree = int(hv_info["memory_free"])
2361 except (ValueError, TypeError):
2362 _ErrorIf(True, self.ENODERPC, node,
2363 "node returned invalid nodeinfo, check hypervisor")
2365 # FIXME: devise a free space model for file based instances as well
2366 if vg_name is not None:
2367 test = (constants.NV_VGLIST not in nresult or
2368 vg_name not in nresult[constants.NV_VGLIST])
2369 _ErrorIf(test, self.ENODELVM, node,
2370 "node didn't return data for the volume group '%s'"
2371 " - it is either missing or broken", vg_name)
2374 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2375 except (ValueError, TypeError):
2376 _ErrorIf(True, self.ENODERPC, node,
2377 "node returned invalid LVM info, check LVM status")
2379 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2380 """Gets per-disk status information for all instances.
2382 @type nodelist: list of strings
2383 @param nodelist: Node names
2384 @type node_image: dict of (name, L{objects.Node})
2385 @param node_image: Node objects
2386 @type instanceinfo: dict of (name, L{objects.Instance})
2387 @param instanceinfo: Instance objects
2388 @rtype: {instance: {node: [(succes, payload)]}}
2389 @return: a dictionary of per-instance dictionaries with nodes as
2390 keys and disk information as values; the disk information is a
2391 list of tuples (success, payload)
2394 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2397 node_disks_devonly = {}
2398 diskless_instances = set()
2399 diskless = constants.DT_DISKLESS
2401 for nname in nodelist:
2402 node_instances = list(itertools.chain(node_image[nname].pinst,
2403 node_image[nname].sinst))
2404 diskless_instances.update(inst for inst in node_instances
2405 if instanceinfo[inst].disk_template == diskless)
2406 disks = [(inst, disk)
2407 for inst in node_instances
2408 for disk in instanceinfo[inst].disks]
2411 # No need to collect data
2414 node_disks[nname] = disks
2416 # Creating copies as SetDiskID below will modify the objects and that can
2417 # lead to incorrect data returned from nodes
2418 devonly = [dev.Copy() for (_, dev) in disks]
2421 self.cfg.SetDiskID(dev, nname)
2423 node_disks_devonly[nname] = devonly
2425 assert len(node_disks) == len(node_disks_devonly)
2427 # Collect data from all nodes with disks
2428 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2431 assert len(result) == len(node_disks)
2435 for (nname, nres) in result.items():
2436 disks = node_disks[nname]
2439 # No data from this node
2440 data = len(disks) * [(False, "node offline")]
2443 _ErrorIf(msg, self.ENODERPC, nname,
2444 "while getting disk information: %s", msg)
2446 # No data from this node
2447 data = len(disks) * [(False, msg)]
2450 for idx, i in enumerate(nres.payload):
2451 if isinstance(i, (tuple, list)) and len(i) == 2:
2454 logging.warning("Invalid result from node %s, entry %d: %s",
2456 data.append((False, "Invalid result from the remote node"))
2458 for ((inst, _), status) in zip(disks, data):
2459 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2461 # Add empty entries for diskless instances.
2462 for inst in diskless_instances:
2463 assert inst not in instdisk
2466 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2467 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2468 compat.all(isinstance(s, (tuple, list)) and
2469 len(s) == 2 for s in statuses)
2470 for inst, nnames in instdisk.items()
2471 for nname, statuses in nnames.items())
2472 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2476 def BuildHooksEnv(self):
2479 Cluster-Verify hooks just ran in the post phase and their failure makes
2480 the output be logged in the verify output and the verification to fail.
2484 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2487 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2488 for node in self.my_node_info.values())
2492 def BuildHooksNodes(self):
2493 """Build hooks nodes.
2496 return ([], self.my_node_names)
2498 def Exec(self, feedback_fn):
2499 """Verify integrity of the node group, performing various test on nodes.
2502 # This method has too many local variables. pylint: disable-msg=R0914
2504 if not self.my_node_names:
2506 feedback_fn("* Empty node group, skipping verification")
2510 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2511 verbose = self.op.verbose
2512 self._feedback_fn = feedback_fn
2514 vg_name = self.cfg.GetVGName()
2515 drbd_helper = self.cfg.GetDRBDHelper()
2516 cluster = self.cfg.GetClusterInfo()
2517 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2518 hypervisors = cluster.enabled_hypervisors
2519 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2521 i_non_redundant = [] # Non redundant instances
2522 i_non_a_balanced = [] # Non auto-balanced instances
2523 n_offline = 0 # Count of offline nodes
2524 n_drained = 0 # Count of nodes being drained
2525 node_vol_should = {}
2527 # FIXME: verify OS list
2530 filemap = _ComputeAncillaryFiles(cluster, False)
2532 # do local checksums
2533 master_node = self.master_node = self.cfg.GetMasterNode()
2534 master_ip = self.cfg.GetMasterIP()
2536 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2538 # We will make nodes contact all nodes in their group, and one node from
2539 # every other group.
2540 # TODO: should it be a *random* node, different every time?
2541 online_nodes = [node.name for node in node_data_list if not node.offline]
2542 other_group_nodes = {}
2544 for name in sorted(self.all_node_info):
2545 node = self.all_node_info[name]
2546 if (node.group not in other_group_nodes
2547 and node.group != self.group_uuid
2548 and not node.offline):
2549 other_group_nodes[node.group] = node.name
2551 node_verify_param = {
2552 constants.NV_FILELIST:
2553 utils.UniqueSequence(filename
2554 for files in filemap
2555 for filename in files),
2556 constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2557 constants.NV_HYPERVISOR: hypervisors,
2558 constants.NV_HVPARAMS:
2559 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2560 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2561 for node in node_data_list
2562 if not node.offline],
2563 constants.NV_INSTANCELIST: hypervisors,
2564 constants.NV_VERSION: None,
2565 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2566 constants.NV_NODESETUP: None,
2567 constants.NV_TIME: None,
2568 constants.NV_MASTERIP: (master_node, master_ip),
2569 constants.NV_OSLIST: None,
2570 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2573 if vg_name is not None:
2574 node_verify_param[constants.NV_VGLIST] = None
2575 node_verify_param[constants.NV_LVLIST] = vg_name
2576 node_verify_param[constants.NV_PVLIST] = [vg_name]
2577 node_verify_param[constants.NV_DRBDLIST] = None
2580 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2583 # FIXME: this needs to be changed per node-group, not cluster-wide
2585 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2586 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2587 bridges.add(default_nicpp[constants.NIC_LINK])
2588 for instance in self.my_inst_info.values():
2589 for nic in instance.nics:
2590 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2591 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2592 bridges.add(full_nic[constants.NIC_LINK])
2595 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2597 # Build our expected cluster state
2598 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2600 vm_capable=node.vm_capable))
2601 for node in node_data_list)
2605 for node in self.all_node_info.values():
2606 path = _SupportsOob(self.cfg, node)
2607 if path and path not in oob_paths:
2608 oob_paths.append(path)
2611 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2613 for instance in self.my_inst_names:
2614 inst_config = self.my_inst_info[instance]
2616 for nname in inst_config.all_nodes:
2617 if nname not in node_image:
2618 gnode = self.NodeImage(name=nname)
2619 gnode.ghost = (nname not in self.all_node_info)
2620 node_image[nname] = gnode
2622 inst_config.MapLVsByNode(node_vol_should)
2624 pnode = inst_config.primary_node
2625 node_image[pnode].pinst.append(instance)
2627 for snode in inst_config.secondary_nodes:
2628 nimg = node_image[snode]
2629 nimg.sinst.append(instance)
2630 if pnode not in nimg.sbp:
2631 nimg.sbp[pnode] = []
2632 nimg.sbp[pnode].append(instance)
2634 # At this point, we have the in-memory data structures complete,
2635 # except for the runtime information, which we'll gather next
2637 # Due to the way our RPC system works, exact response times cannot be
2638 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2639 # time before and after executing the request, we can at least have a time
2641 nvinfo_starttime = time.time()
2642 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2644 self.cfg.GetClusterName())
2645 nvinfo_endtime = time.time()
2647 if self.extra_lv_nodes and vg_name is not None:
2649 self.rpc.call_node_verify(self.extra_lv_nodes,
2650 {constants.NV_LVLIST: vg_name},
2651 self.cfg.GetClusterName())
2653 extra_lv_nvinfo = {}
2655 all_drbd_map = self.cfg.ComputeDRBDMap()
2657 feedback_fn("* Gathering disk information (%s nodes)" %
2658 len(self.my_node_names))
2659 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2662 feedback_fn("* Verifying configuration file consistency")
2664 # If not all nodes are being checked, we need to make sure the master node
2665 # and a non-checked vm_capable node are in the list.
2666 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2668 vf_nvinfo = all_nvinfo.copy()
2669 vf_node_info = list(self.my_node_info.values())
2670 additional_nodes = []
2671 if master_node not in self.my_node_info:
2672 additional_nodes.append(master_node)
2673 vf_node_info.append(self.all_node_info[master_node])
2674 # Add the first vm_capable node we find which is not included
2675 for node in absent_nodes:
2676 nodeinfo = self.all_node_info[node]
2677 if nodeinfo.vm_capable and not nodeinfo.offline:
2678 additional_nodes.append(node)
2679 vf_node_info.append(self.all_node_info[node])
2681 key = constants.NV_FILELIST
2682 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2683 {key: node_verify_param[key]},
2684 self.cfg.GetClusterName()))
2686 vf_nvinfo = all_nvinfo
2687 vf_node_info = self.my_node_info.values()
2689 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2691 feedback_fn("* Verifying node status")
2695 for node_i in node_data_list:
2697 nimg = node_image[node]
2701 feedback_fn("* Skipping offline node %s" % (node,))
2705 if node == master_node:
2707 elif node_i.master_candidate:
2708 ntype = "master candidate"
2709 elif node_i.drained:
2715 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2717 msg = all_nvinfo[node].fail_msg
2718 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2720 nimg.rpc_fail = True
2723 nresult = all_nvinfo[node].payload
2725 nimg.call_ok = self._VerifyNode(node_i, nresult)
2726 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2727 self._VerifyNodeNetwork(node_i, nresult)
2728 self._VerifyOob(node_i, nresult)
2731 self._VerifyNodeLVM(node_i, nresult, vg_name)
2732 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2735 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2736 self._UpdateNodeInstances(node_i, nresult, nimg)
2737 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2738 self._UpdateNodeOS(node_i, nresult, nimg)
2740 if not nimg.os_fail:
2741 if refos_img is None:
2743 self._VerifyNodeOS(node_i, nimg, refos_img)
2744 self._VerifyNodeBridges(node_i, nresult, bridges)
2746 # Check whether all running instancies are primary for the node. (This
2747 # can no longer be done from _VerifyInstance below, since some of the
2748 # wrong instances could be from other node groups.)
2749 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2751 for inst in non_primary_inst:
2752 test = inst in self.all_inst_info
2753 _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2754 "instance should not run on node %s", node_i.name)
2755 _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2756 "node is running unknown instance %s", inst)
2758 for node, result in extra_lv_nvinfo.items():
2759 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2760 node_image[node], vg_name)
2762 feedback_fn("* Verifying instance status")
2763 for instance in self.my_inst_names:
2765 feedback_fn("* Verifying instance %s" % instance)
2766 inst_config = self.my_inst_info[instance]
2767 self._VerifyInstance(instance, inst_config, node_image,
2769 inst_nodes_offline = []
2771 pnode = inst_config.primary_node
2772 pnode_img = node_image[pnode]
2773 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2774 self.ENODERPC, pnode, "instance %s, connection to"
2775 " primary node failed", instance)
2777 _ErrorIf(inst_config.admin_up and pnode_img.offline,
2778 self.EINSTANCEBADNODE, instance,
2779 "instance is marked as running and lives on offline node %s",
2780 inst_config.primary_node)
2782 # If the instance is non-redundant we cannot survive losing its primary
2783 # node, so we are not N+1 compliant. On the other hand we have no disk
2784 # templates with more than one secondary so that situation is not well
2786 # FIXME: does not support file-backed instances
2787 if not inst_config.secondary_nodes:
2788 i_non_redundant.append(instance)
2790 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2791 instance, "instance has multiple secondary nodes: %s",
2792 utils.CommaJoin(inst_config.secondary_nodes),
2793 code=self.ETYPE_WARNING)
2795 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2796 pnode = inst_config.primary_node
2797 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2798 instance_groups = {}
2800 for node in instance_nodes:
2801 instance_groups.setdefault(self.all_node_info[node].group,
2805 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2806 # Sort so that we always list the primary node first.
2807 for group, nodes in sorted(instance_groups.items(),
2808 key=lambda (_, nodes): pnode in nodes,
2811 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2812 instance, "instance has primary and secondary nodes in"
2813 " different groups: %s", utils.CommaJoin(pretty_list),
2814 code=self.ETYPE_WARNING)
2816 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2817 i_non_a_balanced.append(instance)
2819 for snode in inst_config.secondary_nodes:
2820 s_img = node_image[snode]
2821 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2822 "instance %s, connection to secondary node failed", instance)
2825 inst_nodes_offline.append(snode)
2827 # warn that the instance lives on offline nodes
2828 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2829 "instance has offline secondary node(s) %s",
2830 utils.CommaJoin(inst_nodes_offline))
2831 # ... or ghost/non-vm_capable nodes
2832 for node in inst_config.all_nodes:
2833 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2834 "instance lives on ghost node %s", node)
2835 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2836 instance, "instance lives on non-vm_capable node %s", node)
2838 feedback_fn("* Verifying orphan volumes")
2839 reserved = utils.FieldSet(*cluster.reserved_lvs)
2841 # We will get spurious "unknown volume" warnings if any node of this group
2842 # is secondary for an instance whose primary is in another group. To avoid
2843 # them, we find these instances and add their volumes to node_vol_should.
2844 for inst in self.all_inst_info.values():
2845 for secondary in inst.secondary_nodes:
2846 if (secondary in self.my_node_info
2847 and inst.name not in self.my_inst_info):
2848 inst.MapLVsByNode(node_vol_should)
2851 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2853 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2854 feedback_fn("* Verifying N+1 Memory redundancy")
2855 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2857 feedback_fn("* Other Notes")
2859 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2860 % len(i_non_redundant))
2862 if i_non_a_balanced:
2863 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2864 % len(i_non_a_balanced))
2867 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2870 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2874 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2875 """Analyze the post-hooks' result
2877 This method analyses the hook result, handles it, and sends some
2878 nicely-formatted feedback back to the user.
2880 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2881 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2882 @param hooks_results: the results of the multi-node hooks rpc call
2883 @param feedback_fn: function used send feedback back to the caller
2884 @param lu_result: previous Exec result
2885 @return: the new Exec result, based on the previous result
2889 # We only really run POST phase hooks, only for non-empty groups,
2890 # and are only interested in their results
2891 if not self.my_node_names:
2894 elif phase == constants.HOOKS_PHASE_POST:
2895 # Used to change hooks' output to proper indentation
2896 feedback_fn("* Hooks Results")
2897 assert hooks_results, "invalid result from hooks"
2899 for node_name in hooks_results:
2900 res = hooks_results[node_name]
2902 test = msg and not res.offline
2903 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2904 "Communication failure in hooks execution: %s", msg)
2905 if res.offline or msg:
2906 # No need to investigate payload if node is offline or gave an error.
2907 # override manually lu_result here as _ErrorIf only
2908 # overrides self.bad
2911 for script, hkr, output in res.payload:
2912 test = hkr == constants.HKR_FAIL
2913 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2914 "Script %s failed, output:", script)
2916 output = self._HOOKS_INDENT_RE.sub(" ", output)
2917 feedback_fn("%s" % output)
2923 class LUClusterVerifyDisks(NoHooksLU):
2924 """Verifies the cluster disks status.
2929 def ExpandNames(self):
2930 self.share_locks = _ShareAll()
2931 self.needed_locks = {
2932 locking.LEVEL_NODEGROUP: locking.ALL_SET,
2935 def Exec(self, feedback_fn):
2936 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
2938 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
2939 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
2940 for group in group_names])
2943 class LUGroupVerifyDisks(NoHooksLU):
2944 """Verifies the status of all disks in a node group.
2949 def ExpandNames(self):
2950 # Raises errors.OpPrereqError on its own if group can't be found
2951 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2953 self.share_locks = _ShareAll()
2954 self.needed_locks = {
2955 locking.LEVEL_INSTANCE: [],
2956 locking.LEVEL_NODEGROUP: [],
2957 locking.LEVEL_NODE: [],
2960 def DeclareLocks(self, level):
2961 if level == locking.LEVEL_INSTANCE:
2962 assert not self.needed_locks[locking.LEVEL_INSTANCE]
2964 # Lock instances optimistically, needs verification once node and group
2965 # locks have been acquired
2966 self.needed_locks[locking.LEVEL_INSTANCE] = \
2967 self.cfg.GetNodeGroupInstances(self.group_uuid)
2969 elif level == locking.LEVEL_NODEGROUP:
2970 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
2972 self.needed_locks[locking.LEVEL_NODEGROUP] = \
2973 set([self.group_uuid] +
2974 # Lock all groups used by instances optimistically; this requires
2975 # going via the node before it's locked, requiring verification
2978 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
2979 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
2981 elif level == locking.LEVEL_NODE:
2982 # This will only lock the nodes in the group to be verified which contain
2984 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
2985 self._LockInstancesNodes()
2987 # Lock all nodes in group to be verified
2988 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2989 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
2990 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
2992 def CheckPrereq(self):
2993 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
2994 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
2995 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
2997 assert self.group_uuid in owned_groups
2999 # Check if locked instances are still correct
3000 wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
3001 if owned_instances != wanted_instances:
3002 raise errors.OpPrereqError("Instances in node group %s changed since"
3003 " locks were acquired, wanted %s, have %s;"
3004 " retry the operation" %
3005 (self.op.group_name,
3006 utils.CommaJoin(wanted_instances),
3007 utils.CommaJoin(owned_instances)),
3010 # Get instance information
3011 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3013 # Check if node groups for locked instances are still correct
3014 for (instance_name, inst) in self.instances.items():
3015 assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
3016 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3017 assert owned_nodes.issuperset(inst.all_nodes), \
3018 "Instance %s's nodes changed while we kept the lock" % instance_name
3020 _CheckInstanceNodeGroups(self.cfg, instance_name, owned_groups)
3022 def Exec(self, feedback_fn):
3023 """Verify integrity of cluster disks.
3025 @rtype: tuple of three items
3026 @return: a tuple of (dict of node-to-node_error, list of instances
3027 which need activate-disks, dict of instance: (node, volume) for
3032 res_instances = set()
3035 nv_dict = _MapInstanceDisksToNodes([inst
3036 for inst in self.instances.values()
3040 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3041 set(self.cfg.GetVmCapableNodeList()))
3043 node_lvs = self.rpc.call_lv_list(nodes, [])
3045 for (node, node_res) in node_lvs.items():
3046 if node_res.offline:
3049 msg = node_res.fail_msg
3051 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3052 res_nodes[node] = msg
3055 for lv_name, (_, _, lv_online) in node_res.payload.items():
3056 inst = nv_dict.pop((node, lv_name), None)
3057 if not (lv_online or inst is None):
3058 res_instances.add(inst)
3060 # any leftover items in nv_dict are missing LVs, let's arrange the data
3062 for key, inst in nv_dict.iteritems():
3063 res_missing.setdefault(inst, []).append(key)
3065 return (res_nodes, list(res_instances), res_missing)
3068 class LUClusterRepairDiskSizes(NoHooksLU):
3069 """Verifies the cluster disks sizes.
3074 def ExpandNames(self):
3075 if self.op.instances:
3076 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3077 self.needed_locks = {
3078 locking.LEVEL_NODE: [],
3079 locking.LEVEL_INSTANCE: self.wanted_names,
3081 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3083 self.wanted_names = None
3084 self.needed_locks = {
3085 locking.LEVEL_NODE: locking.ALL_SET,
3086 locking.LEVEL_INSTANCE: locking.ALL_SET,
3088 self.share_locks = _ShareAll()
3090 def DeclareLocks(self, level):
3091 if level == locking.LEVEL_NODE and self.wanted_names is not None:
3092 self._LockInstancesNodes(primary_only=True)
3094 def CheckPrereq(self):
3095 """Check prerequisites.
3097 This only checks the optional instance list against the existing names.
3100 if self.wanted_names is None:
3101 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3103 self.wanted_instances = \
3104 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3106 def _EnsureChildSizes(self, disk):
3107 """Ensure children of the disk have the needed disk size.
3109 This is valid mainly for DRBD8 and fixes an issue where the
3110 children have smaller disk size.
3112 @param disk: an L{ganeti.objects.Disk} object
3115 if disk.dev_type == constants.LD_DRBD8:
3116 assert disk.children, "Empty children for DRBD8?"
3117 fchild = disk.children[0]
3118 mismatch = fchild.size < disk.size
3120 self.LogInfo("Child disk has size %d, parent %d, fixing",
3121 fchild.size, disk.size)
3122 fchild.size = disk.size
3124 # and we recurse on this child only, not on the metadev
3125 return self._EnsureChildSizes(fchild) or mismatch
3129 def Exec(self, feedback_fn):
3130 """Verify the size of cluster disks.
3133 # TODO: check child disks too
3134 # TODO: check differences in size between primary/secondary nodes
3136 for instance in self.wanted_instances:
3137 pnode = instance.primary_node
3138 if pnode not in per_node_disks:
3139 per_node_disks[pnode] = []
3140 for idx, disk in enumerate(instance.disks):
3141 per_node_disks[pnode].append((instance, idx, disk))
3144 for node, dskl in per_node_disks.items():
3145 newl = [v[2].Copy() for v in dskl]
3147 self.cfg.SetDiskID(dsk, node)
3148 result = self.rpc.call_blockdev_getsize(node, newl)
3150 self.LogWarning("Failure in blockdev_getsize call to node"
3151 " %s, ignoring", node)
3153 if len(result.payload) != len(dskl):
3154 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3155 " result.payload=%s", node, len(dskl), result.payload)
3156 self.LogWarning("Invalid result from node %s, ignoring node results",
3159 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3161 self.LogWarning("Disk %d of instance %s did not return size"
3162 " information, ignoring", idx, instance.name)
3164 if not isinstance(size, (int, long)):
3165 self.LogWarning("Disk %d of instance %s did not return valid"
3166 " size information, ignoring", idx, instance.name)
3169 if size != disk.size:
3170 self.LogInfo("Disk %d of instance %s has mismatched size,"
3171 " correcting: recorded %d, actual %d", idx,
3172 instance.name, disk.size, size)
3174 self.cfg.Update(instance, feedback_fn)
3175 changed.append((instance.name, idx, size))
3176 if self._EnsureChildSizes(disk):
3177 self.cfg.Update(instance, feedback_fn)
3178 changed.append((instance.name, idx, disk.size))
3182 class LUClusterRename(LogicalUnit):
3183 """Rename the cluster.
3186 HPATH = "cluster-rename"
3187 HTYPE = constants.HTYPE_CLUSTER
3189 def BuildHooksEnv(self):
3194 "OP_TARGET": self.cfg.GetClusterName(),
3195 "NEW_NAME": self.op.name,
3198 def BuildHooksNodes(self):
3199 """Build hooks nodes.
3202 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3204 def CheckPrereq(self):
3205 """Verify that the passed name is a valid one.
3208 hostname = netutils.GetHostname(name=self.op.name,
3209 family=self.cfg.GetPrimaryIPFamily())
3211 new_name = hostname.name
3212 self.ip = new_ip = hostname.ip
3213 old_name = self.cfg.GetClusterName()
3214 old_ip = self.cfg.GetMasterIP()
3215 if new_name == old_name and new_ip == old_ip:
3216 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3217 " cluster has changed",
3219 if new_ip != old_ip:
3220 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3221 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3222 " reachable on the network" %
3223 new_ip, errors.ECODE_NOTUNIQUE)
3225 self.op.name = new_name
3227 def Exec(self, feedback_fn):
3228 """Rename the cluster.
3231 clustername = self.op.name
3234 # shutdown the master IP
3235 master = self.cfg.GetMasterNode()
3236 result = self.rpc.call_node_stop_master(master, False)
3237 result.Raise("Could not disable the master role")
3240 cluster = self.cfg.GetClusterInfo()
3241 cluster.cluster_name = clustername
3242 cluster.master_ip = ip
3243 self.cfg.Update(cluster, feedback_fn)
3245 # update the known hosts file
3246 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3247 node_list = self.cfg.GetOnlineNodeList()
3249 node_list.remove(master)
3252 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3254 result = self.rpc.call_node_start_master(master, False, False)
3255 msg = result.fail_msg
3257 self.LogWarning("Could not re-enable the master role on"
3258 " the master, please restart manually: %s", msg)
3263 class LUClusterSetParams(LogicalUnit):
3264 """Change the parameters of the cluster.
3267 HPATH = "cluster-modify"
3268 HTYPE = constants.HTYPE_CLUSTER
3271 def CheckArguments(self):
3275 if self.op.uid_pool:
3276 uidpool.CheckUidPool(self.op.uid_pool)
3278 if self.op.add_uids:
3279 uidpool.CheckUidPool(self.op.add_uids)
3281 if self.op.remove_uids:
3282 uidpool.CheckUidPool(self.op.remove_uids)
3284 def ExpandNames(self):
3285 # FIXME: in the future maybe other cluster params won't require checking on
3286 # all nodes to be modified.
3287 self.needed_locks = {
3288 locking.LEVEL_NODE: locking.ALL_SET,
3290 self.share_locks[locking.LEVEL_NODE] = 1
3292 def BuildHooksEnv(self):
3297 "OP_TARGET": self.cfg.GetClusterName(),
3298 "NEW_VG_NAME": self.op.vg_name,
3301 def BuildHooksNodes(self):
3302 """Build hooks nodes.
3305 mn = self.cfg.GetMasterNode()
3308 def CheckPrereq(self):
3309 """Check prerequisites.
3311 This checks whether the given params don't conflict and
3312 if the given volume group is valid.
3315 if self.op.vg_name is not None and not self.op.vg_name:
3316 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3317 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3318 " instances exist", errors.ECODE_INVAL)
3320 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3321 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3322 raise errors.OpPrereqError("Cannot disable drbd helper while"
3323 " drbd-based instances exist",
3326 node_list = self.owned_locks(locking.LEVEL_NODE)
3328 # if vg_name not None, checks given volume group on all nodes
3330 vglist = self.rpc.call_vg_list(node_list)
3331 for node in node_list:
3332 msg = vglist[node].fail_msg
3334 # ignoring down node
3335 self.LogWarning("Error while gathering data on node %s"
3336 " (ignoring node): %s", node, msg)
3338 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3340 constants.MIN_VG_SIZE)
3342 raise errors.OpPrereqError("Error on node '%s': %s" %
3343 (node, vgstatus), errors.ECODE_ENVIRON)
3345 if self.op.drbd_helper:
3346 # checks given drbd helper on all nodes
3347 helpers = self.rpc.call_drbd_helper(node_list)
3348 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3350 self.LogInfo("Not checking drbd helper on offline node %s", node)
3352 msg = helpers[node].fail_msg
3354 raise errors.OpPrereqError("Error checking drbd helper on node"
3355 " '%s': %s" % (node, msg),
3356 errors.ECODE_ENVIRON)
3357 node_helper = helpers[node].payload
3358 if node_helper != self.op.drbd_helper:
3359 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3360 (node, node_helper), errors.ECODE_ENVIRON)
3362 self.cluster = cluster = self.cfg.GetClusterInfo()
3363 # validate params changes
3364 if self.op.beparams:
3365 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3366 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3368 if self.op.ndparams:
3369 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3370 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3372 # TODO: we need a more general way to handle resetting
3373 # cluster-level parameters to default values
3374 if self.new_ndparams["oob_program"] == "":
3375 self.new_ndparams["oob_program"] = \
3376 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3378 if self.op.nicparams:
3379 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3380 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3381 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3384 # check all instances for consistency
3385 for instance in self.cfg.GetAllInstancesInfo().values():
3386 for nic_idx, nic in enumerate(instance.nics):
3387 params_copy = copy.deepcopy(nic.nicparams)
3388 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3390 # check parameter syntax
3392 objects.NIC.CheckParameterSyntax(params_filled)
3393 except errors.ConfigurationError, err:
3394 nic_errors.append("Instance %s, nic/%d: %s" %
3395 (instance.name, nic_idx, err))
3397 # if we're moving instances to routed, check that they have an ip
3398 target_mode = params_filled[constants.NIC_MODE]
3399 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3400 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3401 " address" % (instance.name, nic_idx))
3403 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3404 "\n".join(nic_errors))
3406 # hypervisor list/parameters
3407 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3408 if self.op.hvparams:
3409 for hv_name, hv_dict in self.op.hvparams.items():
3410 if hv_name not in self.new_hvparams:
3411 self.new_hvparams[hv_name] = hv_dict
3413 self.new_hvparams[hv_name].update(hv_dict)
3415 # os hypervisor parameters
3416 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3418 for os_name, hvs in self.op.os_hvp.items():
3419 if os_name not in self.new_os_hvp:
3420 self.new_os_hvp[os_name] = hvs
3422 for hv_name, hv_dict in hvs.items():
3423 if hv_name not in self.new_os_hvp[os_name]:
3424 self.new_os_hvp[os_name][hv_name] = hv_dict
3426 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3429 self.new_osp = objects.FillDict(cluster.osparams, {})
3430 if self.op.osparams:
3431 for os_name, osp in self.op.osparams.items():
3432 if os_name not in self.new_osp:
3433 self.new_osp[os_name] = {}
3435 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3438 if not self.new_osp[os_name]:
3439 # we removed all parameters
3440 del self.new_osp[os_name]
3442 # check the parameter validity (remote check)
3443 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3444 os_name, self.new_osp[os_name])
3446 # changes to the hypervisor list
3447 if self.op.enabled_hypervisors is not None:
3448 self.hv_list = self.op.enabled_hypervisors
3449 for hv in self.hv_list:
3450 # if the hypervisor doesn't already exist in the cluster
3451 # hvparams, we initialize it to empty, and then (in both
3452 # cases) we make sure to fill the defaults, as we might not
3453 # have a complete defaults list if the hypervisor wasn't
3455 if hv not in new_hvp:
3457 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3458 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3460 self.hv_list = cluster.enabled_hypervisors
3462 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3463 # either the enabled list has changed, or the parameters have, validate
3464 for hv_name, hv_params in self.new_hvparams.items():
3465 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3466 (self.op.enabled_hypervisors and
3467 hv_name in self.op.enabled_hypervisors)):
3468 # either this is a new hypervisor, or its parameters have changed
3469 hv_class = hypervisor.GetHypervisor(hv_name)
3470 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3471 hv_class.CheckParameterSyntax(hv_params)
3472 _CheckHVParams(self, node_list, hv_name, hv_params)
3475 # no need to check any newly-enabled hypervisors, since the
3476 # defaults have already been checked in the above code-block
3477 for os_name, os_hvp in self.new_os_hvp.items():
3478 for hv_name, hv_params in os_hvp.items():
3479 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3480 # we need to fill in the new os_hvp on top of the actual hv_p
3481 cluster_defaults = self.new_hvparams.get(hv_name, {})
3482 new_osp = objects.FillDict(cluster_defaults, hv_params)
3483 hv_class = hypervisor.GetHypervisor(hv_name)
3484 hv_class.CheckParameterSyntax(new_osp)
3485 _CheckHVParams(self, node_list, hv_name, new_osp)
3487 if self.op.default_iallocator:
3488 alloc_script = utils.FindFile(self.op.default_iallocator,
3489 constants.IALLOCATOR_SEARCH_PATH,
3491 if alloc_script is None:
3492 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3493 " specified" % self.op.default_iallocator,
3496 def Exec(self, feedback_fn):
3497 """Change the parameters of the cluster.
3500 if self.op.vg_name is not None:
3501 new_volume = self.op.vg_name
3504 if new_volume != self.cfg.GetVGName():
3505 self.cfg.SetVGName(new_volume)
3507 feedback_fn("Cluster LVM configuration already in desired"
3508 " state, not changing")
3509 if self.op.drbd_helper is not None:
3510 new_helper = self.op.drbd_helper
3513 if new_helper != self.cfg.GetDRBDHelper():
3514 self.cfg.SetDRBDHelper(new_helper)
3516 feedback_fn("Cluster DRBD helper already in desired state,"
3518 if self.op.hvparams:
3519 self.cluster.hvparams = self.new_hvparams
3521 self.cluster.os_hvp = self.new_os_hvp
3522 if self.op.enabled_hypervisors is not None:
3523 self.cluster.hvparams = self.new_hvparams
3524 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3525 if self.op.beparams:
3526 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3527 if self.op.nicparams:
3528 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3529 if self.op.osparams:
3530 self.cluster.osparams = self.new_osp
3531 if self.op.ndparams:
3532 self.cluster.ndparams = self.new_ndparams
3534 if self.op.candidate_pool_size is not None:
3535 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3536 # we need to update the pool size here, otherwise the save will fail
3537 _AdjustCandidatePool(self, [])
3539 if self.op.maintain_node_health is not None:
3540 self.cluster.maintain_node_health = self.op.maintain_node_health
3542 if self.op.prealloc_wipe_disks is not None:
3543 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3545 if self.op.add_uids is not None:
3546 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3548 if self.op.remove_uids is not None:
3549 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3551 if self.op.uid_pool is not None:
3552 self.cluster.uid_pool = self.op.uid_pool
3554 if self.op.default_iallocator is not None:
3555 self.cluster.default_iallocator = self.op.default_iallocator
3557 if self.op.reserved_lvs is not None:
3558 self.cluster.reserved_lvs = self.op.reserved_lvs
3560 def helper_os(aname, mods, desc):
3562 lst = getattr(self.cluster, aname)
3563 for key, val in mods:
3564 if key == constants.DDM_ADD:
3566 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3569 elif key == constants.DDM_REMOVE:
3573 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3575 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3577 if self.op.hidden_os:
3578 helper_os("hidden_os", self.op.hidden_os, "hidden")
3580 if self.op.blacklisted_os:
3581 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3583 if self.op.master_netdev:
3584 master = self.cfg.GetMasterNode()
3585 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3586 self.cluster.master_netdev)
3587 result = self.rpc.call_node_stop_master(master, False)
3588 result.Raise("Could not disable the master ip")
3589 feedback_fn("Changing master_netdev from %s to %s" %
3590 (self.cluster.master_netdev, self.op.master_netdev))
3591 self.cluster.master_netdev = self.op.master_netdev
3593 self.cfg.Update(self.cluster, feedback_fn)
3595 if self.op.master_netdev:
3596 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3597 self.op.master_netdev)
3598 result = self.rpc.call_node_start_master(master, False, False)
3600 self.LogWarning("Could not re-enable the master ip on"
3601 " the master, please restart manually: %s",
3605 def _UploadHelper(lu, nodes, fname):
3606 """Helper for uploading a file and showing warnings.
3609 if os.path.exists(fname):
3610 result = lu.rpc.call_upload_file(nodes, fname)
3611 for to_node, to_result in result.items():
3612 msg = to_result.fail_msg
3614 msg = ("Copy of file %s to node %s failed: %s" %
3615 (fname, to_node, msg))
3616 lu.proc.LogWarning(msg)
3619 def _ComputeAncillaryFiles(cluster, redist):
3620 """Compute files external to Ganeti which need to be consistent.
3622 @type redist: boolean
3623 @param redist: Whether to include files which need to be redistributed
3626 # Compute files for all nodes
3628 constants.SSH_KNOWN_HOSTS_FILE,
3629 constants.CONFD_HMAC_KEY,
3630 constants.CLUSTER_DOMAIN_SECRET_FILE,
3634 files_all.update(constants.ALL_CERT_FILES)
3635 files_all.update(ssconf.SimpleStore().GetFileList())
3637 if cluster.modify_etc_hosts:
3638 files_all.add(constants.ETC_HOSTS)
3640 # Files which must either exist on all nodes or on none
3641 files_all_opt = set([
3642 constants.RAPI_USERS_FILE,
3645 # Files which should only be on master candidates
3648 files_mc.add(constants.CLUSTER_CONF_FILE)
3650 # Files which should only be on VM-capable nodes
3651 files_vm = set(filename
3652 for hv_name in cluster.enabled_hypervisors
3653 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3655 # Filenames must be unique
3656 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3657 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3658 "Found file listed in more than one file list"
3660 return (files_all, files_all_opt, files_mc, files_vm)
3663 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3664 """Distribute additional files which are part of the cluster configuration.
3666 ConfigWriter takes care of distributing the config and ssconf files, but
3667 there are more files which should be distributed to all nodes. This function
3668 makes sure those are copied.
3670 @param lu: calling logical unit
3671 @param additional_nodes: list of nodes not in the config to distribute to
3672 @type additional_vm: boolean
3673 @param additional_vm: whether the additional nodes are vm-capable or not
3676 # Gather target nodes
3677 cluster = lu.cfg.GetClusterInfo()
3678 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3680 online_nodes = lu.cfg.GetOnlineNodeList()
3681 vm_nodes = lu.cfg.GetVmCapableNodeList()
3683 if additional_nodes is not None:
3684 online_nodes.extend(additional_nodes)
3686 vm_nodes.extend(additional_nodes)
3688 # Never distribute to master node
3689 for nodelist in [online_nodes, vm_nodes]:
3690 if master_info.name in nodelist:
3691 nodelist.remove(master_info.name)
3694 (files_all, files_all_opt, files_mc, files_vm) = \
3695 _ComputeAncillaryFiles(cluster, True)
3697 # Never re-distribute configuration file from here
3698 assert not (constants.CLUSTER_CONF_FILE in files_all or
3699 constants.CLUSTER_CONF_FILE in files_vm)
3700 assert not files_mc, "Master candidates not handled in this function"
3703 (online_nodes, files_all),
3704 (online_nodes, files_all_opt),
3705 (vm_nodes, files_vm),
3709 for (node_list, files) in filemap:
3711 _UploadHelper(lu, node_list, fname)
3714 class LUClusterRedistConf(NoHooksLU):
3715 """Force the redistribution of cluster configuration.
3717 This is a very simple LU.
3722 def ExpandNames(self):
3723 self.needed_locks = {
3724 locking.LEVEL_NODE: locking.ALL_SET,
3726 self.share_locks[locking.LEVEL_NODE] = 1
3728 def Exec(self, feedback_fn):
3729 """Redistribute the configuration.
3732 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3733 _RedistributeAncillaryFiles(self)
3736 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3737 """Sleep and poll for an instance's disk to sync.
3740 if not instance.disks or disks is not None and not disks:
3743 disks = _ExpandCheckDisks(instance, disks)
3746 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3748 node = instance.primary_node
3751 lu.cfg.SetDiskID(dev, node)
3753 # TODO: Convert to utils.Retry
3756 degr_retries = 10 # in seconds, as we sleep 1 second each time
3760 cumul_degraded = False
3761 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3762 msg = rstats.fail_msg
3764 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3767 raise errors.RemoteError("Can't contact node %s for mirror data,"
3768 " aborting." % node)
3771 rstats = rstats.payload
3773 for i, mstat in enumerate(rstats):
3775 lu.LogWarning("Can't compute data for node %s/%s",
3776 node, disks[i].iv_name)
3779 cumul_degraded = (cumul_degraded or
3780 (mstat.is_degraded and mstat.sync_percent is None))
3781 if mstat.sync_percent is not None:
3783 if mstat.estimated_time is not None:
3784 rem_time = ("%s remaining (estimated)" %
3785 utils.FormatSeconds(mstat.estimated_time))
3786 max_time = mstat.estimated_time
3788 rem_time = "no time estimate"
3789 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3790 (disks[i].iv_name, mstat.sync_percent, rem_time))
3792 # if we're done but degraded, let's do a few small retries, to
3793 # make sure we see a stable and not transient situation; therefore
3794 # we force restart of the loop
3795 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3796 logging.info("Degraded disks found, %d retries left", degr_retries)
3804 time.sleep(min(60, max_time))
3807 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3808 return not cumul_degraded
3811 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3812 """Check that mirrors are not degraded.
3814 The ldisk parameter, if True, will change the test from the
3815 is_degraded attribute (which represents overall non-ok status for
3816 the device(s)) to the ldisk (representing the local storage status).
3819 lu.cfg.SetDiskID(dev, node)
3823 if on_primary or dev.AssembleOnSecondary():
3824 rstats = lu.rpc.call_blockdev_find(node, dev)
3825 msg = rstats.fail_msg
3827 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3829 elif not rstats.payload:
3830 lu.LogWarning("Can't find disk on node %s", node)
3834 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3836 result = result and not rstats.payload.is_degraded
3839 for child in dev.children:
3840 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3845 class LUOobCommand(NoHooksLU):
3846 """Logical unit for OOB handling.
3850 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3852 def ExpandNames(self):
3853 """Gather locks we need.
3856 if self.op.node_names:
3857 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3858 lock_names = self.op.node_names
3860 lock_names = locking.ALL_SET
3862 self.needed_locks = {
3863 locking.LEVEL_NODE: lock_names,
3866 def CheckPrereq(self):
3867 """Check prerequisites.
3870 - the node exists in the configuration
3873 Any errors are signaled by raising errors.OpPrereqError.
3877 self.master_node = self.cfg.GetMasterNode()
3879 assert self.op.power_delay >= 0.0
3881 if self.op.node_names:
3882 if (self.op.command in self._SKIP_MASTER and
3883 self.master_node in self.op.node_names):
3884 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3885 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3887 if master_oob_handler:
3888 additional_text = ("run '%s %s %s' if you want to operate on the"
3889 " master regardless") % (master_oob_handler,
3893 additional_text = "it does not support out-of-band operations"
3895 raise errors.OpPrereqError(("Operating on the master node %s is not"
3896 " allowed for %s; %s") %
3897 (self.master_node, self.op.command,
3898 additional_text), errors.ECODE_INVAL)
3900 self.op.node_names = self.cfg.GetNodeList()
3901 if self.op.command in self._SKIP_MASTER:
3902 self.op.node_names.remove(self.master_node)
3904 if self.op.command in self._SKIP_MASTER:
3905 assert self.master_node not in self.op.node_names
3907 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
3909 raise errors.OpPrereqError("Node %s not found" % node_name,
3912 self.nodes.append(node)
3914 if (not self.op.ignore_status and
3915 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3916 raise errors.OpPrereqError(("Cannot power off node %s because it is"
3917 " not marked offline") % node_name,
3920 def Exec(self, feedback_fn):
3921 """Execute OOB and return result if we expect any.
3924 master_node = self.master_node
3927 for idx, node in enumerate(utils.NiceSort(self.nodes,
3928 key=lambda node: node.name)):
3929 node_entry = [(constants.RS_NORMAL, node.name)]
3930 ret.append(node_entry)
3932 oob_program = _SupportsOob(self.cfg, node)
3935 node_entry.append((constants.RS_UNAVAIL, None))
3938 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3939 self.op.command, oob_program, node.name)
3940 result = self.rpc.call_run_oob(master_node, oob_program,
3941 self.op.command, node.name,
3945 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3946 node.name, result.fail_msg)
3947 node_entry.append((constants.RS_NODATA, None))
3950 self._CheckPayload(result)
3951 except errors.OpExecError, err:
3952 self.LogWarning("Payload returned by node '%s' is not valid: %s",
3954 node_entry.append((constants.RS_NODATA, None))
3956 if self.op.command == constants.OOB_HEALTH:
3957 # For health we should log important events
3958 for item, status in result.payload:
3959 if status in [constants.OOB_STATUS_WARNING,
3960 constants.OOB_STATUS_CRITICAL]:
3961 self.LogWarning("Item '%s' on node '%s' has status '%s'",
3962 item, node.name, status)
3964 if self.op.command == constants.OOB_POWER_ON:
3966 elif self.op.command == constants.OOB_POWER_OFF:
3967 node.powered = False
3968 elif self.op.command == constants.OOB_POWER_STATUS:
3969 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3970 if powered != node.powered:
3971 logging.warning(("Recorded power state (%s) of node '%s' does not"
3972 " match actual power state (%s)"), node.powered,
3975 # For configuration changing commands we should update the node
3976 if self.op.command in (constants.OOB_POWER_ON,
3977 constants.OOB_POWER_OFF):
3978 self.cfg.Update(node, feedback_fn)
3980 node_entry.append((constants.RS_NORMAL, result.payload))
3982 if (self.op.command == constants.OOB_POWER_ON and
3983 idx < len(self.nodes) - 1):
3984 time.sleep(self.op.power_delay)
3988 def _CheckPayload(self, result):
3989 """Checks if the payload is valid.
3991 @param result: RPC result
3992 @raises errors.OpExecError: If payload is not valid
3996 if self.op.command == constants.OOB_HEALTH:
3997 if not isinstance(result.payload, list):
3998 errs.append("command 'health' is expected to return a list but got %s" %
3999 type(result.payload))
4001 for item, status in result.payload:
4002 if status not in constants.OOB_STATUSES:
4003 errs.append("health item '%s' has invalid status '%s'" %
4006 if self.op.command == constants.OOB_POWER_STATUS:
4007 if not isinstance(result.payload, dict):
4008 errs.append("power-status is expected to return a dict but got %s" %
4009 type(result.payload))
4011 if self.op.command in [
4012 constants.OOB_POWER_ON,
4013 constants.OOB_POWER_OFF,
4014 constants.OOB_POWER_CYCLE,
4016 if result.payload is not None:
4017 errs.append("%s is expected to not return payload but got '%s'" %
4018 (self.op.command, result.payload))
4021 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4022 utils.CommaJoin(errs))
4024 class _OsQuery(_QueryBase):
4025 FIELDS = query.OS_FIELDS
4027 def ExpandNames(self, lu):
4028 # Lock all nodes in shared mode
4029 # Temporary removal of locks, should be reverted later
4030 # TODO: reintroduce locks when they are lighter-weight
4031 lu.needed_locks = {}
4032 #self.share_locks[locking.LEVEL_NODE] = 1
4033 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4035 # The following variables interact with _QueryBase._GetNames
4037 self.wanted = self.names
4039 self.wanted = locking.ALL_SET
4041 self.do_locking = self.use_locking
4043 def DeclareLocks(self, lu, level):
4047 def _DiagnoseByOS(rlist):
4048 """Remaps a per-node return list into an a per-os per-node dictionary
4050 @param rlist: a map with node names as keys and OS objects as values
4053 @return: a dictionary with osnames as keys and as value another
4054 map, with nodes as keys and tuples of (path, status, diagnose,
4055 variants, parameters, api_versions) as values, eg::
4057 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4058 (/srv/..., False, "invalid api")],
4059 "node2": [(/srv/..., True, "", [], [])]}
4064 # we build here the list of nodes that didn't fail the RPC (at RPC
4065 # level), so that nodes with a non-responding node daemon don't
4066 # make all OSes invalid
4067 good_nodes = [node_name for node_name in rlist
4068 if not rlist[node_name].fail_msg]
4069 for node_name, nr in rlist.items():
4070 if nr.fail_msg or not nr.payload:
4072 for (name, path, status, diagnose, variants,
4073 params, api_versions) in nr.payload:
4074 if name not in all_os:
4075 # build a list of nodes for this os containing empty lists
4076 # for each node in node_list
4078 for nname in good_nodes:
4079 all_os[name][nname] = []
4080 # convert params from [name, help] to (name, help)
4081 params = [tuple(v) for v in params]
4082 all_os[name][node_name].append((path, status, diagnose,
4083 variants, params, api_versions))
4086 def _GetQueryData(self, lu):
4087 """Computes the list of nodes and their attributes.
4090 # Locking is not used
4091 assert not (compat.any(lu.glm.is_owned(level)
4092 for level in locking.LEVELS
4093 if level != locking.LEVEL_CLUSTER) or
4094 self.do_locking or self.use_locking)
4096 valid_nodes = [node.name
4097 for node in lu.cfg.GetAllNodesInfo().values()
4098 if not node.offline and node.vm_capable]
4099 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4100 cluster = lu.cfg.GetClusterInfo()
4104 for (os_name, os_data) in pol.items():
4105 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4106 hidden=(os_name in cluster.hidden_os),
4107 blacklisted=(os_name in cluster.blacklisted_os))
4111 api_versions = set()
4113 for idx, osl in enumerate(os_data.values()):
4114 info.valid = bool(info.valid and osl and osl[0][1])
4118 (node_variants, node_params, node_api) = osl[0][3:6]
4121 variants.update(node_variants)
4122 parameters.update(node_params)
4123 api_versions.update(node_api)
4125 # Filter out inconsistent values
4126 variants.intersection_update(node_variants)
4127 parameters.intersection_update(node_params)
4128 api_versions.intersection_update(node_api)
4130 info.variants = list(variants)
4131 info.parameters = list(parameters)
4132 info.api_versions = list(api_versions)
4134 data[os_name] = info
4136 # Prepare data in requested order
4137 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4141 class LUOsDiagnose(NoHooksLU):
4142 """Logical unit for OS diagnose/query.
4148 def _BuildFilter(fields, names):
4149 """Builds a filter for querying OSes.
4152 name_filter = qlang.MakeSimpleFilter("name", names)
4154 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4155 # respective field is not requested
4156 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4157 for fname in ["hidden", "blacklisted"]
4158 if fname not in fields]
4159 if "valid" not in fields:
4160 status_filter.append([qlang.OP_TRUE, "valid"])
4163 status_filter.insert(0, qlang.OP_AND)
4165 status_filter = None
4167 if name_filter and status_filter:
4168 return [qlang.OP_AND, name_filter, status_filter]
4172 return status_filter
4174 def CheckArguments(self):
4175 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4176 self.op.output_fields, False)
4178 def ExpandNames(self):
4179 self.oq.ExpandNames(self)
4181 def Exec(self, feedback_fn):
4182 return self.oq.OldStyleQuery(self)
4185 class LUNodeRemove(LogicalUnit):
4186 """Logical unit for removing a node.
4189 HPATH = "node-remove"
4190 HTYPE = constants.HTYPE_NODE
4192 def BuildHooksEnv(self):
4195 This doesn't run on the target node in the pre phase as a failed
4196 node would then be impossible to remove.
4200 "OP_TARGET": self.op.node_name,
4201 "NODE_NAME": self.op.node_name,
4204 def BuildHooksNodes(self):
4205 """Build hooks nodes.
4208 all_nodes = self.cfg.GetNodeList()
4210 all_nodes.remove(self.op.node_name)
4212 logging.warning("Node '%s', which is about to be removed, was not found"
4213 " in the list of all nodes", self.op.node_name)
4214 return (all_nodes, all_nodes)
4216 def CheckPrereq(self):
4217 """Check prerequisites.
4220 - the node exists in the configuration
4221 - it does not have primary or secondary instances
4222 - it's not the master
4224 Any errors are signaled by raising errors.OpPrereqError.
4227 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4228 node = self.cfg.GetNodeInfo(self.op.node_name)
4229 assert node is not None
4231 masternode = self.cfg.GetMasterNode()
4232 if node.name == masternode:
4233 raise errors.OpPrereqError("Node is the master node, failover to another"
4234 " node is required", errors.ECODE_INVAL)
4236 for instance_name, instance in self.cfg.GetAllInstancesInfo():
4237 if node.name in instance.all_nodes:
4238 raise errors.OpPrereqError("Instance %s is still running on the node,"
4239 " please remove first" % instance_name,
4241 self.op.node_name = node.name
4244 def Exec(self, feedback_fn):
4245 """Removes the node from the cluster.
4249 logging.info("Stopping the node daemon and removing configs from node %s",
4252 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4254 # Promote nodes to master candidate as needed
4255 _AdjustCandidatePool(self, exceptions=[node.name])
4256 self.context.RemoveNode(node.name)
4258 # Run post hooks on the node before it's removed
4259 _RunPostHook(self, node.name)
4261 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4262 msg = result.fail_msg
4264 self.LogWarning("Errors encountered on the remote node while leaving"
4265 " the cluster: %s", msg)
4267 # Remove node from our /etc/hosts
4268 if self.cfg.GetClusterInfo().modify_etc_hosts:
4269 master_node = self.cfg.GetMasterNode()
4270 result = self.rpc.call_etc_hosts_modify(master_node,
4271 constants.ETC_HOSTS_REMOVE,
4273 result.Raise("Can't update hosts file with new host data")
4274 _RedistributeAncillaryFiles(self)
4277 class _NodeQuery(_QueryBase):
4278 FIELDS = query.NODE_FIELDS
4280 def ExpandNames(self, lu):
4281 lu.needed_locks = {}
4282 lu.share_locks[locking.LEVEL_NODE] = 1
4285 self.wanted = _GetWantedNodes(lu, self.names)
4287 self.wanted = locking.ALL_SET
4289 self.do_locking = (self.use_locking and
4290 query.NQ_LIVE in self.requested_data)
4293 # if we don't request only static fields, we need to lock the nodes
4294 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4296 def DeclareLocks(self, lu, level):
4299 def _GetQueryData(self, lu):
4300 """Computes the list of nodes and their attributes.
4303 all_info = lu.cfg.GetAllNodesInfo()
4305 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4307 # Gather data as requested
4308 if query.NQ_LIVE in self.requested_data:
4309 # filter out non-vm_capable nodes
4310 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4312 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4313 lu.cfg.GetHypervisorType())
4314 live_data = dict((name, nresult.payload)
4315 for (name, nresult) in node_data.items()
4316 if not nresult.fail_msg and nresult.payload)
4320 if query.NQ_INST in self.requested_data:
4321 node_to_primary = dict([(name, set()) for name in nodenames])
4322 node_to_secondary = dict([(name, set()) for name in nodenames])
4324 inst_data = lu.cfg.GetAllInstancesInfo()
4326 for inst in inst_data.values():
4327 if inst.primary_node in node_to_primary:
4328 node_to_primary[inst.primary_node].add(inst.name)
4329 for secnode in inst.secondary_nodes:
4330 if secnode in node_to_secondary:
4331 node_to_secondary[secnode].add(inst.name)
4333 node_to_primary = None
4334 node_to_secondary = None
4336 if query.NQ_OOB in self.requested_data:
4337 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4338 for name, node in all_info.iteritems())
4342 if query.NQ_GROUP in self.requested_data:
4343 groups = lu.cfg.GetAllNodeGroupsInfo()
4347 return query.NodeQueryData([all_info[name] for name in nodenames],
4348 live_data, lu.cfg.GetMasterNode(),
4349 node_to_primary, node_to_secondary, groups,
4350 oob_support, lu.cfg.GetClusterInfo())
4353 class LUNodeQuery(NoHooksLU):
4354 """Logical unit for querying nodes.
4357 # pylint: disable-msg=W0142
4360 def CheckArguments(self):
4361 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4362 self.op.output_fields, self.op.use_locking)
4364 def ExpandNames(self):
4365 self.nq.ExpandNames(self)
4367 def Exec(self, feedback_fn):
4368 return self.nq.OldStyleQuery(self)
4371 class LUNodeQueryvols(NoHooksLU):
4372 """Logical unit for getting volumes on node(s).
4376 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4377 _FIELDS_STATIC = utils.FieldSet("node")
4379 def CheckArguments(self):
4380 _CheckOutputFields(static=self._FIELDS_STATIC,
4381 dynamic=self._FIELDS_DYNAMIC,
4382 selected=self.op.output_fields)
4384 def ExpandNames(self):
4385 self.needed_locks = {}
4386 self.share_locks[locking.LEVEL_NODE] = 1
4387 if not self.op.nodes:
4388 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4390 self.needed_locks[locking.LEVEL_NODE] = \
4391 _GetWantedNodes(self, self.op.nodes)
4393 def Exec(self, feedback_fn):
4394 """Computes the list of nodes and their attributes.
4397 nodenames = self.owned_locks(locking.LEVEL_NODE)
4398 volumes = self.rpc.call_node_volumes(nodenames)
4400 ilist = self.cfg.GetAllInstancesInfo()
4401 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4404 for node in nodenames:
4405 nresult = volumes[node]
4408 msg = nresult.fail_msg
4410 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4413 node_vols = sorted(nresult.payload,
4414 key=operator.itemgetter("dev"))
4416 for vol in node_vols:
4418 for field in self.op.output_fields:
4421 elif field == "phys":
4425 elif field == "name":
4427 elif field == "size":
4428 val = int(float(vol["size"]))
4429 elif field == "instance":
4430 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4432 raise errors.ParameterError(field)
4433 node_output.append(str(val))
4435 output.append(node_output)
4440 class LUNodeQueryStorage(NoHooksLU):
4441 """Logical unit for getting information on storage units on node(s).
4444 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4447 def CheckArguments(self):
4448 _CheckOutputFields(static=self._FIELDS_STATIC,
4449 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4450 selected=self.op.output_fields)
4452 def ExpandNames(self):
4453 self.needed_locks = {}
4454 self.share_locks[locking.LEVEL_NODE] = 1
4457 self.needed_locks[locking.LEVEL_NODE] = \
4458 _GetWantedNodes(self, self.op.nodes)
4460 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4462 def Exec(self, feedback_fn):
4463 """Computes the list of nodes and their attributes.
4466 self.nodes = self.owned_locks(locking.LEVEL_NODE)
4468 # Always get name to sort by
4469 if constants.SF_NAME in self.op.output_fields:
4470 fields = self.op.output_fields[:]
4472 fields = [constants.SF_NAME] + self.op.output_fields
4474 # Never ask for node or type as it's only known to the LU
4475 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4476 while extra in fields:
4477 fields.remove(extra)
4479 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4480 name_idx = field_idx[constants.SF_NAME]
4482 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4483 data = self.rpc.call_storage_list(self.nodes,
4484 self.op.storage_type, st_args,
4485 self.op.name, fields)
4489 for node in utils.NiceSort(self.nodes):
4490 nresult = data[node]
4494 msg = nresult.fail_msg
4496 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4499 rows = dict([(row[name_idx], row) for row in nresult.payload])
4501 for name in utils.NiceSort(rows.keys()):
4506 for field in self.op.output_fields:
4507 if field == constants.SF_NODE:
4509 elif field == constants.SF_TYPE:
4510 val = self.op.storage_type
4511 elif field in field_idx:
4512 val = row[field_idx[field]]
4514 raise errors.ParameterError(field)
4523 class _InstanceQuery(_QueryBase):
4524 FIELDS = query.INSTANCE_FIELDS
4526 def ExpandNames(self, lu):
4527 lu.needed_locks = {}
4528 lu.share_locks = _ShareAll()
4531 self.wanted = _GetWantedInstances(lu, self.names)
4533 self.wanted = locking.ALL_SET
4535 self.do_locking = (self.use_locking and
4536 query.IQ_LIVE in self.requested_data)
4538 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4539 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4540 lu.needed_locks[locking.LEVEL_NODE] = []
4541 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4543 self.do_grouplocks = (self.do_locking and
4544 query.IQ_NODES in self.requested_data)
4546 def DeclareLocks(self, lu, level):
4548 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4549 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4551 # Lock all groups used by instances optimistically; this requires going
4552 # via the node before it's locked, requiring verification later on
4553 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4555 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4556 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4557 elif level == locking.LEVEL_NODE:
4558 lu._LockInstancesNodes() # pylint: disable-msg=W0212
4561 def _CheckGroupLocks(lu):
4562 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4563 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4565 # Check if node groups for locked instances are still correct
4566 for instance_name in owned_instances:
4567 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4569 def _GetQueryData(self, lu):
4570 """Computes the list of instances and their attributes.
4573 if self.do_grouplocks:
4574 self._CheckGroupLocks(lu)
4576 cluster = lu.cfg.GetClusterInfo()
4577 all_info = lu.cfg.GetAllInstancesInfo()
4579 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4581 instance_list = [all_info[name] for name in instance_names]
4582 nodes = frozenset(itertools.chain(*(inst.all_nodes
4583 for inst in instance_list)))
4584 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4587 wrongnode_inst = set()
4589 # Gather data as requested
4590 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4592 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4594 result = node_data[name]
4596 # offline nodes will be in both lists
4597 assert result.fail_msg
4598 offline_nodes.append(name)
4600 bad_nodes.append(name)
4601 elif result.payload:
4602 for inst in result.payload:
4603 if inst in all_info:
4604 if all_info[inst].primary_node == name:
4605 live_data.update(result.payload)
4607 wrongnode_inst.add(inst)
4609 # orphan instance; we don't list it here as we don't
4610 # handle this case yet in the output of instance listing
4611 logging.warning("Orphan instance '%s' found on node %s",
4613 # else no instance is alive
4617 if query.IQ_DISKUSAGE in self.requested_data:
4618 disk_usage = dict((inst.name,
4619 _ComputeDiskSize(inst.disk_template,
4620 [{constants.IDISK_SIZE: disk.size}
4621 for disk in inst.disks]))
4622 for inst in instance_list)
4626 if query.IQ_CONSOLE in self.requested_data:
4628 for inst in instance_list:
4629 if inst.name in live_data:
4630 # Instance is running
4631 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4633 consinfo[inst.name] = None
4634 assert set(consinfo.keys()) == set(instance_names)
4638 if query.IQ_NODES in self.requested_data:
4639 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4641 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4642 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4643 for uuid in set(map(operator.attrgetter("group"),
4649 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4650 disk_usage, offline_nodes, bad_nodes,
4651 live_data, wrongnode_inst, consinfo,
4655 class LUQuery(NoHooksLU):
4656 """Query for resources/items of a certain kind.
4659 # pylint: disable-msg=W0142
4662 def CheckArguments(self):
4663 qcls = _GetQueryImplementation(self.op.what)
4665 self.impl = qcls(self.op.filter, self.op.fields, False)
4667 def ExpandNames(self):
4668 self.impl.ExpandNames(self)
4670 def DeclareLocks(self, level):
4671 self.impl.DeclareLocks(self, level)
4673 def Exec(self, feedback_fn):
4674 return self.impl.NewStyleQuery(self)
4677 class LUQueryFields(NoHooksLU):
4678 """Query for resources/items of a certain kind.
4681 # pylint: disable-msg=W0142
4684 def CheckArguments(self):
4685 self.qcls = _GetQueryImplementation(self.op.what)
4687 def ExpandNames(self):
4688 self.needed_locks = {}
4690 def Exec(self, feedback_fn):
4691 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4694 class LUNodeModifyStorage(NoHooksLU):
4695 """Logical unit for modifying a storage volume on a node.
4700 def CheckArguments(self):
4701 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4703 storage_type = self.op.storage_type
4706 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4708 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4709 " modified" % storage_type,
4712 diff = set(self.op.changes.keys()) - modifiable
4714 raise errors.OpPrereqError("The following fields can not be modified for"
4715 " storage units of type '%s': %r" %
4716 (storage_type, list(diff)),
4719 def ExpandNames(self):
4720 self.needed_locks = {
4721 locking.LEVEL_NODE: self.op.node_name,
4724 def Exec(self, feedback_fn):
4725 """Computes the list of nodes and their attributes.
4728 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4729 result = self.rpc.call_storage_modify(self.op.node_name,
4730 self.op.storage_type, st_args,
4731 self.op.name, self.op.changes)
4732 result.Raise("Failed to modify storage unit '%s' on %s" %
4733 (self.op.name, self.op.node_name))
4736 class LUNodeAdd(LogicalUnit):
4737 """Logical unit for adding node to the cluster.
4741 HTYPE = constants.HTYPE_NODE
4742 _NFLAGS = ["master_capable", "vm_capable"]
4744 def CheckArguments(self):
4745 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4746 # validate/normalize the node name
4747 self.hostname = netutils.GetHostname(name=self.op.node_name,
4748 family=self.primary_ip_family)
4749 self.op.node_name = self.hostname.name
4751 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4752 raise errors.OpPrereqError("Cannot readd the master node",
4755 if self.op.readd and self.op.group:
4756 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4757 " being readded", errors.ECODE_INVAL)
4759 def BuildHooksEnv(self):
4762 This will run on all nodes before, and on all nodes + the new node after.
4766 "OP_TARGET": self.op.node_name,
4767 "NODE_NAME": self.op.node_name,
4768 "NODE_PIP": self.op.primary_ip,
4769 "NODE_SIP": self.op.secondary_ip,
4770 "MASTER_CAPABLE": str(self.op.master_capable),
4771 "VM_CAPABLE": str(self.op.vm_capable),
4774 def BuildHooksNodes(self):
4775 """Build hooks nodes.
4778 # Exclude added node
4779 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4780 post_nodes = pre_nodes + [self.op.node_name, ]
4782 return (pre_nodes, post_nodes)
4784 def CheckPrereq(self):
4785 """Check prerequisites.
4788 - the new node is not already in the config
4790 - its parameters (single/dual homed) matches the cluster
4792 Any errors are signaled by raising errors.OpPrereqError.
4796 hostname = self.hostname
4797 node = hostname.name
4798 primary_ip = self.op.primary_ip = hostname.ip
4799 if self.op.secondary_ip is None:
4800 if self.primary_ip_family == netutils.IP6Address.family:
4801 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4802 " IPv4 address must be given as secondary",
4804 self.op.secondary_ip = primary_ip
4806 secondary_ip = self.op.secondary_ip
4807 if not netutils.IP4Address.IsValid(secondary_ip):
4808 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4809 " address" % secondary_ip, errors.ECODE_INVAL)
4811 node_list = cfg.GetNodeList()
4812 if not self.op.readd and node in node_list:
4813 raise errors.OpPrereqError("Node %s is already in the configuration" %
4814 node, errors.ECODE_EXISTS)
4815 elif self.op.readd and node not in node_list:
4816 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4819 self.changed_primary_ip = False
4821 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4822 if self.op.readd and node == existing_node_name:
4823 if existing_node.secondary_ip != secondary_ip:
4824 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4825 " address configuration as before",
4827 if existing_node.primary_ip != primary_ip:
4828 self.changed_primary_ip = True
4832 if (existing_node.primary_ip == primary_ip or
4833 existing_node.secondary_ip == primary_ip or
4834 existing_node.primary_ip == secondary_ip or
4835 existing_node.secondary_ip == secondary_ip):
4836 raise errors.OpPrereqError("New node ip address(es) conflict with"
4837 " existing node %s" % existing_node.name,
4838 errors.ECODE_NOTUNIQUE)
4840 # After this 'if' block, None is no longer a valid value for the
4841 # _capable op attributes
4843 old_node = self.cfg.GetNodeInfo(node)
4844 assert old_node is not None, "Can't retrieve locked node %s" % node
4845 for attr in self._NFLAGS:
4846 if getattr(self.op, attr) is None:
4847 setattr(self.op, attr, getattr(old_node, attr))
4849 for attr in self._NFLAGS:
4850 if getattr(self.op, attr) is None:
4851 setattr(self.op, attr, True)
4853 if self.op.readd and not self.op.vm_capable:
4854 pri, sec = cfg.GetNodeInstances(node)
4856 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4857 " flag set to false, but it already holds"
4858 " instances" % node,
4861 # check that the type of the node (single versus dual homed) is the
4862 # same as for the master
4863 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4864 master_singlehomed = myself.secondary_ip == myself.primary_ip
4865 newbie_singlehomed = secondary_ip == primary_ip
4866 if master_singlehomed != newbie_singlehomed:
4867 if master_singlehomed:
4868 raise errors.OpPrereqError("The master has no secondary ip but the"
4869 " new node has one",
4872 raise errors.OpPrereqError("The master has a secondary ip but the"
4873 " new node doesn't have one",
4876 # checks reachability
4877 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4878 raise errors.OpPrereqError("Node not reachable by ping",
4879 errors.ECODE_ENVIRON)
4881 if not newbie_singlehomed:
4882 # check reachability from my secondary ip to newbie's secondary ip
4883 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4884 source=myself.secondary_ip):
4885 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4886 " based ping to node daemon port",
4887 errors.ECODE_ENVIRON)
4894 if self.op.master_capable:
4895 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4897 self.master_candidate = False
4900 self.new_node = old_node
4902 node_group = cfg.LookupNodeGroup(self.op.group)
4903 self.new_node = objects.Node(name=node,
4904 primary_ip=primary_ip,
4905 secondary_ip=secondary_ip,
4906 master_candidate=self.master_candidate,
4907 offline=False, drained=False,
4910 if self.op.ndparams:
4911 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4913 def Exec(self, feedback_fn):
4914 """Adds the new node to the cluster.
4917 new_node = self.new_node
4918 node = new_node.name
4920 # We adding a new node so we assume it's powered
4921 new_node.powered = True
4923 # for re-adds, reset the offline/drained/master-candidate flags;
4924 # we need to reset here, otherwise offline would prevent RPC calls
4925 # later in the procedure; this also means that if the re-add
4926 # fails, we are left with a non-offlined, broken node
4928 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4929 self.LogInfo("Readding a node, the offline/drained flags were reset")
4930 # if we demote the node, we do cleanup later in the procedure
4931 new_node.master_candidate = self.master_candidate
4932 if self.changed_primary_ip:
4933 new_node.primary_ip = self.op.primary_ip
4935 # copy the master/vm_capable flags
4936 for attr in self._NFLAGS:
4937 setattr(new_node, attr, getattr(self.op, attr))
4939 # notify the user about any possible mc promotion
4940 if new_node.master_candidate:
4941 self.LogInfo("Node will be a master candidate")
4943 if self.op.ndparams:
4944 new_node.ndparams = self.op.ndparams
4946 new_node.ndparams = {}
4948 # check connectivity
4949 result = self.rpc.call_version([node])[node]
4950 result.Raise("Can't get version information from node %s" % node)
4951 if constants.PROTOCOL_VERSION == result.payload:
4952 logging.info("Communication to node %s fine, sw version %s match",
4953 node, result.payload)
4955 raise errors.OpExecError("Version mismatch master version %s,"
4956 " node version %s" %
4957 (constants.PROTOCOL_VERSION, result.payload))
4959 # Add node to our /etc/hosts, and add key to known_hosts
4960 if self.cfg.GetClusterInfo().modify_etc_hosts:
4961 master_node = self.cfg.GetMasterNode()
4962 result = self.rpc.call_etc_hosts_modify(master_node,
4963 constants.ETC_HOSTS_ADD,
4966 result.Raise("Can't update hosts file with new host data")
4968 if new_node.secondary_ip != new_node.primary_ip:
4969 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4972 node_verify_list = [self.cfg.GetMasterNode()]
4973 node_verify_param = {
4974 constants.NV_NODELIST: [node],
4975 # TODO: do a node-net-test as well?
4978 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4979 self.cfg.GetClusterName())
4980 for verifier in node_verify_list:
4981 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4982 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4984 for failed in nl_payload:
4985 feedback_fn("ssh/hostname verification failed"
4986 " (checking from %s): %s" %
4987 (verifier, nl_payload[failed]))
4988 raise errors.OpExecError("ssh/hostname verification failed")
4991 _RedistributeAncillaryFiles(self)
4992 self.context.ReaddNode(new_node)
4993 # make sure we redistribute the config
4994 self.cfg.Update(new_node, feedback_fn)
4995 # and make sure the new node will not have old files around
4996 if not new_node.master_candidate:
4997 result = self.rpc.call_node_demote_from_mc(new_node.name)
4998 msg = result.fail_msg
5000 self.LogWarning("Node failed to demote itself from master"
5001 " candidate status: %s" % msg)
5003 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5004 additional_vm=self.op.vm_capable)
5005 self.context.AddNode(new_node, self.proc.GetECId())
5008 class LUNodeSetParams(LogicalUnit):
5009 """Modifies the parameters of a node.
5011 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5012 to the node role (as _ROLE_*)
5013 @cvar _R2F: a dictionary from node role to tuples of flags
5014 @cvar _FLAGS: a list of attribute names corresponding to the flags
5017 HPATH = "node-modify"
5018 HTYPE = constants.HTYPE_NODE
5020 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5022 (True, False, False): _ROLE_CANDIDATE,
5023 (False, True, False): _ROLE_DRAINED,
5024 (False, False, True): _ROLE_OFFLINE,
5025 (False, False, False): _ROLE_REGULAR,
5027 _R2F = dict((v, k) for k, v in _F2R.items())
5028 _FLAGS = ["master_candidate", "drained", "offline"]
5030 def CheckArguments(self):
5031 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5032 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5033 self.op.master_capable, self.op.vm_capable,
5034 self.op.secondary_ip, self.op.ndparams]
5035 if all_mods.count(None) == len(all_mods):
5036 raise errors.OpPrereqError("Please pass at least one modification",
5038 if all_mods.count(True) > 1:
5039 raise errors.OpPrereqError("Can't set the node into more than one"
5040 " state at the same time",
5043 # Boolean value that tells us whether we might be demoting from MC
5044 self.might_demote = (self.op.master_candidate == False or
5045 self.op.offline == True or
5046 self.op.drained == True or
5047 self.op.master_capable == False)
5049 if self.op.secondary_ip:
5050 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5051 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5052 " address" % self.op.secondary_ip,
5055 self.lock_all = self.op.auto_promote and self.might_demote
5056 self.lock_instances = self.op.secondary_ip is not None
5058 def ExpandNames(self):
5060 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5062 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5064 if self.lock_instances:
5065 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5067 def DeclareLocks(self, level):
5068 # If we have locked all instances, before waiting to lock nodes, release
5069 # all the ones living on nodes unrelated to the current operation.
5070 if level == locking.LEVEL_NODE and self.lock_instances:
5071 self.affected_instances = []
5072 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5075 # Build list of instances to release
5076 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5077 for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5078 if (instance.disk_template in constants.DTS_INT_MIRROR and
5079 self.op.node_name in instance.all_nodes):
5080 instances_keep.append(instance_name)
5081 self.affected_instances.append(instance)
5083 _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5085 assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5086 set(instances_keep))
5088 def BuildHooksEnv(self):
5091 This runs on the master node.
5095 "OP_TARGET": self.op.node_name,
5096 "MASTER_CANDIDATE": str(self.op.master_candidate),
5097 "OFFLINE": str(self.op.offline),
5098 "DRAINED": str(self.op.drained),
5099 "MASTER_CAPABLE": str(self.op.master_capable),
5100 "VM_CAPABLE": str(self.op.vm_capable),
5103 def BuildHooksNodes(self):
5104 """Build hooks nodes.
5107 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5110 def CheckPrereq(self):
5111 """Check prerequisites.
5113 This only checks the instance list against the existing names.
5116 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5118 if (self.op.master_candidate is not None or
5119 self.op.drained is not None or
5120 self.op.offline is not None):
5121 # we can't change the master's node flags
5122 if self.op.node_name == self.cfg.GetMasterNode():
5123 raise errors.OpPrereqError("The master role can be changed"
5124 " only via master-failover",
5127 if self.op.master_candidate and not node.master_capable:
5128 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5129 " it a master candidate" % node.name,
5132 if self.op.vm_capable == False:
5133 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5135 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5136 " the vm_capable flag" % node.name,
5139 if node.master_candidate and self.might_demote and not self.lock_all:
5140 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5141 # check if after removing the current node, we're missing master
5143 (mc_remaining, mc_should, _) = \
5144 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5145 if mc_remaining < mc_should:
5146 raise errors.OpPrereqError("Not enough master candidates, please"
5147 " pass auto promote option to allow"
5148 " promotion", errors.ECODE_STATE)
5150 self.old_flags = old_flags = (node.master_candidate,
5151 node.drained, node.offline)
5152 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5153 self.old_role = old_role = self._F2R[old_flags]
5155 # Check for ineffective changes
5156 for attr in self._FLAGS:
5157 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5158 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5159 setattr(self.op, attr, None)
5161 # Past this point, any flag change to False means a transition
5162 # away from the respective state, as only real changes are kept
5164 # TODO: We might query the real power state if it supports OOB
5165 if _SupportsOob(self.cfg, node):
5166 if self.op.offline is False and not (node.powered or
5167 self.op.powered == True):
5168 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5169 " offline status can be reset") %
5171 elif self.op.powered is not None:
5172 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5173 " as it does not support out-of-band"
5174 " handling") % self.op.node_name)
5176 # If we're being deofflined/drained, we'll MC ourself if needed
5177 if (self.op.drained == False or self.op.offline == False or
5178 (self.op.master_capable and not node.master_capable)):
5179 if _DecideSelfPromotion(self):
5180 self.op.master_candidate = True
5181 self.LogInfo("Auto-promoting node to master candidate")
5183 # If we're no longer master capable, we'll demote ourselves from MC
5184 if self.op.master_capable == False and node.master_candidate:
5185 self.LogInfo("Demoting from master candidate")
5186 self.op.master_candidate = False
5189 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5190 if self.op.master_candidate:
5191 new_role = self._ROLE_CANDIDATE
5192 elif self.op.drained:
5193 new_role = self._ROLE_DRAINED
5194 elif self.op.offline:
5195 new_role = self._ROLE_OFFLINE
5196 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5197 # False is still in new flags, which means we're un-setting (the
5199 new_role = self._ROLE_REGULAR
5200 else: # no new flags, nothing, keep old role
5203 self.new_role = new_role
5205 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5206 # Trying to transition out of offline status
5207 result = self.rpc.call_version([node.name])[node.name]
5209 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5210 " to report its version: %s" %
5211 (node.name, result.fail_msg),
5214 self.LogWarning("Transitioning node from offline to online state"
5215 " without using re-add. Please make sure the node"
5218 if self.op.secondary_ip:
5219 # Ok even without locking, because this can't be changed by any LU
5220 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5221 master_singlehomed = master.secondary_ip == master.primary_ip
5222 if master_singlehomed and self.op.secondary_ip:
5223 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5224 " homed cluster", errors.ECODE_INVAL)
5227 if self.affected_instances:
5228 raise errors.OpPrereqError("Cannot change secondary ip: offline"
5229 " node has instances (%s) configured"
5230 " to use it" % self.affected_instances)
5232 # On online nodes, check that no instances are running, and that
5233 # the node has the new ip and we can reach it.
5234 for instance in self.affected_instances:
5235 _CheckInstanceDown(self, instance, "cannot change secondary ip")
5237 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5238 if master.name != node.name:
5239 # check reachability from master secondary ip to new secondary ip
5240 if not netutils.TcpPing(self.op.secondary_ip,
5241 constants.DEFAULT_NODED_PORT,
5242 source=master.secondary_ip):
5243 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5244 " based ping to node daemon port",
5245 errors.ECODE_ENVIRON)
5247 if self.op.ndparams:
5248 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5249 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5250 self.new_ndparams = new_ndparams
5252 def Exec(self, feedback_fn):
5257 old_role = self.old_role
5258 new_role = self.new_role
5262 if self.op.ndparams:
5263 node.ndparams = self.new_ndparams
5265 if self.op.powered is not None:
5266 node.powered = self.op.powered
5268 for attr in ["master_capable", "vm_capable"]:
5269 val = getattr(self.op, attr)
5271 setattr(node, attr, val)
5272 result.append((attr, str(val)))
5274 if new_role != old_role:
5275 # Tell the node to demote itself, if no longer MC and not offline
5276 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5277 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5279 self.LogWarning("Node failed to demote itself: %s", msg)
5281 new_flags = self._R2F[new_role]
5282 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5284 result.append((desc, str(nf)))
5285 (node.master_candidate, node.drained, node.offline) = new_flags
5287 # we locked all nodes, we adjust the CP before updating this node
5289 _AdjustCandidatePool(self, [node.name])
5291 if self.op.secondary_ip:
5292 node.secondary_ip = self.op.secondary_ip
5293 result.append(("secondary_ip", self.op.secondary_ip))
5295 # this will trigger configuration file update, if needed
5296 self.cfg.Update(node, feedback_fn)
5298 # this will trigger job queue propagation or cleanup if the mc
5300 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5301 self.context.ReaddNode(node)
5306 class LUNodePowercycle(NoHooksLU):
5307 """Powercycles a node.
5312 def CheckArguments(self):
5313 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5314 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5315 raise errors.OpPrereqError("The node is the master and the force"
5316 " parameter was not set",
5319 def ExpandNames(self):
5320 """Locking for PowercycleNode.
5322 This is a last-resort option and shouldn't block on other
5323 jobs. Therefore, we grab no locks.
5326 self.needed_locks = {}
5328 def Exec(self, feedback_fn):
5332 result = self.rpc.call_node_powercycle(self.op.node_name,
5333 self.cfg.GetHypervisorType())
5334 result.Raise("Failed to schedule the reboot")
5335 return result.payload
5338 class LUClusterQuery(NoHooksLU):
5339 """Query cluster configuration.
5344 def ExpandNames(self):
5345 self.needed_locks = {}
5347 def Exec(self, feedback_fn):
5348 """Return cluster config.
5351 cluster = self.cfg.GetClusterInfo()
5354 # Filter just for enabled hypervisors
5355 for os_name, hv_dict in cluster.os_hvp.items():
5356 os_hvp[os_name] = {}
5357 for hv_name, hv_params in hv_dict.items():
5358 if hv_name in cluster.enabled_hypervisors:
5359 os_hvp[os_name][hv_name] = hv_params
5361 # Convert ip_family to ip_version
5362 primary_ip_version = constants.IP4_VERSION
5363 if cluster.primary_ip_family == netutils.IP6Address.family:
5364 primary_ip_version = constants.IP6_VERSION
5367 "software_version": constants.RELEASE_VERSION,
5368 "protocol_version": constants.PROTOCOL_VERSION,
5369 "config_version": constants.CONFIG_VERSION,
5370 "os_api_version": max(constants.OS_API_VERSIONS),
5371 "export_version": constants.EXPORT_VERSION,
5372 "architecture": (platform.architecture()[0], platform.machine()),
5373 "name": cluster.cluster_name,
5374 "master": cluster.master_node,
5375 "default_hypervisor": cluster.enabled_hypervisors[0],
5376 "enabled_hypervisors": cluster.enabled_hypervisors,
5377 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5378 for hypervisor_name in cluster.enabled_hypervisors]),
5380 "beparams": cluster.beparams,
5381 "osparams": cluster.osparams,
5382 "nicparams": cluster.nicparams,
5383 "ndparams": cluster.ndparams,
5384 "candidate_pool_size": cluster.candidate_pool_size,
5385 "master_netdev": cluster.master_netdev,
5386 "volume_group_name": cluster.volume_group_name,
5387 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5388 "file_storage_dir": cluster.file_storage_dir,
5389 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5390 "maintain_node_health": cluster.maintain_node_health,
5391 "ctime": cluster.ctime,
5392 "mtime": cluster.mtime,
5393 "uuid": cluster.uuid,
5394 "tags": list(cluster.GetTags()),
5395 "uid_pool": cluster.uid_pool,
5396 "default_iallocator": cluster.default_iallocator,
5397 "reserved_lvs": cluster.reserved_lvs,
5398 "primary_ip_version": primary_ip_version,
5399 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5400 "hidden_os": cluster.hidden_os,
5401 "blacklisted_os": cluster.blacklisted_os,
5407 class LUClusterConfigQuery(NoHooksLU):
5408 """Return configuration values.
5412 _FIELDS_DYNAMIC = utils.FieldSet()
5413 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5414 "watcher_pause", "volume_group_name")
5416 def CheckArguments(self):
5417 _CheckOutputFields(static=self._FIELDS_STATIC,
5418 dynamic=self._FIELDS_DYNAMIC,
5419 selected=self.op.output_fields)
5421 def ExpandNames(self):
5422 self.needed_locks = {}
5424 def Exec(self, feedback_fn):
5425 """Dump a representation of the cluster config to the standard output.
5429 for field in self.op.output_fields:
5430 if field == "cluster_name":
5431 entry = self.cfg.GetClusterName()
5432 elif field == "master_node":
5433 entry = self.cfg.GetMasterNode()
5434 elif field == "drain_flag":
5435 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5436 elif field == "watcher_pause":
5437 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5438 elif field == "volume_group_name":
5439 entry = self.cfg.GetVGName()
5441 raise errors.ParameterError(field)
5442 values.append(entry)
5446 class LUInstanceActivateDisks(NoHooksLU):
5447 """Bring up an instance's disks.
5452 def ExpandNames(self):
5453 self._ExpandAndLockInstance()
5454 self.needed_locks[locking.LEVEL_NODE] = []
5455 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5457 def DeclareLocks(self, level):
5458 if level == locking.LEVEL_NODE:
5459 self._LockInstancesNodes()
5461 def CheckPrereq(self):
5462 """Check prerequisites.
5464 This checks that the instance is in the cluster.
5467 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5468 assert self.instance is not None, \
5469 "Cannot retrieve locked instance %s" % self.op.instance_name
5470 _CheckNodeOnline(self, self.instance.primary_node)
5472 def Exec(self, feedback_fn):
5473 """Activate the disks.
5476 disks_ok, disks_info = \
5477 _AssembleInstanceDisks(self, self.instance,
5478 ignore_size=self.op.ignore_size)
5480 raise errors.OpExecError("Cannot activate block devices")
5485 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5487 """Prepare the block devices for an instance.
5489 This sets up the block devices on all nodes.
5491 @type lu: L{LogicalUnit}
5492 @param lu: the logical unit on whose behalf we execute
5493 @type instance: L{objects.Instance}
5494 @param instance: the instance for whose disks we assemble
5495 @type disks: list of L{objects.Disk} or None
5496 @param disks: which disks to assemble (or all, if None)
5497 @type ignore_secondaries: boolean
5498 @param ignore_secondaries: if true, errors on secondary nodes
5499 won't result in an error return from the function
5500 @type ignore_size: boolean
5501 @param ignore_size: if true, the current known size of the disk
5502 will not be used during the disk activation, useful for cases
5503 when the size is wrong
5504 @return: False if the operation failed, otherwise a list of
5505 (host, instance_visible_name, node_visible_name)
5506 with the mapping from node devices to instance devices
5511 iname = instance.name
5512 disks = _ExpandCheckDisks(instance, disks)
5514 # With the two passes mechanism we try to reduce the window of
5515 # opportunity for the race condition of switching DRBD to primary
5516 # before handshaking occured, but we do not eliminate it
5518 # The proper fix would be to wait (with some limits) until the
5519 # connection has been made and drbd transitions from WFConnection
5520 # into any other network-connected state (Connected, SyncTarget,
5523 # 1st pass, assemble on all nodes in secondary mode
5524 for idx, inst_disk in enumerate(disks):
5525 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5527 node_disk = node_disk.Copy()
5528 node_disk.UnsetSize()
5529 lu.cfg.SetDiskID(node_disk, node)
5530 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5531 msg = result.fail_msg
5533 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5534 " (is_primary=False, pass=1): %s",
5535 inst_disk.iv_name, node, msg)
5536 if not ignore_secondaries:
5539 # FIXME: race condition on drbd migration to primary
5541 # 2nd pass, do only the primary node
5542 for idx, inst_disk in enumerate(disks):
5545 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5546 if node != instance.primary_node:
5549 node_disk = node_disk.Copy()
5550 node_disk.UnsetSize()
5551 lu.cfg.SetDiskID(node_disk, node)
5552 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5553 msg = result.fail_msg
5555 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5556 " (is_primary=True, pass=2): %s",
5557 inst_disk.iv_name, node, msg)
5560 dev_path = result.payload
5562 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5564 # leave the disks configured for the primary node
5565 # this is a workaround that would be fixed better by
5566 # improving the logical/physical id handling
5568 lu.cfg.SetDiskID(disk, instance.primary_node)
5570 return disks_ok, device_info
5573 def _StartInstanceDisks(lu, instance, force):
5574 """Start the disks of an instance.
5577 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5578 ignore_secondaries=force)
5580 _ShutdownInstanceDisks(lu, instance)
5581 if force is not None and not force:
5582 lu.proc.LogWarning("", hint="If the message above refers to a"
5584 " you can retry the operation using '--force'.")
5585 raise errors.OpExecError("Disk consistency error")
5588 class LUInstanceDeactivateDisks(NoHooksLU):
5589 """Shutdown an instance's disks.
5594 def ExpandNames(self):
5595 self._ExpandAndLockInstance()
5596 self.needed_locks[locking.LEVEL_NODE] = []
5597 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5599 def DeclareLocks(self, level):
5600 if level == locking.LEVEL_NODE:
5601 self._LockInstancesNodes()
5603 def CheckPrereq(self):
5604 """Check prerequisites.
5606 This checks that the instance is in the cluster.
5609 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5610 assert self.instance is not None, \
5611 "Cannot retrieve locked instance %s" % self.op.instance_name
5613 def Exec(self, feedback_fn):
5614 """Deactivate the disks
5617 instance = self.instance
5619 _ShutdownInstanceDisks(self, instance)
5621 _SafeShutdownInstanceDisks(self, instance)
5624 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5625 """Shutdown block devices of an instance.
5627 This function checks if an instance is running, before calling
5628 _ShutdownInstanceDisks.
5631 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5632 _ShutdownInstanceDisks(lu, instance, disks=disks)
5635 def _ExpandCheckDisks(instance, disks):
5636 """Return the instance disks selected by the disks list
5638 @type disks: list of L{objects.Disk} or None
5639 @param disks: selected disks
5640 @rtype: list of L{objects.Disk}
5641 @return: selected instance disks to act on
5645 return instance.disks
5647 if not set(disks).issubset(instance.disks):
5648 raise errors.ProgrammerError("Can only act on disks belonging to the"
5653 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5654 """Shutdown block devices of an instance.
5656 This does the shutdown on all nodes of the instance.
5658 If the ignore_primary is false, errors on the primary node are
5663 disks = _ExpandCheckDisks(instance, disks)
5666 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5667 lu.cfg.SetDiskID(top_disk, node)
5668 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5669 msg = result.fail_msg
5671 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5672 disk.iv_name, node, msg)
5673 if ((node == instance.primary_node and not ignore_primary) or
5674 (node != instance.primary_node and not result.offline)):
5679 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5680 """Checks if a node has enough free memory.
5682 This function check if a given node has the needed amount of free
5683 memory. In case the node has less memory or we cannot get the
5684 information from the node, this function raise an OpPrereqError
5687 @type lu: C{LogicalUnit}
5688 @param lu: a logical unit from which we get configuration data
5690 @param node: the node to check
5691 @type reason: C{str}
5692 @param reason: string to use in the error message
5693 @type requested: C{int}
5694 @param requested: the amount of memory in MiB to check for
5695 @type hypervisor_name: C{str}
5696 @param hypervisor_name: the hypervisor to ask for memory stats
5697 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5698 we cannot check the node
5701 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5702 nodeinfo[node].Raise("Can't get data from node %s" % node,
5703 prereq=True, ecode=errors.ECODE_ENVIRON)
5704 free_mem = nodeinfo[node].payload.get("memory_free", None)
5705 if not isinstance(free_mem, int):
5706 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5707 " was '%s'" % (node, free_mem),
5708 errors.ECODE_ENVIRON)
5709 if requested > free_mem:
5710 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5711 " needed %s MiB, available %s MiB" %
5712 (node, reason, requested, free_mem),
5716 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5717 """Checks if nodes have enough free disk space in the all VGs.
5719 This function check if all given nodes have the needed amount of
5720 free disk. In case any node has less disk or we cannot get the
5721 information from the node, this function raise an OpPrereqError
5724 @type lu: C{LogicalUnit}
5725 @param lu: a logical unit from which we get configuration data
5726 @type nodenames: C{list}
5727 @param nodenames: the list of node names to check
5728 @type req_sizes: C{dict}
5729 @param req_sizes: the hash of vg and corresponding amount of disk in
5731 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5732 or we cannot check the node
5735 for vg, req_size in req_sizes.items():
5736 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5739 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5740 """Checks if nodes have enough free disk space in the specified VG.
5742 This function check if all given nodes have the needed amount of
5743 free disk. In case any node has less disk or we cannot get the
5744 information from the node, this function raise an OpPrereqError
5747 @type lu: C{LogicalUnit}
5748 @param lu: a logical unit from which we get configuration data
5749 @type nodenames: C{list}
5750 @param nodenames: the list of node names to check
5752 @param vg: the volume group to check
5753 @type requested: C{int}
5754 @param requested: the amount of disk in MiB to check for
5755 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5756 or we cannot check the node
5759 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5760 for node in nodenames:
5761 info = nodeinfo[node]
5762 info.Raise("Cannot get current information from node %s" % node,
5763 prereq=True, ecode=errors.ECODE_ENVIRON)
5764 vg_free = info.payload.get("vg_free", None)
5765 if not isinstance(vg_free, int):
5766 raise errors.OpPrereqError("Can't compute free disk space on node"
5767 " %s for vg %s, result was '%s'" %
5768 (node, vg, vg_free), errors.ECODE_ENVIRON)
5769 if requested > vg_free:
5770 raise errors.OpPrereqError("Not enough disk space on target node %s"
5771 " vg %s: required %d MiB, available %d MiB" %
5772 (node, vg, requested, vg_free),
5776 class LUInstanceStartup(LogicalUnit):
5777 """Starts an instance.
5780 HPATH = "instance-start"
5781 HTYPE = constants.HTYPE_INSTANCE
5784 def CheckArguments(self):
5786 if self.op.beparams:
5787 # fill the beparams dict
5788 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5790 def ExpandNames(self):
5791 self._ExpandAndLockInstance()
5793 def BuildHooksEnv(self):
5796 This runs on master, primary and secondary nodes of the instance.
5800 "FORCE": self.op.force,
5803 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5807 def BuildHooksNodes(self):
5808 """Build hooks nodes.
5811 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5814 def CheckPrereq(self):
5815 """Check prerequisites.
5817 This checks that the instance is in the cluster.
5820 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5821 assert self.instance is not None, \
5822 "Cannot retrieve locked instance %s" % self.op.instance_name
5825 if self.op.hvparams:
5826 # check hypervisor parameter syntax (locally)
5827 cluster = self.cfg.GetClusterInfo()
5828 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5829 filled_hvp = cluster.FillHV(instance)
5830 filled_hvp.update(self.op.hvparams)
5831 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5832 hv_type.CheckParameterSyntax(filled_hvp)
5833 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5835 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5837 if self.primary_offline and self.op.ignore_offline_nodes:
5838 self.proc.LogWarning("Ignoring offline primary node")
5840 if self.op.hvparams or self.op.beparams:
5841 self.proc.LogWarning("Overridden parameters are ignored")
5843 _CheckNodeOnline(self, instance.primary_node)
5845 bep = self.cfg.GetClusterInfo().FillBE(instance)
5847 # check bridges existence
5848 _CheckInstanceBridgesExist(self, instance)
5850 remote_info = self.rpc.call_instance_info(instance.primary_node,
5852 instance.hypervisor)
5853 remote_info.Raise("Error checking node %s" % instance.primary_node,
5854 prereq=True, ecode=errors.ECODE_ENVIRON)
5855 if not remote_info.payload: # not running already
5856 _CheckNodeFreeMemory(self, instance.primary_node,
5857 "starting instance %s" % instance.name,
5858 bep[constants.BE_MEMORY], instance.hypervisor)
5860 def Exec(self, feedback_fn):
5861 """Start the instance.
5864 instance = self.instance
5865 force = self.op.force
5867 if not self.op.no_remember:
5868 self.cfg.MarkInstanceUp(instance.name)
5870 if self.primary_offline:
5871 assert self.op.ignore_offline_nodes
5872 self.proc.LogInfo("Primary node offline, marked instance as started")
5874 node_current = instance.primary_node
5876 _StartInstanceDisks(self, instance, force)
5878 result = self.rpc.call_instance_start(node_current, instance,
5879 self.op.hvparams, self.op.beparams,
5880 self.op.startup_paused)
5881 msg = result.fail_msg
5883 _ShutdownInstanceDisks(self, instance)
5884 raise errors.OpExecError("Could not start instance: %s" % msg)
5887 class LUInstanceReboot(LogicalUnit):
5888 """Reboot an instance.
5891 HPATH = "instance-reboot"
5892 HTYPE = constants.HTYPE_INSTANCE
5895 def ExpandNames(self):
5896 self._ExpandAndLockInstance()
5898 def BuildHooksEnv(self):
5901 This runs on master, primary and secondary nodes of the instance.
5905 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5906 "REBOOT_TYPE": self.op.reboot_type,
5907 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5910 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5914 def BuildHooksNodes(self):
5915 """Build hooks nodes.
5918 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5921 def CheckPrereq(self):
5922 """Check prerequisites.
5924 This checks that the instance is in the cluster.
5927 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5928 assert self.instance is not None, \
5929 "Cannot retrieve locked instance %s" % self.op.instance_name
5931 _CheckNodeOnline(self, instance.primary_node)
5933 # check bridges existence
5934 _CheckInstanceBridgesExist(self, instance)
5936 def Exec(self, feedback_fn):
5937 """Reboot the instance.
5940 instance = self.instance
5941 ignore_secondaries = self.op.ignore_secondaries
5942 reboot_type = self.op.reboot_type
5944 remote_info = self.rpc.call_instance_info(instance.primary_node,
5946 instance.hypervisor)
5947 remote_info.Raise("Error checking node %s" % instance.primary_node)
5948 instance_running = bool(remote_info.payload)
5950 node_current = instance.primary_node
5952 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5953 constants.INSTANCE_REBOOT_HARD]:
5954 for disk in instance.disks:
5955 self.cfg.SetDiskID(disk, node_current)
5956 result = self.rpc.call_instance_reboot(node_current, instance,
5958 self.op.shutdown_timeout)
5959 result.Raise("Could not reboot instance")
5961 if instance_running:
5962 result = self.rpc.call_instance_shutdown(node_current, instance,
5963 self.op.shutdown_timeout)
5964 result.Raise("Could not shutdown instance for full reboot")
5965 _ShutdownInstanceDisks(self, instance)
5967 self.LogInfo("Instance %s was already stopped, starting now",
5969 _StartInstanceDisks(self, instance, ignore_secondaries)
5970 result = self.rpc.call_instance_start(node_current, instance,
5972 msg = result.fail_msg
5974 _ShutdownInstanceDisks(self, instance)
5975 raise errors.OpExecError("Could not start instance for"
5976 " full reboot: %s" % msg)
5978 self.cfg.MarkInstanceUp(instance.name)
5981 class LUInstanceShutdown(LogicalUnit):
5982 """Shutdown an instance.
5985 HPATH = "instance-stop"
5986 HTYPE = constants.HTYPE_INSTANCE
5989 def ExpandNames(self):
5990 self._ExpandAndLockInstance()
5992 def BuildHooksEnv(self):
5995 This runs on master, primary and secondary nodes of the instance.
5998 env = _BuildInstanceHookEnvByObject(self, self.instance)
5999 env["TIMEOUT"] = self.op.timeout
6002 def BuildHooksNodes(self):
6003 """Build hooks nodes.
6006 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6009 def CheckPrereq(self):
6010 """Check prerequisites.
6012 This checks that the instance is in the cluster.
6015 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6016 assert self.instance is not None, \
6017 "Cannot retrieve locked instance %s" % self.op.instance_name
6019 self.primary_offline = \
6020 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6022 if self.primary_offline and self.op.ignore_offline_nodes:
6023 self.proc.LogWarning("Ignoring offline primary node")
6025 _CheckNodeOnline(self, self.instance.primary_node)
6027 def Exec(self, feedback_fn):
6028 """Shutdown the instance.
6031 instance = self.instance
6032 node_current = instance.primary_node
6033 timeout = self.op.timeout
6035 if not self.op.no_remember:
6036 self.cfg.MarkInstanceDown(instance.name)
6038 if self.primary_offline:
6039 assert self.op.ignore_offline_nodes
6040 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6042 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6043 msg = result.fail_msg
6045 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6047 _ShutdownInstanceDisks(self, instance)
6050 class LUInstanceReinstall(LogicalUnit):
6051 """Reinstall an instance.
6054 HPATH = "instance-reinstall"
6055 HTYPE = constants.HTYPE_INSTANCE
6058 def ExpandNames(self):
6059 self._ExpandAndLockInstance()
6061 def BuildHooksEnv(self):
6064 This runs on master, primary and secondary nodes of the instance.
6067 return _BuildInstanceHookEnvByObject(self, self.instance)
6069 def BuildHooksNodes(self):
6070 """Build hooks nodes.
6073 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6076 def CheckPrereq(self):
6077 """Check prerequisites.
6079 This checks that the instance is in the cluster and is not running.
6082 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6083 assert instance is not None, \
6084 "Cannot retrieve locked instance %s" % self.op.instance_name
6085 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6086 " offline, cannot reinstall")
6087 for node in instance.secondary_nodes:
6088 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6089 " cannot reinstall")
6091 if instance.disk_template == constants.DT_DISKLESS:
6092 raise errors.OpPrereqError("Instance '%s' has no disks" %
6093 self.op.instance_name,
6095 _CheckInstanceDown(self, instance, "cannot reinstall")
6097 if self.op.os_type is not None:
6099 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6100 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6101 instance_os = self.op.os_type
6103 instance_os = instance.os
6105 nodelist = list(instance.all_nodes)
6107 if self.op.osparams:
6108 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6109 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6110 self.os_inst = i_osdict # the new dict (without defaults)
6114 self.instance = instance
6116 def Exec(self, feedback_fn):
6117 """Reinstall the instance.
6120 inst = self.instance
6122 if self.op.os_type is not None:
6123 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6124 inst.os = self.op.os_type
6125 # Write to configuration
6126 self.cfg.Update(inst, feedback_fn)
6128 _StartInstanceDisks(self, inst, None)
6130 feedback_fn("Running the instance OS create scripts...")
6131 # FIXME: pass debug option from opcode to backend
6132 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6133 self.op.debug_level,
6134 osparams=self.os_inst)
6135 result.Raise("Could not install OS for instance %s on node %s" %
6136 (inst.name, inst.primary_node))
6138 _ShutdownInstanceDisks(self, inst)
6141 class LUInstanceRecreateDisks(LogicalUnit):
6142 """Recreate an instance's missing disks.
6145 HPATH = "instance-recreate-disks"
6146 HTYPE = constants.HTYPE_INSTANCE
6149 def CheckArguments(self):
6150 # normalise the disk list
6151 self.op.disks = sorted(frozenset(self.op.disks))
6153 def ExpandNames(self):
6154 self._ExpandAndLockInstance()
6155 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6157 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6158 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6160 self.needed_locks[locking.LEVEL_NODE] = []
6162 def DeclareLocks(self, level):
6163 if level == locking.LEVEL_NODE:
6164 # if we replace the nodes, we only need to lock the old primary,
6165 # otherwise we need to lock all nodes for disk re-creation
6166 primary_only = bool(self.op.nodes)
6167 self._LockInstancesNodes(primary_only=primary_only)
6169 def BuildHooksEnv(self):
6172 This runs on master, primary and secondary nodes of the instance.
6175 return _BuildInstanceHookEnvByObject(self, self.instance)
6177 def BuildHooksNodes(self):
6178 """Build hooks nodes.
6181 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6184 def CheckPrereq(self):
6185 """Check prerequisites.
6187 This checks that the instance is in the cluster and is not running.
6190 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6191 assert instance is not None, \
6192 "Cannot retrieve locked instance %s" % self.op.instance_name
6194 if len(self.op.nodes) != len(instance.all_nodes):
6195 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6196 " %d replacement nodes were specified" %
6197 (instance.name, len(instance.all_nodes),
6198 len(self.op.nodes)),
6200 assert instance.disk_template != constants.DT_DRBD8 or \
6201 len(self.op.nodes) == 2
6202 assert instance.disk_template != constants.DT_PLAIN or \
6203 len(self.op.nodes) == 1
6204 primary_node = self.op.nodes[0]
6206 primary_node = instance.primary_node
6207 _CheckNodeOnline(self, primary_node)
6209 if instance.disk_template == constants.DT_DISKLESS:
6210 raise errors.OpPrereqError("Instance '%s' has no disks" %
6211 self.op.instance_name, errors.ECODE_INVAL)
6212 # if we replace nodes *and* the old primary is offline, we don't
6214 assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6215 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6216 if not (self.op.nodes and old_pnode.offline):
6217 _CheckInstanceDown(self, instance, "cannot recreate disks")
6219 if not self.op.disks:
6220 self.op.disks = range(len(instance.disks))
6222 for idx in self.op.disks:
6223 if idx >= len(instance.disks):
6224 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6226 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6227 raise errors.OpPrereqError("Can't recreate disks partially and"
6228 " change the nodes at the same time",
6230 self.instance = instance
6232 def Exec(self, feedback_fn):
6233 """Recreate the disks.
6236 instance = self.instance
6239 mods = [] # keeps track of needed logical_id changes
6241 for idx, disk in enumerate(instance.disks):
6242 if idx not in self.op.disks: # disk idx has not been passed in
6245 # update secondaries for disks, if needed
6247 if disk.dev_type == constants.LD_DRBD8:
6248 # need to update the nodes and minors
6249 assert len(self.op.nodes) == 2
6250 assert len(disk.logical_id) == 6 # otherwise disk internals
6252 (_, _, old_port, _, _, old_secret) = disk.logical_id
6253 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6254 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6255 new_minors[0], new_minors[1], old_secret)
6256 assert len(disk.logical_id) == len(new_id)
6257 mods.append((idx, new_id))
6259 # now that we have passed all asserts above, we can apply the mods
6260 # in a single run (to avoid partial changes)
6261 for idx, new_id in mods:
6262 instance.disks[idx].logical_id = new_id
6264 # change primary node, if needed
6266 instance.primary_node = self.op.nodes[0]
6267 self.LogWarning("Changing the instance's nodes, you will have to"
6268 " remove any disks left on the older nodes manually")
6271 self.cfg.Update(instance, feedback_fn)
6273 _CreateDisks(self, instance, to_skip=to_skip)
6276 class LUInstanceRename(LogicalUnit):
6277 """Rename an instance.
6280 HPATH = "instance-rename"
6281 HTYPE = constants.HTYPE_INSTANCE
6283 def CheckArguments(self):
6287 if self.op.ip_check and not self.op.name_check:
6288 # TODO: make the ip check more flexible and not depend on the name check
6289 raise errors.OpPrereqError("IP address check requires a name check",
6292 def BuildHooksEnv(self):
6295 This runs on master, primary and secondary nodes of the instance.
6298 env = _BuildInstanceHookEnvByObject(self, self.instance)
6299 env["INSTANCE_NEW_NAME"] = self.op.new_name
6302 def BuildHooksNodes(self):
6303 """Build hooks nodes.
6306 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6309 def CheckPrereq(self):
6310 """Check prerequisites.
6312 This checks that the instance is in the cluster and is not running.
6315 self.op.instance_name = _ExpandInstanceName(self.cfg,
6316 self.op.instance_name)
6317 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6318 assert instance is not None
6319 _CheckNodeOnline(self, instance.primary_node)
6320 _CheckInstanceDown(self, instance, "cannot rename")
6321 self.instance = instance
6323 new_name = self.op.new_name
6324 if self.op.name_check:
6325 hostname = netutils.GetHostname(name=new_name)
6326 if hostname != new_name:
6327 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6329 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6330 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6331 " same as given hostname '%s'") %
6332 (hostname.name, self.op.new_name),
6334 new_name = self.op.new_name = hostname.name
6335 if (self.op.ip_check and
6336 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6337 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6338 (hostname.ip, new_name),
6339 errors.ECODE_NOTUNIQUE)
6341 instance_list = self.cfg.GetInstanceList()
6342 if new_name in instance_list and new_name != instance.name:
6343 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6344 new_name, errors.ECODE_EXISTS)
6346 def Exec(self, feedback_fn):
6347 """Rename the instance.
6350 inst = self.instance
6351 old_name = inst.name
6353 rename_file_storage = False
6354 if (inst.disk_template in constants.DTS_FILEBASED and
6355 self.op.new_name != inst.name):
6356 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6357 rename_file_storage = True
6359 self.cfg.RenameInstance(inst.name, self.op.new_name)
6360 # Change the instance lock. This is definitely safe while we hold the BGL.
6361 # Otherwise the new lock would have to be added in acquired mode.
6363 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6364 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6366 # re-read the instance from the configuration after rename
6367 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6369 if rename_file_storage:
6370 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6371 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6372 old_file_storage_dir,
6373 new_file_storage_dir)
6374 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6375 " (but the instance has been renamed in Ganeti)" %
6376 (inst.primary_node, old_file_storage_dir,
6377 new_file_storage_dir))
6379 _StartInstanceDisks(self, inst, None)
6381 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6382 old_name, self.op.debug_level)
6383 msg = result.fail_msg
6385 msg = ("Could not run OS rename script for instance %s on node %s"
6386 " (but the instance has been renamed in Ganeti): %s" %
6387 (inst.name, inst.primary_node, msg))
6388 self.proc.LogWarning(msg)
6390 _ShutdownInstanceDisks(self, inst)
6395 class LUInstanceRemove(LogicalUnit):
6396 """Remove an instance.
6399 HPATH = "instance-remove"
6400 HTYPE = constants.HTYPE_INSTANCE
6403 def ExpandNames(self):
6404 self._ExpandAndLockInstance()
6405 self.needed_locks[locking.LEVEL_NODE] = []
6406 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6408 def DeclareLocks(self, level):
6409 if level == locking.LEVEL_NODE:
6410 self._LockInstancesNodes()
6412 def BuildHooksEnv(self):
6415 This runs on master, primary and secondary nodes of the instance.
6418 env = _BuildInstanceHookEnvByObject(self, self.instance)
6419 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6422 def BuildHooksNodes(self):
6423 """Build hooks nodes.
6426 nl = [self.cfg.GetMasterNode()]
6427 nl_post = list(self.instance.all_nodes) + nl
6428 return (nl, nl_post)
6430 def CheckPrereq(self):
6431 """Check prerequisites.
6433 This checks that the instance is in the cluster.
6436 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6437 assert self.instance is not None, \
6438 "Cannot retrieve locked instance %s" % self.op.instance_name
6440 def Exec(self, feedback_fn):
6441 """Remove the instance.
6444 instance = self.instance
6445 logging.info("Shutting down instance %s on node %s",
6446 instance.name, instance.primary_node)
6448 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6449 self.op.shutdown_timeout)
6450 msg = result.fail_msg
6452 if self.op.ignore_failures:
6453 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6455 raise errors.OpExecError("Could not shutdown instance %s on"
6457 (instance.name, instance.primary_node, msg))
6459 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6462 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6463 """Utility function to remove an instance.
6466 logging.info("Removing block devices for instance %s", instance.name)
6468 if not _RemoveDisks(lu, instance):
6469 if not ignore_failures:
6470 raise errors.OpExecError("Can't remove instance's disks")
6471 feedback_fn("Warning: can't remove instance's disks")
6473 logging.info("Removing instance %s out of cluster config", instance.name)
6475 lu.cfg.RemoveInstance(instance.name)
6477 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6478 "Instance lock removal conflict"
6480 # Remove lock for the instance
6481 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6484 class LUInstanceQuery(NoHooksLU):
6485 """Logical unit for querying instances.
6488 # pylint: disable-msg=W0142
6491 def CheckArguments(self):
6492 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6493 self.op.output_fields, self.op.use_locking)
6495 def ExpandNames(self):
6496 self.iq.ExpandNames(self)
6498 def DeclareLocks(self, level):
6499 self.iq.DeclareLocks(self, level)
6501 def Exec(self, feedback_fn):
6502 return self.iq.OldStyleQuery(self)
6505 class LUInstanceFailover(LogicalUnit):
6506 """Failover an instance.
6509 HPATH = "instance-failover"
6510 HTYPE = constants.HTYPE_INSTANCE
6513 def CheckArguments(self):
6514 """Check the arguments.
6517 self.iallocator = getattr(self.op, "iallocator", None)
6518 self.target_node = getattr(self.op, "target_node", None)
6520 def ExpandNames(self):
6521 self._ExpandAndLockInstance()
6523 if self.op.target_node is not None:
6524 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6526 self.needed_locks[locking.LEVEL_NODE] = []
6527 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6529 ignore_consistency = self.op.ignore_consistency
6530 shutdown_timeout = self.op.shutdown_timeout
6531 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6534 ignore_consistency=ignore_consistency,
6535 shutdown_timeout=shutdown_timeout)
6536 self.tasklets = [self._migrater]
6538 def DeclareLocks(self, level):
6539 if level == locking.LEVEL_NODE:
6540 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6541 if instance.disk_template in constants.DTS_EXT_MIRROR:
6542 if self.op.target_node is None:
6543 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6545 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6546 self.op.target_node]
6547 del self.recalculate_locks[locking.LEVEL_NODE]
6549 self._LockInstancesNodes()
6551 def BuildHooksEnv(self):
6554 This runs on master, primary and secondary nodes of the instance.
6557 instance = self._migrater.instance
6558 source_node = instance.primary_node
6559 target_node = self.op.target_node
6561 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6562 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6563 "OLD_PRIMARY": source_node,
6564 "NEW_PRIMARY": target_node,
6567 if instance.disk_template in constants.DTS_INT_MIRROR:
6568 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6569 env["NEW_SECONDARY"] = source_node
6571 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6573 env.update(_BuildInstanceHookEnvByObject(self, instance))
6577 def BuildHooksNodes(self):
6578 """Build hooks nodes.
6581 instance = self._migrater.instance
6582 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6583 return (nl, nl + [instance.primary_node])
6586 class LUInstanceMigrate(LogicalUnit):
6587 """Migrate an instance.
6589 This is migration without shutting down, compared to the failover,
6590 which is done with shutdown.
6593 HPATH = "instance-migrate"
6594 HTYPE = constants.HTYPE_INSTANCE
6597 def ExpandNames(self):
6598 self._ExpandAndLockInstance()
6600 if self.op.target_node is not None:
6601 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6603 self.needed_locks[locking.LEVEL_NODE] = []
6604 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6606 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6607 cleanup=self.op.cleanup,
6609 fallback=self.op.allow_failover)
6610 self.tasklets = [self._migrater]
6612 def DeclareLocks(self, level):
6613 if level == locking.LEVEL_NODE:
6614 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6615 if instance.disk_template in constants.DTS_EXT_MIRROR:
6616 if self.op.target_node is None:
6617 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6619 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6620 self.op.target_node]
6621 del self.recalculate_locks[locking.LEVEL_NODE]
6623 self._LockInstancesNodes()
6625 def BuildHooksEnv(self):
6628 This runs on master, primary and secondary nodes of the instance.
6631 instance = self._migrater.instance
6632 source_node = instance.primary_node
6633 target_node = self.op.target_node
6634 env = _BuildInstanceHookEnvByObject(self, instance)
6636 "MIGRATE_LIVE": self._migrater.live,
6637 "MIGRATE_CLEANUP": self.op.cleanup,
6638 "OLD_PRIMARY": source_node,
6639 "NEW_PRIMARY": target_node,
6642 if instance.disk_template in constants.DTS_INT_MIRROR:
6643 env["OLD_SECONDARY"] = target_node
6644 env["NEW_SECONDARY"] = source_node
6646 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6650 def BuildHooksNodes(self):
6651 """Build hooks nodes.
6654 instance = self._migrater.instance
6655 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6656 return (nl, nl + [instance.primary_node])
6659 class LUInstanceMove(LogicalUnit):
6660 """Move an instance by data-copying.
6663 HPATH = "instance-move"
6664 HTYPE = constants.HTYPE_INSTANCE
6667 def ExpandNames(self):
6668 self._ExpandAndLockInstance()
6669 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6670 self.op.target_node = target_node
6671 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6672 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6674 def DeclareLocks(self, level):
6675 if level == locking.LEVEL_NODE:
6676 self._LockInstancesNodes(primary_only=True)
6678 def BuildHooksEnv(self):
6681 This runs on master, primary and secondary nodes of the instance.
6685 "TARGET_NODE": self.op.target_node,
6686 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6688 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6691 def BuildHooksNodes(self):
6692 """Build hooks nodes.
6696 self.cfg.GetMasterNode(),
6697 self.instance.primary_node,
6698 self.op.target_node,
6702 def CheckPrereq(self):
6703 """Check prerequisites.
6705 This checks that the instance is in the cluster.
6708 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6709 assert self.instance is not None, \
6710 "Cannot retrieve locked instance %s" % self.op.instance_name
6712 node = self.cfg.GetNodeInfo(self.op.target_node)
6713 assert node is not None, \
6714 "Cannot retrieve locked node %s" % self.op.target_node
6716 self.target_node = target_node = node.name
6718 if target_node == instance.primary_node:
6719 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6720 (instance.name, target_node),
6723 bep = self.cfg.GetClusterInfo().FillBE(instance)
6725 for idx, dsk in enumerate(instance.disks):
6726 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6727 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6728 " cannot copy" % idx, errors.ECODE_STATE)
6730 _CheckNodeOnline(self, target_node)
6731 _CheckNodeNotDrained(self, target_node)
6732 _CheckNodeVmCapable(self, target_node)
6734 if instance.admin_up:
6735 # check memory requirements on the secondary node
6736 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6737 instance.name, bep[constants.BE_MEMORY],
6738 instance.hypervisor)
6740 self.LogInfo("Not checking memory on the secondary node as"
6741 " instance will not be started")
6743 # check bridge existance
6744 _CheckInstanceBridgesExist(self, instance, node=target_node)
6746 def Exec(self, feedback_fn):
6747 """Move an instance.
6749 The move is done by shutting it down on its present node, copying
6750 the data over (slow) and starting it on the new node.
6753 instance = self.instance
6755 source_node = instance.primary_node
6756 target_node = self.target_node
6758 self.LogInfo("Shutting down instance %s on source node %s",
6759 instance.name, source_node)
6761 result = self.rpc.call_instance_shutdown(source_node, instance,
6762 self.op.shutdown_timeout)
6763 msg = result.fail_msg
6765 if self.op.ignore_consistency:
6766 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6767 " Proceeding anyway. Please make sure node"
6768 " %s is down. Error details: %s",
6769 instance.name, source_node, source_node, msg)
6771 raise errors.OpExecError("Could not shutdown instance %s on"
6773 (instance.name, source_node, msg))
6775 # create the target disks
6777 _CreateDisks(self, instance, target_node=target_node)
6778 except errors.OpExecError:
6779 self.LogWarning("Device creation failed, reverting...")
6781 _RemoveDisks(self, instance, target_node=target_node)
6783 self.cfg.ReleaseDRBDMinors(instance.name)
6786 cluster_name = self.cfg.GetClusterInfo().cluster_name
6789 # activate, get path, copy the data over
6790 for idx, disk in enumerate(instance.disks):
6791 self.LogInfo("Copying data for disk %d", idx)
6792 result = self.rpc.call_blockdev_assemble(target_node, disk,
6793 instance.name, True, idx)
6795 self.LogWarning("Can't assemble newly created disk %d: %s",
6796 idx, result.fail_msg)
6797 errs.append(result.fail_msg)
6799 dev_path = result.payload
6800 result = self.rpc.call_blockdev_export(source_node, disk,
6801 target_node, dev_path,
6804 self.LogWarning("Can't copy data over for disk %d: %s",
6805 idx, result.fail_msg)
6806 errs.append(result.fail_msg)
6810 self.LogWarning("Some disks failed to copy, aborting")
6812 _RemoveDisks(self, instance, target_node=target_node)
6814 self.cfg.ReleaseDRBDMinors(instance.name)
6815 raise errors.OpExecError("Errors during disk copy: %s" %
6818 instance.primary_node = target_node
6819 self.cfg.Update(instance, feedback_fn)
6821 self.LogInfo("Removing the disks on the original node")
6822 _RemoveDisks(self, instance, target_node=source_node)
6824 # Only start the instance if it's marked as up
6825 if instance.admin_up:
6826 self.LogInfo("Starting instance %s on node %s",
6827 instance.name, target_node)
6829 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6830 ignore_secondaries=True)
6832 _ShutdownInstanceDisks(self, instance)
6833 raise errors.OpExecError("Can't activate the instance's disks")
6835 result = self.rpc.call_instance_start(target_node, instance,
6837 msg = result.fail_msg
6839 _ShutdownInstanceDisks(self, instance)
6840 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6841 (instance.name, target_node, msg))
6844 class LUNodeMigrate(LogicalUnit):
6845 """Migrate all instances from a node.
6848 HPATH = "node-migrate"
6849 HTYPE = constants.HTYPE_NODE
6852 def CheckArguments(self):
6855 def ExpandNames(self):
6856 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6858 self.share_locks = _ShareAll()
6859 self.needed_locks = {
6860 locking.LEVEL_NODE: [self.op.node_name],
6863 def BuildHooksEnv(self):
6866 This runs on the master, the primary and all the secondaries.
6870 "NODE_NAME": self.op.node_name,
6873 def BuildHooksNodes(self):
6874 """Build hooks nodes.
6877 nl = [self.cfg.GetMasterNode()]
6880 def CheckPrereq(self):
6883 def Exec(self, feedback_fn):
6884 # Prepare jobs for migration instances
6886 [opcodes.OpInstanceMigrate(instance_name=inst.name,
6889 iallocator=self.op.iallocator,
6890 target_node=self.op.target_node)]
6891 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6894 # TODO: Run iallocator in this opcode and pass correct placement options to
6895 # OpInstanceMigrate. Since other jobs can modify the cluster between
6896 # running the iallocator and the actual migration, a good consistency model
6897 # will have to be found.
6899 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
6900 frozenset([self.op.node_name]))
6902 return ResultWithJobs(jobs)
6905 class TLMigrateInstance(Tasklet):
6906 """Tasklet class for instance migration.
6909 @ivar live: whether the migration will be done live or non-live;
6910 this variable is initalized only after CheckPrereq has run
6911 @type cleanup: boolean
6912 @ivar cleanup: Wheater we cleanup from a failed migration
6913 @type iallocator: string
6914 @ivar iallocator: The iallocator used to determine target_node
6915 @type target_node: string
6916 @ivar target_node: If given, the target_node to reallocate the instance to
6917 @type failover: boolean
6918 @ivar failover: Whether operation results in failover or migration
6919 @type fallback: boolean
6920 @ivar fallback: Whether fallback to failover is allowed if migration not
6922 @type ignore_consistency: boolean
6923 @ivar ignore_consistency: Wheter we should ignore consistency between source
6925 @type shutdown_timeout: int
6926 @ivar shutdown_timeout: In case of failover timeout of the shutdown
6929 def __init__(self, lu, instance_name, cleanup=False,
6930 failover=False, fallback=False,
6931 ignore_consistency=False,
6932 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6933 """Initializes this class.
6936 Tasklet.__init__(self, lu)
6939 self.instance_name = instance_name
6940 self.cleanup = cleanup
6941 self.live = False # will be overridden later
6942 self.failover = failover
6943 self.fallback = fallback
6944 self.ignore_consistency = ignore_consistency
6945 self.shutdown_timeout = shutdown_timeout
6947 def CheckPrereq(self):
6948 """Check prerequisites.
6950 This checks that the instance is in the cluster.
6953 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6954 instance = self.cfg.GetInstanceInfo(instance_name)
6955 assert instance is not None
6956 self.instance = instance
6958 if (not self.cleanup and not instance.admin_up and not self.failover and
6960 self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6962 self.failover = True
6964 if instance.disk_template not in constants.DTS_MIRRORED:
6969 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6970 " %s" % (instance.disk_template, text),
6973 if instance.disk_template in constants.DTS_EXT_MIRROR:
6974 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6976 if self.lu.op.iallocator:
6977 self._RunAllocator()
6979 # We set set self.target_node as it is required by
6981 self.target_node = self.lu.op.target_node
6983 # self.target_node is already populated, either directly or by the
6985 target_node = self.target_node
6986 if self.target_node == instance.primary_node:
6987 raise errors.OpPrereqError("Cannot migrate instance %s"
6988 " to its primary (%s)" %
6989 (instance.name, instance.primary_node))
6991 if len(self.lu.tasklets) == 1:
6992 # It is safe to release locks only when we're the only tasklet
6994 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6995 keep=[instance.primary_node, self.target_node])
6998 secondary_nodes = instance.secondary_nodes
6999 if not secondary_nodes:
7000 raise errors.ConfigurationError("No secondary node but using"
7001 " %s disk template" %
7002 instance.disk_template)
7003 target_node = secondary_nodes[0]
7004 if self.lu.op.iallocator or (self.lu.op.target_node and
7005 self.lu.op.target_node != target_node):
7007 text = "failed over"
7010 raise errors.OpPrereqError("Instances with disk template %s cannot"
7011 " be %s to arbitrary nodes"
7012 " (neither an iallocator nor a target"
7013 " node can be passed)" %
7014 (instance.disk_template, text),
7017 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7019 # check memory requirements on the secondary node
7020 if not self.failover or instance.admin_up:
7021 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7022 instance.name, i_be[constants.BE_MEMORY],
7023 instance.hypervisor)
7025 self.lu.LogInfo("Not checking memory on the secondary node as"
7026 " instance will not be started")
7028 # check bridge existance
7029 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7031 if not self.cleanup:
7032 _CheckNodeNotDrained(self.lu, target_node)
7033 if not self.failover:
7034 result = self.rpc.call_instance_migratable(instance.primary_node,
7036 if result.fail_msg and self.fallback:
7037 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7039 self.failover = True
7041 result.Raise("Can't migrate, please use failover",
7042 prereq=True, ecode=errors.ECODE_STATE)
7044 assert not (self.failover and self.cleanup)
7046 if not self.failover:
7047 if self.lu.op.live is not None and self.lu.op.mode is not None:
7048 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7049 " parameters are accepted",
7051 if self.lu.op.live is not None:
7053 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7055 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7056 # reset the 'live' parameter to None so that repeated
7057 # invocations of CheckPrereq do not raise an exception
7058 self.lu.op.live = None
7059 elif self.lu.op.mode is None:
7060 # read the default value from the hypervisor
7061 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7063 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7065 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7067 # Failover is never live
7070 def _RunAllocator(self):
7071 """Run the allocator based on input opcode.
7074 ial = IAllocator(self.cfg, self.rpc,
7075 mode=constants.IALLOCATOR_MODE_RELOC,
7076 name=self.instance_name,
7077 # TODO See why hail breaks with a single node below
7078 relocate_from=[self.instance.primary_node,
7079 self.instance.primary_node],
7082 ial.Run(self.lu.op.iallocator)
7085 raise errors.OpPrereqError("Can't compute nodes using"
7086 " iallocator '%s': %s" %
7087 (self.lu.op.iallocator, ial.info),
7089 if len(ial.result) != ial.required_nodes:
7090 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7091 " of nodes (%s), required %s" %
7092 (self.lu.op.iallocator, len(ial.result),
7093 ial.required_nodes), errors.ECODE_FAULT)
7094 self.target_node = ial.result[0]
7095 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7096 self.instance_name, self.lu.op.iallocator,
7097 utils.CommaJoin(ial.result))
7099 def _WaitUntilSync(self):
7100 """Poll with custom rpc for disk sync.
7102 This uses our own step-based rpc call.
7105 self.feedback_fn("* wait until resync is done")
7109 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7111 self.instance.disks)
7113 for node, nres in result.items():
7114 nres.Raise("Cannot resync disks on node %s" % node)
7115 node_done, node_percent = nres.payload
7116 all_done = all_done and node_done
7117 if node_percent is not None:
7118 min_percent = min(min_percent, node_percent)
7120 if min_percent < 100:
7121 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7124 def _EnsureSecondary(self, node):
7125 """Demote a node to secondary.
7128 self.feedback_fn("* switching node %s to secondary mode" % node)
7130 for dev in self.instance.disks:
7131 self.cfg.SetDiskID(dev, node)
7133 result = self.rpc.call_blockdev_close(node, self.instance.name,
7134 self.instance.disks)
7135 result.Raise("Cannot change disk to secondary on node %s" % node)
7137 def _GoStandalone(self):
7138 """Disconnect from the network.
7141 self.feedback_fn("* changing into standalone mode")
7142 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7143 self.instance.disks)
7144 for node, nres in result.items():
7145 nres.Raise("Cannot disconnect disks node %s" % node)
7147 def _GoReconnect(self, multimaster):
7148 """Reconnect to the network.
7154 msg = "single-master"
7155 self.feedback_fn("* changing disks into %s mode" % msg)
7156 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7157 self.instance.disks,
7158 self.instance.name, multimaster)
7159 for node, nres in result.items():
7160 nres.Raise("Cannot change disks config on node %s" % node)
7162 def _ExecCleanup(self):
7163 """Try to cleanup after a failed migration.
7165 The cleanup is done by:
7166 - check that the instance is running only on one node
7167 (and update the config if needed)
7168 - change disks on its secondary node to secondary
7169 - wait until disks are fully synchronized
7170 - disconnect from the network
7171 - change disks into single-master mode
7172 - wait again until disks are fully synchronized
7175 instance = self.instance
7176 target_node = self.target_node
7177 source_node = self.source_node
7179 # check running on only one node
7180 self.feedback_fn("* checking where the instance actually runs"
7181 " (if this hangs, the hypervisor might be in"
7183 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7184 for node, result in ins_l.items():
7185 result.Raise("Can't contact node %s" % node)
7187 runningon_source = instance.name in ins_l[source_node].payload
7188 runningon_target = instance.name in ins_l[target_node].payload
7190 if runningon_source and runningon_target:
7191 raise errors.OpExecError("Instance seems to be running on two nodes,"
7192 " or the hypervisor is confused; you will have"
7193 " to ensure manually that it runs only on one"
7194 " and restart this operation")
7196 if not (runningon_source or runningon_target):
7197 raise errors.OpExecError("Instance does not seem to be running at all;"
7198 " in this case it's safer to repair by"
7199 " running 'gnt-instance stop' to ensure disk"
7200 " shutdown, and then restarting it")
7202 if runningon_target:
7203 # the migration has actually succeeded, we need to update the config
7204 self.feedback_fn("* instance running on secondary node (%s),"
7205 " updating config" % target_node)
7206 instance.primary_node = target_node
7207 self.cfg.Update(instance, self.feedback_fn)
7208 demoted_node = source_node
7210 self.feedback_fn("* instance confirmed to be running on its"
7211 " primary node (%s)" % source_node)
7212 demoted_node = target_node
7214 if instance.disk_template in constants.DTS_INT_MIRROR:
7215 self._EnsureSecondary(demoted_node)
7217 self._WaitUntilSync()
7218 except errors.OpExecError:
7219 # we ignore here errors, since if the device is standalone, it
7220 # won't be able to sync
7222 self._GoStandalone()
7223 self._GoReconnect(False)
7224 self._WaitUntilSync()
7226 self.feedback_fn("* done")
7228 def _RevertDiskStatus(self):
7229 """Try to revert the disk status after a failed migration.
7232 target_node = self.target_node
7233 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7237 self._EnsureSecondary(target_node)
7238 self._GoStandalone()
7239 self._GoReconnect(False)
7240 self._WaitUntilSync()
7241 except errors.OpExecError, err:
7242 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7243 " please try to recover the instance manually;"
7244 " error '%s'" % str(err))
7246 def _AbortMigration(self):
7247 """Call the hypervisor code to abort a started migration.
7250 instance = self.instance
7251 target_node = self.target_node
7252 migration_info = self.migration_info
7254 abort_result = self.rpc.call_finalize_migration(target_node,
7258 abort_msg = abort_result.fail_msg
7260 logging.error("Aborting migration failed on target node %s: %s",
7261 target_node, abort_msg)
7262 # Don't raise an exception here, as we stil have to try to revert the
7263 # disk status, even if this step failed.
7265 def _ExecMigration(self):
7266 """Migrate an instance.
7268 The migrate is done by:
7269 - change the disks into dual-master mode
7270 - wait until disks are fully synchronized again
7271 - migrate the instance
7272 - change disks on the new secondary node (the old primary) to secondary
7273 - wait until disks are fully synchronized
7274 - change disks into single-master mode
7277 instance = self.instance
7278 target_node = self.target_node
7279 source_node = self.source_node
7281 self.feedback_fn("* checking disk consistency between source and target")
7282 for dev in instance.disks:
7283 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7284 raise errors.OpExecError("Disk %s is degraded or not fully"
7285 " synchronized on target node,"
7286 " aborting migration" % dev.iv_name)
7288 # First get the migration information from the remote node
7289 result = self.rpc.call_migration_info(source_node, instance)
7290 msg = result.fail_msg
7292 log_err = ("Failed fetching source migration information from %s: %s" %
7294 logging.error(log_err)
7295 raise errors.OpExecError(log_err)
7297 self.migration_info = migration_info = result.payload
7299 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7300 # Then switch the disks to master/master mode
7301 self._EnsureSecondary(target_node)
7302 self._GoStandalone()
7303 self._GoReconnect(True)
7304 self._WaitUntilSync()
7306 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7307 result = self.rpc.call_accept_instance(target_node,
7310 self.nodes_ip[target_node])
7312 msg = result.fail_msg
7314 logging.error("Instance pre-migration failed, trying to revert"
7315 " disk status: %s", msg)
7316 self.feedback_fn("Pre-migration failed, aborting")
7317 self._AbortMigration()
7318 self._RevertDiskStatus()
7319 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7320 (instance.name, msg))
7322 self.feedback_fn("* migrating instance to %s" % target_node)
7323 result = self.rpc.call_instance_migrate(source_node, instance,
7324 self.nodes_ip[target_node],
7326 msg = result.fail_msg
7328 logging.error("Instance migration failed, trying to revert"
7329 " disk status: %s", msg)
7330 self.feedback_fn("Migration failed, aborting")
7331 self._AbortMigration()
7332 self._RevertDiskStatus()
7333 raise errors.OpExecError("Could not migrate instance %s: %s" %
7334 (instance.name, msg))
7336 instance.primary_node = target_node
7337 # distribute new instance config to the other nodes
7338 self.cfg.Update(instance, self.feedback_fn)
7340 result = self.rpc.call_finalize_migration(target_node,
7344 msg = result.fail_msg
7346 logging.error("Instance migration succeeded, but finalization failed:"
7348 raise errors.OpExecError("Could not finalize instance migration: %s" %
7351 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7352 self._EnsureSecondary(source_node)
7353 self._WaitUntilSync()
7354 self._GoStandalone()
7355 self._GoReconnect(False)
7356 self._WaitUntilSync()
7358 self.feedback_fn("* done")
7360 def _ExecFailover(self):
7361 """Failover an instance.
7363 The failover is done by shutting it down on its present node and
7364 starting it on the secondary.
7367 instance = self.instance
7368 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7370 source_node = instance.primary_node
7371 target_node = self.target_node
7373 if instance.admin_up:
7374 self.feedback_fn("* checking disk consistency between source and target")
7375 for dev in instance.disks:
7376 # for drbd, these are drbd over lvm
7377 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7378 if primary_node.offline:
7379 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7381 (primary_node.name, dev.iv_name, target_node))
7382 elif not self.ignore_consistency:
7383 raise errors.OpExecError("Disk %s is degraded on target node,"
7384 " aborting failover" % dev.iv_name)
7386 self.feedback_fn("* not checking disk consistency as instance is not"
7389 self.feedback_fn("* shutting down instance on source node")
7390 logging.info("Shutting down instance %s on node %s",
7391 instance.name, source_node)
7393 result = self.rpc.call_instance_shutdown(source_node, instance,
7394 self.shutdown_timeout)
7395 msg = result.fail_msg
7397 if self.ignore_consistency or primary_node.offline:
7398 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7399 " proceeding anyway; please make sure node"
7400 " %s is down; error details: %s",
7401 instance.name, source_node, source_node, msg)
7403 raise errors.OpExecError("Could not shutdown instance %s on"
7405 (instance.name, source_node, msg))
7407 self.feedback_fn("* deactivating the instance's disks on source node")
7408 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7409 raise errors.OpExecError("Can't shut down the instance's disks")
7411 instance.primary_node = target_node
7412 # distribute new instance config to the other nodes
7413 self.cfg.Update(instance, self.feedback_fn)
7415 # Only start the instance if it's marked as up
7416 if instance.admin_up:
7417 self.feedback_fn("* activating the instance's disks on target node %s" %
7419 logging.info("Starting instance %s on node %s",
7420 instance.name, target_node)
7422 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7423 ignore_secondaries=True)
7425 _ShutdownInstanceDisks(self.lu, instance)
7426 raise errors.OpExecError("Can't activate the instance's disks")
7428 self.feedback_fn("* starting the instance on the target node %s" %
7430 result = self.rpc.call_instance_start(target_node, instance, None, None,
7432 msg = result.fail_msg
7434 _ShutdownInstanceDisks(self.lu, instance)
7435 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7436 (instance.name, target_node, msg))
7438 def Exec(self, feedback_fn):
7439 """Perform the migration.
7442 self.feedback_fn = feedback_fn
7443 self.source_node = self.instance.primary_node
7445 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7446 if self.instance.disk_template in constants.DTS_INT_MIRROR:
7447 self.target_node = self.instance.secondary_nodes[0]
7448 # Otherwise self.target_node has been populated either
7449 # directly, or through an iallocator.
7451 self.all_nodes = [self.source_node, self.target_node]
7452 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7453 in self.cfg.GetMultiNodeInfo(self.all_nodes))
7456 feedback_fn("Failover instance %s" % self.instance.name)
7457 self._ExecFailover()
7459 feedback_fn("Migrating instance %s" % self.instance.name)
7462 return self._ExecCleanup()
7464 return self._ExecMigration()
7467 def _CreateBlockDev(lu, node, instance, device, force_create,
7469 """Create a tree of block devices on a given node.
7471 If this device type has to be created on secondaries, create it and
7474 If not, just recurse to children keeping the same 'force' value.
7476 @param lu: the lu on whose behalf we execute
7477 @param node: the node on which to create the device
7478 @type instance: L{objects.Instance}
7479 @param instance: the instance which owns the device
7480 @type device: L{objects.Disk}
7481 @param device: the device to create
7482 @type force_create: boolean
7483 @param force_create: whether to force creation of this device; this
7484 will be change to True whenever we find a device which has
7485 CreateOnSecondary() attribute
7486 @param info: the extra 'metadata' we should attach to the device
7487 (this will be represented as a LVM tag)
7488 @type force_open: boolean
7489 @param force_open: this parameter will be passes to the
7490 L{backend.BlockdevCreate} function where it specifies
7491 whether we run on primary or not, and it affects both
7492 the child assembly and the device own Open() execution
7495 if device.CreateOnSecondary():
7499 for child in device.children:
7500 _CreateBlockDev(lu, node, instance, child, force_create,
7503 if not force_create:
7506 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7509 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7510 """Create a single block device on a given node.
7512 This will not recurse over children of the device, so they must be
7515 @param lu: the lu on whose behalf we execute
7516 @param node: the node on which to create the device
7517 @type instance: L{objects.Instance}
7518 @param instance: the instance which owns the device
7519 @type device: L{objects.Disk}
7520 @param device: the device to create
7521 @param info: the extra 'metadata' we should attach to the device
7522 (this will be represented as a LVM tag)
7523 @type force_open: boolean
7524 @param force_open: this parameter will be passes to the
7525 L{backend.BlockdevCreate} function where it specifies
7526 whether we run on primary or not, and it affects both
7527 the child assembly and the device own Open() execution
7530 lu.cfg.SetDiskID(device, node)
7531 result = lu.rpc.call_blockdev_create(node, device, device.size,
7532 instance.name, force_open, info)
7533 result.Raise("Can't create block device %s on"
7534 " node %s for instance %s" % (device, node, instance.name))
7535 if device.physical_id is None:
7536 device.physical_id = result.payload
7539 def _GenerateUniqueNames(lu, exts):
7540 """Generate a suitable LV name.
7542 This will generate a logical volume name for the given instance.
7547 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7548 results.append("%s%s" % (new_id, val))
7552 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7553 iv_name, p_minor, s_minor):
7554 """Generate a drbd8 device complete with its children.
7557 assert len(vgnames) == len(names) == 2
7558 port = lu.cfg.AllocatePort()
7559 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7560 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7561 logical_id=(vgnames[0], names[0]))
7562 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7563 logical_id=(vgnames[1], names[1]))
7564 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7565 logical_id=(primary, secondary, port,
7568 children=[dev_data, dev_meta],
7573 def _GenerateDiskTemplate(lu, template_name,
7574 instance_name, primary_node,
7575 secondary_nodes, disk_info,
7576 file_storage_dir, file_driver,
7577 base_index, feedback_fn):
7578 """Generate the entire disk layout for a given template type.
7581 #TODO: compute space requirements
7583 vgname = lu.cfg.GetVGName()
7584 disk_count = len(disk_info)
7586 if template_name == constants.DT_DISKLESS:
7588 elif template_name == constants.DT_PLAIN:
7589 if len(secondary_nodes) != 0:
7590 raise errors.ProgrammerError("Wrong template configuration")
7592 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7593 for i in range(disk_count)])
7594 for idx, disk in enumerate(disk_info):
7595 disk_index = idx + base_index
7596 vg = disk.get(constants.IDISK_VG, vgname)
7597 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7598 disk_dev = objects.Disk(dev_type=constants.LD_LV,
7599 size=disk[constants.IDISK_SIZE],
7600 logical_id=(vg, names[idx]),
7601 iv_name="disk/%d" % disk_index,
7602 mode=disk[constants.IDISK_MODE])
7603 disks.append(disk_dev)
7604 elif template_name == constants.DT_DRBD8:
7605 if len(secondary_nodes) != 1:
7606 raise errors.ProgrammerError("Wrong template configuration")
7607 remote_node = secondary_nodes[0]
7608 minors = lu.cfg.AllocateDRBDMinor(
7609 [primary_node, remote_node] * len(disk_info), instance_name)
7612 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7613 for i in range(disk_count)]):
7614 names.append(lv_prefix + "_data")
7615 names.append(lv_prefix + "_meta")
7616 for idx, disk in enumerate(disk_info):
7617 disk_index = idx + base_index
7618 data_vg = disk.get(constants.IDISK_VG, vgname)
7619 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7620 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7621 disk[constants.IDISK_SIZE],
7623 names[idx * 2:idx * 2 + 2],
7624 "disk/%d" % disk_index,
7625 minors[idx * 2], minors[idx * 2 + 1])
7626 disk_dev.mode = disk[constants.IDISK_MODE]
7627 disks.append(disk_dev)
7628 elif template_name == constants.DT_FILE:
7629 if len(secondary_nodes) != 0:
7630 raise errors.ProgrammerError("Wrong template configuration")
7632 opcodes.RequireFileStorage()
7634 for idx, disk in enumerate(disk_info):
7635 disk_index = idx + base_index
7636 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7637 size=disk[constants.IDISK_SIZE],
7638 iv_name="disk/%d" % disk_index,
7639 logical_id=(file_driver,
7640 "%s/disk%d" % (file_storage_dir,
7642 mode=disk[constants.IDISK_MODE])
7643 disks.append(disk_dev)
7644 elif template_name == constants.DT_SHARED_FILE:
7645 if len(secondary_nodes) != 0:
7646 raise errors.ProgrammerError("Wrong template configuration")
7648 opcodes.RequireSharedFileStorage()
7650 for idx, disk in enumerate(disk_info):
7651 disk_index = idx + base_index
7652 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7653 size=disk[constants.IDISK_SIZE],
7654 iv_name="disk/%d" % disk_index,
7655 logical_id=(file_driver,
7656 "%s/disk%d" % (file_storage_dir,
7658 mode=disk[constants.IDISK_MODE])
7659 disks.append(disk_dev)
7660 elif template_name == constants.DT_BLOCK:
7661 if len(secondary_nodes) != 0:
7662 raise errors.ProgrammerError("Wrong template configuration")
7664 for idx, disk in enumerate(disk_info):
7665 disk_index = idx + base_index
7666 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7667 size=disk[constants.IDISK_SIZE],
7668 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7669 disk[constants.IDISK_ADOPT]),
7670 iv_name="disk/%d" % disk_index,
7671 mode=disk[constants.IDISK_MODE])
7672 disks.append(disk_dev)
7675 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7679 def _GetInstanceInfoText(instance):
7680 """Compute that text that should be added to the disk's metadata.
7683 return "originstname+%s" % instance.name
7686 def _CalcEta(time_taken, written, total_size):
7687 """Calculates the ETA based on size written and total size.
7689 @param time_taken: The time taken so far
7690 @param written: amount written so far
7691 @param total_size: The total size of data to be written
7692 @return: The remaining time in seconds
7695 avg_time = time_taken / float(written)
7696 return (total_size - written) * avg_time
7699 def _WipeDisks(lu, instance):
7700 """Wipes instance disks.
7702 @type lu: L{LogicalUnit}
7703 @param lu: the logical unit on whose behalf we execute
7704 @type instance: L{objects.Instance}
7705 @param instance: the instance whose disks we should create
7706 @return: the success of the wipe
7709 node = instance.primary_node
7711 for device in instance.disks:
7712 lu.cfg.SetDiskID(device, node)
7714 logging.info("Pause sync of instance %s disks", instance.name)
7715 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7717 for idx, success in enumerate(result.payload):
7719 logging.warn("pause-sync of instance %s for disks %d failed",
7723 for idx, device in enumerate(instance.disks):
7724 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7725 # MAX_WIPE_CHUNK at max
7726 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7727 constants.MIN_WIPE_CHUNK_PERCENT)
7728 # we _must_ make this an int, otherwise rounding errors will
7730 wipe_chunk_size = int(wipe_chunk_size)
7732 lu.LogInfo("* Wiping disk %d", idx)
7733 logging.info("Wiping disk %d for instance %s, node %s using"
7734 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7739 start_time = time.time()
7741 while offset < size:
7742 wipe_size = min(wipe_chunk_size, size - offset)
7743 logging.debug("Wiping disk %d, offset %s, chunk %s",
7744 idx, offset, wipe_size)
7745 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7746 result.Raise("Could not wipe disk %d at offset %d for size %d" %
7747 (idx, offset, wipe_size))
7750 if now - last_output >= 60:
7751 eta = _CalcEta(now - start_time, offset, size)
7752 lu.LogInfo(" - done: %.1f%% ETA: %s" %
7753 (offset / float(size) * 100, utils.FormatSeconds(eta)))
7756 logging.info("Resume sync of instance %s disks", instance.name)
7758 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7760 for idx, success in enumerate(result.payload):
7762 lu.LogWarning("Resume sync of disk %d failed, please have a"
7763 " look at the status and troubleshoot the issue", idx)
7764 logging.warn("resume-sync of instance %s for disks %d failed",
7768 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7769 """Create all disks for an instance.
7771 This abstracts away some work from AddInstance.
7773 @type lu: L{LogicalUnit}
7774 @param lu: the logical unit on whose behalf we execute
7775 @type instance: L{objects.Instance}
7776 @param instance: the instance whose disks we should create
7778 @param to_skip: list of indices to skip
7779 @type target_node: string
7780 @param target_node: if passed, overrides the target node for creation
7782 @return: the success of the creation
7785 info = _GetInstanceInfoText(instance)
7786 if target_node is None:
7787 pnode = instance.primary_node
7788 all_nodes = instance.all_nodes
7793 if instance.disk_template in constants.DTS_FILEBASED:
7794 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7795 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7797 result.Raise("Failed to create directory '%s' on"
7798 " node %s" % (file_storage_dir, pnode))
7800 # Note: this needs to be kept in sync with adding of disks in
7801 # LUInstanceSetParams
7802 for idx, device in enumerate(instance.disks):
7803 if to_skip and idx in to_skip:
7805 logging.info("Creating volume %s for instance %s",
7806 device.iv_name, instance.name)
7808 for node in all_nodes:
7809 f_create = node == pnode
7810 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7813 def _RemoveDisks(lu, instance, target_node=None):
7814 """Remove all disks for an instance.
7816 This abstracts away some work from `AddInstance()` and
7817 `RemoveInstance()`. Note that in case some of the devices couldn't
7818 be removed, the removal will continue with the other ones (compare
7819 with `_CreateDisks()`).
7821 @type lu: L{LogicalUnit}
7822 @param lu: the logical unit on whose behalf we execute
7823 @type instance: L{objects.Instance}
7824 @param instance: the instance whose disks we should remove
7825 @type target_node: string
7826 @param target_node: used to override the node on which to remove the disks
7828 @return: the success of the removal
7831 logging.info("Removing block devices for instance %s", instance.name)
7834 for device in instance.disks:
7836 edata = [(target_node, device)]
7838 edata = device.ComputeNodeTree(instance.primary_node)
7839 for node, disk in edata:
7840 lu.cfg.SetDiskID(disk, node)
7841 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7843 lu.LogWarning("Could not remove block device %s on node %s,"
7844 " continuing anyway: %s", device.iv_name, node, msg)
7847 if instance.disk_template == constants.DT_FILE:
7848 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7852 tgt = instance.primary_node
7853 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7855 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7856 file_storage_dir, instance.primary_node, result.fail_msg)
7862 def _ComputeDiskSizePerVG(disk_template, disks):
7863 """Compute disk size requirements in the volume group
7866 def _compute(disks, payload):
7867 """Universal algorithm.
7872 vgs[disk[constants.IDISK_VG]] = \
7873 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7877 # Required free disk space as a function of disk and swap space
7879 constants.DT_DISKLESS: {},
7880 constants.DT_PLAIN: _compute(disks, 0),
7881 # 128 MB are added for drbd metadata for each disk
7882 constants.DT_DRBD8: _compute(disks, 128),
7883 constants.DT_FILE: {},
7884 constants.DT_SHARED_FILE: {},
7887 if disk_template not in req_size_dict:
7888 raise errors.ProgrammerError("Disk template '%s' size requirement"
7889 " is unknown" % disk_template)
7891 return req_size_dict[disk_template]
7894 def _ComputeDiskSize(disk_template, disks):
7895 """Compute disk size requirements in the volume group
7898 # Required free disk space as a function of disk and swap space
7900 constants.DT_DISKLESS: None,
7901 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7902 # 128 MB are added for drbd metadata for each disk
7903 constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7904 constants.DT_FILE: None,
7905 constants.DT_SHARED_FILE: 0,
7906 constants.DT_BLOCK: 0,
7909 if disk_template not in req_size_dict:
7910 raise errors.ProgrammerError("Disk template '%s' size requirement"
7911 " is unknown" % disk_template)
7913 return req_size_dict[disk_template]
7916 def _FilterVmNodes(lu, nodenames):
7917 """Filters out non-vm_capable nodes from a list.
7919 @type lu: L{LogicalUnit}
7920 @param lu: the logical unit for which we check
7921 @type nodenames: list
7922 @param nodenames: the list of nodes on which we should check
7924 @return: the list of vm-capable nodes
7927 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7928 return [name for name in nodenames if name not in vm_nodes]
7931 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7932 """Hypervisor parameter validation.
7934 This function abstract the hypervisor parameter validation to be
7935 used in both instance create and instance modify.
7937 @type lu: L{LogicalUnit}
7938 @param lu: the logical unit for which we check
7939 @type nodenames: list
7940 @param nodenames: the list of nodes on which we should check
7941 @type hvname: string
7942 @param hvname: the name of the hypervisor we should use
7943 @type hvparams: dict
7944 @param hvparams: the parameters which we need to check
7945 @raise errors.OpPrereqError: if the parameters are not valid
7948 nodenames = _FilterVmNodes(lu, nodenames)
7949 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7952 for node in nodenames:
7956 info.Raise("Hypervisor parameter validation failed on node %s" % node)
7959 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7960 """OS parameters validation.
7962 @type lu: L{LogicalUnit}
7963 @param lu: the logical unit for which we check
7964 @type required: boolean
7965 @param required: whether the validation should fail if the OS is not
7967 @type nodenames: list
7968 @param nodenames: the list of nodes on which we should check
7969 @type osname: string
7970 @param osname: the name of the hypervisor we should use
7971 @type osparams: dict
7972 @param osparams: the parameters which we need to check
7973 @raise errors.OpPrereqError: if the parameters are not valid
7976 nodenames = _FilterVmNodes(lu, nodenames)
7977 result = lu.rpc.call_os_validate(required, nodenames, osname,
7978 [constants.OS_VALIDATE_PARAMETERS],
7980 for node, nres in result.items():
7981 # we don't check for offline cases since this should be run only
7982 # against the master node and/or an instance's nodes
7983 nres.Raise("OS Parameters validation failed on node %s" % node)
7984 if not nres.payload:
7985 lu.LogInfo("OS %s not found on node %s, validation skipped",
7989 class LUInstanceCreate(LogicalUnit):
7990 """Create an instance.
7993 HPATH = "instance-add"
7994 HTYPE = constants.HTYPE_INSTANCE
7997 def CheckArguments(self):
8001 # do not require name_check to ease forward/backward compatibility
8003 if self.op.no_install and self.op.start:
8004 self.LogInfo("No-installation mode selected, disabling startup")
8005 self.op.start = False
8006 # validate/normalize the instance name
8007 self.op.instance_name = \
8008 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8010 if self.op.ip_check and not self.op.name_check:
8011 # TODO: make the ip check more flexible and not depend on the name check
8012 raise errors.OpPrereqError("Cannot do IP address check without a name"
8013 " check", errors.ECODE_INVAL)
8015 # check nics' parameter names
8016 for nic in self.op.nics:
8017 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8019 # check disks. parameter names and consistent adopt/no-adopt strategy
8020 has_adopt = has_no_adopt = False
8021 for disk in self.op.disks:
8022 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8023 if constants.IDISK_ADOPT in disk:
8027 if has_adopt and has_no_adopt:
8028 raise errors.OpPrereqError("Either all disks are adopted or none is",
8031 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8032 raise errors.OpPrereqError("Disk adoption is not supported for the"
8033 " '%s' disk template" %
8034 self.op.disk_template,
8036 if self.op.iallocator is not None:
8037 raise errors.OpPrereqError("Disk adoption not allowed with an"
8038 " iallocator script", errors.ECODE_INVAL)
8039 if self.op.mode == constants.INSTANCE_IMPORT:
8040 raise errors.OpPrereqError("Disk adoption not allowed for"
8041 " instance import", errors.ECODE_INVAL)
8043 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8044 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8045 " but no 'adopt' parameter given" %
8046 self.op.disk_template,
8049 self.adopt_disks = has_adopt
8051 # instance name verification
8052 if self.op.name_check:
8053 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8054 self.op.instance_name = self.hostname1.name
8055 # used in CheckPrereq for ip ping check
8056 self.check_ip = self.hostname1.ip
8058 self.check_ip = None
8060 # file storage checks
8061 if (self.op.file_driver and
8062 not self.op.file_driver in constants.FILE_DRIVER):
8063 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8064 self.op.file_driver, errors.ECODE_INVAL)
8066 if self.op.disk_template == constants.DT_FILE:
8067 opcodes.RequireFileStorage()
8068 elif self.op.disk_template == constants.DT_SHARED_FILE:
8069 opcodes.RequireSharedFileStorage()
8071 ### Node/iallocator related checks
8072 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8074 if self.op.pnode is not None:
8075 if self.op.disk_template in constants.DTS_INT_MIRROR:
8076 if self.op.snode is None:
8077 raise errors.OpPrereqError("The networked disk templates need"
8078 " a mirror node", errors.ECODE_INVAL)
8080 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8082 self.op.snode = None
8084 self._cds = _GetClusterDomainSecret()
8086 if self.op.mode == constants.INSTANCE_IMPORT:
8087 # On import force_variant must be True, because if we forced it at
8088 # initial install, our only chance when importing it back is that it
8090 self.op.force_variant = True
8092 if self.op.no_install:
8093 self.LogInfo("No-installation mode has no effect during import")
8095 elif self.op.mode == constants.INSTANCE_CREATE:
8096 if self.op.os_type is None:
8097 raise errors.OpPrereqError("No guest OS specified",
8099 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8100 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8101 " installation" % self.op.os_type,
8103 if self.op.disk_template is None:
8104 raise errors.OpPrereqError("No disk template specified",
8107 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8108 # Check handshake to ensure both clusters have the same domain secret
8109 src_handshake = self.op.source_handshake
8110 if not src_handshake:
8111 raise errors.OpPrereqError("Missing source handshake",
8114 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8117 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8120 # Load and check source CA
8121 self.source_x509_ca_pem = self.op.source_x509_ca
8122 if not self.source_x509_ca_pem:
8123 raise errors.OpPrereqError("Missing source X509 CA",
8127 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8129 except OpenSSL.crypto.Error, err:
8130 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8131 (err, ), errors.ECODE_INVAL)
8133 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8134 if errcode is not None:
8135 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8138 self.source_x509_ca = cert
8140 src_instance_name = self.op.source_instance_name
8141 if not src_instance_name:
8142 raise errors.OpPrereqError("Missing source instance name",
8145 self.source_instance_name = \
8146 netutils.GetHostname(name=src_instance_name).name
8149 raise errors.OpPrereqError("Invalid instance creation mode %r" %
8150 self.op.mode, errors.ECODE_INVAL)
8152 def ExpandNames(self):
8153 """ExpandNames for CreateInstance.
8155 Figure out the right locks for instance creation.
8158 self.needed_locks = {}
8160 instance_name = self.op.instance_name
8161 # this is just a preventive check, but someone might still add this
8162 # instance in the meantime, and creation will fail at lock-add time
8163 if instance_name in self.cfg.GetInstanceList():
8164 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8165 instance_name, errors.ECODE_EXISTS)
8167 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8169 if self.op.iallocator:
8170 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8172 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8173 nodelist = [self.op.pnode]
8174 if self.op.snode is not None:
8175 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8176 nodelist.append(self.op.snode)
8177 self.needed_locks[locking.LEVEL_NODE] = nodelist
8179 # in case of import lock the source node too
8180 if self.op.mode == constants.INSTANCE_IMPORT:
8181 src_node = self.op.src_node
8182 src_path = self.op.src_path
8184 if src_path is None:
8185 self.op.src_path = src_path = self.op.instance_name
8187 if src_node is None:
8188 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8189 self.op.src_node = None
8190 if os.path.isabs(src_path):
8191 raise errors.OpPrereqError("Importing an instance from an absolute"
8192 " path requires a source node option",
8195 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8196 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8197 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8198 if not os.path.isabs(src_path):
8199 self.op.src_path = src_path = \
8200 utils.PathJoin(constants.EXPORT_DIR, src_path)
8202 def _RunAllocator(self):
8203 """Run the allocator based on input opcode.
8206 nics = [n.ToDict() for n in self.nics]
8207 ial = IAllocator(self.cfg, self.rpc,
8208 mode=constants.IALLOCATOR_MODE_ALLOC,
8209 name=self.op.instance_name,
8210 disk_template=self.op.disk_template,
8213 vcpus=self.be_full[constants.BE_VCPUS],
8214 memory=self.be_full[constants.BE_MEMORY],
8217 hypervisor=self.op.hypervisor,
8220 ial.Run(self.op.iallocator)
8223 raise errors.OpPrereqError("Can't compute nodes using"
8224 " iallocator '%s': %s" %
8225 (self.op.iallocator, ial.info),
8227 if len(ial.result) != ial.required_nodes:
8228 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8229 " of nodes (%s), required %s" %
8230 (self.op.iallocator, len(ial.result),
8231 ial.required_nodes), errors.ECODE_FAULT)
8232 self.op.pnode = ial.result[0]
8233 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8234 self.op.instance_name, self.op.iallocator,
8235 utils.CommaJoin(ial.result))
8236 if ial.required_nodes == 2:
8237 self.op.snode = ial.result[1]
8239 def BuildHooksEnv(self):
8242 This runs on master, primary and secondary nodes of the instance.
8246 "ADD_MODE": self.op.mode,
8248 if self.op.mode == constants.INSTANCE_IMPORT:
8249 env["SRC_NODE"] = self.op.src_node
8250 env["SRC_PATH"] = self.op.src_path
8251 env["SRC_IMAGES"] = self.src_images
8253 env.update(_BuildInstanceHookEnv(
8254 name=self.op.instance_name,
8255 primary_node=self.op.pnode,
8256 secondary_nodes=self.secondaries,
8257 status=self.op.start,
8258 os_type=self.op.os_type,
8259 memory=self.be_full[constants.BE_MEMORY],
8260 vcpus=self.be_full[constants.BE_VCPUS],
8261 nics=_NICListToTuple(self, self.nics),
8262 disk_template=self.op.disk_template,
8263 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8264 for d in self.disks],
8267 hypervisor_name=self.op.hypervisor,
8273 def BuildHooksNodes(self):
8274 """Build hooks nodes.
8277 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8280 def _ReadExportInfo(self):
8281 """Reads the export information from disk.
8283 It will override the opcode source node and path with the actual
8284 information, if these two were not specified before.
8286 @return: the export information
8289 assert self.op.mode == constants.INSTANCE_IMPORT
8291 src_node = self.op.src_node
8292 src_path = self.op.src_path
8294 if src_node is None:
8295 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8296 exp_list = self.rpc.call_export_list(locked_nodes)
8298 for node in exp_list:
8299 if exp_list[node].fail_msg:
8301 if src_path in exp_list[node].payload:
8303 self.op.src_node = src_node = node
8304 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8308 raise errors.OpPrereqError("No export found for relative path %s" %
8309 src_path, errors.ECODE_INVAL)
8311 _CheckNodeOnline(self, src_node)
8312 result = self.rpc.call_export_info(src_node, src_path)
8313 result.Raise("No export or invalid export found in dir %s" % src_path)
8315 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8316 if not export_info.has_section(constants.INISECT_EXP):
8317 raise errors.ProgrammerError("Corrupted export config",
8318 errors.ECODE_ENVIRON)
8320 ei_version = export_info.get(constants.INISECT_EXP, "version")
8321 if (int(ei_version) != constants.EXPORT_VERSION):
8322 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8323 (ei_version, constants.EXPORT_VERSION),
8324 errors.ECODE_ENVIRON)
8327 def _ReadExportParams(self, einfo):
8328 """Use export parameters as defaults.
8330 In case the opcode doesn't specify (as in override) some instance
8331 parameters, then try to use them from the export information, if
8335 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8337 if self.op.disk_template is None:
8338 if einfo.has_option(constants.INISECT_INS, "disk_template"):
8339 self.op.disk_template = einfo.get(constants.INISECT_INS,
8342 raise errors.OpPrereqError("No disk template specified and the export"
8343 " is missing the disk_template information",
8346 if not self.op.disks:
8347 if einfo.has_option(constants.INISECT_INS, "disk_count"):
8349 # TODO: import the disk iv_name too
8350 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8351 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8352 disks.append({constants.IDISK_SIZE: disk_sz})
8353 self.op.disks = disks
8355 raise errors.OpPrereqError("No disk info specified and the export"
8356 " is missing the disk information",
8359 if (not self.op.nics and
8360 einfo.has_option(constants.INISECT_INS, "nic_count")):
8362 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8364 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8365 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8370 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8371 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8373 if (self.op.hypervisor is None and
8374 einfo.has_option(constants.INISECT_INS, "hypervisor")):
8375 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8377 if einfo.has_section(constants.INISECT_HYP):
8378 # use the export parameters but do not override the ones
8379 # specified by the user
8380 for name, value in einfo.items(constants.INISECT_HYP):
8381 if name not in self.op.hvparams:
8382 self.op.hvparams[name] = value
8384 if einfo.has_section(constants.INISECT_BEP):
8385 # use the parameters, without overriding
8386 for name, value in einfo.items(constants.INISECT_BEP):
8387 if name not in self.op.beparams:
8388 self.op.beparams[name] = value
8390 # try to read the parameters old style, from the main section
8391 for name in constants.BES_PARAMETERS:
8392 if (name not in self.op.beparams and
8393 einfo.has_option(constants.INISECT_INS, name)):
8394 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8396 if einfo.has_section(constants.INISECT_OSP):
8397 # use the parameters, without overriding
8398 for name, value in einfo.items(constants.INISECT_OSP):
8399 if name not in self.op.osparams:
8400 self.op.osparams[name] = value
8402 def _RevertToDefaults(self, cluster):
8403 """Revert the instance parameters to the default values.
8407 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8408 for name in self.op.hvparams.keys():
8409 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8410 del self.op.hvparams[name]
8412 be_defs = cluster.SimpleFillBE({})
8413 for name in self.op.beparams.keys():
8414 if name in be_defs and be_defs[name] == self.op.beparams[name]:
8415 del self.op.beparams[name]
8417 nic_defs = cluster.SimpleFillNIC({})
8418 for nic in self.op.nics:
8419 for name in constants.NICS_PARAMETERS:
8420 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8423 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8424 for name in self.op.osparams.keys():
8425 if name in os_defs and os_defs[name] == self.op.osparams[name]:
8426 del self.op.osparams[name]
8428 def _CalculateFileStorageDir(self):
8429 """Calculate final instance file storage dir.
8432 # file storage dir calculation/check
8433 self.instance_file_storage_dir = None
8434 if self.op.disk_template in constants.DTS_FILEBASED:
8435 # build the full file storage dir path
8438 if self.op.disk_template == constants.DT_SHARED_FILE:
8439 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8441 get_fsd_fn = self.cfg.GetFileStorageDir
8443 cfg_storagedir = get_fsd_fn()
8444 if not cfg_storagedir:
8445 raise errors.OpPrereqError("Cluster file storage dir not defined")
8446 joinargs.append(cfg_storagedir)
8448 if self.op.file_storage_dir is not None:
8449 joinargs.append(self.op.file_storage_dir)
8451 joinargs.append(self.op.instance_name)
8453 # pylint: disable-msg=W0142
8454 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8456 def CheckPrereq(self):
8457 """Check prerequisites.
8460 self._CalculateFileStorageDir()
8462 if self.op.mode == constants.INSTANCE_IMPORT:
8463 export_info = self._ReadExportInfo()
8464 self._ReadExportParams(export_info)
8466 if (not self.cfg.GetVGName() and
8467 self.op.disk_template not in constants.DTS_NOT_LVM):
8468 raise errors.OpPrereqError("Cluster does not support lvm-based"
8469 " instances", errors.ECODE_STATE)
8471 if self.op.hypervisor is None:
8472 self.op.hypervisor = self.cfg.GetHypervisorType()
8474 cluster = self.cfg.GetClusterInfo()
8475 enabled_hvs = cluster.enabled_hypervisors
8476 if self.op.hypervisor not in enabled_hvs:
8477 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8478 " cluster (%s)" % (self.op.hypervisor,
8479 ",".join(enabled_hvs)),
8482 # Check tag validity
8483 for tag in self.op.tags:
8484 objects.TaggableObject.ValidateTag(tag)
8486 # check hypervisor parameter syntax (locally)
8487 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8488 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8490 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8491 hv_type.CheckParameterSyntax(filled_hvp)
8492 self.hv_full = filled_hvp
8493 # check that we don't specify global parameters on an instance
8494 _CheckGlobalHvParams(self.op.hvparams)
8496 # fill and remember the beparams dict
8497 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8498 self.be_full = cluster.SimpleFillBE(self.op.beparams)
8500 # build os parameters
8501 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8503 # now that hvp/bep are in final format, let's reset to defaults,
8505 if self.op.identify_defaults:
8506 self._RevertToDefaults(cluster)
8510 for idx, nic in enumerate(self.op.nics):
8511 nic_mode_req = nic.get(constants.INIC_MODE, None)
8512 nic_mode = nic_mode_req
8513 if nic_mode is None:
8514 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8516 # in routed mode, for the first nic, the default ip is 'auto'
8517 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8518 default_ip_mode = constants.VALUE_AUTO
8520 default_ip_mode = constants.VALUE_NONE
8522 # ip validity checks
8523 ip = nic.get(constants.INIC_IP, default_ip_mode)
8524 if ip is None or ip.lower() == constants.VALUE_NONE:
8526 elif ip.lower() == constants.VALUE_AUTO:
8527 if not self.op.name_check:
8528 raise errors.OpPrereqError("IP address set to auto but name checks"
8529 " have been skipped",
8531 nic_ip = self.hostname1.ip
8533 if not netutils.IPAddress.IsValid(ip):
8534 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8538 # TODO: check the ip address for uniqueness
8539 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8540 raise errors.OpPrereqError("Routed nic mode requires an ip address",
8543 # MAC address verification
8544 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8545 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8546 mac = utils.NormalizeAndValidateMac(mac)
8549 self.cfg.ReserveMAC(mac, self.proc.GetECId())
8550 except errors.ReservationError:
8551 raise errors.OpPrereqError("MAC address %s already in use"
8552 " in cluster" % mac,
8553 errors.ECODE_NOTUNIQUE)
8555 # Build nic parameters
8556 link = nic.get(constants.INIC_LINK, None)
8559 nicparams[constants.NIC_MODE] = nic_mode_req
8561 nicparams[constants.NIC_LINK] = link
8563 check_params = cluster.SimpleFillNIC(nicparams)
8564 objects.NIC.CheckParameterSyntax(check_params)
8565 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8567 # disk checks/pre-build
8568 default_vg = self.cfg.GetVGName()
8570 for disk in self.op.disks:
8571 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8572 if mode not in constants.DISK_ACCESS_SET:
8573 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8574 mode, errors.ECODE_INVAL)
8575 size = disk.get(constants.IDISK_SIZE, None)
8577 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8580 except (TypeError, ValueError):
8581 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8584 data_vg = disk.get(constants.IDISK_VG, default_vg)
8586 constants.IDISK_SIZE: size,
8587 constants.IDISK_MODE: mode,
8588 constants.IDISK_VG: data_vg,
8589 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8591 if constants.IDISK_ADOPT in disk:
8592 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8593 self.disks.append(new_disk)
8595 if self.op.mode == constants.INSTANCE_IMPORT:
8597 # Check that the new instance doesn't have less disks than the export
8598 instance_disks = len(self.disks)
8599 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8600 if instance_disks < export_disks:
8601 raise errors.OpPrereqError("Not enough disks to import."
8602 " (instance: %d, export: %d)" %
8603 (instance_disks, export_disks),
8607 for idx in range(export_disks):
8608 option = "disk%d_dump" % idx
8609 if export_info.has_option(constants.INISECT_INS, option):
8610 # FIXME: are the old os-es, disk sizes, etc. useful?
8611 export_name = export_info.get(constants.INISECT_INS, option)
8612 image = utils.PathJoin(self.op.src_path, export_name)
8613 disk_images.append(image)
8615 disk_images.append(False)
8617 self.src_images = disk_images
8619 old_name = export_info.get(constants.INISECT_INS, "name")
8621 exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8622 except (TypeError, ValueError), err:
8623 raise errors.OpPrereqError("Invalid export file, nic_count is not"
8624 " an integer: %s" % str(err),
8626 if self.op.instance_name == old_name:
8627 for idx, nic in enumerate(self.nics):
8628 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8629 nic_mac_ini = "nic%d_mac" % idx
8630 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8632 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8634 # ip ping checks (we use the same ip that was resolved in ExpandNames)
8635 if self.op.ip_check:
8636 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8637 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8638 (self.check_ip, self.op.instance_name),
8639 errors.ECODE_NOTUNIQUE)
8641 #### mac address generation
8642 # By generating here the mac address both the allocator and the hooks get
8643 # the real final mac address rather than the 'auto' or 'generate' value.
8644 # There is a race condition between the generation and the instance object
8645 # creation, which means that we know the mac is valid now, but we're not
8646 # sure it will be when we actually add the instance. If things go bad
8647 # adding the instance will abort because of a duplicate mac, and the
8648 # creation job will fail.
8649 for nic in self.nics:
8650 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8651 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8655 if self.op.iallocator is not None:
8656 self._RunAllocator()
8658 #### node related checks
8660 # check primary node
8661 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8662 assert self.pnode is not None, \
8663 "Cannot retrieve locked node %s" % self.op.pnode
8665 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8666 pnode.name, errors.ECODE_STATE)
8668 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8669 pnode.name, errors.ECODE_STATE)
8670 if not pnode.vm_capable:
8671 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8672 " '%s'" % pnode.name, errors.ECODE_STATE)
8674 self.secondaries = []
8676 # mirror node verification
8677 if self.op.disk_template in constants.DTS_INT_MIRROR:
8678 if self.op.snode == pnode.name:
8679 raise errors.OpPrereqError("The secondary node cannot be the"
8680 " primary node", errors.ECODE_INVAL)
8681 _CheckNodeOnline(self, self.op.snode)
8682 _CheckNodeNotDrained(self, self.op.snode)
8683 _CheckNodeVmCapable(self, self.op.snode)
8684 self.secondaries.append(self.op.snode)
8686 nodenames = [pnode.name] + self.secondaries
8688 if not self.adopt_disks:
8689 # Check lv size requirements, if not adopting
8690 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8691 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8693 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8694 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8695 disk[constants.IDISK_ADOPT])
8696 for disk in self.disks])
8697 if len(all_lvs) != len(self.disks):
8698 raise errors.OpPrereqError("Duplicate volume names given for adoption",
8700 for lv_name in all_lvs:
8702 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8703 # to ReserveLV uses the same syntax
8704 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8705 except errors.ReservationError:
8706 raise errors.OpPrereqError("LV named %s used by another instance" %
8707 lv_name, errors.ECODE_NOTUNIQUE)
8709 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8710 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8712 node_lvs = self.rpc.call_lv_list([pnode.name],
8713 vg_names.payload.keys())[pnode.name]
8714 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8715 node_lvs = node_lvs.payload
8717 delta = all_lvs.difference(node_lvs.keys())
8719 raise errors.OpPrereqError("Missing logical volume(s): %s" %
8720 utils.CommaJoin(delta),
8722 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8724 raise errors.OpPrereqError("Online logical volumes found, cannot"
8725 " adopt: %s" % utils.CommaJoin(online_lvs),
8727 # update the size of disk based on what is found
8728 for dsk in self.disks:
8729 dsk[constants.IDISK_SIZE] = \
8730 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8731 dsk[constants.IDISK_ADOPT])][0]))
8733 elif self.op.disk_template == constants.DT_BLOCK:
8734 # Normalize and de-duplicate device paths
8735 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8736 for disk in self.disks])
8737 if len(all_disks) != len(self.disks):
8738 raise errors.OpPrereqError("Duplicate disk names given for adoption",
8740 baddisks = [d for d in all_disks
8741 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8743 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8744 " cannot be adopted" %
8745 (", ".join(baddisks),
8746 constants.ADOPTABLE_BLOCKDEV_ROOT),
8749 node_disks = self.rpc.call_bdev_sizes([pnode.name],
8750 list(all_disks))[pnode.name]
8751 node_disks.Raise("Cannot get block device information from node %s" %
8753 node_disks = node_disks.payload
8754 delta = all_disks.difference(node_disks.keys())
8756 raise errors.OpPrereqError("Missing block device(s): %s" %
8757 utils.CommaJoin(delta),
8759 for dsk in self.disks:
8760 dsk[constants.IDISK_SIZE] = \
8761 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8763 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8765 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8766 # check OS parameters (remotely)
8767 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8769 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8771 # memory check on primary node
8773 _CheckNodeFreeMemory(self, self.pnode.name,
8774 "creating instance %s" % self.op.instance_name,
8775 self.be_full[constants.BE_MEMORY],
8778 self.dry_run_result = list(nodenames)
8780 def Exec(self, feedback_fn):
8781 """Create and add the instance to the cluster.
8784 instance = self.op.instance_name
8785 pnode_name = self.pnode.name
8787 ht_kind = self.op.hypervisor
8788 if ht_kind in constants.HTS_REQ_PORT:
8789 network_port = self.cfg.AllocatePort()
8793 disks = _GenerateDiskTemplate(self,
8794 self.op.disk_template,
8795 instance, pnode_name,
8798 self.instance_file_storage_dir,
8799 self.op.file_driver,
8803 iobj = objects.Instance(name=instance, os=self.op.os_type,
8804 primary_node=pnode_name,
8805 nics=self.nics, disks=disks,
8806 disk_template=self.op.disk_template,
8808 network_port=network_port,
8809 beparams=self.op.beparams,
8810 hvparams=self.op.hvparams,
8811 hypervisor=self.op.hypervisor,
8812 osparams=self.op.osparams,
8816 for tag in self.op.tags:
8819 if self.adopt_disks:
8820 if self.op.disk_template == constants.DT_PLAIN:
8821 # rename LVs to the newly-generated names; we need to construct
8822 # 'fake' LV disks with the old data, plus the new unique_id
8823 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8825 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8826 rename_to.append(t_dsk.logical_id)
8827 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8828 self.cfg.SetDiskID(t_dsk, pnode_name)
8829 result = self.rpc.call_blockdev_rename(pnode_name,
8830 zip(tmp_disks, rename_to))
8831 result.Raise("Failed to rename adoped LVs")
8833 feedback_fn("* creating instance disks...")
8835 _CreateDisks(self, iobj)
8836 except errors.OpExecError:
8837 self.LogWarning("Device creation failed, reverting...")
8839 _RemoveDisks(self, iobj)
8841 self.cfg.ReleaseDRBDMinors(instance)
8844 feedback_fn("adding instance %s to cluster config" % instance)
8846 self.cfg.AddInstance(iobj, self.proc.GetECId())
8848 # Declare that we don't want to remove the instance lock anymore, as we've
8849 # added the instance to the config
8850 del self.remove_locks[locking.LEVEL_INSTANCE]
8852 if self.op.mode == constants.INSTANCE_IMPORT:
8853 # Release unused nodes
8854 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8857 _ReleaseLocks(self, locking.LEVEL_NODE)
8860 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8861 feedback_fn("* wiping instance disks...")
8863 _WipeDisks(self, iobj)
8864 except errors.OpExecError, err:
8865 logging.exception("Wiping disks failed")
8866 self.LogWarning("Wiping instance disks failed (%s)", err)
8870 # Something is already wrong with the disks, don't do anything else
8872 elif self.op.wait_for_sync:
8873 disk_abort = not _WaitForSync(self, iobj)
8874 elif iobj.disk_template in constants.DTS_INT_MIRROR:
8875 # make sure the disks are not degraded (still sync-ing is ok)
8876 feedback_fn("* checking mirrors status")
8877 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8882 _RemoveDisks(self, iobj)
8883 self.cfg.RemoveInstance(iobj.name)
8884 # Make sure the instance lock gets removed
8885 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8886 raise errors.OpExecError("There are some degraded disks for"
8889 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8890 if self.op.mode == constants.INSTANCE_CREATE:
8891 if not self.op.no_install:
8892 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
8893 not self.op.wait_for_sync)
8895 feedback_fn("* pausing disk sync to install instance OS")
8896 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8898 for idx, success in enumerate(result.payload):
8900 logging.warn("pause-sync of instance %s for disk %d failed",
8903 feedback_fn("* running the instance OS create scripts...")
8904 # FIXME: pass debug option from opcode to backend
8905 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8906 self.op.debug_level)
8908 feedback_fn("* resuming disk sync")
8909 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8911 for idx, success in enumerate(result.payload):
8913 logging.warn("resume-sync of instance %s for disk %d failed",
8916 result.Raise("Could not add os for instance %s"
8917 " on node %s" % (instance, pnode_name))
8919 elif self.op.mode == constants.INSTANCE_IMPORT:
8920 feedback_fn("* running the instance OS import scripts...")
8924 for idx, image in enumerate(self.src_images):
8928 # FIXME: pass debug option from opcode to backend
8929 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8930 constants.IEIO_FILE, (image, ),
8931 constants.IEIO_SCRIPT,
8932 (iobj.disks[idx], idx),
8934 transfers.append(dt)
8937 masterd.instance.TransferInstanceData(self, feedback_fn,
8938 self.op.src_node, pnode_name,
8939 self.pnode.secondary_ip,
8941 if not compat.all(import_result):
8942 self.LogWarning("Some disks for instance %s on node %s were not"
8943 " imported successfully" % (instance, pnode_name))
8945 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8946 feedback_fn("* preparing remote import...")
8947 # The source cluster will stop the instance before attempting to make a
8948 # connection. In some cases stopping an instance can take a long time,
8949 # hence the shutdown timeout is added to the connection timeout.
8950 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8951 self.op.source_shutdown_timeout)
8952 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8954 assert iobj.primary_node == self.pnode.name
8956 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8957 self.source_x509_ca,
8958 self._cds, timeouts)
8959 if not compat.all(disk_results):
8960 # TODO: Should the instance still be started, even if some disks
8961 # failed to import (valid for local imports, too)?
8962 self.LogWarning("Some disks for instance %s on node %s were not"
8963 " imported successfully" % (instance, pnode_name))
8965 # Run rename script on newly imported instance
8966 assert iobj.name == instance
8967 feedback_fn("Running rename script for %s" % instance)
8968 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8969 self.source_instance_name,
8970 self.op.debug_level)
8972 self.LogWarning("Failed to run rename script for %s on node"
8973 " %s: %s" % (instance, pnode_name, result.fail_msg))
8976 # also checked in the prereq part
8977 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8981 iobj.admin_up = True
8982 self.cfg.Update(iobj, feedback_fn)
8983 logging.info("Starting instance %s on node %s", instance, pnode_name)
8984 feedback_fn("* starting instance...")
8985 result = self.rpc.call_instance_start(pnode_name, iobj,
8987 result.Raise("Could not start instance")
8989 return list(iobj.all_nodes)
8992 class LUInstanceConsole(NoHooksLU):
8993 """Connect to an instance's console.
8995 This is somewhat special in that it returns the command line that
8996 you need to run on the master node in order to connect to the
9002 def ExpandNames(self):
9003 self._ExpandAndLockInstance()
9005 def CheckPrereq(self):
9006 """Check prerequisites.
9008 This checks that the instance is in the cluster.
9011 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9012 assert self.instance is not None, \
9013 "Cannot retrieve locked instance %s" % self.op.instance_name
9014 _CheckNodeOnline(self, self.instance.primary_node)
9016 def Exec(self, feedback_fn):
9017 """Connect to the console of an instance
9020 instance = self.instance
9021 node = instance.primary_node
9023 node_insts = self.rpc.call_instance_list([node],
9024 [instance.hypervisor])[node]
9025 node_insts.Raise("Can't get node information from %s" % node)
9027 if instance.name not in node_insts.payload:
9028 if instance.admin_up:
9029 state = constants.INSTST_ERRORDOWN
9031 state = constants.INSTST_ADMINDOWN
9032 raise errors.OpExecError("Instance %s is not running (state %s)" %
9033 (instance.name, state))
9035 logging.debug("Connecting to console of %s on %s", instance.name, node)
9037 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9040 def _GetInstanceConsole(cluster, instance):
9041 """Returns console information for an instance.
9043 @type cluster: L{objects.Cluster}
9044 @type instance: L{objects.Instance}
9048 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9049 # beparams and hvparams are passed separately, to avoid editing the
9050 # instance and then saving the defaults in the instance itself.
9051 hvparams = cluster.FillHV(instance)
9052 beparams = cluster.FillBE(instance)
9053 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9055 assert console.instance == instance.name
9056 assert console.Validate()
9058 return console.ToDict()
9061 class LUInstanceReplaceDisks(LogicalUnit):
9062 """Replace the disks of an instance.
9065 HPATH = "mirrors-replace"
9066 HTYPE = constants.HTYPE_INSTANCE
9069 def CheckArguments(self):
9070 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9073 def ExpandNames(self):
9074 self._ExpandAndLockInstance()
9076 assert locking.LEVEL_NODE not in self.needed_locks
9077 assert locking.LEVEL_NODEGROUP not in self.needed_locks
9079 assert self.op.iallocator is None or self.op.remote_node is None, \
9080 "Conflicting options"
9082 if self.op.remote_node is not None:
9083 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9085 # Warning: do not remove the locking of the new secondary here
9086 # unless DRBD8.AddChildren is changed to work in parallel;
9087 # currently it doesn't since parallel invocations of
9088 # FindUnusedMinor will conflict
9089 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9090 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9092 self.needed_locks[locking.LEVEL_NODE] = []
9093 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9095 if self.op.iallocator is not None:
9096 # iallocator will select a new node in the same group
9097 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9099 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9100 self.op.iallocator, self.op.remote_node,
9101 self.op.disks, False, self.op.early_release)
9103 self.tasklets = [self.replacer]
9105 def DeclareLocks(self, level):
9106 if level == locking.LEVEL_NODEGROUP:
9107 assert self.op.remote_node is None
9108 assert self.op.iallocator is not None
9109 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9111 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9112 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9113 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9115 elif level == locking.LEVEL_NODE:
9116 if self.op.iallocator is not None:
9117 assert self.op.remote_node is None
9118 assert not self.needed_locks[locking.LEVEL_NODE]
9120 # Lock member nodes of all locked groups
9121 self.needed_locks[locking.LEVEL_NODE] = [node_name
9122 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9123 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9125 self._LockInstancesNodes()
9127 def BuildHooksEnv(self):
9130 This runs on the master, the primary and all the secondaries.
9133 instance = self.replacer.instance
9135 "MODE": self.op.mode,
9136 "NEW_SECONDARY": self.op.remote_node,
9137 "OLD_SECONDARY": instance.secondary_nodes[0],
9139 env.update(_BuildInstanceHookEnvByObject(self, instance))
9142 def BuildHooksNodes(self):
9143 """Build hooks nodes.
9146 instance = self.replacer.instance
9148 self.cfg.GetMasterNode(),
9149 instance.primary_node,
9151 if self.op.remote_node is not None:
9152 nl.append(self.op.remote_node)
9155 def CheckPrereq(self):
9156 """Check prerequisites.
9159 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9160 self.op.iallocator is None)
9162 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9164 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9166 return LogicalUnit.CheckPrereq(self)
9169 class TLReplaceDisks(Tasklet):
9170 """Replaces disks for an instance.
9172 Note: Locking is not within the scope of this class.
9175 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9176 disks, delay_iallocator, early_release):
9177 """Initializes this class.
9180 Tasklet.__init__(self, lu)
9183 self.instance_name = instance_name
9185 self.iallocator_name = iallocator_name
9186 self.remote_node = remote_node
9188 self.delay_iallocator = delay_iallocator
9189 self.early_release = early_release
9192 self.instance = None
9193 self.new_node = None
9194 self.target_node = None
9195 self.other_node = None
9196 self.remote_node_info = None
9197 self.node_secondary_ip = None
9200 def CheckArguments(mode, remote_node, iallocator):
9201 """Helper function for users of this class.
9204 # check for valid parameter combination
9205 if mode == constants.REPLACE_DISK_CHG:
9206 if remote_node is None and iallocator is None:
9207 raise errors.OpPrereqError("When changing the secondary either an"
9208 " iallocator script must be used or the"
9209 " new node given", errors.ECODE_INVAL)
9211 if remote_node is not None and iallocator is not None:
9212 raise errors.OpPrereqError("Give either the iallocator or the new"
9213 " secondary, not both", errors.ECODE_INVAL)
9215 elif remote_node is not None or iallocator is not None:
9216 # Not replacing the secondary
9217 raise errors.OpPrereqError("The iallocator and new node options can"
9218 " only be used when changing the"
9219 " secondary node", errors.ECODE_INVAL)
9222 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9223 """Compute a new secondary node using an IAllocator.
9226 ial = IAllocator(lu.cfg, lu.rpc,
9227 mode=constants.IALLOCATOR_MODE_RELOC,
9229 relocate_from=list(relocate_from))
9231 ial.Run(iallocator_name)
9234 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9235 " %s" % (iallocator_name, ial.info),
9238 if len(ial.result) != ial.required_nodes:
9239 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9240 " of nodes (%s), required %s" %
9242 len(ial.result), ial.required_nodes),
9245 remote_node_name = ial.result[0]
9247 lu.LogInfo("Selected new secondary for instance '%s': %s",
9248 instance_name, remote_node_name)
9250 return remote_node_name
9252 def _FindFaultyDisks(self, node_name):
9253 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9256 def _CheckDisksActivated(self, instance):
9257 """Checks if the instance disks are activated.
9259 @param instance: The instance to check disks
9260 @return: True if they are activated, False otherwise
9263 nodes = instance.all_nodes
9265 for idx, dev in enumerate(instance.disks):
9267 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9268 self.cfg.SetDiskID(dev, node)
9270 result = self.rpc.call_blockdev_find(node, dev)
9274 elif result.fail_msg or not result.payload:
9279 def CheckPrereq(self):
9280 """Check prerequisites.
9282 This checks that the instance is in the cluster.
9285 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9286 assert instance is not None, \
9287 "Cannot retrieve locked instance %s" % self.instance_name
9289 if instance.disk_template != constants.DT_DRBD8:
9290 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9291 " instances", errors.ECODE_INVAL)
9293 if len(instance.secondary_nodes) != 1:
9294 raise errors.OpPrereqError("The instance has a strange layout,"
9295 " expected one secondary but found %d" %
9296 len(instance.secondary_nodes),
9299 if not self.delay_iallocator:
9300 self._CheckPrereq2()
9302 def _CheckPrereq2(self):
9303 """Check prerequisites, second part.
9305 This function should always be part of CheckPrereq. It was separated and is
9306 now called from Exec because during node evacuation iallocator was only
9307 called with an unmodified cluster model, not taking planned changes into
9311 instance = self.instance
9312 secondary_node = instance.secondary_nodes[0]
9314 if self.iallocator_name is None:
9315 remote_node = self.remote_node
9317 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9318 instance.name, instance.secondary_nodes)
9320 if remote_node is None:
9321 self.remote_node_info = None
9323 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9324 "Remote node '%s' is not locked" % remote_node
9326 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9327 assert self.remote_node_info is not None, \
9328 "Cannot retrieve locked node %s" % remote_node
9330 if remote_node == self.instance.primary_node:
9331 raise errors.OpPrereqError("The specified node is the primary node of"
9332 " the instance", errors.ECODE_INVAL)
9334 if remote_node == secondary_node:
9335 raise errors.OpPrereqError("The specified node is already the"
9336 " secondary node of the instance",
9339 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9340 constants.REPLACE_DISK_CHG):
9341 raise errors.OpPrereqError("Cannot specify disks to be replaced",
9344 if self.mode == constants.REPLACE_DISK_AUTO:
9345 if not self._CheckDisksActivated(instance):
9346 raise errors.OpPrereqError("Please run activate-disks on instance %s"
9347 " first" % self.instance_name,
9349 faulty_primary = self._FindFaultyDisks(instance.primary_node)
9350 faulty_secondary = self._FindFaultyDisks(secondary_node)
9352 if faulty_primary and faulty_secondary:
9353 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9354 " one node and can not be repaired"
9355 " automatically" % self.instance_name,
9359 self.disks = faulty_primary
9360 self.target_node = instance.primary_node
9361 self.other_node = secondary_node
9362 check_nodes = [self.target_node, self.other_node]
9363 elif faulty_secondary:
9364 self.disks = faulty_secondary
9365 self.target_node = secondary_node
9366 self.other_node = instance.primary_node
9367 check_nodes = [self.target_node, self.other_node]
9373 # Non-automatic modes
9374 if self.mode == constants.REPLACE_DISK_PRI:
9375 self.target_node = instance.primary_node
9376 self.other_node = secondary_node
9377 check_nodes = [self.target_node, self.other_node]
9379 elif self.mode == constants.REPLACE_DISK_SEC:
9380 self.target_node = secondary_node
9381 self.other_node = instance.primary_node
9382 check_nodes = [self.target_node, self.other_node]
9384 elif self.mode == constants.REPLACE_DISK_CHG:
9385 self.new_node = remote_node
9386 self.other_node = instance.primary_node
9387 self.target_node = secondary_node
9388 check_nodes = [self.new_node, self.other_node]
9390 _CheckNodeNotDrained(self.lu, remote_node)
9391 _CheckNodeVmCapable(self.lu, remote_node)
9393 old_node_info = self.cfg.GetNodeInfo(secondary_node)
9394 assert old_node_info is not None
9395 if old_node_info.offline and not self.early_release:
9396 # doesn't make sense to delay the release
9397 self.early_release = True
9398 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9399 " early-release mode", secondary_node)
9402 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9405 # If not specified all disks should be replaced
9407 self.disks = range(len(self.instance.disks))
9409 for node in check_nodes:
9410 _CheckNodeOnline(self.lu, node)
9412 touched_nodes = frozenset(node_name for node_name in [self.new_node,
9415 if node_name is not None)
9417 # Release unneeded node locks
9418 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9420 # Release any owned node group
9421 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9422 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9424 # Check whether disks are valid
9425 for disk_idx in self.disks:
9426 instance.FindDisk(disk_idx)
9428 # Get secondary node IP addresses
9429 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9430 in self.cfg.GetMultiNodeInfo(touched_nodes))
9432 def Exec(self, feedback_fn):
9433 """Execute disk replacement.
9435 This dispatches the disk replacement to the appropriate handler.
9438 if self.delay_iallocator:
9439 self._CheckPrereq2()
9442 # Verify owned locks before starting operation
9443 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9444 assert set(owned_nodes) == set(self.node_secondary_ip), \
9445 ("Incorrect node locks, owning %s, expected %s" %
9446 (owned_nodes, self.node_secondary_ip.keys()))
9448 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9449 assert list(owned_instances) == [self.instance_name], \
9450 "Instance '%s' not locked" % self.instance_name
9452 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9453 "Should not own any node group lock at this point"
9456 feedback_fn("No disks need replacement")
9459 feedback_fn("Replacing disk(s) %s for %s" %
9460 (utils.CommaJoin(self.disks), self.instance.name))
9462 activate_disks = (not self.instance.admin_up)
9464 # Activate the instance disks if we're replacing them on a down instance
9466 _StartInstanceDisks(self.lu, self.instance, True)
9469 # Should we replace the secondary node?
9470 if self.new_node is not None:
9471 fn = self._ExecDrbd8Secondary
9473 fn = self._ExecDrbd8DiskOnly
9475 result = fn(feedback_fn)
9477 # Deactivate the instance disks if we're replacing them on a
9480 _SafeShutdownInstanceDisks(self.lu, self.instance)
9483 # Verify owned locks
9484 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9485 nodes = frozenset(self.node_secondary_ip)
9486 assert ((self.early_release and not owned_nodes) or
9487 (not self.early_release and not (set(owned_nodes) - nodes))), \
9488 ("Not owning the correct locks, early_release=%s, owned=%r,"
9489 " nodes=%r" % (self.early_release, owned_nodes, nodes))
9493 def _CheckVolumeGroup(self, nodes):
9494 self.lu.LogInfo("Checking volume groups")
9496 vgname = self.cfg.GetVGName()
9498 # Make sure volume group exists on all involved nodes
9499 results = self.rpc.call_vg_list(nodes)
9501 raise errors.OpExecError("Can't list volume groups on the nodes")
9505 res.Raise("Error checking node %s" % node)
9506 if vgname not in res.payload:
9507 raise errors.OpExecError("Volume group '%s' not found on node %s" %
9510 def _CheckDisksExistence(self, nodes):
9511 # Check disk existence
9512 for idx, dev in enumerate(self.instance.disks):
9513 if idx not in self.disks:
9517 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9518 self.cfg.SetDiskID(dev, node)
9520 result = self.rpc.call_blockdev_find(node, dev)
9522 msg = result.fail_msg
9523 if msg or not result.payload:
9525 msg = "disk not found"
9526 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9529 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9530 for idx, dev in enumerate(self.instance.disks):
9531 if idx not in self.disks:
9534 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9537 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9539 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9540 " replace disks for instance %s" %
9541 (node_name, self.instance.name))
9543 def _CreateNewStorage(self, node_name):
9544 """Create new storage on the primary or secondary node.
9546 This is only used for same-node replaces, not for changing the
9547 secondary node, hence we don't want to modify the existing disk.
9552 for idx, dev in enumerate(self.instance.disks):
9553 if idx not in self.disks:
9556 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9558 self.cfg.SetDiskID(dev, node_name)
9560 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9561 names = _GenerateUniqueNames(self.lu, lv_names)
9563 vg_data = dev.children[0].logical_id[0]
9564 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9565 logical_id=(vg_data, names[0]))
9566 vg_meta = dev.children[1].logical_id[0]
9567 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9568 logical_id=(vg_meta, names[1]))
9570 new_lvs = [lv_data, lv_meta]
9571 old_lvs = [child.Copy() for child in dev.children]
9572 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9574 # we pass force_create=True to force the LVM creation
9575 for new_lv in new_lvs:
9576 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9577 _GetInstanceInfoText(self.instance), False)
9581 def _CheckDevices(self, node_name, iv_names):
9582 for name, (dev, _, _) in iv_names.iteritems():
9583 self.cfg.SetDiskID(dev, node_name)
9585 result = self.rpc.call_blockdev_find(node_name, dev)
9587 msg = result.fail_msg
9588 if msg or not result.payload:
9590 msg = "disk not found"
9591 raise errors.OpExecError("Can't find DRBD device %s: %s" %
9594 if result.payload.is_degraded:
9595 raise errors.OpExecError("DRBD device %s is degraded!" % name)
9597 def _RemoveOldStorage(self, node_name, iv_names):
9598 for name, (_, old_lvs, _) in iv_names.iteritems():
9599 self.lu.LogInfo("Remove logical volumes for %s" % name)
9602 self.cfg.SetDiskID(lv, node_name)
9604 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9606 self.lu.LogWarning("Can't remove old LV: %s" % msg,
9607 hint="remove unused LVs manually")
9609 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
9610 """Replace a disk on the primary or secondary for DRBD 8.
9612 The algorithm for replace is quite complicated:
9614 1. for each disk to be replaced:
9616 1. create new LVs on the target node with unique names
9617 1. detach old LVs from the drbd device
9618 1. rename old LVs to name_replaced.<time_t>
9619 1. rename new LVs to old LVs
9620 1. attach the new LVs (with the old names now) to the drbd device
9622 1. wait for sync across all devices
9624 1. for each modified disk:
9626 1. remove old LVs (which have the name name_replaces.<time_t>)
9628 Failures are not very well handled.
9633 # Step: check device activation
9634 self.lu.LogStep(1, steps_total, "Check device existence")
9635 self._CheckDisksExistence([self.other_node, self.target_node])
9636 self._CheckVolumeGroup([self.target_node, self.other_node])
9638 # Step: check other node consistency
9639 self.lu.LogStep(2, steps_total, "Check peer consistency")
9640 self._CheckDisksConsistency(self.other_node,
9641 self.other_node == self.instance.primary_node,
9644 # Step: create new storage
9645 self.lu.LogStep(3, steps_total, "Allocate new storage")
9646 iv_names = self._CreateNewStorage(self.target_node)
9648 # Step: for each lv, detach+rename*2+attach
9649 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9650 for dev, old_lvs, new_lvs in iv_names.itervalues():
9651 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9653 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9655 result.Raise("Can't detach drbd from local storage on node"
9656 " %s for device %s" % (self.target_node, dev.iv_name))
9658 #cfg.Update(instance)
9660 # ok, we created the new LVs, so now we know we have the needed
9661 # storage; as such, we proceed on the target node to rename
9662 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9663 # using the assumption that logical_id == physical_id (which in
9664 # turn is the unique_id on that node)
9666 # FIXME(iustin): use a better name for the replaced LVs
9667 temp_suffix = int(time.time())
9668 ren_fn = lambda d, suff: (d.physical_id[0],
9669 d.physical_id[1] + "_replaced-%s" % suff)
9671 # Build the rename list based on what LVs exist on the node
9672 rename_old_to_new = []
9673 for to_ren in old_lvs:
9674 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9675 if not result.fail_msg and result.payload:
9677 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9679 self.lu.LogInfo("Renaming the old LVs on the target node")
9680 result = self.rpc.call_blockdev_rename(self.target_node,
9682 result.Raise("Can't rename old LVs on node %s" % self.target_node)
9684 # Now we rename the new LVs to the old LVs
9685 self.lu.LogInfo("Renaming the new LVs on the target node")
9686 rename_new_to_old = [(new, old.physical_id)
9687 for old, new in zip(old_lvs, new_lvs)]
9688 result = self.rpc.call_blockdev_rename(self.target_node,
9690 result.Raise("Can't rename new LVs on node %s" % self.target_node)
9692 # Intermediate steps of in memory modifications
9693 for old, new in zip(old_lvs, new_lvs):
9694 new.logical_id = old.logical_id
9695 self.cfg.SetDiskID(new, self.target_node)
9697 # We need to modify old_lvs so that removal later removes the
9698 # right LVs, not the newly added ones; note that old_lvs is a
9700 for disk in old_lvs:
9701 disk.logical_id = ren_fn(disk, temp_suffix)
9702 self.cfg.SetDiskID(disk, self.target_node)
9704 # Now that the new lvs have the old name, we can add them to the device
9705 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9706 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9708 msg = result.fail_msg
9710 for new_lv in new_lvs:
9711 msg2 = self.rpc.call_blockdev_remove(self.target_node,
9714 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9715 hint=("cleanup manually the unused logical"
9717 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9720 if self.early_release:
9721 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9723 self._RemoveOldStorage(self.target_node, iv_names)
9724 # WARNING: we release both node locks here, do not do other RPCs
9725 # than WaitForSync to the primary node
9726 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9727 names=[self.target_node, self.other_node])
9730 # This can fail as the old devices are degraded and _WaitForSync
9731 # does a combined result over all disks, so we don't check its return value
9732 self.lu.LogStep(cstep, steps_total, "Sync devices")
9734 _WaitForSync(self.lu, self.instance)
9736 # Check all devices manually
9737 self._CheckDevices(self.instance.primary_node, iv_names)
9739 # Step: remove old storage
9740 if not self.early_release:
9741 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9743 self._RemoveOldStorage(self.target_node, iv_names)
9745 def _ExecDrbd8Secondary(self, feedback_fn):
9746 """Replace the secondary node for DRBD 8.
9748 The algorithm for replace is quite complicated:
9749 - for all disks of the instance:
9750 - create new LVs on the new node with same names
9751 - shutdown the drbd device on the old secondary
9752 - disconnect the drbd network on the primary
9753 - create the drbd device on the new secondary
9754 - network attach the drbd on the primary, using an artifice:
9755 the drbd code for Attach() will connect to the network if it
9756 finds a device which is connected to the good local disks but
9758 - wait for sync across all devices
9759 - remove all disks from the old secondary
9761 Failures are not very well handled.
9766 # Step: check device activation
9767 self.lu.LogStep(1, steps_total, "Check device existence")
9768 self._CheckDisksExistence([self.instance.primary_node])
9769 self._CheckVolumeGroup([self.instance.primary_node])
9771 # Step: check other node consistency
9772 self.lu.LogStep(2, steps_total, "Check peer consistency")
9773 self._CheckDisksConsistency(self.instance.primary_node, True, True)
9775 # Step: create new storage
9776 self.lu.LogStep(3, steps_total, "Allocate new storage")
9777 for idx, dev in enumerate(self.instance.disks):
9778 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9779 (self.new_node, idx))
9780 # we pass force_create=True to force LVM creation
9781 for new_lv in dev.children:
9782 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9783 _GetInstanceInfoText(self.instance), False)
9785 # Step 4: dbrd minors and drbd setups changes
9786 # after this, we must manually remove the drbd minors on both the
9787 # error and the success paths
9788 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9789 minors = self.cfg.AllocateDRBDMinor([self.new_node
9790 for dev in self.instance.disks],
9792 logging.debug("Allocated minors %r", minors)
9795 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9796 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9797 (self.new_node, idx))
9798 # create new devices on new_node; note that we create two IDs:
9799 # one without port, so the drbd will be activated without
9800 # networking information on the new node at this stage, and one
9801 # with network, for the latter activation in step 4
9802 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9803 if self.instance.primary_node == o_node1:
9806 assert self.instance.primary_node == o_node2, "Three-node instance?"
9809 new_alone_id = (self.instance.primary_node, self.new_node, None,
9810 p_minor, new_minor, o_secret)
9811 new_net_id = (self.instance.primary_node, self.new_node, o_port,
9812 p_minor, new_minor, o_secret)
9814 iv_names[idx] = (dev, dev.children, new_net_id)
9815 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9817 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9818 logical_id=new_alone_id,
9819 children=dev.children,
9822 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9823 _GetInstanceInfoText(self.instance), False)
9824 except errors.GenericError:
9825 self.cfg.ReleaseDRBDMinors(self.instance.name)
9828 # We have new devices, shutdown the drbd on the old secondary
9829 for idx, dev in enumerate(self.instance.disks):
9830 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9831 self.cfg.SetDiskID(dev, self.target_node)
9832 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9834 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9835 "node: %s" % (idx, msg),
9836 hint=("Please cleanup this device manually as"
9837 " soon as possible"))
9839 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9840 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9841 self.node_secondary_ip,
9842 self.instance.disks)\
9843 [self.instance.primary_node]
9845 msg = result.fail_msg
9847 # detaches didn't succeed (unlikely)
9848 self.cfg.ReleaseDRBDMinors(self.instance.name)
9849 raise errors.OpExecError("Can't detach the disks from the network on"
9850 " old node: %s" % (msg,))
9852 # if we managed to detach at least one, we update all the disks of
9853 # the instance to point to the new secondary
9854 self.lu.LogInfo("Updating instance configuration")
9855 for dev, _, new_logical_id in iv_names.itervalues():
9856 dev.logical_id = new_logical_id
9857 self.cfg.SetDiskID(dev, self.instance.primary_node)
9859 self.cfg.Update(self.instance, feedback_fn)
9861 # and now perform the drbd attach
9862 self.lu.LogInfo("Attaching primary drbds to new secondary"
9863 " (standalone => connected)")
9864 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9866 self.node_secondary_ip,
9867 self.instance.disks,
9870 for to_node, to_result in result.items():
9871 msg = to_result.fail_msg
9873 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9875 hint=("please do a gnt-instance info to see the"
9876 " status of disks"))
9878 if self.early_release:
9879 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9881 self._RemoveOldStorage(self.target_node, iv_names)
9882 # WARNING: we release all node locks here, do not do other RPCs
9883 # than WaitForSync to the primary node
9884 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9885 names=[self.instance.primary_node,
9890 # This can fail as the old devices are degraded and _WaitForSync
9891 # does a combined result over all disks, so we don't check its return value
9892 self.lu.LogStep(cstep, steps_total, "Sync devices")
9894 _WaitForSync(self.lu, self.instance)
9896 # Check all devices manually
9897 self._CheckDevices(self.instance.primary_node, iv_names)
9899 # Step: remove old storage
9900 if not self.early_release:
9901 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9902 self._RemoveOldStorage(self.target_node, iv_names)
9905 class LURepairNodeStorage(NoHooksLU):
9906 """Repairs the volume group on a node.
9911 def CheckArguments(self):
9912 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9914 storage_type = self.op.storage_type
9916 if (constants.SO_FIX_CONSISTENCY not in
9917 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9918 raise errors.OpPrereqError("Storage units of type '%s' can not be"
9919 " repaired" % storage_type,
9922 def ExpandNames(self):
9923 self.needed_locks = {
9924 locking.LEVEL_NODE: [self.op.node_name],
9927 def _CheckFaultyDisks(self, instance, node_name):
9928 """Ensure faulty disks abort the opcode or at least warn."""
9930 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9932 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9933 " node '%s'" % (instance.name, node_name),
9935 except errors.OpPrereqError, err:
9936 if self.op.ignore_consistency:
9937 self.proc.LogWarning(str(err.args[0]))
9941 def CheckPrereq(self):
9942 """Check prerequisites.
9945 # Check whether any instance on this node has faulty disks
9946 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9947 if not inst.admin_up:
9949 check_nodes = set(inst.all_nodes)
9950 check_nodes.discard(self.op.node_name)
9951 for inst_node_name in check_nodes:
9952 self._CheckFaultyDisks(inst, inst_node_name)
9954 def Exec(self, feedback_fn):
9955 feedback_fn("Repairing storage unit '%s' on %s ..." %
9956 (self.op.name, self.op.node_name))
9958 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9959 result = self.rpc.call_storage_execute(self.op.node_name,
9960 self.op.storage_type, st_args,
9962 constants.SO_FIX_CONSISTENCY)
9963 result.Raise("Failed to repair storage unit '%s' on %s" %
9964 (self.op.name, self.op.node_name))
9967 class LUNodeEvacuate(NoHooksLU):
9968 """Evacuates instances off a list of nodes.
9973 def CheckArguments(self):
9974 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9976 def ExpandNames(self):
9977 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9979 if self.op.remote_node is not None:
9980 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9981 assert self.op.remote_node
9983 if self.op.remote_node == self.op.node_name:
9984 raise errors.OpPrereqError("Can not use evacuated node as a new"
9985 " secondary node", errors.ECODE_INVAL)
9987 if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
9988 raise errors.OpPrereqError("Without the use of an iallocator only"
9989 " secondary instances can be evacuated",
9993 self.share_locks = _ShareAll()
9994 self.needed_locks = {
9995 locking.LEVEL_INSTANCE: [],
9996 locking.LEVEL_NODEGROUP: [],
9997 locking.LEVEL_NODE: [],
10000 if self.op.remote_node is None:
10001 # Iallocator will choose any node(s) in the same group
10002 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10004 group_nodes = frozenset([self.op.remote_node])
10006 # Determine nodes to be locked
10007 self.lock_nodes = set([self.op.node_name]) | group_nodes
10009 def _DetermineInstances(self):
10010 """Builds list of instances to operate on.
10013 assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10015 if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10016 # Primary instances only
10017 inst_fn = _GetNodePrimaryInstances
10018 assert self.op.remote_node is None, \
10019 "Evacuating primary instances requires iallocator"
10020 elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10021 # Secondary instances only
10022 inst_fn = _GetNodeSecondaryInstances
10025 assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10026 inst_fn = _GetNodeInstances
10028 return inst_fn(self.cfg, self.op.node_name)
10030 def DeclareLocks(self, level):
10031 if level == locking.LEVEL_INSTANCE:
10032 # Lock instances optimistically, needs verification once node and group
10033 # locks have been acquired
10034 self.needed_locks[locking.LEVEL_INSTANCE] = \
10035 set(i.name for i in self._DetermineInstances())
10037 elif level == locking.LEVEL_NODEGROUP:
10038 # Lock node groups optimistically, needs verification once nodes have
10040 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10041 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10043 elif level == locking.LEVEL_NODE:
10044 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10046 def CheckPrereq(self):
10048 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10049 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10050 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10052 assert owned_nodes == self.lock_nodes
10054 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10055 if owned_groups != wanted_groups:
10056 raise errors.OpExecError("Node groups changed since locks were acquired,"
10057 " current groups are '%s', used to be '%s'" %
10058 (utils.CommaJoin(wanted_groups),
10059 utils.CommaJoin(owned_groups)))
10061 # Determine affected instances
10062 self.instances = self._DetermineInstances()
10063 self.instance_names = [i.name for i in self.instances]
10065 if set(self.instance_names) != owned_instances:
10066 raise errors.OpExecError("Instances on node '%s' changed since locks"
10067 " were acquired, current instances are '%s',"
10068 " used to be '%s'" %
10069 (self.op.node_name,
10070 utils.CommaJoin(self.instance_names),
10071 utils.CommaJoin(owned_instances)))
10073 if self.instance_names:
10074 self.LogInfo("Evacuating instances from node '%s': %s",
10076 utils.CommaJoin(utils.NiceSort(self.instance_names)))
10078 self.LogInfo("No instances to evacuate from node '%s'",
10081 if self.op.remote_node is not None:
10082 for i in self.instances:
10083 if i.primary_node == self.op.remote_node:
10084 raise errors.OpPrereqError("Node %s is the primary node of"
10085 " instance %s, cannot use it as"
10087 (self.op.remote_node, i.name),
10088 errors.ECODE_INVAL)
10090 def Exec(self, feedback_fn):
10091 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10093 if not self.instance_names:
10094 # No instances to evacuate
10097 elif self.op.iallocator is not None:
10098 # TODO: Implement relocation to other group
10099 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10100 evac_mode=self.op.mode,
10101 instances=list(self.instance_names))
10103 ial.Run(self.op.iallocator)
10105 if not ial.success:
10106 raise errors.OpPrereqError("Can't compute node evacuation using"
10107 " iallocator '%s': %s" %
10108 (self.op.iallocator, ial.info),
10109 errors.ECODE_NORES)
10111 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10113 elif self.op.remote_node is not None:
10114 assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10116 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10117 remote_node=self.op.remote_node,
10119 mode=constants.REPLACE_DISK_CHG,
10120 early_release=self.op.early_release)]
10121 for instance_name in self.instance_names
10125 raise errors.ProgrammerError("No iallocator or remote node")
10127 return ResultWithJobs(jobs)
10130 def _SetOpEarlyRelease(early_release, op):
10131 """Sets C{early_release} flag on opcodes if available.
10135 op.early_release = early_release
10136 except AttributeError:
10137 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10142 def _NodeEvacDest(use_nodes, group, nodes):
10143 """Returns group or nodes depending on caller's choice.
10147 return utils.CommaJoin(nodes)
10152 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10153 """Unpacks the result of change-group and node-evacuate iallocator requests.
10155 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10156 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10158 @type lu: L{LogicalUnit}
10159 @param lu: Logical unit instance
10160 @type alloc_result: tuple/list
10161 @param alloc_result: Result from iallocator
10162 @type early_release: bool
10163 @param early_release: Whether to release locks early if possible
10164 @type use_nodes: bool
10165 @param use_nodes: Whether to display node names instead of groups
10168 (moved, failed, jobs) = alloc_result
10171 lu.LogWarning("Unable to evacuate instances %s",
10172 utils.CommaJoin("%s (%s)" % (name, reason)
10173 for (name, reason) in failed))
10176 lu.LogInfo("Instances to be moved: %s",
10177 utils.CommaJoin("%s (to %s)" %
10178 (name, _NodeEvacDest(use_nodes, group, nodes))
10179 for (name, group, nodes) in moved))
10181 return [map(compat.partial(_SetOpEarlyRelease, early_release),
10182 map(opcodes.OpCode.LoadOpCode, ops))
10186 class LUInstanceGrowDisk(LogicalUnit):
10187 """Grow a disk of an instance.
10190 HPATH = "disk-grow"
10191 HTYPE = constants.HTYPE_INSTANCE
10194 def ExpandNames(self):
10195 self._ExpandAndLockInstance()
10196 self.needed_locks[locking.LEVEL_NODE] = []
10197 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10199 def DeclareLocks(self, level):
10200 if level == locking.LEVEL_NODE:
10201 self._LockInstancesNodes()
10203 def BuildHooksEnv(self):
10204 """Build hooks env.
10206 This runs on the master, the primary and all the secondaries.
10210 "DISK": self.op.disk,
10211 "AMOUNT": self.op.amount,
10213 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10216 def BuildHooksNodes(self):
10217 """Build hooks nodes.
10220 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10223 def CheckPrereq(self):
10224 """Check prerequisites.
10226 This checks that the instance is in the cluster.
10229 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10230 assert instance is not None, \
10231 "Cannot retrieve locked instance %s" % self.op.instance_name
10232 nodenames = list(instance.all_nodes)
10233 for node in nodenames:
10234 _CheckNodeOnline(self, node)
10236 self.instance = instance
10238 if instance.disk_template not in constants.DTS_GROWABLE:
10239 raise errors.OpPrereqError("Instance's disk layout does not support"
10240 " growing", errors.ECODE_INVAL)
10242 self.disk = instance.FindDisk(self.op.disk)
10244 if instance.disk_template not in (constants.DT_FILE,
10245 constants.DT_SHARED_FILE):
10246 # TODO: check the free disk space for file, when that feature will be
10248 _CheckNodesFreeDiskPerVG(self, nodenames,
10249 self.disk.ComputeGrowth(self.op.amount))
10251 def Exec(self, feedback_fn):
10252 """Execute disk grow.
10255 instance = self.instance
10258 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10260 raise errors.OpExecError("Cannot activate block device to grow")
10262 # First run all grow ops in dry-run mode
10263 for node in instance.all_nodes:
10264 self.cfg.SetDiskID(disk, node)
10265 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10266 result.Raise("Grow request failed to node %s" % node)
10268 # We know that (as far as we can test) operations across different
10269 # nodes will succeed, time to run it for real
10270 for node in instance.all_nodes:
10271 self.cfg.SetDiskID(disk, node)
10272 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10273 result.Raise("Grow request failed to node %s" % node)
10275 # TODO: Rewrite code to work properly
10276 # DRBD goes into sync mode for a short amount of time after executing the
10277 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10278 # calling "resize" in sync mode fails. Sleeping for a short amount of
10279 # time is a work-around.
10282 disk.RecordGrow(self.op.amount)
10283 self.cfg.Update(instance, feedback_fn)
10284 if self.op.wait_for_sync:
10285 disk_abort = not _WaitForSync(self, instance, disks=[disk])
10287 self.proc.LogWarning("Disk sync-ing has not returned a good"
10288 " status; please check the instance")
10289 if not instance.admin_up:
10290 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10291 elif not instance.admin_up:
10292 self.proc.LogWarning("Not shutting down the disk even if the instance is"
10293 " not supposed to be running because no wait for"
10294 " sync mode was requested")
10297 class LUInstanceQueryData(NoHooksLU):
10298 """Query runtime instance data.
10303 def ExpandNames(self):
10304 self.needed_locks = {}
10306 # Use locking if requested or when non-static information is wanted
10307 if not (self.op.static or self.op.use_locking):
10308 self.LogWarning("Non-static data requested, locks need to be acquired")
10309 self.op.use_locking = True
10311 if self.op.instances or not self.op.use_locking:
10312 # Expand instance names right here
10313 self.wanted_names = _GetWantedInstances(self, self.op.instances)
10315 # Will use acquired locks
10316 self.wanted_names = None
10318 if self.op.use_locking:
10319 self.share_locks = _ShareAll()
10321 if self.wanted_names is None:
10322 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10324 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10326 self.needed_locks[locking.LEVEL_NODE] = []
10327 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10329 def DeclareLocks(self, level):
10330 if self.op.use_locking and level == locking.LEVEL_NODE:
10331 self._LockInstancesNodes()
10333 def CheckPrereq(self):
10334 """Check prerequisites.
10336 This only checks the optional instance list against the existing names.
10339 if self.wanted_names is None:
10340 assert self.op.use_locking, "Locking was not used"
10341 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10343 self.wanted_instances = \
10344 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10346 def _ComputeBlockdevStatus(self, node, instance_name, dev):
10347 """Returns the status of a block device
10350 if self.op.static or not node:
10353 self.cfg.SetDiskID(dev, node)
10355 result = self.rpc.call_blockdev_find(node, dev)
10359 result.Raise("Can't compute disk status for %s" % instance_name)
10361 status = result.payload
10365 return (status.dev_path, status.major, status.minor,
10366 status.sync_percent, status.estimated_time,
10367 status.is_degraded, status.ldisk_status)
10369 def _ComputeDiskStatus(self, instance, snode, dev):
10370 """Compute block device status.
10373 if dev.dev_type in constants.LDS_DRBD:
10374 # we change the snode then (otherwise we use the one passed in)
10375 if dev.logical_id[0] == instance.primary_node:
10376 snode = dev.logical_id[1]
10378 snode = dev.logical_id[0]
10380 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10381 instance.name, dev)
10382 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10385 dev_children = map(compat.partial(self._ComputeDiskStatus,
10392 "iv_name": dev.iv_name,
10393 "dev_type": dev.dev_type,
10394 "logical_id": dev.logical_id,
10395 "physical_id": dev.physical_id,
10396 "pstatus": dev_pstatus,
10397 "sstatus": dev_sstatus,
10398 "children": dev_children,
10403 def Exec(self, feedback_fn):
10404 """Gather and return data"""
10407 cluster = self.cfg.GetClusterInfo()
10409 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10410 for i in self.wanted_instances)
10411 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10412 if self.op.static or pnode.offline:
10413 remote_state = None
10415 self.LogWarning("Primary node %s is marked offline, returning static"
10416 " information only for instance %s" %
10417 (pnode.name, instance.name))
10419 remote_info = self.rpc.call_instance_info(instance.primary_node,
10421 instance.hypervisor)
10422 remote_info.Raise("Error checking node %s" % instance.primary_node)
10423 remote_info = remote_info.payload
10424 if remote_info and "state" in remote_info:
10425 remote_state = "up"
10427 remote_state = "down"
10429 if instance.admin_up:
10430 config_state = "up"
10432 config_state = "down"
10434 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10437 result[instance.name] = {
10438 "name": instance.name,
10439 "config_state": config_state,
10440 "run_state": remote_state,
10441 "pnode": instance.primary_node,
10442 "snodes": instance.secondary_nodes,
10444 # this happens to be the same format used for hooks
10445 "nics": _NICListToTuple(self, instance.nics),
10446 "disk_template": instance.disk_template,
10448 "hypervisor": instance.hypervisor,
10449 "network_port": instance.network_port,
10450 "hv_instance": instance.hvparams,
10451 "hv_actual": cluster.FillHV(instance, skip_globals=True),
10452 "be_instance": instance.beparams,
10453 "be_actual": cluster.FillBE(instance),
10454 "os_instance": instance.osparams,
10455 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10456 "serial_no": instance.serial_no,
10457 "mtime": instance.mtime,
10458 "ctime": instance.ctime,
10459 "uuid": instance.uuid,
10465 class LUInstanceSetParams(LogicalUnit):
10466 """Modifies an instances's parameters.
10469 HPATH = "instance-modify"
10470 HTYPE = constants.HTYPE_INSTANCE
10473 def CheckArguments(self):
10474 if not (self.op.nics or self.op.disks or self.op.disk_template or
10475 self.op.hvparams or self.op.beparams or self.op.os_name):
10476 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10478 if self.op.hvparams:
10479 _CheckGlobalHvParams(self.op.hvparams)
10483 for disk_op, disk_dict in self.op.disks:
10484 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10485 if disk_op == constants.DDM_REMOVE:
10486 disk_addremove += 1
10488 elif disk_op == constants.DDM_ADD:
10489 disk_addremove += 1
10491 if not isinstance(disk_op, int):
10492 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10493 if not isinstance(disk_dict, dict):
10494 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10495 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10497 if disk_op == constants.DDM_ADD:
10498 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10499 if mode not in constants.DISK_ACCESS_SET:
10500 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10501 errors.ECODE_INVAL)
10502 size = disk_dict.get(constants.IDISK_SIZE, None)
10504 raise errors.OpPrereqError("Required disk parameter size missing",
10505 errors.ECODE_INVAL)
10508 except (TypeError, ValueError), err:
10509 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10510 str(err), errors.ECODE_INVAL)
10511 disk_dict[constants.IDISK_SIZE] = size
10513 # modification of disk
10514 if constants.IDISK_SIZE in disk_dict:
10515 raise errors.OpPrereqError("Disk size change not possible, use"
10516 " grow-disk", errors.ECODE_INVAL)
10518 if disk_addremove > 1:
10519 raise errors.OpPrereqError("Only one disk add or remove operation"
10520 " supported at a time", errors.ECODE_INVAL)
10522 if self.op.disks and self.op.disk_template is not None:
10523 raise errors.OpPrereqError("Disk template conversion and other disk"
10524 " changes not supported at the same time",
10525 errors.ECODE_INVAL)
10527 if (self.op.disk_template and
10528 self.op.disk_template in constants.DTS_INT_MIRROR and
10529 self.op.remote_node is None):
10530 raise errors.OpPrereqError("Changing the disk template to a mirrored"
10531 " one requires specifying a secondary node",
10532 errors.ECODE_INVAL)
10536 for nic_op, nic_dict in self.op.nics:
10537 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10538 if nic_op == constants.DDM_REMOVE:
10541 elif nic_op == constants.DDM_ADD:
10544 if not isinstance(nic_op, int):
10545 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10546 if not isinstance(nic_dict, dict):
10547 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10548 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10550 # nic_dict should be a dict
10551 nic_ip = nic_dict.get(constants.INIC_IP, None)
10552 if nic_ip is not None:
10553 if nic_ip.lower() == constants.VALUE_NONE:
10554 nic_dict[constants.INIC_IP] = None
10556 if not netutils.IPAddress.IsValid(nic_ip):
10557 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10558 errors.ECODE_INVAL)
10560 nic_bridge = nic_dict.get("bridge", None)
10561 nic_link = nic_dict.get(constants.INIC_LINK, None)
10562 if nic_bridge and nic_link:
10563 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10564 " at the same time", errors.ECODE_INVAL)
10565 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10566 nic_dict["bridge"] = None
10567 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10568 nic_dict[constants.INIC_LINK] = None
10570 if nic_op == constants.DDM_ADD:
10571 nic_mac = nic_dict.get(constants.INIC_MAC, None)
10572 if nic_mac is None:
10573 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10575 if constants.INIC_MAC in nic_dict:
10576 nic_mac = nic_dict[constants.INIC_MAC]
10577 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10578 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10580 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10581 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10582 " modifying an existing nic",
10583 errors.ECODE_INVAL)
10585 if nic_addremove > 1:
10586 raise errors.OpPrereqError("Only one NIC add or remove operation"
10587 " supported at a time", errors.ECODE_INVAL)
10589 def ExpandNames(self):
10590 self._ExpandAndLockInstance()
10591 self.needed_locks[locking.LEVEL_NODE] = []
10592 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10594 def DeclareLocks(self, level):
10595 if level == locking.LEVEL_NODE:
10596 self._LockInstancesNodes()
10597 if self.op.disk_template and self.op.remote_node:
10598 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10599 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10601 def BuildHooksEnv(self):
10602 """Build hooks env.
10604 This runs on the master, primary and secondaries.
10608 if constants.BE_MEMORY in self.be_new:
10609 args["memory"] = self.be_new[constants.BE_MEMORY]
10610 if constants.BE_VCPUS in self.be_new:
10611 args["vcpus"] = self.be_new[constants.BE_VCPUS]
10612 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10613 # information at all.
10616 nic_override = dict(self.op.nics)
10617 for idx, nic in enumerate(self.instance.nics):
10618 if idx in nic_override:
10619 this_nic_override = nic_override[idx]
10621 this_nic_override = {}
10622 if constants.INIC_IP in this_nic_override:
10623 ip = this_nic_override[constants.INIC_IP]
10626 if constants.INIC_MAC in this_nic_override:
10627 mac = this_nic_override[constants.INIC_MAC]
10630 if idx in self.nic_pnew:
10631 nicparams = self.nic_pnew[idx]
10633 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10634 mode = nicparams[constants.NIC_MODE]
10635 link = nicparams[constants.NIC_LINK]
10636 args["nics"].append((ip, mac, mode, link))
10637 if constants.DDM_ADD in nic_override:
10638 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10639 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10640 nicparams = self.nic_pnew[constants.DDM_ADD]
10641 mode = nicparams[constants.NIC_MODE]
10642 link = nicparams[constants.NIC_LINK]
10643 args["nics"].append((ip, mac, mode, link))
10644 elif constants.DDM_REMOVE in nic_override:
10645 del args["nics"][-1]
10647 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10648 if self.op.disk_template:
10649 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10653 def BuildHooksNodes(self):
10654 """Build hooks nodes.
10657 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10660 def CheckPrereq(self):
10661 """Check prerequisites.
10663 This only checks the instance list against the existing names.
10666 # checking the new params on the primary/secondary nodes
10668 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10669 cluster = self.cluster = self.cfg.GetClusterInfo()
10670 assert self.instance is not None, \
10671 "Cannot retrieve locked instance %s" % self.op.instance_name
10672 pnode = instance.primary_node
10673 nodelist = list(instance.all_nodes)
10676 if self.op.os_name and not self.op.force:
10677 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10678 self.op.force_variant)
10679 instance_os = self.op.os_name
10681 instance_os = instance.os
10683 if self.op.disk_template:
10684 if instance.disk_template == self.op.disk_template:
10685 raise errors.OpPrereqError("Instance already has disk template %s" %
10686 instance.disk_template, errors.ECODE_INVAL)
10688 if (instance.disk_template,
10689 self.op.disk_template) not in self._DISK_CONVERSIONS:
10690 raise errors.OpPrereqError("Unsupported disk template conversion from"
10691 " %s to %s" % (instance.disk_template,
10692 self.op.disk_template),
10693 errors.ECODE_INVAL)
10694 _CheckInstanceDown(self, instance, "cannot change disk template")
10695 if self.op.disk_template in constants.DTS_INT_MIRROR:
10696 if self.op.remote_node == pnode:
10697 raise errors.OpPrereqError("Given new secondary node %s is the same"
10698 " as the primary node of the instance" %
10699 self.op.remote_node, errors.ECODE_STATE)
10700 _CheckNodeOnline(self, self.op.remote_node)
10701 _CheckNodeNotDrained(self, self.op.remote_node)
10702 # FIXME: here we assume that the old instance type is DT_PLAIN
10703 assert instance.disk_template == constants.DT_PLAIN
10704 disks = [{constants.IDISK_SIZE: d.size,
10705 constants.IDISK_VG: d.logical_id[0]}
10706 for d in instance.disks]
10707 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10708 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10710 # hvparams processing
10711 if self.op.hvparams:
10712 hv_type = instance.hypervisor
10713 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10714 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10715 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10718 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10719 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10720 self.hv_new = hv_new # the new actual values
10721 self.hv_inst = i_hvdict # the new dict (without defaults)
10723 self.hv_new = self.hv_inst = {}
10725 # beparams processing
10726 if self.op.beparams:
10727 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10729 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10730 be_new = cluster.SimpleFillBE(i_bedict)
10731 self.be_new = be_new # the new actual values
10732 self.be_inst = i_bedict # the new dict (without defaults)
10734 self.be_new = self.be_inst = {}
10735 be_old = cluster.FillBE(instance)
10737 # osparams processing
10738 if self.op.osparams:
10739 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10740 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10741 self.os_inst = i_osdict # the new dict (without defaults)
10747 if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10748 be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10749 mem_check_list = [pnode]
10750 if be_new[constants.BE_AUTO_BALANCE]:
10751 # either we changed auto_balance to yes or it was from before
10752 mem_check_list.extend(instance.secondary_nodes)
10753 instance_info = self.rpc.call_instance_info(pnode, instance.name,
10754 instance.hypervisor)
10755 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10756 instance.hypervisor)
10757 pninfo = nodeinfo[pnode]
10758 msg = pninfo.fail_msg
10760 # Assume the primary node is unreachable and go ahead
10761 self.warn.append("Can't get info from primary node %s: %s" %
10763 elif not isinstance(pninfo.payload.get("memory_free", None), int):
10764 self.warn.append("Node data from primary node %s doesn't contain"
10765 " free memory information" % pnode)
10766 elif instance_info.fail_msg:
10767 self.warn.append("Can't get instance runtime information: %s" %
10768 instance_info.fail_msg)
10770 if instance_info.payload:
10771 current_mem = int(instance_info.payload["memory"])
10773 # Assume instance not running
10774 # (there is a slight race condition here, but it's not very probable,
10775 # and we have no other way to check)
10777 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10778 pninfo.payload["memory_free"])
10780 raise errors.OpPrereqError("This change will prevent the instance"
10781 " from starting, due to %d MB of memory"
10782 " missing on its primary node" % miss_mem,
10783 errors.ECODE_NORES)
10785 if be_new[constants.BE_AUTO_BALANCE]:
10786 for node, nres in nodeinfo.items():
10787 if node not in instance.secondary_nodes:
10789 nres.Raise("Can't get info from secondary node %s" % node,
10790 prereq=True, ecode=errors.ECODE_STATE)
10791 if not isinstance(nres.payload.get("memory_free", None), int):
10792 raise errors.OpPrereqError("Secondary node %s didn't return free"
10793 " memory information" % node,
10794 errors.ECODE_STATE)
10795 elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10796 raise errors.OpPrereqError("This change will prevent the instance"
10797 " from failover to its secondary node"
10798 " %s, due to not enough memory" % node,
10799 errors.ECODE_STATE)
10803 self.nic_pinst = {}
10804 for nic_op, nic_dict in self.op.nics:
10805 if nic_op == constants.DDM_REMOVE:
10806 if not instance.nics:
10807 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10808 errors.ECODE_INVAL)
10810 if nic_op != constants.DDM_ADD:
10812 if not instance.nics:
10813 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10814 " no NICs" % nic_op,
10815 errors.ECODE_INVAL)
10816 if nic_op < 0 or nic_op >= len(instance.nics):
10817 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10819 (nic_op, len(instance.nics) - 1),
10820 errors.ECODE_INVAL)
10821 old_nic_params = instance.nics[nic_op].nicparams
10822 old_nic_ip = instance.nics[nic_op].ip
10824 old_nic_params = {}
10827 update_params_dict = dict([(key, nic_dict[key])
10828 for key in constants.NICS_PARAMETERS
10829 if key in nic_dict])
10831 if "bridge" in nic_dict:
10832 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10834 new_nic_params = _GetUpdatedParams(old_nic_params,
10835 update_params_dict)
10836 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10837 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10838 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10839 self.nic_pinst[nic_op] = new_nic_params
10840 self.nic_pnew[nic_op] = new_filled_nic_params
10841 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10843 if new_nic_mode == constants.NIC_MODE_BRIDGED:
10844 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10845 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10847 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10849 self.warn.append(msg)
10851 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10852 if new_nic_mode == constants.NIC_MODE_ROUTED:
10853 if constants.INIC_IP in nic_dict:
10854 nic_ip = nic_dict[constants.INIC_IP]
10856 nic_ip = old_nic_ip
10858 raise errors.OpPrereqError("Cannot set the nic ip to None"
10859 " on a routed nic", errors.ECODE_INVAL)
10860 if constants.INIC_MAC in nic_dict:
10861 nic_mac = nic_dict[constants.INIC_MAC]
10862 if nic_mac is None:
10863 raise errors.OpPrereqError("Cannot set the nic mac to None",
10864 errors.ECODE_INVAL)
10865 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10866 # otherwise generate the mac
10867 nic_dict[constants.INIC_MAC] = \
10868 self.cfg.GenerateMAC(self.proc.GetECId())
10870 # or validate/reserve the current one
10872 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10873 except errors.ReservationError:
10874 raise errors.OpPrereqError("MAC address %s already in use"
10875 " in cluster" % nic_mac,
10876 errors.ECODE_NOTUNIQUE)
10879 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10880 raise errors.OpPrereqError("Disk operations not supported for"
10881 " diskless instances",
10882 errors.ECODE_INVAL)
10883 for disk_op, _ in self.op.disks:
10884 if disk_op == constants.DDM_REMOVE:
10885 if len(instance.disks) == 1:
10886 raise errors.OpPrereqError("Cannot remove the last disk of"
10887 " an instance", errors.ECODE_INVAL)
10888 _CheckInstanceDown(self, instance, "cannot remove disks")
10890 if (disk_op == constants.DDM_ADD and
10891 len(instance.disks) >= constants.MAX_DISKS):
10892 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10893 " add more" % constants.MAX_DISKS,
10894 errors.ECODE_STATE)
10895 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10897 if disk_op < 0 or disk_op >= len(instance.disks):
10898 raise errors.OpPrereqError("Invalid disk index %s, valid values"
10900 (disk_op, len(instance.disks)),
10901 errors.ECODE_INVAL)
10905 def _ConvertPlainToDrbd(self, feedback_fn):
10906 """Converts an instance from plain to drbd.
10909 feedback_fn("Converting template to drbd")
10910 instance = self.instance
10911 pnode = instance.primary_node
10912 snode = self.op.remote_node
10914 # create a fake disk info for _GenerateDiskTemplate
10915 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10916 constants.IDISK_VG: d.logical_id[0]}
10917 for d in instance.disks]
10918 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10919 instance.name, pnode, [snode],
10920 disk_info, None, None, 0, feedback_fn)
10921 info = _GetInstanceInfoText(instance)
10922 feedback_fn("Creating aditional volumes...")
10923 # first, create the missing data and meta devices
10924 for disk in new_disks:
10925 # unfortunately this is... not too nice
10926 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10928 for child in disk.children:
10929 _CreateSingleBlockDev(self, snode, instance, child, info, True)
10930 # at this stage, all new LVs have been created, we can rename the
10932 feedback_fn("Renaming original volumes...")
10933 rename_list = [(o, n.children[0].logical_id)
10934 for (o, n) in zip(instance.disks, new_disks)]
10935 result = self.rpc.call_blockdev_rename(pnode, rename_list)
10936 result.Raise("Failed to rename original LVs")
10938 feedback_fn("Initializing DRBD devices...")
10939 # all child devices are in place, we can now create the DRBD devices
10940 for disk in new_disks:
10941 for node in [pnode, snode]:
10942 f_create = node == pnode
10943 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10945 # at this point, the instance has been modified
10946 instance.disk_template = constants.DT_DRBD8
10947 instance.disks = new_disks
10948 self.cfg.Update(instance, feedback_fn)
10950 # disks are created, waiting for sync
10951 disk_abort = not _WaitForSync(self, instance,
10952 oneshot=not self.op.wait_for_sync)
10954 raise errors.OpExecError("There are some degraded disks for"
10955 " this instance, please cleanup manually")
10957 def _ConvertDrbdToPlain(self, feedback_fn):
10958 """Converts an instance from drbd to plain.
10961 instance = self.instance
10962 assert len(instance.secondary_nodes) == 1
10963 pnode = instance.primary_node
10964 snode = instance.secondary_nodes[0]
10965 feedback_fn("Converting template to plain")
10967 old_disks = instance.disks
10968 new_disks = [d.children[0] for d in old_disks]
10970 # copy over size and mode
10971 for parent, child in zip(old_disks, new_disks):
10972 child.size = parent.size
10973 child.mode = parent.mode
10975 # update instance structure
10976 instance.disks = new_disks
10977 instance.disk_template = constants.DT_PLAIN
10978 self.cfg.Update(instance, feedback_fn)
10980 feedback_fn("Removing volumes on the secondary node...")
10981 for disk in old_disks:
10982 self.cfg.SetDiskID(disk, snode)
10983 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10985 self.LogWarning("Could not remove block device %s on node %s,"
10986 " continuing anyway: %s", disk.iv_name, snode, msg)
10988 feedback_fn("Removing unneeded volumes on the primary node...")
10989 for idx, disk in enumerate(old_disks):
10990 meta = disk.children[1]
10991 self.cfg.SetDiskID(meta, pnode)
10992 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10994 self.LogWarning("Could not remove metadata for disk %d on node %s,"
10995 " continuing anyway: %s", idx, pnode, msg)
10997 def Exec(self, feedback_fn):
10998 """Modifies an instance.
11000 All parameters take effect only at the next restart of the instance.
11003 # Process here the warnings from CheckPrereq, as we don't have a
11004 # feedback_fn there.
11005 for warn in self.warn:
11006 feedback_fn("WARNING: %s" % warn)
11009 instance = self.instance
11011 for disk_op, disk_dict in self.op.disks:
11012 if disk_op == constants.DDM_REMOVE:
11013 # remove the last disk
11014 device = instance.disks.pop()
11015 device_idx = len(instance.disks)
11016 for node, disk in device.ComputeNodeTree(instance.primary_node):
11017 self.cfg.SetDiskID(disk, node)
11018 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11020 self.LogWarning("Could not remove disk/%d on node %s: %s,"
11021 " continuing anyway", device_idx, node, msg)
11022 result.append(("disk/%d" % device_idx, "remove"))
11023 elif disk_op == constants.DDM_ADD:
11025 if instance.disk_template in (constants.DT_FILE,
11026 constants.DT_SHARED_FILE):
11027 file_driver, file_path = instance.disks[0].logical_id
11028 file_path = os.path.dirname(file_path)
11030 file_driver = file_path = None
11031 disk_idx_base = len(instance.disks)
11032 new_disk = _GenerateDiskTemplate(self,
11033 instance.disk_template,
11034 instance.name, instance.primary_node,
11035 instance.secondary_nodes,
11039 disk_idx_base, feedback_fn)[0]
11040 instance.disks.append(new_disk)
11041 info = _GetInstanceInfoText(instance)
11043 logging.info("Creating volume %s for instance %s",
11044 new_disk.iv_name, instance.name)
11045 # Note: this needs to be kept in sync with _CreateDisks
11047 for node in instance.all_nodes:
11048 f_create = node == instance.primary_node
11050 _CreateBlockDev(self, node, instance, new_disk,
11051 f_create, info, f_create)
11052 except errors.OpExecError, err:
11053 self.LogWarning("Failed to create volume %s (%s) on"
11055 new_disk.iv_name, new_disk, node, err)
11056 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11057 (new_disk.size, new_disk.mode)))
11059 # change a given disk
11060 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11061 result.append(("disk.mode/%d" % disk_op,
11062 disk_dict[constants.IDISK_MODE]))
11064 if self.op.disk_template:
11065 r_shut = _ShutdownInstanceDisks(self, instance)
11067 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11068 " proceed with disk template conversion")
11069 mode = (instance.disk_template, self.op.disk_template)
11071 self._DISK_CONVERSIONS[mode](self, feedback_fn)
11073 self.cfg.ReleaseDRBDMinors(instance.name)
11075 result.append(("disk_template", self.op.disk_template))
11078 for nic_op, nic_dict in self.op.nics:
11079 if nic_op == constants.DDM_REMOVE:
11080 # remove the last nic
11081 del instance.nics[-1]
11082 result.append(("nic.%d" % len(instance.nics), "remove"))
11083 elif nic_op == constants.DDM_ADD:
11084 # mac and bridge should be set, by now
11085 mac = nic_dict[constants.INIC_MAC]
11086 ip = nic_dict.get(constants.INIC_IP, None)
11087 nicparams = self.nic_pinst[constants.DDM_ADD]
11088 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11089 instance.nics.append(new_nic)
11090 result.append(("nic.%d" % (len(instance.nics) - 1),
11091 "add:mac=%s,ip=%s,mode=%s,link=%s" %
11092 (new_nic.mac, new_nic.ip,
11093 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11094 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11097 for key in (constants.INIC_MAC, constants.INIC_IP):
11098 if key in nic_dict:
11099 setattr(instance.nics[nic_op], key, nic_dict[key])
11100 if nic_op in self.nic_pinst:
11101 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11102 for key, val in nic_dict.iteritems():
11103 result.append(("nic.%s/%d" % (key, nic_op), val))
11106 if self.op.hvparams:
11107 instance.hvparams = self.hv_inst
11108 for key, val in self.op.hvparams.iteritems():
11109 result.append(("hv/%s" % key, val))
11112 if self.op.beparams:
11113 instance.beparams = self.be_inst
11114 for key, val in self.op.beparams.iteritems():
11115 result.append(("be/%s" % key, val))
11118 if self.op.os_name:
11119 instance.os = self.op.os_name
11122 if self.op.osparams:
11123 instance.osparams = self.os_inst
11124 for key, val in self.op.osparams.iteritems():
11125 result.append(("os/%s" % key, val))
11127 self.cfg.Update(instance, feedback_fn)
11131 _DISK_CONVERSIONS = {
11132 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11133 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11137 class LUInstanceChangeGroup(LogicalUnit):
11138 HPATH = "instance-change-group"
11139 HTYPE = constants.HTYPE_INSTANCE
11142 def ExpandNames(self):
11143 self.share_locks = _ShareAll()
11144 self.needed_locks = {
11145 locking.LEVEL_NODEGROUP: [],
11146 locking.LEVEL_NODE: [],
11149 self._ExpandAndLockInstance()
11151 if self.op.target_groups:
11152 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11153 self.op.target_groups)
11155 self.req_target_uuids = None
11157 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11159 def DeclareLocks(self, level):
11160 if level == locking.LEVEL_NODEGROUP:
11161 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11163 if self.req_target_uuids:
11164 lock_groups = set(self.req_target_uuids)
11166 # Lock all groups used by instance optimistically; this requires going
11167 # via the node before it's locked, requiring verification later on
11168 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11169 lock_groups.update(instance_groups)
11171 # No target groups, need to lock all of them
11172 lock_groups = locking.ALL_SET
11174 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11176 elif level == locking.LEVEL_NODE:
11177 if self.req_target_uuids:
11178 # Lock all nodes used by instances
11179 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11180 self._LockInstancesNodes()
11182 # Lock all nodes in all potential target groups
11183 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11184 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11185 member_nodes = [node_name
11186 for group in lock_groups
11187 for node_name in self.cfg.GetNodeGroup(group).members]
11188 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11190 # Lock all nodes as all groups are potential targets
11191 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11193 def CheckPrereq(self):
11194 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11195 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11196 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11198 assert (self.req_target_uuids is None or
11199 owned_groups.issuperset(self.req_target_uuids))
11200 assert owned_instances == set([self.op.instance_name])
11202 # Get instance information
11203 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11205 # Check if node groups for locked instance are still correct
11206 assert owned_nodes.issuperset(self.instance.all_nodes), \
11207 ("Instance %s's nodes changed while we kept the lock" %
11208 self.op.instance_name)
11210 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11213 if self.req_target_uuids:
11214 # User requested specific target groups
11215 self.target_uuids = self.req_target_uuids
11217 # All groups except those used by the instance are potential targets
11218 self.target_uuids = owned_groups - inst_groups
11220 conflicting_groups = self.target_uuids & inst_groups
11221 if conflicting_groups:
11222 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11223 " used by the instance '%s'" %
11224 (utils.CommaJoin(conflicting_groups),
11225 self.op.instance_name),
11226 errors.ECODE_INVAL)
11228 if not self.target_uuids:
11229 raise errors.OpPrereqError("There are no possible target groups",
11230 errors.ECODE_INVAL)
11232 def BuildHooksEnv(self):
11233 """Build hooks env.
11236 assert self.target_uuids
11239 "TARGET_GROUPS": " ".join(self.target_uuids),
11242 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11246 def BuildHooksNodes(self):
11247 """Build hooks nodes.
11250 mn = self.cfg.GetMasterNode()
11251 return ([mn], [mn])
11253 def Exec(self, feedback_fn):
11254 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11256 assert instances == [self.op.instance_name], "Instance not locked"
11258 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11259 instances=instances, target_groups=list(self.target_uuids))
11261 ial.Run(self.op.iallocator)
11263 if not ial.success:
11264 raise errors.OpPrereqError("Can't compute solution for changing group of"
11265 " instance '%s' using iallocator '%s': %s" %
11266 (self.op.instance_name, self.op.iallocator,
11268 errors.ECODE_NORES)
11270 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11272 self.LogInfo("Iallocator returned %s job(s) for changing group of"
11273 " instance '%s'", len(jobs), self.op.instance_name)
11275 return ResultWithJobs(jobs)
11278 class LUBackupQuery(NoHooksLU):
11279 """Query the exports list
11284 def ExpandNames(self):
11285 self.needed_locks = {}
11286 self.share_locks[locking.LEVEL_NODE] = 1
11287 if not self.op.nodes:
11288 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11290 self.needed_locks[locking.LEVEL_NODE] = \
11291 _GetWantedNodes(self, self.op.nodes)
11293 def Exec(self, feedback_fn):
11294 """Compute the list of all the exported system images.
11297 @return: a dictionary with the structure node->(export-list)
11298 where export-list is a list of the instances exported on
11302 self.nodes = self.owned_locks(locking.LEVEL_NODE)
11303 rpcresult = self.rpc.call_export_list(self.nodes)
11305 for node in rpcresult:
11306 if rpcresult[node].fail_msg:
11307 result[node] = False
11309 result[node] = rpcresult[node].payload
11314 class LUBackupPrepare(NoHooksLU):
11315 """Prepares an instance for an export and returns useful information.
11320 def ExpandNames(self):
11321 self._ExpandAndLockInstance()
11323 def CheckPrereq(self):
11324 """Check prerequisites.
11327 instance_name = self.op.instance_name
11329 self.instance = self.cfg.GetInstanceInfo(instance_name)
11330 assert self.instance is not None, \
11331 "Cannot retrieve locked instance %s" % self.op.instance_name
11332 _CheckNodeOnline(self, self.instance.primary_node)
11334 self._cds = _GetClusterDomainSecret()
11336 def Exec(self, feedback_fn):
11337 """Prepares an instance for an export.
11340 instance = self.instance
11342 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11343 salt = utils.GenerateSecret(8)
11345 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11346 result = self.rpc.call_x509_cert_create(instance.primary_node,
11347 constants.RIE_CERT_VALIDITY)
11348 result.Raise("Can't create X509 key and certificate on %s" % result.node)
11350 (name, cert_pem) = result.payload
11352 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11356 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11357 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11359 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11365 class LUBackupExport(LogicalUnit):
11366 """Export an instance to an image in the cluster.
11369 HPATH = "instance-export"
11370 HTYPE = constants.HTYPE_INSTANCE
11373 def CheckArguments(self):
11374 """Check the arguments.
11377 self.x509_key_name = self.op.x509_key_name
11378 self.dest_x509_ca_pem = self.op.destination_x509_ca
11380 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11381 if not self.x509_key_name:
11382 raise errors.OpPrereqError("Missing X509 key name for encryption",
11383 errors.ECODE_INVAL)
11385 if not self.dest_x509_ca_pem:
11386 raise errors.OpPrereqError("Missing destination X509 CA",
11387 errors.ECODE_INVAL)
11389 def ExpandNames(self):
11390 self._ExpandAndLockInstance()
11392 # Lock all nodes for local exports
11393 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11394 # FIXME: lock only instance primary and destination node
11396 # Sad but true, for now we have do lock all nodes, as we don't know where
11397 # the previous export might be, and in this LU we search for it and
11398 # remove it from its current node. In the future we could fix this by:
11399 # - making a tasklet to search (share-lock all), then create the
11400 # new one, then one to remove, after
11401 # - removing the removal operation altogether
11402 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11404 def DeclareLocks(self, level):
11405 """Last minute lock declaration."""
11406 # All nodes are locked anyway, so nothing to do here.
11408 def BuildHooksEnv(self):
11409 """Build hooks env.
11411 This will run on the master, primary node and target node.
11415 "EXPORT_MODE": self.op.mode,
11416 "EXPORT_NODE": self.op.target_node,
11417 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11418 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11419 # TODO: Generic function for boolean env variables
11420 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11423 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11427 def BuildHooksNodes(self):
11428 """Build hooks nodes.
11431 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11433 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11434 nl.append(self.op.target_node)
11438 def CheckPrereq(self):
11439 """Check prerequisites.
11441 This checks that the instance and node names are valid.
11444 instance_name = self.op.instance_name
11446 self.instance = self.cfg.GetInstanceInfo(instance_name)
11447 assert self.instance is not None, \
11448 "Cannot retrieve locked instance %s" % self.op.instance_name
11449 _CheckNodeOnline(self, self.instance.primary_node)
11451 if (self.op.remove_instance and self.instance.admin_up and
11452 not self.op.shutdown):
11453 raise errors.OpPrereqError("Can not remove instance without shutting it"
11456 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11457 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11458 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11459 assert self.dst_node is not None
11461 _CheckNodeOnline(self, self.dst_node.name)
11462 _CheckNodeNotDrained(self, self.dst_node.name)
11465 self.dest_disk_info = None
11466 self.dest_x509_ca = None
11468 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11469 self.dst_node = None
11471 if len(self.op.target_node) != len(self.instance.disks):
11472 raise errors.OpPrereqError(("Received destination information for %s"
11473 " disks, but instance %s has %s disks") %
11474 (len(self.op.target_node), instance_name,
11475 len(self.instance.disks)),
11476 errors.ECODE_INVAL)
11478 cds = _GetClusterDomainSecret()
11480 # Check X509 key name
11482 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11483 except (TypeError, ValueError), err:
11484 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11486 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11487 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11488 errors.ECODE_INVAL)
11490 # Load and verify CA
11492 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11493 except OpenSSL.crypto.Error, err:
11494 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11495 (err, ), errors.ECODE_INVAL)
11497 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11498 if errcode is not None:
11499 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11500 (msg, ), errors.ECODE_INVAL)
11502 self.dest_x509_ca = cert
11504 # Verify target information
11506 for idx, disk_data in enumerate(self.op.target_node):
11508 (host, port, magic) = \
11509 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11510 except errors.GenericError, err:
11511 raise errors.OpPrereqError("Target info for disk %s: %s" %
11512 (idx, err), errors.ECODE_INVAL)
11514 disk_info.append((host, port, magic))
11516 assert len(disk_info) == len(self.op.target_node)
11517 self.dest_disk_info = disk_info
11520 raise errors.ProgrammerError("Unhandled export mode %r" %
11523 # instance disk type verification
11524 # TODO: Implement export support for file-based disks
11525 for disk in self.instance.disks:
11526 if disk.dev_type == constants.LD_FILE:
11527 raise errors.OpPrereqError("Export not supported for instances with"
11528 " file-based disks", errors.ECODE_INVAL)
11530 def _CleanupExports(self, feedback_fn):
11531 """Removes exports of current instance from all other nodes.
11533 If an instance in a cluster with nodes A..D was exported to node C, its
11534 exports will be removed from the nodes A, B and D.
11537 assert self.op.mode != constants.EXPORT_MODE_REMOTE
11539 nodelist = self.cfg.GetNodeList()
11540 nodelist.remove(self.dst_node.name)
11542 # on one-node clusters nodelist will be empty after the removal
11543 # if we proceed the backup would be removed because OpBackupQuery
11544 # substitutes an empty list with the full cluster node list.
11545 iname = self.instance.name
11547 feedback_fn("Removing old exports for instance %s" % iname)
11548 exportlist = self.rpc.call_export_list(nodelist)
11549 for node in exportlist:
11550 if exportlist[node].fail_msg:
11552 if iname in exportlist[node].payload:
11553 msg = self.rpc.call_export_remove(node, iname).fail_msg
11555 self.LogWarning("Could not remove older export for instance %s"
11556 " on node %s: %s", iname, node, msg)
11558 def Exec(self, feedback_fn):
11559 """Export an instance to an image in the cluster.
11562 assert self.op.mode in constants.EXPORT_MODES
11564 instance = self.instance
11565 src_node = instance.primary_node
11567 if self.op.shutdown:
11568 # shutdown the instance, but not the disks
11569 feedback_fn("Shutting down instance %s" % instance.name)
11570 result = self.rpc.call_instance_shutdown(src_node, instance,
11571 self.op.shutdown_timeout)
11572 # TODO: Maybe ignore failures if ignore_remove_failures is set
11573 result.Raise("Could not shutdown instance %s on"
11574 " node %s" % (instance.name, src_node))
11576 # set the disks ID correctly since call_instance_start needs the
11577 # correct drbd minor to create the symlinks
11578 for disk in instance.disks:
11579 self.cfg.SetDiskID(disk, src_node)
11581 activate_disks = (not instance.admin_up)
11584 # Activate the instance disks if we'exporting a stopped instance
11585 feedback_fn("Activating disks for %s" % instance.name)
11586 _StartInstanceDisks(self, instance, None)
11589 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11592 helper.CreateSnapshots()
11594 if (self.op.shutdown and instance.admin_up and
11595 not self.op.remove_instance):
11596 assert not activate_disks
11597 feedback_fn("Starting instance %s" % instance.name)
11598 result = self.rpc.call_instance_start(src_node, instance,
11600 msg = result.fail_msg
11602 feedback_fn("Failed to start instance: %s" % msg)
11603 _ShutdownInstanceDisks(self, instance)
11604 raise errors.OpExecError("Could not start instance: %s" % msg)
11606 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11607 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11608 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11609 connect_timeout = constants.RIE_CONNECT_TIMEOUT
11610 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11612 (key_name, _, _) = self.x509_key_name
11615 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11618 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11619 key_name, dest_ca_pem,
11624 # Check for backwards compatibility
11625 assert len(dresults) == len(instance.disks)
11626 assert compat.all(isinstance(i, bool) for i in dresults), \
11627 "Not all results are boolean: %r" % dresults
11631 feedback_fn("Deactivating disks for %s" % instance.name)
11632 _ShutdownInstanceDisks(self, instance)
11634 if not (compat.all(dresults) and fin_resu):
11637 failures.append("export finalization")
11638 if not compat.all(dresults):
11639 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11641 failures.append("disk export: disk(s) %s" % fdsk)
11643 raise errors.OpExecError("Export failed, errors in %s" %
11644 utils.CommaJoin(failures))
11646 # At this point, the export was successful, we can cleanup/finish
11648 # Remove instance if requested
11649 if self.op.remove_instance:
11650 feedback_fn("Removing instance %s" % instance.name)
11651 _RemoveInstance(self, feedback_fn, instance,
11652 self.op.ignore_remove_failures)
11654 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11655 self._CleanupExports(feedback_fn)
11657 return fin_resu, dresults
11660 class LUBackupRemove(NoHooksLU):
11661 """Remove exports related to the named instance.
11666 def ExpandNames(self):
11667 self.needed_locks = {}
11668 # We need all nodes to be locked in order for RemoveExport to work, but we
11669 # don't need to lock the instance itself, as nothing will happen to it (and
11670 # we can remove exports also for a removed instance)
11671 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11673 def Exec(self, feedback_fn):
11674 """Remove any export.
11677 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11678 # If the instance was not found we'll try with the name that was passed in.
11679 # This will only work if it was an FQDN, though.
11681 if not instance_name:
11683 instance_name = self.op.instance_name
11685 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11686 exportlist = self.rpc.call_export_list(locked_nodes)
11688 for node in exportlist:
11689 msg = exportlist[node].fail_msg
11691 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11693 if instance_name in exportlist[node].payload:
11695 result = self.rpc.call_export_remove(node, instance_name)
11696 msg = result.fail_msg
11698 logging.error("Could not remove export for instance %s"
11699 " on node %s: %s", instance_name, node, msg)
11701 if fqdn_warn and not found:
11702 feedback_fn("Export not found. If trying to remove an export belonging"
11703 " to a deleted instance please use its Fully Qualified"
11707 class LUGroupAdd(LogicalUnit):
11708 """Logical unit for creating node groups.
11711 HPATH = "group-add"
11712 HTYPE = constants.HTYPE_GROUP
11715 def ExpandNames(self):
11716 # We need the new group's UUID here so that we can create and acquire the
11717 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11718 # that it should not check whether the UUID exists in the configuration.
11719 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11720 self.needed_locks = {}
11721 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11723 def CheckPrereq(self):
11724 """Check prerequisites.
11726 This checks that the given group name is not an existing node group
11731 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11732 except errors.OpPrereqError:
11735 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11736 " node group (UUID: %s)" %
11737 (self.op.group_name, existing_uuid),
11738 errors.ECODE_EXISTS)
11740 if self.op.ndparams:
11741 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11743 def BuildHooksEnv(self):
11744 """Build hooks env.
11748 "GROUP_NAME": self.op.group_name,
11751 def BuildHooksNodes(self):
11752 """Build hooks nodes.
11755 mn = self.cfg.GetMasterNode()
11756 return ([mn], [mn])
11758 def Exec(self, feedback_fn):
11759 """Add the node group to the cluster.
11762 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11763 uuid=self.group_uuid,
11764 alloc_policy=self.op.alloc_policy,
11765 ndparams=self.op.ndparams)
11767 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11768 del self.remove_locks[locking.LEVEL_NODEGROUP]
11771 class LUGroupAssignNodes(NoHooksLU):
11772 """Logical unit for assigning nodes to groups.
11777 def ExpandNames(self):
11778 # These raise errors.OpPrereqError on their own:
11779 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11780 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11782 # We want to lock all the affected nodes and groups. We have readily
11783 # available the list of nodes, and the *destination* group. To gather the
11784 # list of "source" groups, we need to fetch node information later on.
11785 self.needed_locks = {
11786 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11787 locking.LEVEL_NODE: self.op.nodes,
11790 def DeclareLocks(self, level):
11791 if level == locking.LEVEL_NODEGROUP:
11792 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11794 # Try to get all affected nodes' groups without having the group or node
11795 # lock yet. Needs verification later in the code flow.
11796 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11798 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11800 def CheckPrereq(self):
11801 """Check prerequisites.
11804 assert self.needed_locks[locking.LEVEL_NODEGROUP]
11805 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11806 frozenset(self.op.nodes))
11808 expected_locks = (set([self.group_uuid]) |
11809 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11810 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11811 if actual_locks != expected_locks:
11812 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11813 " current groups are '%s', used to be '%s'" %
11814 (utils.CommaJoin(expected_locks),
11815 utils.CommaJoin(actual_locks)))
11817 self.node_data = self.cfg.GetAllNodesInfo()
11818 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11819 instance_data = self.cfg.GetAllInstancesInfo()
11821 if self.group is None:
11822 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11823 (self.op.group_name, self.group_uuid))
11825 (new_splits, previous_splits) = \
11826 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11827 for node in self.op.nodes],
11828 self.node_data, instance_data)
11831 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11833 if not self.op.force:
11834 raise errors.OpExecError("The following instances get split by this"
11835 " change and --force was not given: %s" %
11838 self.LogWarning("This operation will split the following instances: %s",
11841 if previous_splits:
11842 self.LogWarning("In addition, these already-split instances continue"
11843 " to be split across groups: %s",
11844 utils.CommaJoin(utils.NiceSort(previous_splits)))
11846 def Exec(self, feedback_fn):
11847 """Assign nodes to a new group.
11850 for node in self.op.nodes:
11851 self.node_data[node].group = self.group_uuid
11853 # FIXME: Depends on side-effects of modifying the result of
11854 # C{cfg.GetAllNodesInfo}
11856 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11859 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11860 """Check for split instances after a node assignment.
11862 This method considers a series of node assignments as an atomic operation,
11863 and returns information about split instances after applying the set of
11866 In particular, it returns information about newly split instances, and
11867 instances that were already split, and remain so after the change.
11869 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11872 @type changes: list of (node_name, new_group_uuid) pairs.
11873 @param changes: list of node assignments to consider.
11874 @param node_data: a dict with data for all nodes
11875 @param instance_data: a dict with all instances to consider
11876 @rtype: a two-tuple
11877 @return: a list of instances that were previously okay and result split as a
11878 consequence of this change, and a list of instances that were previously
11879 split and this change does not fix.
11882 changed_nodes = dict((node, group) for node, group in changes
11883 if node_data[node].group != group)
11885 all_split_instances = set()
11886 previously_split_instances = set()
11888 def InstanceNodes(instance):
11889 return [instance.primary_node] + list(instance.secondary_nodes)
11891 for inst in instance_data.values():
11892 if inst.disk_template not in constants.DTS_INT_MIRROR:
11895 instance_nodes = InstanceNodes(inst)
11897 if len(set(node_data[node].group for node in instance_nodes)) > 1:
11898 previously_split_instances.add(inst.name)
11900 if len(set(changed_nodes.get(node, node_data[node].group)
11901 for node in instance_nodes)) > 1:
11902 all_split_instances.add(inst.name)
11904 return (list(all_split_instances - previously_split_instances),
11905 list(previously_split_instances & all_split_instances))
11908 class _GroupQuery(_QueryBase):
11909 FIELDS = query.GROUP_FIELDS
11911 def ExpandNames(self, lu):
11912 lu.needed_locks = {}
11914 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11915 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11918 self.wanted = [name_to_uuid[name]
11919 for name in utils.NiceSort(name_to_uuid.keys())]
11921 # Accept names to be either names or UUIDs.
11924 all_uuid = frozenset(self._all_groups.keys())
11926 for name in self.names:
11927 if name in all_uuid:
11928 self.wanted.append(name)
11929 elif name in name_to_uuid:
11930 self.wanted.append(name_to_uuid[name])
11932 missing.append(name)
11935 raise errors.OpPrereqError("Some groups do not exist: %s" %
11936 utils.CommaJoin(missing),
11937 errors.ECODE_NOENT)
11939 def DeclareLocks(self, lu, level):
11942 def _GetQueryData(self, lu):
11943 """Computes the list of node groups and their attributes.
11946 do_nodes = query.GQ_NODE in self.requested_data
11947 do_instances = query.GQ_INST in self.requested_data
11949 group_to_nodes = None
11950 group_to_instances = None
11952 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11953 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11954 # latter GetAllInstancesInfo() is not enough, for we have to go through
11955 # instance->node. Hence, we will need to process nodes even if we only need
11956 # instance information.
11957 if do_nodes or do_instances:
11958 all_nodes = lu.cfg.GetAllNodesInfo()
11959 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11962 for node in all_nodes.values():
11963 if node.group in group_to_nodes:
11964 group_to_nodes[node.group].append(node.name)
11965 node_to_group[node.name] = node.group
11968 all_instances = lu.cfg.GetAllInstancesInfo()
11969 group_to_instances = dict((uuid, []) for uuid in self.wanted)
11971 for instance in all_instances.values():
11972 node = instance.primary_node
11973 if node in node_to_group:
11974 group_to_instances[node_to_group[node]].append(instance.name)
11977 # Do not pass on node information if it was not requested.
11978 group_to_nodes = None
11980 return query.GroupQueryData([self._all_groups[uuid]
11981 for uuid in self.wanted],
11982 group_to_nodes, group_to_instances)
11985 class LUGroupQuery(NoHooksLU):
11986 """Logical unit for querying node groups.
11991 def CheckArguments(self):
11992 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11993 self.op.output_fields, False)
11995 def ExpandNames(self):
11996 self.gq.ExpandNames(self)
11998 def Exec(self, feedback_fn):
11999 return self.gq.OldStyleQuery(self)
12002 class LUGroupSetParams(LogicalUnit):
12003 """Modifies the parameters of a node group.
12006 HPATH = "group-modify"
12007 HTYPE = constants.HTYPE_GROUP
12010 def CheckArguments(self):
12013 self.op.alloc_policy,
12016 if all_changes.count(None) == len(all_changes):
12017 raise errors.OpPrereqError("Please pass at least one modification",
12018 errors.ECODE_INVAL)
12020 def ExpandNames(self):
12021 # This raises errors.OpPrereqError on its own:
12022 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12024 self.needed_locks = {
12025 locking.LEVEL_NODEGROUP: [self.group_uuid],
12028 def CheckPrereq(self):
12029 """Check prerequisites.
12032 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12034 if self.group is None:
12035 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12036 (self.op.group_name, self.group_uuid))
12038 if self.op.ndparams:
12039 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12040 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12041 self.new_ndparams = new_ndparams
12043 def BuildHooksEnv(self):
12044 """Build hooks env.
12048 "GROUP_NAME": self.op.group_name,
12049 "NEW_ALLOC_POLICY": self.op.alloc_policy,
12052 def BuildHooksNodes(self):
12053 """Build hooks nodes.
12056 mn = self.cfg.GetMasterNode()
12057 return ([mn], [mn])
12059 def Exec(self, feedback_fn):
12060 """Modifies the node group.
12065 if self.op.ndparams:
12066 self.group.ndparams = self.new_ndparams
12067 result.append(("ndparams", str(self.group.ndparams)))
12069 if self.op.alloc_policy:
12070 self.group.alloc_policy = self.op.alloc_policy
12072 self.cfg.Update(self.group, feedback_fn)
12077 class LUGroupRemove(LogicalUnit):
12078 HPATH = "group-remove"
12079 HTYPE = constants.HTYPE_GROUP
12082 def ExpandNames(self):
12083 # This will raises errors.OpPrereqError on its own:
12084 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12085 self.needed_locks = {
12086 locking.LEVEL_NODEGROUP: [self.group_uuid],
12089 def CheckPrereq(self):
12090 """Check prerequisites.
12092 This checks that the given group name exists as a node group, that is
12093 empty (i.e., contains no nodes), and that is not the last group of the
12097 # Verify that the group is empty.
12098 group_nodes = [node.name
12099 for node in self.cfg.GetAllNodesInfo().values()
12100 if node.group == self.group_uuid]
12103 raise errors.OpPrereqError("Group '%s' not empty, has the following"
12105 (self.op.group_name,
12106 utils.CommaJoin(utils.NiceSort(group_nodes))),
12107 errors.ECODE_STATE)
12109 # Verify the cluster would not be left group-less.
12110 if len(self.cfg.GetNodeGroupList()) == 1:
12111 raise errors.OpPrereqError("Group '%s' is the only group,"
12112 " cannot be removed" %
12113 self.op.group_name,
12114 errors.ECODE_STATE)
12116 def BuildHooksEnv(self):
12117 """Build hooks env.
12121 "GROUP_NAME": self.op.group_name,
12124 def BuildHooksNodes(self):
12125 """Build hooks nodes.
12128 mn = self.cfg.GetMasterNode()
12129 return ([mn], [mn])
12131 def Exec(self, feedback_fn):
12132 """Remove the node group.
12136 self.cfg.RemoveNodeGroup(self.group_uuid)
12137 except errors.ConfigurationError:
12138 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12139 (self.op.group_name, self.group_uuid))
12141 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12144 class LUGroupRename(LogicalUnit):
12145 HPATH = "group-rename"
12146 HTYPE = constants.HTYPE_GROUP
12149 def ExpandNames(self):
12150 # This raises errors.OpPrereqError on its own:
12151 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12153 self.needed_locks = {
12154 locking.LEVEL_NODEGROUP: [self.group_uuid],
12157 def CheckPrereq(self):
12158 """Check prerequisites.
12160 Ensures requested new name is not yet used.
12164 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12165 except errors.OpPrereqError:
12168 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12169 " node group (UUID: %s)" %
12170 (self.op.new_name, new_name_uuid),
12171 errors.ECODE_EXISTS)
12173 def BuildHooksEnv(self):
12174 """Build hooks env.
12178 "OLD_NAME": self.op.group_name,
12179 "NEW_NAME": self.op.new_name,
12182 def BuildHooksNodes(self):
12183 """Build hooks nodes.
12186 mn = self.cfg.GetMasterNode()
12188 all_nodes = self.cfg.GetAllNodesInfo()
12189 all_nodes.pop(mn, None)
12192 run_nodes.extend(node.name for node in all_nodes.values()
12193 if node.group == self.group_uuid)
12195 return (run_nodes, run_nodes)
12197 def Exec(self, feedback_fn):
12198 """Rename the node group.
12201 group = self.cfg.GetNodeGroup(self.group_uuid)
12204 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12205 (self.op.group_name, self.group_uuid))
12207 group.name = self.op.new_name
12208 self.cfg.Update(group, feedback_fn)
12210 return self.op.new_name
12213 class LUGroupEvacuate(LogicalUnit):
12214 HPATH = "group-evacuate"
12215 HTYPE = constants.HTYPE_GROUP
12218 def ExpandNames(self):
12219 # This raises errors.OpPrereqError on its own:
12220 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12222 if self.op.target_groups:
12223 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12224 self.op.target_groups)
12226 self.req_target_uuids = []
12228 if self.group_uuid in self.req_target_uuids:
12229 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12230 " as a target group (targets are %s)" %
12232 utils.CommaJoin(self.req_target_uuids)),
12233 errors.ECODE_INVAL)
12235 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12237 self.share_locks = _ShareAll()
12238 self.needed_locks = {
12239 locking.LEVEL_INSTANCE: [],
12240 locking.LEVEL_NODEGROUP: [],
12241 locking.LEVEL_NODE: [],
12244 def DeclareLocks(self, level):
12245 if level == locking.LEVEL_INSTANCE:
12246 assert not self.needed_locks[locking.LEVEL_INSTANCE]
12248 # Lock instances optimistically, needs verification once node and group
12249 # locks have been acquired
12250 self.needed_locks[locking.LEVEL_INSTANCE] = \
12251 self.cfg.GetNodeGroupInstances(self.group_uuid)
12253 elif level == locking.LEVEL_NODEGROUP:
12254 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12256 if self.req_target_uuids:
12257 lock_groups = set([self.group_uuid] + self.req_target_uuids)
12259 # Lock all groups used by instances optimistically; this requires going
12260 # via the node before it's locked, requiring verification later on
12261 lock_groups.update(group_uuid
12262 for instance_name in
12263 self.owned_locks(locking.LEVEL_INSTANCE)
12265 self.cfg.GetInstanceNodeGroups(instance_name))
12267 # No target groups, need to lock all of them
12268 lock_groups = locking.ALL_SET
12270 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12272 elif level == locking.LEVEL_NODE:
12273 # This will only lock the nodes in the group to be evacuated which
12274 # contain actual instances
12275 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12276 self._LockInstancesNodes()
12278 # Lock all nodes in group to be evacuated and target groups
12279 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12280 assert self.group_uuid in owned_groups
12281 member_nodes = [node_name
12282 for group in owned_groups
12283 for node_name in self.cfg.GetNodeGroup(group).members]
12284 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12286 def CheckPrereq(self):
12287 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12288 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12289 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12291 assert owned_groups.issuperset(self.req_target_uuids)
12292 assert self.group_uuid in owned_groups
12294 # Check if locked instances are still correct
12295 wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
12296 if owned_instances != wanted_instances:
12297 raise errors.OpPrereqError("Instances in node group to be evacuated (%s)"
12298 " changed since locks were acquired, wanted"
12299 " %s, have %s; retry the operation" %
12301 utils.CommaJoin(wanted_instances),
12302 utils.CommaJoin(owned_instances)),
12303 errors.ECODE_STATE)
12305 # Get instance information
12306 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12308 # Check if node groups for locked instances are still correct
12309 for instance_name in owned_instances:
12310 inst = self.instances[instance_name]
12311 assert owned_nodes.issuperset(inst.all_nodes), \
12312 "Instance %s's nodes changed while we kept the lock" % instance_name
12314 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12317 assert self.group_uuid in inst_groups, \
12318 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12320 if self.req_target_uuids:
12321 # User requested specific target groups
12322 self.target_uuids = self.req_target_uuids
12324 # All groups except the one to be evacuated are potential targets
12325 self.target_uuids = [group_uuid for group_uuid in owned_groups
12326 if group_uuid != self.group_uuid]
12328 if not self.target_uuids:
12329 raise errors.OpPrereqError("There are no possible target groups",
12330 errors.ECODE_INVAL)
12332 def BuildHooksEnv(self):
12333 """Build hooks env.
12337 "GROUP_NAME": self.op.group_name,
12338 "TARGET_GROUPS": " ".join(self.target_uuids),
12341 def BuildHooksNodes(self):
12342 """Build hooks nodes.
12345 mn = self.cfg.GetMasterNode()
12347 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12349 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12351 return (run_nodes, run_nodes)
12353 def Exec(self, feedback_fn):
12354 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12356 assert self.group_uuid not in self.target_uuids
12358 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12359 instances=instances, target_groups=self.target_uuids)
12361 ial.Run(self.op.iallocator)
12363 if not ial.success:
12364 raise errors.OpPrereqError("Can't compute group evacuation using"
12365 " iallocator '%s': %s" %
12366 (self.op.iallocator, ial.info),
12367 errors.ECODE_NORES)
12369 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12371 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12372 len(jobs), self.op.group_name)
12374 return ResultWithJobs(jobs)
12377 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
12378 """Generic tags LU.
12380 This is an abstract class which is the parent of all the other tags LUs.
12383 def ExpandNames(self):
12384 self.group_uuid = None
12385 self.needed_locks = {}
12386 if self.op.kind == constants.TAG_NODE:
12387 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12388 self.needed_locks[locking.LEVEL_NODE] = self.op.name
12389 elif self.op.kind == constants.TAG_INSTANCE:
12390 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12391 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12392 elif self.op.kind == constants.TAG_NODEGROUP:
12393 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12395 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12396 # not possible to acquire the BGL based on opcode parameters)
12398 def CheckPrereq(self):
12399 """Check prerequisites.
12402 if self.op.kind == constants.TAG_CLUSTER:
12403 self.target = self.cfg.GetClusterInfo()
12404 elif self.op.kind == constants.TAG_NODE:
12405 self.target = self.cfg.GetNodeInfo(self.op.name)
12406 elif self.op.kind == constants.TAG_INSTANCE:
12407 self.target = self.cfg.GetInstanceInfo(self.op.name)
12408 elif self.op.kind == constants.TAG_NODEGROUP:
12409 self.target = self.cfg.GetNodeGroup(self.group_uuid)
12411 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12412 str(self.op.kind), errors.ECODE_INVAL)
12415 class LUTagsGet(TagsLU):
12416 """Returns the tags of a given object.
12421 def ExpandNames(self):
12422 TagsLU.ExpandNames(self)
12424 # Share locks as this is only a read operation
12425 self.share_locks = _ShareAll()
12427 def Exec(self, feedback_fn):
12428 """Returns the tag list.
12431 return list(self.target.GetTags())
12434 class LUTagsSearch(NoHooksLU):
12435 """Searches the tags for a given pattern.
12440 def ExpandNames(self):
12441 self.needed_locks = {}
12443 def CheckPrereq(self):
12444 """Check prerequisites.
12446 This checks the pattern passed for validity by compiling it.
12450 self.re = re.compile(self.op.pattern)
12451 except re.error, err:
12452 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12453 (self.op.pattern, err), errors.ECODE_INVAL)
12455 def Exec(self, feedback_fn):
12456 """Returns the tag list.
12460 tgts = [("/cluster", cfg.GetClusterInfo())]
12461 ilist = cfg.GetAllInstancesInfo().values()
12462 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12463 nlist = cfg.GetAllNodesInfo().values()
12464 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12465 tgts.extend(("/nodegroup/%s" % n.name, n)
12466 for n in cfg.GetAllNodeGroupsInfo().values())
12468 for path, target in tgts:
12469 for tag in target.GetTags():
12470 if self.re.search(tag):
12471 results.append((path, tag))
12475 class LUTagsSet(TagsLU):
12476 """Sets a tag on a given object.
12481 def CheckPrereq(self):
12482 """Check prerequisites.
12484 This checks the type and length of the tag name and value.
12487 TagsLU.CheckPrereq(self)
12488 for tag in self.op.tags:
12489 objects.TaggableObject.ValidateTag(tag)
12491 def Exec(self, feedback_fn):
12496 for tag in self.op.tags:
12497 self.target.AddTag(tag)
12498 except errors.TagError, err:
12499 raise errors.OpExecError("Error while setting tag: %s" % str(err))
12500 self.cfg.Update(self.target, feedback_fn)
12503 class LUTagsDel(TagsLU):
12504 """Delete a list of tags from a given object.
12509 def CheckPrereq(self):
12510 """Check prerequisites.
12512 This checks that we have the given tag.
12515 TagsLU.CheckPrereq(self)
12516 for tag in self.op.tags:
12517 objects.TaggableObject.ValidateTag(tag)
12518 del_tags = frozenset(self.op.tags)
12519 cur_tags = self.target.GetTags()
12521 diff_tags = del_tags - cur_tags
12523 diff_names = ("'%s'" % i for i in sorted(diff_tags))
12524 raise errors.OpPrereqError("Tag(s) %s not found" %
12525 (utils.CommaJoin(diff_names), ),
12526 errors.ECODE_NOENT)
12528 def Exec(self, feedback_fn):
12529 """Remove the tag from the object.
12532 for tag in self.op.tags:
12533 self.target.RemoveTag(tag)
12534 self.cfg.Update(self.target, feedback_fn)
12537 class LUTestDelay(NoHooksLU):
12538 """Sleep for a specified amount of time.
12540 This LU sleeps on the master and/or nodes for a specified amount of
12546 def ExpandNames(self):
12547 """Expand names and set required locks.
12549 This expands the node list, if any.
12552 self.needed_locks = {}
12553 if self.op.on_nodes:
12554 # _GetWantedNodes can be used here, but is not always appropriate to use
12555 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12556 # more information.
12557 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12558 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12560 def _TestDelay(self):
12561 """Do the actual sleep.
12564 if self.op.on_master:
12565 if not utils.TestDelay(self.op.duration):
12566 raise errors.OpExecError("Error during master delay test")
12567 if self.op.on_nodes:
12568 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12569 for node, node_result in result.items():
12570 node_result.Raise("Failure during rpc call to node %s" % node)
12572 def Exec(self, feedback_fn):
12573 """Execute the test delay opcode, with the wanted repetitions.
12576 if self.op.repeat == 0:
12579 top_value = self.op.repeat - 1
12580 for i in range(self.op.repeat):
12581 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12585 class LUTestJqueue(NoHooksLU):
12586 """Utility LU to test some aspects of the job queue.
12591 # Must be lower than default timeout for WaitForJobChange to see whether it
12592 # notices changed jobs
12593 _CLIENT_CONNECT_TIMEOUT = 20.0
12594 _CLIENT_CONFIRM_TIMEOUT = 60.0
12597 def _NotifyUsingSocket(cls, cb, errcls):
12598 """Opens a Unix socket and waits for another program to connect.
12601 @param cb: Callback to send socket name to client
12602 @type errcls: class
12603 @param errcls: Exception class to use for errors
12606 # Using a temporary directory as there's no easy way to create temporary
12607 # sockets without writing a custom loop around tempfile.mktemp and
12609 tmpdir = tempfile.mkdtemp()
12611 tmpsock = utils.PathJoin(tmpdir, "sock")
12613 logging.debug("Creating temporary socket at %s", tmpsock)
12614 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12619 # Send details to client
12622 # Wait for client to connect before continuing
12623 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12625 (conn, _) = sock.accept()
12626 except socket.error, err:
12627 raise errcls("Client didn't connect in time (%s)" % err)
12631 # Remove as soon as client is connected
12632 shutil.rmtree(tmpdir)
12634 # Wait for client to close
12637 # pylint: disable-msg=E1101
12638 # Instance of '_socketobject' has no ... member
12639 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12641 except socket.error, err:
12642 raise errcls("Client failed to confirm notification (%s)" % err)
12646 def _SendNotification(self, test, arg, sockname):
12647 """Sends a notification to the client.
12650 @param test: Test name
12651 @param arg: Test argument (depends on test)
12652 @type sockname: string
12653 @param sockname: Socket path
12656 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12658 def _Notify(self, prereq, test, arg):
12659 """Notifies the client of a test.
12662 @param prereq: Whether this is a prereq-phase test
12664 @param test: Test name
12665 @param arg: Test argument (depends on test)
12669 errcls = errors.OpPrereqError
12671 errcls = errors.OpExecError
12673 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12677 def CheckArguments(self):
12678 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12679 self.expandnames_calls = 0
12681 def ExpandNames(self):
12682 checkargs_calls = getattr(self, "checkargs_calls", 0)
12683 if checkargs_calls < 1:
12684 raise errors.ProgrammerError("CheckArguments was not called")
12686 self.expandnames_calls += 1
12688 if self.op.notify_waitlock:
12689 self._Notify(True, constants.JQT_EXPANDNAMES, None)
12691 self.LogInfo("Expanding names")
12693 # Get lock on master node (just to get a lock, not for a particular reason)
12694 self.needed_locks = {
12695 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12698 def Exec(self, feedback_fn):
12699 if self.expandnames_calls < 1:
12700 raise errors.ProgrammerError("ExpandNames was not called")
12702 if self.op.notify_exec:
12703 self._Notify(False, constants.JQT_EXEC, None)
12705 self.LogInfo("Executing")
12707 if self.op.log_messages:
12708 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12709 for idx, msg in enumerate(self.op.log_messages):
12710 self.LogInfo("Sending log message %s", idx + 1)
12711 feedback_fn(constants.JQT_MSGPREFIX + msg)
12712 # Report how many test messages have been sent
12713 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12716 raise errors.OpExecError("Opcode failure was requested")
12721 class IAllocator(object):
12722 """IAllocator framework.
12724 An IAllocator instance has three sets of attributes:
12725 - cfg that is needed to query the cluster
12726 - input data (all members of the _KEYS class attribute are required)
12727 - four buffer attributes (in|out_data|text), that represent the
12728 input (to the external script) in text and data structure format,
12729 and the output from it, again in two formats
12730 - the result variables from the script (success, info, nodes) for
12734 # pylint: disable-msg=R0902
12735 # lots of instance attributes
12737 def __init__(self, cfg, rpc, mode, **kwargs):
12740 # init buffer variables
12741 self.in_text = self.out_text = self.in_data = self.out_data = None
12742 # init all input fields so that pylint is happy
12744 self.memory = self.disks = self.disk_template = None
12745 self.os = self.tags = self.nics = self.vcpus = None
12746 self.hypervisor = None
12747 self.relocate_from = None
12749 self.evac_nodes = None
12750 self.instances = None
12751 self.evac_mode = None
12752 self.target_groups = []
12754 self.required_nodes = None
12755 # init result fields
12756 self.success = self.info = self.result = None
12759 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12761 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12762 " IAllocator" % self.mode)
12764 keyset = [n for (n, _) in keydata]
12767 if key not in keyset:
12768 raise errors.ProgrammerError("Invalid input parameter '%s' to"
12769 " IAllocator" % key)
12770 setattr(self, key, kwargs[key])
12773 if key not in kwargs:
12774 raise errors.ProgrammerError("Missing input parameter '%s' to"
12775 " IAllocator" % key)
12776 self._BuildInputData(compat.partial(fn, self), keydata)
12778 def _ComputeClusterData(self):
12779 """Compute the generic allocator input data.
12781 This is the data that is independent of the actual operation.
12785 cluster_info = cfg.GetClusterInfo()
12788 "version": constants.IALLOCATOR_VERSION,
12789 "cluster_name": cfg.GetClusterName(),
12790 "cluster_tags": list(cluster_info.GetTags()),
12791 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12792 # we don't have job IDs
12794 ninfo = cfg.GetAllNodesInfo()
12795 iinfo = cfg.GetAllInstancesInfo().values()
12796 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12799 node_list = [n.name for n in ninfo.values() if n.vm_capable]
12801 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12802 hypervisor_name = self.hypervisor
12803 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12804 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12806 hypervisor_name = cluster_info.enabled_hypervisors[0]
12808 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12811 self.rpc.call_all_instances_info(node_list,
12812 cluster_info.enabled_hypervisors)
12814 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12816 config_ndata = self._ComputeBasicNodeData(ninfo)
12817 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12818 i_list, config_ndata)
12819 assert len(data["nodes"]) == len(ninfo), \
12820 "Incomplete node data computed"
12822 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12824 self.in_data = data
12827 def _ComputeNodeGroupData(cfg):
12828 """Compute node groups data.
12831 ng = dict((guuid, {
12832 "name": gdata.name,
12833 "alloc_policy": gdata.alloc_policy,
12835 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12840 def _ComputeBasicNodeData(node_cfg):
12841 """Compute global node data.
12844 @returns: a dict of name: (node dict, node config)
12847 # fill in static (config-based) values
12848 node_results = dict((ninfo.name, {
12849 "tags": list(ninfo.GetTags()),
12850 "primary_ip": ninfo.primary_ip,
12851 "secondary_ip": ninfo.secondary_ip,
12852 "offline": ninfo.offline,
12853 "drained": ninfo.drained,
12854 "master_candidate": ninfo.master_candidate,
12855 "group": ninfo.group,
12856 "master_capable": ninfo.master_capable,
12857 "vm_capable": ninfo.vm_capable,
12859 for ninfo in node_cfg.values())
12861 return node_results
12864 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12866 """Compute global node data.
12868 @param node_results: the basic node structures as filled from the config
12871 # make a copy of the current dict
12872 node_results = dict(node_results)
12873 for nname, nresult in node_data.items():
12874 assert nname in node_results, "Missing basic data for node %s" % nname
12875 ninfo = node_cfg[nname]
12877 if not (ninfo.offline or ninfo.drained):
12878 nresult.Raise("Can't get data for node %s" % nname)
12879 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12881 remote_info = nresult.payload
12883 for attr in ["memory_total", "memory_free", "memory_dom0",
12884 "vg_size", "vg_free", "cpu_total"]:
12885 if attr not in remote_info:
12886 raise errors.OpExecError("Node '%s' didn't return attribute"
12887 " '%s'" % (nname, attr))
12888 if not isinstance(remote_info[attr], int):
12889 raise errors.OpExecError("Node '%s' returned invalid value"
12891 (nname, attr, remote_info[attr]))
12892 # compute memory used by primary instances
12893 i_p_mem = i_p_up_mem = 0
12894 for iinfo, beinfo in i_list:
12895 if iinfo.primary_node == nname:
12896 i_p_mem += beinfo[constants.BE_MEMORY]
12897 if iinfo.name not in node_iinfo[nname].payload:
12900 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
12901 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12902 remote_info["memory_free"] -= max(0, i_mem_diff)
12905 i_p_up_mem += beinfo[constants.BE_MEMORY]
12907 # compute memory used by instances
12909 "total_memory": remote_info["memory_total"],
12910 "reserved_memory": remote_info["memory_dom0"],
12911 "free_memory": remote_info["memory_free"],
12912 "total_disk": remote_info["vg_size"],
12913 "free_disk": remote_info["vg_free"],
12914 "total_cpus": remote_info["cpu_total"],
12915 "i_pri_memory": i_p_mem,
12916 "i_pri_up_memory": i_p_up_mem,
12918 pnr_dyn.update(node_results[nname])
12919 node_results[nname] = pnr_dyn
12921 return node_results
12924 def _ComputeInstanceData(cluster_info, i_list):
12925 """Compute global instance data.
12929 for iinfo, beinfo in i_list:
12931 for nic in iinfo.nics:
12932 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12936 "mode": filled_params[constants.NIC_MODE],
12937 "link": filled_params[constants.NIC_LINK],
12939 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12940 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12941 nic_data.append(nic_dict)
12943 "tags": list(iinfo.GetTags()),
12944 "admin_up": iinfo.admin_up,
12945 "vcpus": beinfo[constants.BE_VCPUS],
12946 "memory": beinfo[constants.BE_MEMORY],
12948 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12950 "disks": [{constants.IDISK_SIZE: dsk.size,
12951 constants.IDISK_MODE: dsk.mode}
12952 for dsk in iinfo.disks],
12953 "disk_template": iinfo.disk_template,
12954 "hypervisor": iinfo.hypervisor,
12956 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12958 instance_data[iinfo.name] = pir
12960 return instance_data
12962 def _AddNewInstance(self):
12963 """Add new instance data to allocator structure.
12965 This in combination with _AllocatorGetClusterData will create the
12966 correct structure needed as input for the allocator.
12968 The checks for the completeness of the opcode must have already been
12972 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12974 if self.disk_template in constants.DTS_INT_MIRROR:
12975 self.required_nodes = 2
12977 self.required_nodes = 1
12981 "disk_template": self.disk_template,
12984 "vcpus": self.vcpus,
12985 "memory": self.memory,
12986 "disks": self.disks,
12987 "disk_space_total": disk_space,
12989 "required_nodes": self.required_nodes,
12990 "hypervisor": self.hypervisor,
12995 def _AddRelocateInstance(self):
12996 """Add relocate instance data to allocator structure.
12998 This in combination with _IAllocatorGetClusterData will create the
12999 correct structure needed as input for the allocator.
13001 The checks for the completeness of the opcode must have already been
13005 instance = self.cfg.GetInstanceInfo(self.name)
13006 if instance is None:
13007 raise errors.ProgrammerError("Unknown instance '%s' passed to"
13008 " IAllocator" % self.name)
13010 if instance.disk_template not in constants.DTS_MIRRORED:
13011 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13012 errors.ECODE_INVAL)
13014 if instance.disk_template in constants.DTS_INT_MIRROR and \
13015 len(instance.secondary_nodes) != 1:
13016 raise errors.OpPrereqError("Instance has not exactly one secondary node",
13017 errors.ECODE_STATE)
13019 self.required_nodes = 1
13020 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13021 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13025 "disk_space_total": disk_space,
13026 "required_nodes": self.required_nodes,
13027 "relocate_from": self.relocate_from,
13031 def _AddEvacuateNodes(self):
13032 """Add evacuate nodes data to allocator structure.
13036 "evac_nodes": self.evac_nodes
13040 def _AddNodeEvacuate(self):
13041 """Get data for node-evacuate requests.
13045 "instances": self.instances,
13046 "evac_mode": self.evac_mode,
13049 def _AddChangeGroup(self):
13050 """Get data for node-evacuate requests.
13054 "instances": self.instances,
13055 "target_groups": self.target_groups,
13058 def _BuildInputData(self, fn, keydata):
13059 """Build input data structures.
13062 self._ComputeClusterData()
13065 request["type"] = self.mode
13066 for keyname, keytype in keydata:
13067 if keyname not in request:
13068 raise errors.ProgrammerError("Request parameter %s is missing" %
13070 val = request[keyname]
13071 if not keytype(val):
13072 raise errors.ProgrammerError("Request parameter %s doesn't pass"
13073 " validation, value %s, expected"
13074 " type %s" % (keyname, val, keytype))
13075 self.in_data["request"] = request
13077 self.in_text = serializer.Dump(self.in_data)
13079 _STRING_LIST = ht.TListOf(ht.TString)
13080 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13081 # pylint: disable-msg=E1101
13082 # Class '...' has no 'OP_ID' member
13083 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13084 opcodes.OpInstanceMigrate.OP_ID,
13085 opcodes.OpInstanceReplaceDisks.OP_ID])
13089 ht.TListOf(ht.TAnd(ht.TIsLength(3),
13090 ht.TItems([ht.TNonEmptyString,
13091 ht.TNonEmptyString,
13092 ht.TListOf(ht.TNonEmptyString),
13095 ht.TListOf(ht.TAnd(ht.TIsLength(2),
13096 ht.TItems([ht.TNonEmptyString,
13099 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13100 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13103 constants.IALLOCATOR_MODE_ALLOC:
13106 ("name", ht.TString),
13107 ("memory", ht.TInt),
13108 ("disks", ht.TListOf(ht.TDict)),
13109 ("disk_template", ht.TString),
13110 ("os", ht.TString),
13111 ("tags", _STRING_LIST),
13112 ("nics", ht.TListOf(ht.TDict)),
13113 ("vcpus", ht.TInt),
13114 ("hypervisor", ht.TString),
13116 constants.IALLOCATOR_MODE_RELOC:
13117 (_AddRelocateInstance,
13118 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13120 constants.IALLOCATOR_MODE_MEVAC:
13121 (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
13122 ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
13123 constants.IALLOCATOR_MODE_NODE_EVAC:
13124 (_AddNodeEvacuate, [
13125 ("instances", _STRING_LIST),
13126 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13128 constants.IALLOCATOR_MODE_CHG_GROUP:
13129 (_AddChangeGroup, [
13130 ("instances", _STRING_LIST),
13131 ("target_groups", _STRING_LIST),
13135 def Run(self, name, validate=True, call_fn=None):
13136 """Run an instance allocator and return the results.
13139 if call_fn is None:
13140 call_fn = self.rpc.call_iallocator_runner
13142 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13143 result.Raise("Failure while running the iallocator script")
13145 self.out_text = result.payload
13147 self._ValidateResult()
13149 def _ValidateResult(self):
13150 """Process the allocator results.
13152 This will process and if successful save the result in
13153 self.out_data and the other parameters.
13157 rdict = serializer.Load(self.out_text)
13158 except Exception, err:
13159 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13161 if not isinstance(rdict, dict):
13162 raise errors.OpExecError("Can't parse iallocator results: not a dict")
13164 # TODO: remove backwards compatiblity in later versions
13165 if "nodes" in rdict and "result" not in rdict:
13166 rdict["result"] = rdict["nodes"]
13169 for key in "success", "info", "result":
13170 if key not in rdict:
13171 raise errors.OpExecError("Can't parse iallocator results:"
13172 " missing key '%s'" % key)
13173 setattr(self, key, rdict[key])
13175 if not self._result_check(self.result):
13176 raise errors.OpExecError("Iallocator returned invalid result,"
13177 " expected %s, got %s" %
13178 (self._result_check, self.result),
13179 errors.ECODE_INVAL)
13181 if self.mode in (constants.IALLOCATOR_MODE_RELOC,
13182 constants.IALLOCATOR_MODE_MEVAC):
13183 node2group = dict((name, ndata["group"])
13184 for (name, ndata) in self.in_data["nodes"].items())
13186 fn = compat.partial(self._NodesToGroups, node2group,
13187 self.in_data["nodegroups"])
13189 if self.mode == constants.IALLOCATOR_MODE_RELOC:
13190 assert self.relocate_from is not None
13191 assert self.required_nodes == 1
13193 request_groups = fn(self.relocate_from)
13194 result_groups = fn(rdict["result"])
13196 if result_groups != request_groups:
13197 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13198 " differ from original groups (%s)" %
13199 (utils.CommaJoin(result_groups),
13200 utils.CommaJoin(request_groups)))
13201 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
13202 request_groups = fn(self.evac_nodes)
13203 for (instance_name, secnode) in self.result:
13204 result_groups = fn([secnode])
13205 if result_groups != request_groups:
13206 raise errors.OpExecError("Iallocator returned new secondary node"
13207 " '%s' (group '%s') for instance '%s'"
13208 " which is not in original group '%s'" %
13209 (secnode, utils.CommaJoin(result_groups),
13211 utils.CommaJoin(request_groups)))
13213 raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
13215 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13216 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13218 self.out_data = rdict
13221 def _NodesToGroups(node2group, groups, nodes):
13222 """Returns a list of unique group names for a list of nodes.
13224 @type node2group: dict
13225 @param node2group: Map from node name to group UUID
13227 @param groups: Group information
13229 @param nodes: Node names
13236 group_uuid = node2group[node]
13238 # Ignore unknown node
13242 group = groups[group_uuid]
13244 # Can't find group, let's use UUID
13245 group_name = group_uuid
13247 group_name = group["name"]
13249 result.add(group_name)
13251 return sorted(result)
13254 class LUTestAllocator(NoHooksLU):
13255 """Run allocator tests.
13257 This LU runs the allocator tests
13260 def CheckPrereq(self):
13261 """Check prerequisites.
13263 This checks the opcode parameters depending on the director and mode test.
13266 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13267 for attr in ["memory", "disks", "disk_template",
13268 "os", "tags", "nics", "vcpus"]:
13269 if not hasattr(self.op, attr):
13270 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13271 attr, errors.ECODE_INVAL)
13272 iname = self.cfg.ExpandInstanceName(self.op.name)
13273 if iname is not None:
13274 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13275 iname, errors.ECODE_EXISTS)
13276 if not isinstance(self.op.nics, list):
13277 raise errors.OpPrereqError("Invalid parameter 'nics'",
13278 errors.ECODE_INVAL)
13279 if not isinstance(self.op.disks, list):
13280 raise errors.OpPrereqError("Invalid parameter 'disks'",
13281 errors.ECODE_INVAL)
13282 for row in self.op.disks:
13283 if (not isinstance(row, dict) or
13284 constants.IDISK_SIZE not in row or
13285 not isinstance(row[constants.IDISK_SIZE], int) or
13286 constants.IDISK_MODE not in row or
13287 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13288 raise errors.OpPrereqError("Invalid contents of the 'disks'"
13289 " parameter", errors.ECODE_INVAL)
13290 if self.op.hypervisor is None:
13291 self.op.hypervisor = self.cfg.GetHypervisorType()
13292 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13293 fname = _ExpandInstanceName(self.cfg, self.op.name)
13294 self.op.name = fname
13295 self.relocate_from = \
13296 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13297 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13298 if not hasattr(self.op, "evac_nodes"):
13299 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
13300 " opcode input", errors.ECODE_INVAL)
13301 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13302 constants.IALLOCATOR_MODE_NODE_EVAC):
13303 if not self.op.instances:
13304 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13305 self.op.instances = _GetWantedInstances(self, self.op.instances)
13307 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13308 self.op.mode, errors.ECODE_INVAL)
13310 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13311 if self.op.allocator is None:
13312 raise errors.OpPrereqError("Missing allocator name",
13313 errors.ECODE_INVAL)
13314 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13315 raise errors.OpPrereqError("Wrong allocator test '%s'" %
13316 self.op.direction, errors.ECODE_INVAL)
13318 def Exec(self, feedback_fn):
13319 """Run the allocator test.
13322 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13323 ial = IAllocator(self.cfg, self.rpc,
13326 memory=self.op.memory,
13327 disks=self.op.disks,
13328 disk_template=self.op.disk_template,
13332 vcpus=self.op.vcpus,
13333 hypervisor=self.op.hypervisor,
13335 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13336 ial = IAllocator(self.cfg, self.rpc,
13339 relocate_from=list(self.relocate_from),
13341 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13342 ial = IAllocator(self.cfg, self.rpc,
13344 evac_nodes=self.op.evac_nodes)
13345 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13346 ial = IAllocator(self.cfg, self.rpc,
13348 instances=self.op.instances,
13349 target_groups=self.op.target_groups)
13350 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13351 ial = IAllocator(self.cfg, self.rpc,
13353 instances=self.op.instances,
13354 evac_mode=self.op.evac_mode)
13356 raise errors.ProgrammerError("Uncatched mode %s in"
13357 " LUTestAllocator.Exec", self.op.mode)
13359 if self.op.direction == constants.IALLOCATOR_DIR_IN:
13360 result = ial.in_text
13362 ial.Run(self.op.allocator, validate=False)
13363 result = ial.out_text
13367 #: Query type implementations
13369 constants.QR_INSTANCE: _InstanceQuery,
13370 constants.QR_NODE: _NodeQuery,
13371 constants.QR_GROUP: _GroupQuery,
13372 constants.QR_OS: _OsQuery,
13375 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13378 def _GetQueryImplementation(name):
13379 """Returns the implemtnation for a query type.
13381 @param name: Query type, must be one of L{constants.QR_VIA_OP}
13385 return _QUERY_IMPL[name]
13387 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13388 errors.ECODE_INVAL)