4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
63 import ganeti.masterd.instance # pylint: disable-msg=W0611
66 def _SupportsOob(cfg, node):
67 """Tells if node supports OOB.
69 @type cfg: L{config.ConfigWriter}
70 @param cfg: The cluster configuration
71 @type node: L{objects.Node}
73 @return: The OOB script if supported or an empty string otherwise
76 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
80 """Data container for LU results with jobs.
82 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
83 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
84 contained in the C{jobs} attribute and include the job IDs in the opcode
88 def __init__(self, jobs, **kwargs):
89 """Initializes this class.
91 Additional return values can be specified as keyword arguments.
93 @type jobs: list of lists of L{opcode.OpCode}
94 @param jobs: A list of lists of opcode objects
101 class LogicalUnit(object):
102 """Logical Unit base class.
104 Subclasses must follow these rules:
105 - implement ExpandNames
106 - implement CheckPrereq (except when tasklets are used)
107 - implement Exec (except when tasklets are used)
108 - implement BuildHooksEnv
109 - implement BuildHooksNodes
110 - redefine HPATH and HTYPE
111 - optionally redefine their run requirements:
112 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
114 Note that all commands require root permissions.
116 @ivar dry_run_result: the value (if any) that will be returned to the caller
117 in dry-run mode (signalled by opcode dry_run parameter)
124 def __init__(self, processor, op, context, rpc):
125 """Constructor for LogicalUnit.
127 This needs to be overridden in derived classes in order to check op
131 self.proc = processor
133 self.cfg = context.cfg
134 self.glm = context.glm
135 self.context = context
137 # Dicts used to declare locking needs to mcpu
138 self.needed_locks = None
139 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
141 self.remove_locks = {}
142 # Used to force good behavior when calling helper functions
143 self.recalculate_locks = {}
145 self.Log = processor.Log # pylint: disable-msg=C0103
146 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
147 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
148 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
149 # support for dry-run
150 self.dry_run_result = None
151 # support for generic debug attribute
152 if (not hasattr(self.op, "debug_level") or
153 not isinstance(self.op.debug_level, int)):
154 self.op.debug_level = 0
159 # Validate opcode parameters and set defaults
160 self.op.Validate(True)
162 self.CheckArguments()
164 def CheckArguments(self):
165 """Check syntactic validity for the opcode arguments.
167 This method is for doing a simple syntactic check and ensure
168 validity of opcode parameters, without any cluster-related
169 checks. While the same can be accomplished in ExpandNames and/or
170 CheckPrereq, doing these separate is better because:
172 - ExpandNames is left as as purely a lock-related function
173 - CheckPrereq is run after we have acquired locks (and possible
176 The function is allowed to change the self.op attribute so that
177 later methods can no longer worry about missing parameters.
182 def ExpandNames(self):
183 """Expand names for this LU.
185 This method is called before starting to execute the opcode, and it should
186 update all the parameters of the opcode to their canonical form (e.g. a
187 short node name must be fully expanded after this method has successfully
188 completed). This way locking, hooks, logging, etc. can work correctly.
190 LUs which implement this method must also populate the self.needed_locks
191 member, as a dict with lock levels as keys, and a list of needed lock names
194 - use an empty dict if you don't need any lock
195 - if you don't need any lock at a particular level omit that level
196 - don't put anything for the BGL level
197 - if you want all locks at a level use locking.ALL_SET as a value
199 If you need to share locks (rather than acquire them exclusively) at one
200 level you can modify self.share_locks, setting a true value (usually 1) for
201 that level. By default locks are not shared.
203 This function can also define a list of tasklets, which then will be
204 executed in order instead of the usual LU-level CheckPrereq and Exec
205 functions, if those are not defined by the LU.
209 # Acquire all nodes and one instance
210 self.needed_locks = {
211 locking.LEVEL_NODE: locking.ALL_SET,
212 locking.LEVEL_INSTANCE: ['instance1.example.com'],
214 # Acquire just two nodes
215 self.needed_locks = {
216 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
219 self.needed_locks = {} # No, you can't leave it to the default value None
222 # The implementation of this method is mandatory only if the new LU is
223 # concurrent, so that old LUs don't need to be changed all at the same
226 self.needed_locks = {} # Exclusive LUs don't need locks.
228 raise NotImplementedError
230 def DeclareLocks(self, level):
231 """Declare LU locking needs for a level
233 While most LUs can just declare their locking needs at ExpandNames time,
234 sometimes there's the need to calculate some locks after having acquired
235 the ones before. This function is called just before acquiring locks at a
236 particular level, but after acquiring the ones at lower levels, and permits
237 such calculations. It can be used to modify self.needed_locks, and by
238 default it does nothing.
240 This function is only called if you have something already set in
241 self.needed_locks for the level.
243 @param level: Locking level which is going to be locked
244 @type level: member of ganeti.locking.LEVELS
248 def CheckPrereq(self):
249 """Check prerequisites for this LU.
251 This method should check that the prerequisites for the execution
252 of this LU are fulfilled. It can do internode communication, but
253 it should be idempotent - no cluster or system changes are
256 The method should raise errors.OpPrereqError in case something is
257 not fulfilled. Its return value is ignored.
259 This method should also update all the parameters of the opcode to
260 their canonical form if it hasn't been done by ExpandNames before.
263 if self.tasklets is not None:
264 for (idx, tl) in enumerate(self.tasklets):
265 logging.debug("Checking prerequisites for tasklet %s/%s",
266 idx + 1, len(self.tasklets))
271 def Exec(self, feedback_fn):
274 This method should implement the actual work. It should raise
275 errors.OpExecError for failures that are somewhat dealt with in
279 if self.tasklets is not None:
280 for (idx, tl) in enumerate(self.tasklets):
281 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
284 raise NotImplementedError
286 def BuildHooksEnv(self):
287 """Build hooks environment for this LU.
290 @return: Dictionary containing the environment that will be used for
291 running the hooks for this LU. The keys of the dict must not be prefixed
292 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
293 will extend the environment with additional variables. If no environment
294 should be defined, an empty dictionary should be returned (not C{None}).
295 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
299 raise NotImplementedError
301 def BuildHooksNodes(self):
302 """Build list of nodes to run LU's hooks.
304 @rtype: tuple; (list, list)
305 @return: Tuple containing a list of node names on which the hook
306 should run before the execution and a list of node names on which the
307 hook should run after the execution. No nodes should be returned as an
308 empty list (and not None).
309 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
313 raise NotImplementedError
315 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
316 """Notify the LU about the results of its hooks.
318 This method is called every time a hooks phase is executed, and notifies
319 the Logical Unit about the hooks' result. The LU can then use it to alter
320 its result based on the hooks. By default the method does nothing and the
321 previous result is passed back unchanged but any LU can define it if it
322 wants to use the local cluster hook-scripts somehow.
324 @param phase: one of L{constants.HOOKS_PHASE_POST} or
325 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
326 @param hook_results: the results of the multi-node hooks rpc call
327 @param feedback_fn: function used send feedback back to the caller
328 @param lu_result: the previous Exec result this LU had, or None
330 @return: the new Exec result, based on the previous result
334 # API must be kept, thus we ignore the unused argument and could
335 # be a function warnings
336 # pylint: disable-msg=W0613,R0201
339 def _ExpandAndLockInstance(self):
340 """Helper function to expand and lock an instance.
342 Many LUs that work on an instance take its name in self.op.instance_name
343 and need to expand it and then declare the expanded name for locking. This
344 function does it, and then updates self.op.instance_name to the expanded
345 name. It also initializes needed_locks as a dict, if this hasn't been done
349 if self.needed_locks is None:
350 self.needed_locks = {}
352 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
353 "_ExpandAndLockInstance called with instance-level locks set"
354 self.op.instance_name = _ExpandInstanceName(self.cfg,
355 self.op.instance_name)
356 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
358 def _LockInstancesNodes(self, primary_only=False):
359 """Helper function to declare instances' nodes for locking.
361 This function should be called after locking one or more instances to lock
362 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
363 with all primary or secondary nodes for instances already locked and
364 present in self.needed_locks[locking.LEVEL_INSTANCE].
366 It should be called from DeclareLocks, and for safety only works if
367 self.recalculate_locks[locking.LEVEL_NODE] is set.
369 In the future it may grow parameters to just lock some instance's nodes, or
370 to just lock primaries or secondary nodes, if needed.
372 If should be called in DeclareLocks in a way similar to::
374 if level == locking.LEVEL_NODE:
375 self._LockInstancesNodes()
377 @type primary_only: boolean
378 @param primary_only: only lock primary nodes of locked instances
381 assert locking.LEVEL_NODE in self.recalculate_locks, \
382 "_LockInstancesNodes helper function called with no nodes to recalculate"
384 # TODO: check if we're really been called with the instance locks held
386 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
387 # future we might want to have different behaviors depending on the value
388 # of self.recalculate_locks[locking.LEVEL_NODE]
390 for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
391 instance = self.context.cfg.GetInstanceInfo(instance_name)
392 wanted_nodes.append(instance.primary_node)
394 wanted_nodes.extend(instance.secondary_nodes)
396 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
397 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
398 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
399 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
401 del self.recalculate_locks[locking.LEVEL_NODE]
404 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
405 """Simple LU which runs no hooks.
407 This LU is intended as a parent for other LogicalUnits which will
408 run no hooks, in order to reduce duplicate code.
414 def BuildHooksEnv(self):
415 """Empty BuildHooksEnv for NoHooksLu.
417 This just raises an error.
420 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
422 def BuildHooksNodes(self):
423 """Empty BuildHooksNodes for NoHooksLU.
426 raise AssertionError("BuildHooksNodes called for NoHooksLU")
430 """Tasklet base class.
432 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
433 they can mix legacy code with tasklets. Locking needs to be done in the LU,
434 tasklets know nothing about locks.
436 Subclasses must follow these rules:
437 - Implement CheckPrereq
441 def __init__(self, lu):
448 def CheckPrereq(self):
449 """Check prerequisites for this tasklets.
451 This method should check whether the prerequisites for the execution of
452 this tasklet are fulfilled. It can do internode communication, but it
453 should be idempotent - no cluster or system changes are allowed.
455 The method should raise errors.OpPrereqError in case something is not
456 fulfilled. Its return value is ignored.
458 This method should also update all parameters to their canonical form if it
459 hasn't been done before.
464 def Exec(self, feedback_fn):
465 """Execute the tasklet.
467 This method should implement the actual work. It should raise
468 errors.OpExecError for failures that are somewhat dealt with in code, or
472 raise NotImplementedError
476 """Base for query utility classes.
479 #: Attribute holding field definitions
482 def __init__(self, filter_, fields, use_locking):
483 """Initializes this class.
486 self.use_locking = use_locking
488 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
490 self.requested_data = self.query.RequestedData()
491 self.names = self.query.RequestedNames()
493 # Sort only if no names were requested
494 self.sort_by_name = not self.names
496 self.do_locking = None
499 def _GetNames(self, lu, all_names, lock_level):
500 """Helper function to determine names asked for in the query.
504 names = lu.glm.list_owned(lock_level)
508 if self.wanted == locking.ALL_SET:
509 assert not self.names
510 # caller didn't specify names, so ordering is not important
511 return utils.NiceSort(names)
513 # caller specified names and we must keep the same order
515 assert not self.do_locking or lu.glm.is_owned(lock_level)
517 missing = set(self.wanted).difference(names)
519 raise errors.OpExecError("Some items were removed before retrieving"
520 " their data: %s" % missing)
522 # Return expanded names
525 def ExpandNames(self, lu):
526 """Expand names for this query.
528 See L{LogicalUnit.ExpandNames}.
531 raise NotImplementedError()
533 def DeclareLocks(self, lu, level):
534 """Declare locks for this query.
536 See L{LogicalUnit.DeclareLocks}.
539 raise NotImplementedError()
541 def _GetQueryData(self, lu):
542 """Collects all data for this query.
544 @return: Query data object
547 raise NotImplementedError()
549 def NewStyleQuery(self, lu):
550 """Collect data and execute query.
553 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
554 sort_by_name=self.sort_by_name)
556 def OldStyleQuery(self, lu):
557 """Collect data and execute query.
560 return self.query.OldStyleQuery(self._GetQueryData(lu),
561 sort_by_name=self.sort_by_name)
564 def _GetWantedNodes(lu, nodes):
565 """Returns list of checked and expanded node names.
567 @type lu: L{LogicalUnit}
568 @param lu: the logical unit on whose behalf we execute
570 @param nodes: list of node names or None for all nodes
572 @return: the list of nodes, sorted
573 @raise errors.ProgrammerError: if the nodes parameter is wrong type
577 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
579 return utils.NiceSort(lu.cfg.GetNodeList())
582 def _GetWantedInstances(lu, instances):
583 """Returns list of checked and expanded instance names.
585 @type lu: L{LogicalUnit}
586 @param lu: the logical unit on whose behalf we execute
587 @type instances: list
588 @param instances: list of instance names or None for all instances
590 @return: the list of instances, sorted
591 @raise errors.OpPrereqError: if the instances parameter is wrong type
592 @raise errors.OpPrereqError: if any of the passed instances is not found
596 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
598 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
602 def _GetUpdatedParams(old_params, update_dict,
603 use_default=True, use_none=False):
604 """Return the new version of a parameter dictionary.
606 @type old_params: dict
607 @param old_params: old parameters
608 @type update_dict: dict
609 @param update_dict: dict containing new parameter values, or
610 constants.VALUE_DEFAULT to reset the parameter to its default
612 @param use_default: boolean
613 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
614 values as 'to be deleted' values
615 @param use_none: boolean
616 @type use_none: whether to recognise C{None} values as 'to be
619 @return: the new parameter dictionary
622 params_copy = copy.deepcopy(old_params)
623 for key, val in update_dict.iteritems():
624 if ((use_default and val == constants.VALUE_DEFAULT) or
625 (use_none and val is None)):
631 params_copy[key] = val
635 def _ReleaseLocks(lu, level, names=None, keep=None):
636 """Releases locks owned by an LU.
638 @type lu: L{LogicalUnit}
639 @param level: Lock level
640 @type names: list or None
641 @param names: Names of locks to release
642 @type keep: list or None
643 @param keep: Names of locks to retain
646 assert not (keep is not None and names is not None), \
647 "Only one of the 'names' and the 'keep' parameters can be given"
649 if names is not None:
650 should_release = names.__contains__
652 should_release = lambda name: name not in keep
654 should_release = None
660 # Determine which locks to release
661 for name in lu.glm.list_owned(level):
662 if should_release(name):
667 assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
669 # Release just some locks
670 lu.glm.release(level, names=release)
672 assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
675 lu.glm.release(level)
677 assert not lu.glm.is_owned(level), "No locks should be owned"
680 def _MapInstanceDisksToNodes(instances):
681 """Creates a map from (node, volume) to instance name.
683 @type instances: list of L{objects.Instance}
684 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
687 return dict(((node, vol), inst.name)
688 for inst in instances
689 for (node, vols) in inst.MapLVsByNode().items()
693 def _RunPostHook(lu, node_name):
694 """Runs the post-hook for an opcode on a single node.
697 hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
699 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
701 # pylint: disable-msg=W0702
702 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
705 def _CheckOutputFields(static, dynamic, selected):
706 """Checks whether all selected fields are valid.
708 @type static: L{utils.FieldSet}
709 @param static: static fields set
710 @type dynamic: L{utils.FieldSet}
711 @param dynamic: dynamic fields set
718 delta = f.NonMatching(selected)
720 raise errors.OpPrereqError("Unknown output fields selected: %s"
721 % ",".join(delta), errors.ECODE_INVAL)
724 def _CheckGlobalHvParams(params):
725 """Validates that given hypervisor params are not global ones.
727 This will ensure that instances don't get customised versions of
731 used_globals = constants.HVC_GLOBALS.intersection(params)
733 msg = ("The following hypervisor parameters are global and cannot"
734 " be customized at instance level, please modify them at"
735 " cluster level: %s" % utils.CommaJoin(used_globals))
736 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
739 def _CheckNodeOnline(lu, node, msg=None):
740 """Ensure that a given node is online.
742 @param lu: the LU on behalf of which we make the check
743 @param node: the node to check
744 @param msg: if passed, should be a message to replace the default one
745 @raise errors.OpPrereqError: if the node is offline
749 msg = "Can't use offline node"
750 if lu.cfg.GetNodeInfo(node).offline:
751 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
754 def _CheckNodeNotDrained(lu, node):
755 """Ensure that a given node is not drained.
757 @param lu: the LU on behalf of which we make the check
758 @param node: the node to check
759 @raise errors.OpPrereqError: if the node is drained
762 if lu.cfg.GetNodeInfo(node).drained:
763 raise errors.OpPrereqError("Can't use drained node %s" % node,
767 def _CheckNodeVmCapable(lu, node):
768 """Ensure that a given node is vm capable.
770 @param lu: the LU on behalf of which we make the check
771 @param node: the node to check
772 @raise errors.OpPrereqError: if the node is not vm capable
775 if not lu.cfg.GetNodeInfo(node).vm_capable:
776 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
780 def _CheckNodeHasOS(lu, node, os_name, force_variant):
781 """Ensure that a node supports a given OS.
783 @param lu: the LU on behalf of which we make the check
784 @param node: the node to check
785 @param os_name: the OS to query about
786 @param force_variant: whether to ignore variant errors
787 @raise errors.OpPrereqError: if the node is not supporting the OS
790 result = lu.rpc.call_os_get(node, os_name)
791 result.Raise("OS '%s' not in supported OS list for node %s" %
793 prereq=True, ecode=errors.ECODE_INVAL)
794 if not force_variant:
795 _CheckOSVariant(result.payload, os_name)
798 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
799 """Ensure that a node has the given secondary ip.
801 @type lu: L{LogicalUnit}
802 @param lu: the LU on behalf of which we make the check
804 @param node: the node to check
805 @type secondary_ip: string
806 @param secondary_ip: the ip to check
807 @type prereq: boolean
808 @param prereq: whether to throw a prerequisite or an execute error
809 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
810 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
813 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
814 result.Raise("Failure checking secondary ip on node %s" % node,
815 prereq=prereq, ecode=errors.ECODE_ENVIRON)
816 if not result.payload:
817 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
818 " please fix and re-run this command" % secondary_ip)
820 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
822 raise errors.OpExecError(msg)
825 def _GetClusterDomainSecret():
826 """Reads the cluster domain secret.
829 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
833 def _CheckInstanceDown(lu, instance, reason):
834 """Ensure that an instance is not running."""
835 if instance.admin_up:
836 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
837 (instance.name, reason), errors.ECODE_STATE)
839 pnode = instance.primary_node
840 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
841 ins_l.Raise("Can't contact node %s for instance information" % pnode,
842 prereq=True, ecode=errors.ECODE_ENVIRON)
844 if instance.name in ins_l.payload:
845 raise errors.OpPrereqError("Instance %s is running, %s" %
846 (instance.name, reason), errors.ECODE_STATE)
849 def _ExpandItemName(fn, name, kind):
850 """Expand an item name.
852 @param fn: the function to use for expansion
853 @param name: requested item name
854 @param kind: text description ('Node' or 'Instance')
855 @return: the resolved (full) name
856 @raise errors.OpPrereqError: if the item is not found
860 if full_name is None:
861 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
866 def _ExpandNodeName(cfg, name):
867 """Wrapper over L{_ExpandItemName} for nodes."""
868 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
871 def _ExpandInstanceName(cfg, name):
872 """Wrapper over L{_ExpandItemName} for instance."""
873 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
876 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
877 memory, vcpus, nics, disk_template, disks,
878 bep, hvp, hypervisor_name, tags):
879 """Builds instance related env variables for hooks
881 This builds the hook environment from individual variables.
884 @param name: the name of the instance
885 @type primary_node: string
886 @param primary_node: the name of the instance's primary node
887 @type secondary_nodes: list
888 @param secondary_nodes: list of secondary nodes as strings
889 @type os_type: string
890 @param os_type: the name of the instance's OS
891 @type status: boolean
892 @param status: the should_run status of the instance
894 @param memory: the memory size of the instance
896 @param vcpus: the count of VCPUs the instance has
898 @param nics: list of tuples (ip, mac, mode, link) representing
899 the NICs the instance has
900 @type disk_template: string
901 @param disk_template: the disk template of the instance
903 @param disks: the list of (size, mode) pairs
905 @param bep: the backend parameters for the instance
907 @param hvp: the hypervisor parameters for the instance
908 @type hypervisor_name: string
909 @param hypervisor_name: the hypervisor for the instance
911 @param tags: list of instance tags as strings
913 @return: the hook environment for this instance
922 "INSTANCE_NAME": name,
923 "INSTANCE_PRIMARY": primary_node,
924 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
925 "INSTANCE_OS_TYPE": os_type,
926 "INSTANCE_STATUS": str_status,
927 "INSTANCE_MEMORY": memory,
928 "INSTANCE_VCPUS": vcpus,
929 "INSTANCE_DISK_TEMPLATE": disk_template,
930 "INSTANCE_HYPERVISOR": hypervisor_name,
934 nic_count = len(nics)
935 for idx, (ip, mac, mode, link) in enumerate(nics):
938 env["INSTANCE_NIC%d_IP" % idx] = ip
939 env["INSTANCE_NIC%d_MAC" % idx] = mac
940 env["INSTANCE_NIC%d_MODE" % idx] = mode
941 env["INSTANCE_NIC%d_LINK" % idx] = link
942 if mode == constants.NIC_MODE_BRIDGED:
943 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
947 env["INSTANCE_NIC_COUNT"] = nic_count
950 disk_count = len(disks)
951 for idx, (size, mode) in enumerate(disks):
952 env["INSTANCE_DISK%d_SIZE" % idx] = size
953 env["INSTANCE_DISK%d_MODE" % idx] = mode
957 env["INSTANCE_DISK_COUNT"] = disk_count
962 env["INSTANCE_TAGS"] = " ".join(tags)
964 for source, kind in [(bep, "BE"), (hvp, "HV")]:
965 for key, value in source.items():
966 env["INSTANCE_%s_%s" % (kind, key)] = value
971 def _NICListToTuple(lu, nics):
972 """Build a list of nic information tuples.
974 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
975 value in LUInstanceQueryData.
977 @type lu: L{LogicalUnit}
978 @param lu: the logical unit on whose behalf we execute
979 @type nics: list of L{objects.NIC}
980 @param nics: list of nics to convert to hooks tuples
984 cluster = lu.cfg.GetClusterInfo()
988 filled_params = cluster.SimpleFillNIC(nic.nicparams)
989 mode = filled_params[constants.NIC_MODE]
990 link = filled_params[constants.NIC_LINK]
991 hooks_nics.append((ip, mac, mode, link))
995 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
996 """Builds instance related env variables for hooks from an object.
998 @type lu: L{LogicalUnit}
999 @param lu: the logical unit on whose behalf we execute
1000 @type instance: L{objects.Instance}
1001 @param instance: the instance for which we should build the
1003 @type override: dict
1004 @param override: dictionary with key/values that will override
1007 @return: the hook environment dictionary
1010 cluster = lu.cfg.GetClusterInfo()
1011 bep = cluster.FillBE(instance)
1012 hvp = cluster.FillHV(instance)
1014 "name": instance.name,
1015 "primary_node": instance.primary_node,
1016 "secondary_nodes": instance.secondary_nodes,
1017 "os_type": instance.os,
1018 "status": instance.admin_up,
1019 "memory": bep[constants.BE_MEMORY],
1020 "vcpus": bep[constants.BE_VCPUS],
1021 "nics": _NICListToTuple(lu, instance.nics),
1022 "disk_template": instance.disk_template,
1023 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1026 "hypervisor_name": instance.hypervisor,
1027 "tags": instance.tags,
1030 args.update(override)
1031 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1034 def _AdjustCandidatePool(lu, exceptions):
1035 """Adjust the candidate pool after node operations.
1038 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1040 lu.LogInfo("Promoted nodes to master candidate role: %s",
1041 utils.CommaJoin(node.name for node in mod_list))
1042 for name in mod_list:
1043 lu.context.ReaddNode(name)
1044 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1046 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1050 def _DecideSelfPromotion(lu, exceptions=None):
1051 """Decide whether I should promote myself as a master candidate.
1054 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1055 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1056 # the new node will increase mc_max with one, so:
1057 mc_should = min(mc_should + 1, cp_size)
1058 return mc_now < mc_should
1061 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1062 """Check that the brigdes needed by a list of nics exist.
1065 cluster = lu.cfg.GetClusterInfo()
1066 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1067 brlist = [params[constants.NIC_LINK] for params in paramslist
1068 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1070 result = lu.rpc.call_bridges_exist(target_node, brlist)
1071 result.Raise("Error checking bridges on destination node '%s'" %
1072 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1075 def _CheckInstanceBridgesExist(lu, instance, node=None):
1076 """Check that the brigdes needed by an instance exist.
1080 node = instance.primary_node
1081 _CheckNicsBridgesExist(lu, instance.nics, node)
1084 def _CheckOSVariant(os_obj, name):
1085 """Check whether an OS name conforms to the os variants specification.
1087 @type os_obj: L{objects.OS}
1088 @param os_obj: OS object to check
1090 @param name: OS name passed by the user, to check for validity
1093 if not os_obj.supported_variants:
1095 variant = objects.OS.GetVariant(name)
1097 raise errors.OpPrereqError("OS name must include a variant",
1100 if variant not in os_obj.supported_variants:
1101 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1104 def _GetNodeInstancesInner(cfg, fn):
1105 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1108 def _GetNodeInstances(cfg, node_name):
1109 """Returns a list of all primary and secondary instances on a node.
1113 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1116 def _GetNodePrimaryInstances(cfg, node_name):
1117 """Returns primary instances on a node.
1120 return _GetNodeInstancesInner(cfg,
1121 lambda inst: node_name == inst.primary_node)
1124 def _GetNodeSecondaryInstances(cfg, node_name):
1125 """Returns secondary instances on a node.
1128 return _GetNodeInstancesInner(cfg,
1129 lambda inst: node_name in inst.secondary_nodes)
1132 def _GetStorageTypeArgs(cfg, storage_type):
1133 """Returns the arguments for a storage type.
1136 # Special case for file storage
1137 if storage_type == constants.ST_FILE:
1138 # storage.FileStorage wants a list of storage directories
1139 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1144 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1147 for dev in instance.disks:
1148 cfg.SetDiskID(dev, node_name)
1150 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1151 result.Raise("Failed to get disk status from node %s" % node_name,
1152 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1154 for idx, bdev_status in enumerate(result.payload):
1155 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1161 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1162 """Check the sanity of iallocator and node arguments and use the
1163 cluster-wide iallocator if appropriate.
1165 Check that at most one of (iallocator, node) is specified. If none is
1166 specified, then the LU's opcode's iallocator slot is filled with the
1167 cluster-wide default iallocator.
1169 @type iallocator_slot: string
1170 @param iallocator_slot: the name of the opcode iallocator slot
1171 @type node_slot: string
1172 @param node_slot: the name of the opcode target node slot
1175 node = getattr(lu.op, node_slot, None)
1176 iallocator = getattr(lu.op, iallocator_slot, None)
1178 if node is not None and iallocator is not None:
1179 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1181 elif node is None and iallocator is None:
1182 default_iallocator = lu.cfg.GetDefaultIAllocator()
1183 if default_iallocator:
1184 setattr(lu.op, iallocator_slot, default_iallocator)
1186 raise errors.OpPrereqError("No iallocator or node given and no"
1187 " cluster-wide default iallocator found;"
1188 " please specify either an iallocator or a"
1189 " node, or set a cluster-wide default"
1193 class LUClusterPostInit(LogicalUnit):
1194 """Logical unit for running hooks after cluster initialization.
1197 HPATH = "cluster-init"
1198 HTYPE = constants.HTYPE_CLUSTER
1200 def BuildHooksEnv(self):
1205 "OP_TARGET": self.cfg.GetClusterName(),
1208 def BuildHooksNodes(self):
1209 """Build hooks nodes.
1212 return ([], [self.cfg.GetMasterNode()])
1214 def Exec(self, feedback_fn):
1221 class LUClusterDestroy(LogicalUnit):
1222 """Logical unit for destroying the cluster.
1225 HPATH = "cluster-destroy"
1226 HTYPE = constants.HTYPE_CLUSTER
1228 def BuildHooksEnv(self):
1233 "OP_TARGET": self.cfg.GetClusterName(),
1236 def BuildHooksNodes(self):
1237 """Build hooks nodes.
1242 def CheckPrereq(self):
1243 """Check prerequisites.
1245 This checks whether the cluster is empty.
1247 Any errors are signaled by raising errors.OpPrereqError.
1250 master = self.cfg.GetMasterNode()
1252 nodelist = self.cfg.GetNodeList()
1253 if len(nodelist) != 1 or nodelist[0] != master:
1254 raise errors.OpPrereqError("There are still %d node(s) in"
1255 " this cluster." % (len(nodelist) - 1),
1257 instancelist = self.cfg.GetInstanceList()
1259 raise errors.OpPrereqError("There are still %d instance(s) in"
1260 " this cluster." % len(instancelist),
1263 def Exec(self, feedback_fn):
1264 """Destroys the cluster.
1267 master = self.cfg.GetMasterNode()
1269 # Run post hooks on master node before it's removed
1270 _RunPostHook(self, master)
1272 result = self.rpc.call_node_stop_master(master, False)
1273 result.Raise("Could not disable the master role")
1278 def _VerifyCertificate(filename):
1279 """Verifies a certificate for L{LUClusterVerifyConfig}.
1281 @type filename: string
1282 @param filename: Path to PEM file
1286 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1287 utils.ReadFile(filename))
1288 except Exception, err: # pylint: disable-msg=W0703
1289 return (LUClusterVerifyConfig.ETYPE_ERROR,
1290 "Failed to load X509 certificate %s: %s" % (filename, err))
1293 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1294 constants.SSL_CERT_EXPIRATION_ERROR)
1297 fnamemsg = "While verifying %s: %s" % (filename, msg)
1302 return (None, fnamemsg)
1303 elif errcode == utils.CERT_WARNING:
1304 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1305 elif errcode == utils.CERT_ERROR:
1306 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1308 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1311 def _GetAllHypervisorParameters(cluster, instances):
1312 """Compute the set of all hypervisor parameters.
1314 @type cluster: L{objects.Cluster}
1315 @param cluster: the cluster object
1316 @param instances: list of L{objects.Instance}
1317 @param instances: additional instances from which to obtain parameters
1318 @rtype: list of (origin, hypervisor, parameters)
1319 @return: a list with all parameters found, indicating the hypervisor they
1320 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1325 for hv_name in cluster.enabled_hypervisors:
1326 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1328 for os_name, os_hvp in cluster.os_hvp.items():
1329 for hv_name, hv_params in os_hvp.items():
1331 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1332 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1334 # TODO: collapse identical parameter values in a single one
1335 for instance in instances:
1336 if instance.hvparams:
1337 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1338 cluster.FillHV(instance)))
1343 class _VerifyErrors(object):
1344 """Mix-in for cluster/group verify LUs.
1346 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1347 self.op and self._feedback_fn to be available.)
1350 TCLUSTER = "cluster"
1352 TINSTANCE = "instance"
1354 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1355 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1356 ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1357 ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1358 ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1359 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1360 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1361 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1362 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1363 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1364 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1365 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1366 ENODEDRBD = (TNODE, "ENODEDRBD")
1367 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1368 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1369 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1370 ENODEHV = (TNODE, "ENODEHV")
1371 ENODELVM = (TNODE, "ENODELVM")
1372 ENODEN1 = (TNODE, "ENODEN1")
1373 ENODENET = (TNODE, "ENODENET")
1374 ENODEOS = (TNODE, "ENODEOS")
1375 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1376 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1377 ENODERPC = (TNODE, "ENODERPC")
1378 ENODESSH = (TNODE, "ENODESSH")
1379 ENODEVERSION = (TNODE, "ENODEVERSION")
1380 ENODESETUP = (TNODE, "ENODESETUP")
1381 ENODETIME = (TNODE, "ENODETIME")
1382 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1384 ETYPE_FIELD = "code"
1385 ETYPE_ERROR = "ERROR"
1386 ETYPE_WARNING = "WARNING"
1388 def _Error(self, ecode, item, msg, *args, **kwargs):
1389 """Format an error message.
1391 Based on the opcode's error_codes parameter, either format a
1392 parseable error code, or a simpler error string.
1394 This must be called only from Exec and functions called from Exec.
1397 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1399 # first complete the msg
1402 # then format the whole message
1403 if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1404 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1410 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1411 # and finally report it via the feedback_fn
1412 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1414 def _ErrorIf(self, cond, *args, **kwargs):
1415 """Log an error message if the passed condition is True.
1419 or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1421 self._Error(*args, **kwargs)
1422 # do not mark the operation as failed for WARN cases only
1423 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1424 self.bad = self.bad or cond
1427 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1428 """Verifies the cluster config.
1433 def _VerifyHVP(self, hvp_data):
1434 """Verifies locally the syntax of the hypervisor parameters.
1437 for item, hv_name, hv_params in hvp_data:
1438 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1441 hv_class = hypervisor.GetHypervisor(hv_name)
1442 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1443 hv_class.CheckParameterSyntax(hv_params)
1444 except errors.GenericError, err:
1445 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1447 def ExpandNames(self):
1448 # Information can be safely retrieved as the BGL is acquired in exclusive
1450 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1451 self.all_node_info = self.cfg.GetAllNodesInfo()
1452 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1453 self.needed_locks = {}
1455 def Exec(self, feedback_fn):
1456 """Verify integrity of cluster, performing various test on nodes.
1460 self._feedback_fn = feedback_fn
1462 feedback_fn("* Verifying cluster config")
1464 for msg in self.cfg.VerifyConfig():
1465 self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1467 feedback_fn("* Verifying cluster certificate files")
1469 for cert_filename in constants.ALL_CERT_FILES:
1470 (errcode, msg) = _VerifyCertificate(cert_filename)
1471 self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1473 feedback_fn("* Verifying hypervisor parameters")
1475 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1476 self.all_inst_info.values()))
1478 feedback_fn("* Verifying all nodes belong to an existing group")
1480 # We do this verification here because, should this bogus circumstance
1481 # occur, it would never be caught by VerifyGroup, which only acts on
1482 # nodes/instances reachable from existing node groups.
1484 dangling_nodes = set(node.name for node in self.all_node_info.values()
1485 if node.group not in self.all_group_info)
1487 dangling_instances = {}
1488 no_node_instances = []
1490 for inst in self.all_inst_info.values():
1491 if inst.primary_node in dangling_nodes:
1492 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1493 elif inst.primary_node not in self.all_node_info:
1494 no_node_instances.append(inst.name)
1499 utils.CommaJoin(dangling_instances.get(node.name,
1501 for node in dangling_nodes]
1503 self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1504 "the following nodes (and their instances) belong to a non"
1505 " existing group: %s", utils.CommaJoin(pretty_dangling))
1507 self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1508 "the following instances have a non-existing primary-node:"
1509 " %s", utils.CommaJoin(no_node_instances))
1511 return (not self.bad, [g.name for g in self.all_group_info.values()])
1514 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1515 """Verifies the status of a node group.
1518 HPATH = "cluster-verify"
1519 HTYPE = constants.HTYPE_CLUSTER
1522 _HOOKS_INDENT_RE = re.compile("^", re.M)
1524 class NodeImage(object):
1525 """A class representing the logical and physical status of a node.
1528 @ivar name: the node name to which this object refers
1529 @ivar volumes: a structure as returned from
1530 L{ganeti.backend.GetVolumeList} (runtime)
1531 @ivar instances: a list of running instances (runtime)
1532 @ivar pinst: list of configured primary instances (config)
1533 @ivar sinst: list of configured secondary instances (config)
1534 @ivar sbp: dictionary of {primary-node: list of instances} for all
1535 instances for which this node is secondary (config)
1536 @ivar mfree: free memory, as reported by hypervisor (runtime)
1537 @ivar dfree: free disk, as reported by the node (runtime)
1538 @ivar offline: the offline status (config)
1539 @type rpc_fail: boolean
1540 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1541 not whether the individual keys were correct) (runtime)
1542 @type lvm_fail: boolean
1543 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1544 @type hyp_fail: boolean
1545 @ivar hyp_fail: whether the RPC call didn't return the instance list
1546 @type ghost: boolean
1547 @ivar ghost: whether this is a known node or not (config)
1548 @type os_fail: boolean
1549 @ivar os_fail: whether the RPC call didn't return valid OS data
1551 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1552 @type vm_capable: boolean
1553 @ivar vm_capable: whether the node can host instances
1556 def __init__(self, offline=False, name=None, vm_capable=True):
1565 self.offline = offline
1566 self.vm_capable = vm_capable
1567 self.rpc_fail = False
1568 self.lvm_fail = False
1569 self.hyp_fail = False
1571 self.os_fail = False
1574 def ExpandNames(self):
1575 # This raises errors.OpPrereqError on its own:
1576 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1578 # Get instances in node group; this is unsafe and needs verification later
1579 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1581 self.needed_locks = {
1582 locking.LEVEL_INSTANCE: inst_names,
1583 locking.LEVEL_NODEGROUP: [self.group_uuid],
1584 locking.LEVEL_NODE: [],
1587 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1589 def DeclareLocks(self, level):
1590 if level == locking.LEVEL_NODE:
1591 # Get members of node group; this is unsafe and needs verification later
1592 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1594 all_inst_info = self.cfg.GetAllInstancesInfo()
1596 # In Exec(), we warn about mirrored instances that have primary and
1597 # secondary living in separate node groups. To fully verify that
1598 # volumes for these instances are healthy, we will need to do an
1599 # extra call to their secondaries. We ensure here those nodes will
1601 for inst in self.glm.list_owned(locking.LEVEL_INSTANCE):
1602 # Important: access only the instances whose lock is owned
1603 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1604 nodes.update(all_inst_info[inst].secondary_nodes)
1606 self.needed_locks[locking.LEVEL_NODE] = nodes
1608 def CheckPrereq(self):
1609 group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1610 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1613 group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1615 unlocked_instances = \
1616 group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
1619 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1620 utils.CommaJoin(unlocked_nodes))
1622 if unlocked_instances:
1623 raise errors.OpPrereqError("Missing lock for instances: %s" %
1624 utils.CommaJoin(unlocked_instances))
1626 self.all_node_info = self.cfg.GetAllNodesInfo()
1627 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1629 self.my_node_names = utils.NiceSort(group_nodes)
1630 self.my_inst_names = utils.NiceSort(group_instances)
1632 self.my_node_info = dict((name, self.all_node_info[name])
1633 for name in self.my_node_names)
1635 self.my_inst_info = dict((name, self.all_inst_info[name])
1636 for name in self.my_inst_names)
1638 # We detect here the nodes that will need the extra RPC calls for verifying
1639 # split LV volumes; they should be locked.
1640 extra_lv_nodes = set()
1642 for inst in self.my_inst_info.values():
1643 if inst.disk_template in constants.DTS_INT_MIRROR:
1644 group = self.my_node_info[inst.primary_node].group
1645 for nname in inst.secondary_nodes:
1646 if self.all_node_info[nname].group != group:
1647 extra_lv_nodes.add(nname)
1649 unlocked_lv_nodes = \
1650 extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1652 if unlocked_lv_nodes:
1653 raise errors.OpPrereqError("these nodes could be locked: %s" %
1654 utils.CommaJoin(unlocked_lv_nodes))
1655 self.extra_lv_nodes = list(extra_lv_nodes)
1657 def _VerifyNode(self, ninfo, nresult):
1658 """Perform some basic validation on data returned from a node.
1660 - check the result data structure is well formed and has all the
1662 - check ganeti version
1664 @type ninfo: L{objects.Node}
1665 @param ninfo: the node to check
1666 @param nresult: the results from the node
1668 @return: whether overall this call was successful (and we can expect
1669 reasonable values in the respose)
1673 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1675 # main result, nresult should be a non-empty dict
1676 test = not nresult or not isinstance(nresult, dict)
1677 _ErrorIf(test, self.ENODERPC, node,
1678 "unable to verify node: no data returned")
1682 # compares ganeti version
1683 local_version = constants.PROTOCOL_VERSION
1684 remote_version = nresult.get("version", None)
1685 test = not (remote_version and
1686 isinstance(remote_version, (list, tuple)) and
1687 len(remote_version) == 2)
1688 _ErrorIf(test, self.ENODERPC, node,
1689 "connection to node returned invalid data")
1693 test = local_version != remote_version[0]
1694 _ErrorIf(test, self.ENODEVERSION, node,
1695 "incompatible protocol versions: master %s,"
1696 " node %s", local_version, remote_version[0])
1700 # node seems compatible, we can actually try to look into its results
1702 # full package version
1703 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1704 self.ENODEVERSION, node,
1705 "software version mismatch: master %s, node %s",
1706 constants.RELEASE_VERSION, remote_version[1],
1707 code=self.ETYPE_WARNING)
1709 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1710 if ninfo.vm_capable and isinstance(hyp_result, dict):
1711 for hv_name, hv_result in hyp_result.iteritems():
1712 test = hv_result is not None
1713 _ErrorIf(test, self.ENODEHV, node,
1714 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1716 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1717 if ninfo.vm_capable and isinstance(hvp_result, list):
1718 for item, hv_name, hv_result in hvp_result:
1719 _ErrorIf(True, self.ENODEHV, node,
1720 "hypervisor %s parameter verify failure (source %s): %s",
1721 hv_name, item, hv_result)
1723 test = nresult.get(constants.NV_NODESETUP,
1724 ["Missing NODESETUP results"])
1725 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1730 def _VerifyNodeTime(self, ninfo, nresult,
1731 nvinfo_starttime, nvinfo_endtime):
1732 """Check the node time.
1734 @type ninfo: L{objects.Node}
1735 @param ninfo: the node to check
1736 @param nresult: the remote results for the node
1737 @param nvinfo_starttime: the start time of the RPC call
1738 @param nvinfo_endtime: the end time of the RPC call
1742 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1744 ntime = nresult.get(constants.NV_TIME, None)
1746 ntime_merged = utils.MergeTime(ntime)
1747 except (ValueError, TypeError):
1748 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1751 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1752 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1753 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1754 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1758 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1759 "Node time diverges by at least %s from master node time",
1762 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1763 """Check the node LVM results.
1765 @type ninfo: L{objects.Node}
1766 @param ninfo: the node to check
1767 @param nresult: the remote results for the node
1768 @param vg_name: the configured VG name
1775 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1777 # checks vg existence and size > 20G
1778 vglist = nresult.get(constants.NV_VGLIST, None)
1780 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1782 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1783 constants.MIN_VG_SIZE)
1784 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1787 pvlist = nresult.get(constants.NV_PVLIST, None)
1788 test = pvlist is None
1789 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1791 # check that ':' is not present in PV names, since it's a
1792 # special character for lvcreate (denotes the range of PEs to
1794 for _, pvname, owner_vg in pvlist:
1795 test = ":" in pvname
1796 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1797 " '%s' of VG '%s'", pvname, owner_vg)
1799 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1800 """Check the node bridges.
1802 @type ninfo: L{objects.Node}
1803 @param ninfo: the node to check
1804 @param nresult: the remote results for the node
1805 @param bridges: the expected list of bridges
1812 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1814 missing = nresult.get(constants.NV_BRIDGES, None)
1815 test = not isinstance(missing, list)
1816 _ErrorIf(test, self.ENODENET, node,
1817 "did not return valid bridge information")
1819 _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1820 utils.CommaJoin(sorted(missing)))
1822 def _VerifyNodeNetwork(self, ninfo, nresult):
1823 """Check the node network connectivity results.
1825 @type ninfo: L{objects.Node}
1826 @param ninfo: the node to check
1827 @param nresult: the remote results for the node
1831 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1833 test = constants.NV_NODELIST not in nresult
1834 _ErrorIf(test, self.ENODESSH, node,
1835 "node hasn't returned node ssh connectivity data")
1837 if nresult[constants.NV_NODELIST]:
1838 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1839 _ErrorIf(True, self.ENODESSH, node,
1840 "ssh communication with node '%s': %s", a_node, a_msg)
1842 test = constants.NV_NODENETTEST not in nresult
1843 _ErrorIf(test, self.ENODENET, node,
1844 "node hasn't returned node tcp connectivity data")
1846 if nresult[constants.NV_NODENETTEST]:
1847 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1849 _ErrorIf(True, self.ENODENET, node,
1850 "tcp communication with node '%s': %s",
1851 anode, nresult[constants.NV_NODENETTEST][anode])
1853 test = constants.NV_MASTERIP not in nresult
1854 _ErrorIf(test, self.ENODENET, node,
1855 "node hasn't returned node master IP reachability data")
1857 if not nresult[constants.NV_MASTERIP]:
1858 if node == self.master_node:
1859 msg = "the master node cannot reach the master IP (not configured?)"
1861 msg = "cannot reach the master IP"
1862 _ErrorIf(True, self.ENODENET, node, msg)
1864 def _VerifyInstance(self, instance, instanceconfig, node_image,
1866 """Verify an instance.
1868 This function checks to see if the required block devices are
1869 available on the instance's node.
1872 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1873 node_current = instanceconfig.primary_node
1875 node_vol_should = {}
1876 instanceconfig.MapLVsByNode(node_vol_should)
1878 for node in node_vol_should:
1879 n_img = node_image[node]
1880 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1881 # ignore missing volumes on offline or broken nodes
1883 for volume in node_vol_should[node]:
1884 test = volume not in n_img.volumes
1885 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1886 "volume %s missing on node %s", volume, node)
1888 if instanceconfig.admin_up:
1889 pri_img = node_image[node_current]
1890 test = instance not in pri_img.instances and not pri_img.offline
1891 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1892 "instance not running on its primary node %s",
1895 diskdata = [(nname, success, status, idx)
1896 for (nname, disks) in diskstatus.items()
1897 for idx, (success, status) in enumerate(disks)]
1899 for nname, success, bdev_status, idx in diskdata:
1900 # the 'ghost node' construction in Exec() ensures that we have a
1902 snode = node_image[nname]
1903 bad_snode = snode.ghost or snode.offline
1904 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1905 self.EINSTANCEFAULTYDISK, instance,
1906 "couldn't retrieve status for disk/%s on %s: %s",
1907 idx, nname, bdev_status)
1908 _ErrorIf((instanceconfig.admin_up and success and
1909 bdev_status.ldisk_status == constants.LDS_FAULTY),
1910 self.EINSTANCEFAULTYDISK, instance,
1911 "disk/%s on %s is faulty", idx, nname)
1913 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1914 """Verify if there are any unknown volumes in the cluster.
1916 The .os, .swap and backup volumes are ignored. All other volumes are
1917 reported as unknown.
1919 @type reserved: L{ganeti.utils.FieldSet}
1920 @param reserved: a FieldSet of reserved volume names
1923 for node, n_img in node_image.items():
1924 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1925 # skip non-healthy nodes
1927 for volume in n_img.volumes:
1928 test = ((node not in node_vol_should or
1929 volume not in node_vol_should[node]) and
1930 not reserved.Matches(volume))
1931 self._ErrorIf(test, self.ENODEORPHANLV, node,
1932 "volume %s is unknown", volume)
1934 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1935 """Verify N+1 Memory Resilience.
1937 Check that if one single node dies we can still start all the
1938 instances it was primary for.
1941 cluster_info = self.cfg.GetClusterInfo()
1942 for node, n_img in node_image.items():
1943 # This code checks that every node which is now listed as
1944 # secondary has enough memory to host all instances it is
1945 # supposed to should a single other node in the cluster fail.
1946 # FIXME: not ready for failover to an arbitrary node
1947 # FIXME: does not support file-backed instances
1948 # WARNING: we currently take into account down instances as well
1949 # as up ones, considering that even if they're down someone
1950 # might want to start them even in the event of a node failure.
1952 # we're skipping offline nodes from the N+1 warning, since
1953 # most likely we don't have good memory infromation from them;
1954 # we already list instances living on such nodes, and that's
1957 for prinode, instances in n_img.sbp.items():
1959 for instance in instances:
1960 bep = cluster_info.FillBE(instance_cfg[instance])
1961 if bep[constants.BE_AUTO_BALANCE]:
1962 needed_mem += bep[constants.BE_MEMORY]
1963 test = n_img.mfree < needed_mem
1964 self._ErrorIf(test, self.ENODEN1, node,
1965 "not enough memory to accomodate instance failovers"
1966 " should node %s fail (%dMiB needed, %dMiB available)",
1967 prinode, needed_mem, n_img.mfree)
1970 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1971 (files_all, files_all_opt, files_mc, files_vm)):
1972 """Verifies file checksums collected from all nodes.
1974 @param errorif: Callback for reporting errors
1975 @param nodeinfo: List of L{objects.Node} objects
1976 @param master_node: Name of master node
1977 @param all_nvinfo: RPC results
1980 node_names = frozenset(node.name for node in nodeinfo)
1982 assert master_node in node_names
1983 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1984 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1985 "Found file listed in more than one file list"
1987 # Define functions determining which nodes to consider for a file
1988 file2nodefn = dict([(filename, fn)
1989 for (files, fn) in [(files_all, None),
1990 (files_all_opt, None),
1991 (files_mc, lambda node: (node.master_candidate or
1992 node.name == master_node)),
1993 (files_vm, lambda node: node.vm_capable)]
1994 for filename in files])
1996 fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1998 for node in nodeinfo:
1999 nresult = all_nvinfo[node.name]
2001 if nresult.fail_msg or not nresult.payload:
2004 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2006 test = not (node_files and isinstance(node_files, dict))
2007 errorif(test, cls.ENODEFILECHECK, node.name,
2008 "Node did not return file checksum data")
2012 for (filename, checksum) in node_files.items():
2013 # Check if the file should be considered for a node
2014 fn = file2nodefn[filename]
2015 if fn is None or fn(node):
2016 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2018 for (filename, checksums) in fileinfo.items():
2019 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2021 # Nodes having the file
2022 with_file = frozenset(node_name
2023 for nodes in fileinfo[filename].values()
2024 for node_name in nodes)
2026 # Nodes missing file
2027 missing_file = node_names - with_file
2029 if filename in files_all_opt:
2031 errorif(missing_file and missing_file != node_names,
2032 cls.ECLUSTERFILECHECK, None,
2033 "File %s is optional, but it must exist on all or no"
2034 " nodes (not found on %s)",
2035 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2037 errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2038 "File %s is missing from node(s) %s", filename,
2039 utils.CommaJoin(utils.NiceSort(missing_file)))
2041 # See if there are multiple versions of the file
2042 test = len(checksums) > 1
2044 variants = ["variant %s on %s" %
2045 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2046 for (idx, (checksum, nodes)) in
2047 enumerate(sorted(checksums.items()))]
2051 errorif(test, cls.ECLUSTERFILECHECK, None,
2052 "File %s found with %s different checksums (%s)",
2053 filename, len(checksums), "; ".join(variants))
2055 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2057 """Verifies and the node DRBD status.
2059 @type ninfo: L{objects.Node}
2060 @param ninfo: the node to check
2061 @param nresult: the remote results for the node
2062 @param instanceinfo: the dict of instances
2063 @param drbd_helper: the configured DRBD usermode helper
2064 @param drbd_map: the DRBD map as returned by
2065 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2069 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2072 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2073 test = (helper_result == None)
2074 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2075 "no drbd usermode helper returned")
2077 status, payload = helper_result
2079 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2080 "drbd usermode helper check unsuccessful: %s", payload)
2081 test = status and (payload != drbd_helper)
2082 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2083 "wrong drbd usermode helper: %s", payload)
2085 # compute the DRBD minors
2087 for minor, instance in drbd_map[node].items():
2088 test = instance not in instanceinfo
2089 _ErrorIf(test, self.ECLUSTERCFG, None,
2090 "ghost instance '%s' in temporary DRBD map", instance)
2091 # ghost instance should not be running, but otherwise we
2092 # don't give double warnings (both ghost instance and
2093 # unallocated minor in use)
2095 node_drbd[minor] = (instance, False)
2097 instance = instanceinfo[instance]
2098 node_drbd[minor] = (instance.name, instance.admin_up)
2100 # and now check them
2101 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2102 test = not isinstance(used_minors, (tuple, list))
2103 _ErrorIf(test, self.ENODEDRBD, node,
2104 "cannot parse drbd status file: %s", str(used_minors))
2106 # we cannot check drbd status
2109 for minor, (iname, must_exist) in node_drbd.items():
2110 test = minor not in used_minors and must_exist
2111 _ErrorIf(test, self.ENODEDRBD, node,
2112 "drbd minor %d of instance %s is not active", minor, iname)
2113 for minor in used_minors:
2114 test = minor not in node_drbd
2115 _ErrorIf(test, self.ENODEDRBD, node,
2116 "unallocated drbd minor %d is in use", minor)
2118 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2119 """Builds the node OS structures.
2121 @type ninfo: L{objects.Node}
2122 @param ninfo: the node to check
2123 @param nresult: the remote results for the node
2124 @param nimg: the node image object
2128 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2130 remote_os = nresult.get(constants.NV_OSLIST, None)
2131 test = (not isinstance(remote_os, list) or
2132 not compat.all(isinstance(v, list) and len(v) == 7
2133 for v in remote_os))
2135 _ErrorIf(test, self.ENODEOS, node,
2136 "node hasn't returned valid OS data")
2145 for (name, os_path, status, diagnose,
2146 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2148 if name not in os_dict:
2151 # parameters is a list of lists instead of list of tuples due to
2152 # JSON lacking a real tuple type, fix it:
2153 parameters = [tuple(v) for v in parameters]
2154 os_dict[name].append((os_path, status, diagnose,
2155 set(variants), set(parameters), set(api_ver)))
2157 nimg.oslist = os_dict
2159 def _VerifyNodeOS(self, ninfo, nimg, base):
2160 """Verifies the node OS list.
2162 @type ninfo: L{objects.Node}
2163 @param ninfo: the node to check
2164 @param nimg: the node image object
2165 @param base: the 'template' node we match against (e.g. from the master)
2169 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2171 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2173 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2174 for os_name, os_data in nimg.oslist.items():
2175 assert os_data, "Empty OS status for OS %s?!" % os_name
2176 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2177 _ErrorIf(not f_status, self.ENODEOS, node,
2178 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2179 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2180 "OS '%s' has multiple entries (first one shadows the rest): %s",
2181 os_name, utils.CommaJoin([v[0] for v in os_data]))
2182 # this will catched in backend too
2183 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
2184 and not f_var, self.ENODEOS, node,
2185 "OS %s with API at least %d does not declare any variant",
2186 os_name, constants.OS_API_V15)
2187 # comparisons with the 'base' image
2188 test = os_name not in base.oslist
2189 _ErrorIf(test, self.ENODEOS, node,
2190 "Extra OS %s not present on reference node (%s)",
2194 assert base.oslist[os_name], "Base node has empty OS status?"
2195 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2197 # base OS is invalid, skipping
2199 for kind, a, b in [("API version", f_api, b_api),
2200 ("variants list", f_var, b_var),
2201 ("parameters", beautify_params(f_param),
2202 beautify_params(b_param))]:
2203 _ErrorIf(a != b, self.ENODEOS, node,
2204 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2205 kind, os_name, base.name,
2206 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2208 # check any missing OSes
2209 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2210 _ErrorIf(missing, self.ENODEOS, node,
2211 "OSes present on reference node %s but missing on this node: %s",
2212 base.name, utils.CommaJoin(missing))
2214 def _VerifyOob(self, ninfo, nresult):
2215 """Verifies out of band functionality of a node.
2217 @type ninfo: L{objects.Node}
2218 @param ninfo: the node to check
2219 @param nresult: the remote results for the node
2223 # We just have to verify the paths on master and/or master candidates
2224 # as the oob helper is invoked on the master
2225 if ((ninfo.master_candidate or ninfo.master_capable) and
2226 constants.NV_OOB_PATHS in nresult):
2227 for path_result in nresult[constants.NV_OOB_PATHS]:
2228 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2230 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2231 """Verifies and updates the node volume data.
2233 This function will update a L{NodeImage}'s internal structures
2234 with data from the remote call.
2236 @type ninfo: L{objects.Node}
2237 @param ninfo: the node to check
2238 @param nresult: the remote results for the node
2239 @param nimg: the node image object
2240 @param vg_name: the configured VG name
2244 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2246 nimg.lvm_fail = True
2247 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2250 elif isinstance(lvdata, basestring):
2251 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2252 utils.SafeEncode(lvdata))
2253 elif not isinstance(lvdata, dict):
2254 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2256 nimg.volumes = lvdata
2257 nimg.lvm_fail = False
2259 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2260 """Verifies and updates the node instance list.
2262 If the listing was successful, then updates this node's instance
2263 list. Otherwise, it marks the RPC call as failed for the instance
2266 @type ninfo: L{objects.Node}
2267 @param ninfo: the node to check
2268 @param nresult: the remote results for the node
2269 @param nimg: the node image object
2272 idata = nresult.get(constants.NV_INSTANCELIST, None)
2273 test = not isinstance(idata, list)
2274 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2275 " (instancelist): %s", utils.SafeEncode(str(idata)))
2277 nimg.hyp_fail = True
2279 nimg.instances = idata
2281 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2282 """Verifies and computes a node information map
2284 @type ninfo: L{objects.Node}
2285 @param ninfo: the node to check
2286 @param nresult: the remote results for the node
2287 @param nimg: the node image object
2288 @param vg_name: the configured VG name
2292 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2294 # try to read free memory (from the hypervisor)
2295 hv_info = nresult.get(constants.NV_HVINFO, None)
2296 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2297 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2300 nimg.mfree = int(hv_info["memory_free"])
2301 except (ValueError, TypeError):
2302 _ErrorIf(True, self.ENODERPC, node,
2303 "node returned invalid nodeinfo, check hypervisor")
2305 # FIXME: devise a free space model for file based instances as well
2306 if vg_name is not None:
2307 test = (constants.NV_VGLIST not in nresult or
2308 vg_name not in nresult[constants.NV_VGLIST])
2309 _ErrorIf(test, self.ENODELVM, node,
2310 "node didn't return data for the volume group '%s'"
2311 " - it is either missing or broken", vg_name)
2314 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2315 except (ValueError, TypeError):
2316 _ErrorIf(True, self.ENODERPC, node,
2317 "node returned invalid LVM info, check LVM status")
2319 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2320 """Gets per-disk status information for all instances.
2322 @type nodelist: list of strings
2323 @param nodelist: Node names
2324 @type node_image: dict of (name, L{objects.Node})
2325 @param node_image: Node objects
2326 @type instanceinfo: dict of (name, L{objects.Instance})
2327 @param instanceinfo: Instance objects
2328 @rtype: {instance: {node: [(succes, payload)]}}
2329 @return: a dictionary of per-instance dictionaries with nodes as
2330 keys and disk information as values; the disk information is a
2331 list of tuples (success, payload)
2334 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2337 node_disks_devonly = {}
2338 diskless_instances = set()
2339 diskless = constants.DT_DISKLESS
2341 for nname in nodelist:
2342 node_instances = list(itertools.chain(node_image[nname].pinst,
2343 node_image[nname].sinst))
2344 diskless_instances.update(inst for inst in node_instances
2345 if instanceinfo[inst].disk_template == diskless)
2346 disks = [(inst, disk)
2347 for inst in node_instances
2348 for disk in instanceinfo[inst].disks]
2351 # No need to collect data
2354 node_disks[nname] = disks
2356 # Creating copies as SetDiskID below will modify the objects and that can
2357 # lead to incorrect data returned from nodes
2358 devonly = [dev.Copy() for (_, dev) in disks]
2361 self.cfg.SetDiskID(dev, nname)
2363 node_disks_devonly[nname] = devonly
2365 assert len(node_disks) == len(node_disks_devonly)
2367 # Collect data from all nodes with disks
2368 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2371 assert len(result) == len(node_disks)
2375 for (nname, nres) in result.items():
2376 disks = node_disks[nname]
2379 # No data from this node
2380 data = len(disks) * [(False, "node offline")]
2383 _ErrorIf(msg, self.ENODERPC, nname,
2384 "while getting disk information: %s", msg)
2386 # No data from this node
2387 data = len(disks) * [(False, msg)]
2390 for idx, i in enumerate(nres.payload):
2391 if isinstance(i, (tuple, list)) and len(i) == 2:
2394 logging.warning("Invalid result from node %s, entry %d: %s",
2396 data.append((False, "Invalid result from the remote node"))
2398 for ((inst, _), status) in zip(disks, data):
2399 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2401 # Add empty entries for diskless instances.
2402 for inst in diskless_instances:
2403 assert inst not in instdisk
2406 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2407 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2408 compat.all(isinstance(s, (tuple, list)) and
2409 len(s) == 2 for s in statuses)
2410 for inst, nnames in instdisk.items()
2411 for nname, statuses in nnames.items())
2412 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2416 def BuildHooksEnv(self):
2419 Cluster-Verify hooks just ran in the post phase and their failure makes
2420 the output be logged in the verify output and the verification to fail.
2424 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2427 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2428 for node in self.my_node_info.values())
2432 def BuildHooksNodes(self):
2433 """Build hooks nodes.
2436 return ([], self.my_node_names)
2438 def Exec(self, feedback_fn):
2439 """Verify integrity of the node group, performing various test on nodes.
2442 # This method has too many local variables. pylint: disable-msg=R0914
2444 if not self.my_node_names:
2446 feedback_fn("* Empty node group, skipping verification")
2450 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2451 verbose = self.op.verbose
2452 self._feedback_fn = feedback_fn
2454 vg_name = self.cfg.GetVGName()
2455 drbd_helper = self.cfg.GetDRBDHelper()
2456 cluster = self.cfg.GetClusterInfo()
2457 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2458 hypervisors = cluster.enabled_hypervisors
2459 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2461 i_non_redundant = [] # Non redundant instances
2462 i_non_a_balanced = [] # Non auto-balanced instances
2463 n_offline = 0 # Count of offline nodes
2464 n_drained = 0 # Count of nodes being drained
2465 node_vol_should = {}
2467 # FIXME: verify OS list
2470 filemap = _ComputeAncillaryFiles(cluster, False)
2472 # do local checksums
2473 master_node = self.master_node = self.cfg.GetMasterNode()
2474 master_ip = self.cfg.GetMasterIP()
2476 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2478 # We will make nodes contact all nodes in their group, and one node from
2479 # every other group.
2480 # TODO: should it be a *random* node, different every time?
2481 online_nodes = [node.name for node in node_data_list if not node.offline]
2482 other_group_nodes = {}
2484 for name in sorted(self.all_node_info):
2485 node = self.all_node_info[name]
2486 if (node.group not in other_group_nodes
2487 and node.group != self.group_uuid
2488 and not node.offline):
2489 other_group_nodes[node.group] = node.name
2491 node_verify_param = {
2492 constants.NV_FILELIST:
2493 utils.UniqueSequence(filename
2494 for files in filemap
2495 for filename in files),
2496 constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2497 constants.NV_HYPERVISOR: hypervisors,
2498 constants.NV_HVPARAMS:
2499 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2500 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2501 for node in node_data_list
2502 if not node.offline],
2503 constants.NV_INSTANCELIST: hypervisors,
2504 constants.NV_VERSION: None,
2505 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2506 constants.NV_NODESETUP: None,
2507 constants.NV_TIME: None,
2508 constants.NV_MASTERIP: (master_node, master_ip),
2509 constants.NV_OSLIST: None,
2510 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2513 if vg_name is not None:
2514 node_verify_param[constants.NV_VGLIST] = None
2515 node_verify_param[constants.NV_LVLIST] = vg_name
2516 node_verify_param[constants.NV_PVLIST] = [vg_name]
2517 node_verify_param[constants.NV_DRBDLIST] = None
2520 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2523 # FIXME: this needs to be changed per node-group, not cluster-wide
2525 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2526 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2527 bridges.add(default_nicpp[constants.NIC_LINK])
2528 for instance in self.my_inst_info.values():
2529 for nic in instance.nics:
2530 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2531 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2532 bridges.add(full_nic[constants.NIC_LINK])
2535 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2537 # Build our expected cluster state
2538 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2540 vm_capable=node.vm_capable))
2541 for node in node_data_list)
2545 for node in self.all_node_info.values():
2546 path = _SupportsOob(self.cfg, node)
2547 if path and path not in oob_paths:
2548 oob_paths.append(path)
2551 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2553 for instance in self.my_inst_names:
2554 inst_config = self.my_inst_info[instance]
2556 for nname in inst_config.all_nodes:
2557 if nname not in node_image:
2558 gnode = self.NodeImage(name=nname)
2559 gnode.ghost = (nname not in self.all_node_info)
2560 node_image[nname] = gnode
2562 inst_config.MapLVsByNode(node_vol_should)
2564 pnode = inst_config.primary_node
2565 node_image[pnode].pinst.append(instance)
2567 for snode in inst_config.secondary_nodes:
2568 nimg = node_image[snode]
2569 nimg.sinst.append(instance)
2570 if pnode not in nimg.sbp:
2571 nimg.sbp[pnode] = []
2572 nimg.sbp[pnode].append(instance)
2574 # At this point, we have the in-memory data structures complete,
2575 # except for the runtime information, which we'll gather next
2577 # Due to the way our RPC system works, exact response times cannot be
2578 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2579 # time before and after executing the request, we can at least have a time
2581 nvinfo_starttime = time.time()
2582 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2584 self.cfg.GetClusterName())
2585 nvinfo_endtime = time.time()
2587 if self.extra_lv_nodes and vg_name is not None:
2589 self.rpc.call_node_verify(self.extra_lv_nodes,
2590 {constants.NV_LVLIST: vg_name},
2591 self.cfg.GetClusterName())
2593 extra_lv_nvinfo = {}
2595 all_drbd_map = self.cfg.ComputeDRBDMap()
2597 feedback_fn("* Gathering disk information (%s nodes)" %
2598 len(self.my_node_names))
2599 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2602 feedback_fn("* Verifying configuration file consistency")
2604 # If not all nodes are being checked, we need to make sure the master node
2605 # and a non-checked vm_capable node are in the list.
2606 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2608 vf_nvinfo = all_nvinfo.copy()
2609 vf_node_info = list(self.my_node_info.values())
2610 additional_nodes = []
2611 if master_node not in self.my_node_info:
2612 additional_nodes.append(master_node)
2613 vf_node_info.append(self.all_node_info[master_node])
2614 # Add the first vm_capable node we find which is not included
2615 for node in absent_nodes:
2616 nodeinfo = self.all_node_info[node]
2617 if nodeinfo.vm_capable and not nodeinfo.offline:
2618 additional_nodes.append(node)
2619 vf_node_info.append(self.all_node_info[node])
2621 key = constants.NV_FILELIST
2622 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2623 {key: node_verify_param[key]},
2624 self.cfg.GetClusterName()))
2626 vf_nvinfo = all_nvinfo
2627 vf_node_info = self.my_node_info.values()
2629 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2631 feedback_fn("* Verifying node status")
2635 for node_i in node_data_list:
2637 nimg = node_image[node]
2641 feedback_fn("* Skipping offline node %s" % (node,))
2645 if node == master_node:
2647 elif node_i.master_candidate:
2648 ntype = "master candidate"
2649 elif node_i.drained:
2655 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2657 msg = all_nvinfo[node].fail_msg
2658 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2660 nimg.rpc_fail = True
2663 nresult = all_nvinfo[node].payload
2665 nimg.call_ok = self._VerifyNode(node_i, nresult)
2666 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2667 self._VerifyNodeNetwork(node_i, nresult)
2668 self._VerifyOob(node_i, nresult)
2671 self._VerifyNodeLVM(node_i, nresult, vg_name)
2672 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2675 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2676 self._UpdateNodeInstances(node_i, nresult, nimg)
2677 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2678 self._UpdateNodeOS(node_i, nresult, nimg)
2680 if not nimg.os_fail:
2681 if refos_img is None:
2683 self._VerifyNodeOS(node_i, nimg, refos_img)
2684 self._VerifyNodeBridges(node_i, nresult, bridges)
2686 # Check whether all running instancies are primary for the node. (This
2687 # can no longer be done from _VerifyInstance below, since some of the
2688 # wrong instances could be from other node groups.)
2689 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2691 for inst in non_primary_inst:
2692 test = inst in self.all_inst_info
2693 _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2694 "instance should not run on node %s", node_i.name)
2695 _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2696 "node is running unknown instance %s", inst)
2698 for node, result in extra_lv_nvinfo.items():
2699 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2700 node_image[node], vg_name)
2702 feedback_fn("* Verifying instance status")
2703 for instance in self.my_inst_names:
2705 feedback_fn("* Verifying instance %s" % instance)
2706 inst_config = self.my_inst_info[instance]
2707 self._VerifyInstance(instance, inst_config, node_image,
2709 inst_nodes_offline = []
2711 pnode = inst_config.primary_node
2712 pnode_img = node_image[pnode]
2713 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2714 self.ENODERPC, pnode, "instance %s, connection to"
2715 " primary node failed", instance)
2717 _ErrorIf(inst_config.admin_up and pnode_img.offline,
2718 self.EINSTANCEBADNODE, instance,
2719 "instance is marked as running and lives on offline node %s",
2720 inst_config.primary_node)
2722 # If the instance is non-redundant we cannot survive losing its primary
2723 # node, so we are not N+1 compliant. On the other hand we have no disk
2724 # templates with more than one secondary so that situation is not well
2726 # FIXME: does not support file-backed instances
2727 if not inst_config.secondary_nodes:
2728 i_non_redundant.append(instance)
2730 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2731 instance, "instance has multiple secondary nodes: %s",
2732 utils.CommaJoin(inst_config.secondary_nodes),
2733 code=self.ETYPE_WARNING)
2735 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2736 pnode = inst_config.primary_node
2737 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2738 instance_groups = {}
2740 for node in instance_nodes:
2741 instance_groups.setdefault(self.all_node_info[node].group,
2745 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2746 # Sort so that we always list the primary node first.
2747 for group, nodes in sorted(instance_groups.items(),
2748 key=lambda (_, nodes): pnode in nodes,
2751 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2752 instance, "instance has primary and secondary nodes in"
2753 " different groups: %s", utils.CommaJoin(pretty_list),
2754 code=self.ETYPE_WARNING)
2756 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2757 i_non_a_balanced.append(instance)
2759 for snode in inst_config.secondary_nodes:
2760 s_img = node_image[snode]
2761 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2762 "instance %s, connection to secondary node failed", instance)
2765 inst_nodes_offline.append(snode)
2767 # warn that the instance lives on offline nodes
2768 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2769 "instance has offline secondary node(s) %s",
2770 utils.CommaJoin(inst_nodes_offline))
2771 # ... or ghost/non-vm_capable nodes
2772 for node in inst_config.all_nodes:
2773 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2774 "instance lives on ghost node %s", node)
2775 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2776 instance, "instance lives on non-vm_capable node %s", node)
2778 feedback_fn("* Verifying orphan volumes")
2779 reserved = utils.FieldSet(*cluster.reserved_lvs)
2781 # We will get spurious "unknown volume" warnings if any node of this group
2782 # is secondary for an instance whose primary is in another group. To avoid
2783 # them, we find these instances and add their volumes to node_vol_should.
2784 for inst in self.all_inst_info.values():
2785 for secondary in inst.secondary_nodes:
2786 if (secondary in self.my_node_info
2787 and inst.name not in self.my_inst_info):
2788 inst.MapLVsByNode(node_vol_should)
2791 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2793 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2794 feedback_fn("* Verifying N+1 Memory redundancy")
2795 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2797 feedback_fn("* Other Notes")
2799 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2800 % len(i_non_redundant))
2802 if i_non_a_balanced:
2803 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2804 % len(i_non_a_balanced))
2807 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2810 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2814 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2815 """Analyze the post-hooks' result
2817 This method analyses the hook result, handles it, and sends some
2818 nicely-formatted feedback back to the user.
2820 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2821 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2822 @param hooks_results: the results of the multi-node hooks rpc call
2823 @param feedback_fn: function used send feedback back to the caller
2824 @param lu_result: previous Exec result
2825 @return: the new Exec result, based on the previous result
2829 # We only really run POST phase hooks, only for non-empty groups,
2830 # and are only interested in their results
2831 if not self.my_node_names:
2834 elif phase == constants.HOOKS_PHASE_POST:
2835 # Used to change hooks' output to proper indentation
2836 feedback_fn("* Hooks Results")
2837 assert hooks_results, "invalid result from hooks"
2839 for node_name in hooks_results:
2840 res = hooks_results[node_name]
2842 test = msg and not res.offline
2843 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2844 "Communication failure in hooks execution: %s", msg)
2845 if res.offline or msg:
2846 # No need to investigate payload if node is offline or gave an error.
2847 # override manually lu_result here as _ErrorIf only
2848 # overrides self.bad
2851 for script, hkr, output in res.payload:
2852 test = hkr == constants.HKR_FAIL
2853 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2854 "Script %s failed, output:", script)
2856 output = self._HOOKS_INDENT_RE.sub(" ", output)
2857 feedback_fn("%s" % output)
2863 class LUClusterVerifyDisks(NoHooksLU):
2864 """Verifies the cluster disks status.
2869 def ExpandNames(self):
2870 self.needed_locks = {
2871 locking.LEVEL_NODE: locking.ALL_SET,
2872 locking.LEVEL_INSTANCE: locking.ALL_SET,
2874 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2876 def Exec(self, feedback_fn):
2877 """Verify integrity of cluster disks.
2879 @rtype: tuple of three items
2880 @return: a tuple of (dict of node-to-node_error, list of instances
2881 which need activate-disks, dict of instance: (node, volume) for
2885 result = res_nodes, res_instances, res_missing = {}, [], {}
2887 nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2888 instances = self.cfg.GetAllInstancesInfo().values()
2891 for inst in instances:
2893 if not inst.admin_up:
2895 inst.MapLVsByNode(inst_lvs)
2896 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2897 for node, vol_list in inst_lvs.iteritems():
2898 for vol in vol_list:
2899 nv_dict[(node, vol)] = inst
2904 node_lvs = self.rpc.call_lv_list(nodes, [])
2905 for node, node_res in node_lvs.items():
2906 if node_res.offline:
2908 msg = node_res.fail_msg
2910 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2911 res_nodes[node] = msg
2914 lvs = node_res.payload
2915 for lv_name, (_, _, lv_online) in lvs.items():
2916 inst = nv_dict.pop((node, lv_name), None)
2917 if (not lv_online and inst is not None
2918 and inst.name not in res_instances):
2919 res_instances.append(inst.name)
2921 # any leftover items in nv_dict are missing LVs, let's arrange the
2923 for key, inst in nv_dict.iteritems():
2924 if inst.name not in res_missing:
2925 res_missing[inst.name] = []
2926 res_missing[inst.name].append(key)
2931 class LUClusterRepairDiskSizes(NoHooksLU):
2932 """Verifies the cluster disks sizes.
2937 def ExpandNames(self):
2938 if self.op.instances:
2939 self.wanted_names = _GetWantedInstances(self, self.op.instances)
2940 self.needed_locks = {
2941 locking.LEVEL_NODE: [],
2942 locking.LEVEL_INSTANCE: self.wanted_names,
2944 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2946 self.wanted_names = None
2947 self.needed_locks = {
2948 locking.LEVEL_NODE: locking.ALL_SET,
2949 locking.LEVEL_INSTANCE: locking.ALL_SET,
2951 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2953 def DeclareLocks(self, level):
2954 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2955 self._LockInstancesNodes(primary_only=True)
2957 def CheckPrereq(self):
2958 """Check prerequisites.
2960 This only checks the optional instance list against the existing names.
2963 if self.wanted_names is None:
2964 self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
2966 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2967 in self.wanted_names]
2969 def _EnsureChildSizes(self, disk):
2970 """Ensure children of the disk have the needed disk size.
2972 This is valid mainly for DRBD8 and fixes an issue where the
2973 children have smaller disk size.
2975 @param disk: an L{ganeti.objects.Disk} object
2978 if disk.dev_type == constants.LD_DRBD8:
2979 assert disk.children, "Empty children for DRBD8?"
2980 fchild = disk.children[0]
2981 mismatch = fchild.size < disk.size
2983 self.LogInfo("Child disk has size %d, parent %d, fixing",
2984 fchild.size, disk.size)
2985 fchild.size = disk.size
2987 # and we recurse on this child only, not on the metadev
2988 return self._EnsureChildSizes(fchild) or mismatch
2992 def Exec(self, feedback_fn):
2993 """Verify the size of cluster disks.
2996 # TODO: check child disks too
2997 # TODO: check differences in size between primary/secondary nodes
2999 for instance in self.wanted_instances:
3000 pnode = instance.primary_node
3001 if pnode not in per_node_disks:
3002 per_node_disks[pnode] = []
3003 for idx, disk in enumerate(instance.disks):
3004 per_node_disks[pnode].append((instance, idx, disk))
3007 for node, dskl in per_node_disks.items():
3008 newl = [v[2].Copy() for v in dskl]
3010 self.cfg.SetDiskID(dsk, node)
3011 result = self.rpc.call_blockdev_getsize(node, newl)
3013 self.LogWarning("Failure in blockdev_getsize call to node"
3014 " %s, ignoring", node)
3016 if len(result.payload) != len(dskl):
3017 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3018 " result.payload=%s", node, len(dskl), result.payload)
3019 self.LogWarning("Invalid result from node %s, ignoring node results",
3022 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3024 self.LogWarning("Disk %d of instance %s did not return size"
3025 " information, ignoring", idx, instance.name)
3027 if not isinstance(size, (int, long)):
3028 self.LogWarning("Disk %d of instance %s did not return valid"
3029 " size information, ignoring", idx, instance.name)
3032 if size != disk.size:
3033 self.LogInfo("Disk %d of instance %s has mismatched size,"
3034 " correcting: recorded %d, actual %d", idx,
3035 instance.name, disk.size, size)
3037 self.cfg.Update(instance, feedback_fn)
3038 changed.append((instance.name, idx, size))
3039 if self._EnsureChildSizes(disk):
3040 self.cfg.Update(instance, feedback_fn)
3041 changed.append((instance.name, idx, disk.size))
3045 class LUClusterRename(LogicalUnit):
3046 """Rename the cluster.
3049 HPATH = "cluster-rename"
3050 HTYPE = constants.HTYPE_CLUSTER
3052 def BuildHooksEnv(self):
3057 "OP_TARGET": self.cfg.GetClusterName(),
3058 "NEW_NAME": self.op.name,
3061 def BuildHooksNodes(self):
3062 """Build hooks nodes.
3065 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3067 def CheckPrereq(self):
3068 """Verify that the passed name is a valid one.
3071 hostname = netutils.GetHostname(name=self.op.name,
3072 family=self.cfg.GetPrimaryIPFamily())
3074 new_name = hostname.name
3075 self.ip = new_ip = hostname.ip
3076 old_name = self.cfg.GetClusterName()
3077 old_ip = self.cfg.GetMasterIP()
3078 if new_name == old_name and new_ip == old_ip:
3079 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3080 " cluster has changed",
3082 if new_ip != old_ip:
3083 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3084 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3085 " reachable on the network" %
3086 new_ip, errors.ECODE_NOTUNIQUE)
3088 self.op.name = new_name
3090 def Exec(self, feedback_fn):
3091 """Rename the cluster.
3094 clustername = self.op.name
3097 # shutdown the master IP
3098 master = self.cfg.GetMasterNode()
3099 result = self.rpc.call_node_stop_master(master, False)
3100 result.Raise("Could not disable the master role")
3103 cluster = self.cfg.GetClusterInfo()
3104 cluster.cluster_name = clustername
3105 cluster.master_ip = ip
3106 self.cfg.Update(cluster, feedback_fn)
3108 # update the known hosts file
3109 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3110 node_list = self.cfg.GetOnlineNodeList()
3112 node_list.remove(master)
3115 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3117 result = self.rpc.call_node_start_master(master, False, False)
3118 msg = result.fail_msg
3120 self.LogWarning("Could not re-enable the master role on"
3121 " the master, please restart manually: %s", msg)
3126 class LUClusterSetParams(LogicalUnit):
3127 """Change the parameters of the cluster.
3130 HPATH = "cluster-modify"
3131 HTYPE = constants.HTYPE_CLUSTER
3134 def CheckArguments(self):
3138 if self.op.uid_pool:
3139 uidpool.CheckUidPool(self.op.uid_pool)
3141 if self.op.add_uids:
3142 uidpool.CheckUidPool(self.op.add_uids)
3144 if self.op.remove_uids:
3145 uidpool.CheckUidPool(self.op.remove_uids)
3147 def ExpandNames(self):
3148 # FIXME: in the future maybe other cluster params won't require checking on
3149 # all nodes to be modified.
3150 self.needed_locks = {
3151 locking.LEVEL_NODE: locking.ALL_SET,
3153 self.share_locks[locking.LEVEL_NODE] = 1
3155 def BuildHooksEnv(self):
3160 "OP_TARGET": self.cfg.GetClusterName(),
3161 "NEW_VG_NAME": self.op.vg_name,
3164 def BuildHooksNodes(self):
3165 """Build hooks nodes.
3168 mn = self.cfg.GetMasterNode()
3171 def CheckPrereq(self):
3172 """Check prerequisites.
3174 This checks whether the given params don't conflict and
3175 if the given volume group is valid.
3178 if self.op.vg_name is not None and not self.op.vg_name:
3179 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3180 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3181 " instances exist", errors.ECODE_INVAL)
3183 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3184 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3185 raise errors.OpPrereqError("Cannot disable drbd helper while"
3186 " drbd-based instances exist",
3189 node_list = self.glm.list_owned(locking.LEVEL_NODE)
3191 # if vg_name not None, checks given volume group on all nodes
3193 vglist = self.rpc.call_vg_list(node_list)
3194 for node in node_list:
3195 msg = vglist[node].fail_msg
3197 # ignoring down node
3198 self.LogWarning("Error while gathering data on node %s"
3199 " (ignoring node): %s", node, msg)
3201 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3203 constants.MIN_VG_SIZE)
3205 raise errors.OpPrereqError("Error on node '%s': %s" %
3206 (node, vgstatus), errors.ECODE_ENVIRON)
3208 if self.op.drbd_helper:
3209 # checks given drbd helper on all nodes
3210 helpers = self.rpc.call_drbd_helper(node_list)
3211 for node in node_list:
3212 ninfo = self.cfg.GetNodeInfo(node)
3214 self.LogInfo("Not checking drbd helper on offline node %s", node)
3216 msg = helpers[node].fail_msg
3218 raise errors.OpPrereqError("Error checking drbd helper on node"
3219 " '%s': %s" % (node, msg),
3220 errors.ECODE_ENVIRON)
3221 node_helper = helpers[node].payload
3222 if node_helper != self.op.drbd_helper:
3223 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3224 (node, node_helper), errors.ECODE_ENVIRON)
3226 self.cluster = cluster = self.cfg.GetClusterInfo()
3227 # validate params changes
3228 if self.op.beparams:
3229 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3230 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3232 if self.op.ndparams:
3233 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3234 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3236 # TODO: we need a more general way to handle resetting
3237 # cluster-level parameters to default values
3238 if self.new_ndparams["oob_program"] == "":
3239 self.new_ndparams["oob_program"] = \
3240 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3242 if self.op.nicparams:
3243 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3244 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3245 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3248 # check all instances for consistency
3249 for instance in self.cfg.GetAllInstancesInfo().values():
3250 for nic_idx, nic in enumerate(instance.nics):
3251 params_copy = copy.deepcopy(nic.nicparams)
3252 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3254 # check parameter syntax
3256 objects.NIC.CheckParameterSyntax(params_filled)
3257 except errors.ConfigurationError, err:
3258 nic_errors.append("Instance %s, nic/%d: %s" %
3259 (instance.name, nic_idx, err))
3261 # if we're moving instances to routed, check that they have an ip
3262 target_mode = params_filled[constants.NIC_MODE]
3263 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3264 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3265 " address" % (instance.name, nic_idx))
3267 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3268 "\n".join(nic_errors))
3270 # hypervisor list/parameters
3271 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3272 if self.op.hvparams:
3273 for hv_name, hv_dict in self.op.hvparams.items():
3274 if hv_name not in self.new_hvparams:
3275 self.new_hvparams[hv_name] = hv_dict
3277 self.new_hvparams[hv_name].update(hv_dict)
3279 # os hypervisor parameters
3280 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3282 for os_name, hvs in self.op.os_hvp.items():
3283 if os_name not in self.new_os_hvp:
3284 self.new_os_hvp[os_name] = hvs
3286 for hv_name, hv_dict in hvs.items():
3287 if hv_name not in self.new_os_hvp[os_name]:
3288 self.new_os_hvp[os_name][hv_name] = hv_dict
3290 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3293 self.new_osp = objects.FillDict(cluster.osparams, {})
3294 if self.op.osparams:
3295 for os_name, osp in self.op.osparams.items():
3296 if os_name not in self.new_osp:
3297 self.new_osp[os_name] = {}
3299 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3302 if not self.new_osp[os_name]:
3303 # we removed all parameters
3304 del self.new_osp[os_name]
3306 # check the parameter validity (remote check)
3307 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3308 os_name, self.new_osp[os_name])
3310 # changes to the hypervisor list
3311 if self.op.enabled_hypervisors is not None:
3312 self.hv_list = self.op.enabled_hypervisors
3313 for hv in self.hv_list:
3314 # if the hypervisor doesn't already exist in the cluster
3315 # hvparams, we initialize it to empty, and then (in both
3316 # cases) we make sure to fill the defaults, as we might not
3317 # have a complete defaults list if the hypervisor wasn't
3319 if hv not in new_hvp:
3321 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3322 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3324 self.hv_list = cluster.enabled_hypervisors
3326 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3327 # either the enabled list has changed, or the parameters have, validate
3328 for hv_name, hv_params in self.new_hvparams.items():
3329 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3330 (self.op.enabled_hypervisors and
3331 hv_name in self.op.enabled_hypervisors)):
3332 # either this is a new hypervisor, or its parameters have changed
3333 hv_class = hypervisor.GetHypervisor(hv_name)
3334 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3335 hv_class.CheckParameterSyntax(hv_params)
3336 _CheckHVParams(self, node_list, hv_name, hv_params)
3339 # no need to check any newly-enabled hypervisors, since the
3340 # defaults have already been checked in the above code-block
3341 for os_name, os_hvp in self.new_os_hvp.items():
3342 for hv_name, hv_params in os_hvp.items():
3343 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3344 # we need to fill in the new os_hvp on top of the actual hv_p
3345 cluster_defaults = self.new_hvparams.get(hv_name, {})
3346 new_osp = objects.FillDict(cluster_defaults, hv_params)
3347 hv_class = hypervisor.GetHypervisor(hv_name)
3348 hv_class.CheckParameterSyntax(new_osp)
3349 _CheckHVParams(self, node_list, hv_name, new_osp)
3351 if self.op.default_iallocator:
3352 alloc_script = utils.FindFile(self.op.default_iallocator,
3353 constants.IALLOCATOR_SEARCH_PATH,
3355 if alloc_script is None:
3356 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3357 " specified" % self.op.default_iallocator,
3360 def Exec(self, feedback_fn):
3361 """Change the parameters of the cluster.
3364 if self.op.vg_name is not None:
3365 new_volume = self.op.vg_name
3368 if new_volume != self.cfg.GetVGName():
3369 self.cfg.SetVGName(new_volume)
3371 feedback_fn("Cluster LVM configuration already in desired"
3372 " state, not changing")
3373 if self.op.drbd_helper is not None:
3374 new_helper = self.op.drbd_helper
3377 if new_helper != self.cfg.GetDRBDHelper():
3378 self.cfg.SetDRBDHelper(new_helper)
3380 feedback_fn("Cluster DRBD helper already in desired state,"
3382 if self.op.hvparams:
3383 self.cluster.hvparams = self.new_hvparams
3385 self.cluster.os_hvp = self.new_os_hvp
3386 if self.op.enabled_hypervisors is not None:
3387 self.cluster.hvparams = self.new_hvparams
3388 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3389 if self.op.beparams:
3390 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3391 if self.op.nicparams:
3392 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3393 if self.op.osparams:
3394 self.cluster.osparams = self.new_osp
3395 if self.op.ndparams:
3396 self.cluster.ndparams = self.new_ndparams
3398 if self.op.candidate_pool_size is not None:
3399 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3400 # we need to update the pool size here, otherwise the save will fail
3401 _AdjustCandidatePool(self, [])
3403 if self.op.maintain_node_health is not None:
3404 self.cluster.maintain_node_health = self.op.maintain_node_health
3406 if self.op.prealloc_wipe_disks is not None:
3407 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3409 if self.op.add_uids is not None:
3410 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3412 if self.op.remove_uids is not None:
3413 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3415 if self.op.uid_pool is not None:
3416 self.cluster.uid_pool = self.op.uid_pool
3418 if self.op.default_iallocator is not None:
3419 self.cluster.default_iallocator = self.op.default_iallocator
3421 if self.op.reserved_lvs is not None:
3422 self.cluster.reserved_lvs = self.op.reserved_lvs
3424 def helper_os(aname, mods, desc):
3426 lst = getattr(self.cluster, aname)
3427 for key, val in mods:
3428 if key == constants.DDM_ADD:
3430 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3433 elif key == constants.DDM_REMOVE:
3437 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3439 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3441 if self.op.hidden_os:
3442 helper_os("hidden_os", self.op.hidden_os, "hidden")
3444 if self.op.blacklisted_os:
3445 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3447 if self.op.master_netdev:
3448 master = self.cfg.GetMasterNode()
3449 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3450 self.cluster.master_netdev)
3451 result = self.rpc.call_node_stop_master(master, False)
3452 result.Raise("Could not disable the master ip")
3453 feedback_fn("Changing master_netdev from %s to %s" %
3454 (self.cluster.master_netdev, self.op.master_netdev))
3455 self.cluster.master_netdev = self.op.master_netdev
3457 self.cfg.Update(self.cluster, feedback_fn)
3459 if self.op.master_netdev:
3460 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3461 self.op.master_netdev)
3462 result = self.rpc.call_node_start_master(master, False, False)
3464 self.LogWarning("Could not re-enable the master ip on"
3465 " the master, please restart manually: %s",
3469 def _UploadHelper(lu, nodes, fname):
3470 """Helper for uploading a file and showing warnings.
3473 if os.path.exists(fname):
3474 result = lu.rpc.call_upload_file(nodes, fname)
3475 for to_node, to_result in result.items():
3476 msg = to_result.fail_msg
3478 msg = ("Copy of file %s to node %s failed: %s" %
3479 (fname, to_node, msg))
3480 lu.proc.LogWarning(msg)
3483 def _ComputeAncillaryFiles(cluster, redist):
3484 """Compute files external to Ganeti which need to be consistent.
3486 @type redist: boolean
3487 @param redist: Whether to include files which need to be redistributed
3490 # Compute files for all nodes
3492 constants.SSH_KNOWN_HOSTS_FILE,
3493 constants.CONFD_HMAC_KEY,
3494 constants.CLUSTER_DOMAIN_SECRET_FILE,
3498 files_all.update(constants.ALL_CERT_FILES)
3499 files_all.update(ssconf.SimpleStore().GetFileList())
3501 if cluster.modify_etc_hosts:
3502 files_all.add(constants.ETC_HOSTS)
3504 # Files which must either exist on all nodes or on none
3505 files_all_opt = set([
3506 constants.RAPI_USERS_FILE,
3509 # Files which should only be on master candidates
3512 files_mc.add(constants.CLUSTER_CONF_FILE)
3514 # Files which should only be on VM-capable nodes
3515 files_vm = set(filename
3516 for hv_name in cluster.enabled_hypervisors
3517 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3519 # Filenames must be unique
3520 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3521 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3522 "Found file listed in more than one file list"
3524 return (files_all, files_all_opt, files_mc, files_vm)
3527 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3528 """Distribute additional files which are part of the cluster configuration.
3530 ConfigWriter takes care of distributing the config and ssconf files, but
3531 there are more files which should be distributed to all nodes. This function
3532 makes sure those are copied.
3534 @param lu: calling logical unit
3535 @param additional_nodes: list of nodes not in the config to distribute to
3536 @type additional_vm: boolean
3537 @param additional_vm: whether the additional nodes are vm-capable or not
3540 # Gather target nodes
3541 cluster = lu.cfg.GetClusterInfo()
3542 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3544 online_nodes = lu.cfg.GetOnlineNodeList()
3545 vm_nodes = lu.cfg.GetVmCapableNodeList()
3547 if additional_nodes is not None:
3548 online_nodes.extend(additional_nodes)
3550 vm_nodes.extend(additional_nodes)
3552 # Never distribute to master node
3553 for nodelist in [online_nodes, vm_nodes]:
3554 if master_info.name in nodelist:
3555 nodelist.remove(master_info.name)
3558 (files_all, files_all_opt, files_mc, files_vm) = \
3559 _ComputeAncillaryFiles(cluster, True)
3561 # Never re-distribute configuration file from here
3562 assert not (constants.CLUSTER_CONF_FILE in files_all or
3563 constants.CLUSTER_CONF_FILE in files_vm)
3564 assert not files_mc, "Master candidates not handled in this function"
3567 (online_nodes, files_all),
3568 (online_nodes, files_all_opt),
3569 (vm_nodes, files_vm),
3573 for (node_list, files) in filemap:
3575 _UploadHelper(lu, node_list, fname)
3578 class LUClusterRedistConf(NoHooksLU):
3579 """Force the redistribution of cluster configuration.
3581 This is a very simple LU.
3586 def ExpandNames(self):
3587 self.needed_locks = {
3588 locking.LEVEL_NODE: locking.ALL_SET,
3590 self.share_locks[locking.LEVEL_NODE] = 1
3592 def Exec(self, feedback_fn):
3593 """Redistribute the configuration.
3596 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3597 _RedistributeAncillaryFiles(self)
3600 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3601 """Sleep and poll for an instance's disk to sync.
3604 if not instance.disks or disks is not None and not disks:
3607 disks = _ExpandCheckDisks(instance, disks)
3610 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3612 node = instance.primary_node
3615 lu.cfg.SetDiskID(dev, node)
3617 # TODO: Convert to utils.Retry
3620 degr_retries = 10 # in seconds, as we sleep 1 second each time
3624 cumul_degraded = False
3625 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3626 msg = rstats.fail_msg
3628 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3631 raise errors.RemoteError("Can't contact node %s for mirror data,"
3632 " aborting." % node)
3635 rstats = rstats.payload
3637 for i, mstat in enumerate(rstats):
3639 lu.LogWarning("Can't compute data for node %s/%s",
3640 node, disks[i].iv_name)
3643 cumul_degraded = (cumul_degraded or
3644 (mstat.is_degraded and mstat.sync_percent is None))
3645 if mstat.sync_percent is not None:
3647 if mstat.estimated_time is not None:
3648 rem_time = ("%s remaining (estimated)" %
3649 utils.FormatSeconds(mstat.estimated_time))
3650 max_time = mstat.estimated_time
3652 rem_time = "no time estimate"
3653 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3654 (disks[i].iv_name, mstat.sync_percent, rem_time))
3656 # if we're done but degraded, let's do a few small retries, to
3657 # make sure we see a stable and not transient situation; therefore
3658 # we force restart of the loop
3659 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3660 logging.info("Degraded disks found, %d retries left", degr_retries)
3668 time.sleep(min(60, max_time))
3671 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3672 return not cumul_degraded
3675 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3676 """Check that mirrors are not degraded.
3678 The ldisk parameter, if True, will change the test from the
3679 is_degraded attribute (which represents overall non-ok status for
3680 the device(s)) to the ldisk (representing the local storage status).
3683 lu.cfg.SetDiskID(dev, node)
3687 if on_primary or dev.AssembleOnSecondary():
3688 rstats = lu.rpc.call_blockdev_find(node, dev)
3689 msg = rstats.fail_msg
3691 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3693 elif not rstats.payload:
3694 lu.LogWarning("Can't find disk on node %s", node)
3698 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3700 result = result and not rstats.payload.is_degraded
3703 for child in dev.children:
3704 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3709 class LUOobCommand(NoHooksLU):
3710 """Logical unit for OOB handling.
3714 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3716 def ExpandNames(self):
3717 """Gather locks we need.
3720 if self.op.node_names:
3721 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3722 lock_names = self.op.node_names
3724 lock_names = locking.ALL_SET
3726 self.needed_locks = {
3727 locking.LEVEL_NODE: lock_names,
3730 def CheckPrereq(self):
3731 """Check prerequisites.
3734 - the node exists in the configuration
3737 Any errors are signaled by raising errors.OpPrereqError.
3741 self.master_node = self.cfg.GetMasterNode()
3743 assert self.op.power_delay >= 0.0
3745 if self.op.node_names:
3746 if (self.op.command in self._SKIP_MASTER and
3747 self.master_node in self.op.node_names):
3748 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3749 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3751 if master_oob_handler:
3752 additional_text = ("run '%s %s %s' if you want to operate on the"
3753 " master regardless") % (master_oob_handler,
3757 additional_text = "it does not support out-of-band operations"
3759 raise errors.OpPrereqError(("Operating on the master node %s is not"
3760 " allowed for %s; %s") %
3761 (self.master_node, self.op.command,
3762 additional_text), errors.ECODE_INVAL)
3764 self.op.node_names = self.cfg.GetNodeList()
3765 if self.op.command in self._SKIP_MASTER:
3766 self.op.node_names.remove(self.master_node)
3768 if self.op.command in self._SKIP_MASTER:
3769 assert self.master_node not in self.op.node_names
3771 for node_name in self.op.node_names:
3772 node = self.cfg.GetNodeInfo(node_name)
3775 raise errors.OpPrereqError("Node %s not found" % node_name,
3778 self.nodes.append(node)
3780 if (not self.op.ignore_status and
3781 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3782 raise errors.OpPrereqError(("Cannot power off node %s because it is"
3783 " not marked offline") % node_name,
3786 def Exec(self, feedback_fn):
3787 """Execute OOB and return result if we expect any.
3790 master_node = self.master_node
3793 for idx, node in enumerate(utils.NiceSort(self.nodes,
3794 key=lambda node: node.name)):
3795 node_entry = [(constants.RS_NORMAL, node.name)]
3796 ret.append(node_entry)
3798 oob_program = _SupportsOob(self.cfg, node)
3801 node_entry.append((constants.RS_UNAVAIL, None))
3804 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3805 self.op.command, oob_program, node.name)
3806 result = self.rpc.call_run_oob(master_node, oob_program,
3807 self.op.command, node.name,
3811 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3812 node.name, result.fail_msg)
3813 node_entry.append((constants.RS_NODATA, None))
3816 self._CheckPayload(result)
3817 except errors.OpExecError, err:
3818 self.LogWarning("Payload returned by node '%s' is not valid: %s",
3820 node_entry.append((constants.RS_NODATA, None))
3822 if self.op.command == constants.OOB_HEALTH:
3823 # For health we should log important events
3824 for item, status in result.payload:
3825 if status in [constants.OOB_STATUS_WARNING,
3826 constants.OOB_STATUS_CRITICAL]:
3827 self.LogWarning("Item '%s' on node '%s' has status '%s'",
3828 item, node.name, status)
3830 if self.op.command == constants.OOB_POWER_ON:
3832 elif self.op.command == constants.OOB_POWER_OFF:
3833 node.powered = False
3834 elif self.op.command == constants.OOB_POWER_STATUS:
3835 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3836 if powered != node.powered:
3837 logging.warning(("Recorded power state (%s) of node '%s' does not"
3838 " match actual power state (%s)"), node.powered,
3841 # For configuration changing commands we should update the node
3842 if self.op.command in (constants.OOB_POWER_ON,
3843 constants.OOB_POWER_OFF):
3844 self.cfg.Update(node, feedback_fn)
3846 node_entry.append((constants.RS_NORMAL, result.payload))
3848 if (self.op.command == constants.OOB_POWER_ON and
3849 idx < len(self.nodes) - 1):
3850 time.sleep(self.op.power_delay)
3854 def _CheckPayload(self, result):
3855 """Checks if the payload is valid.
3857 @param result: RPC result
3858 @raises errors.OpExecError: If payload is not valid
3862 if self.op.command == constants.OOB_HEALTH:
3863 if not isinstance(result.payload, list):
3864 errs.append("command 'health' is expected to return a list but got %s" %
3865 type(result.payload))
3867 for item, status in result.payload:
3868 if status not in constants.OOB_STATUSES:
3869 errs.append("health item '%s' has invalid status '%s'" %
3872 if self.op.command == constants.OOB_POWER_STATUS:
3873 if not isinstance(result.payload, dict):
3874 errs.append("power-status is expected to return a dict but got %s" %
3875 type(result.payload))
3877 if self.op.command in [
3878 constants.OOB_POWER_ON,
3879 constants.OOB_POWER_OFF,
3880 constants.OOB_POWER_CYCLE,
3882 if result.payload is not None:
3883 errs.append("%s is expected to not return payload but got '%s'" %
3884 (self.op.command, result.payload))
3887 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3888 utils.CommaJoin(errs))
3890 class _OsQuery(_QueryBase):
3891 FIELDS = query.OS_FIELDS
3893 def ExpandNames(self, lu):
3894 # Lock all nodes in shared mode
3895 # Temporary removal of locks, should be reverted later
3896 # TODO: reintroduce locks when they are lighter-weight
3897 lu.needed_locks = {}
3898 #self.share_locks[locking.LEVEL_NODE] = 1
3899 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3901 # The following variables interact with _QueryBase._GetNames
3903 self.wanted = self.names
3905 self.wanted = locking.ALL_SET
3907 self.do_locking = self.use_locking
3909 def DeclareLocks(self, lu, level):
3913 def _DiagnoseByOS(rlist):
3914 """Remaps a per-node return list into an a per-os per-node dictionary
3916 @param rlist: a map with node names as keys and OS objects as values
3919 @return: a dictionary with osnames as keys and as value another
3920 map, with nodes as keys and tuples of (path, status, diagnose,
3921 variants, parameters, api_versions) as values, eg::
3923 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3924 (/srv/..., False, "invalid api")],
3925 "node2": [(/srv/..., True, "", [], [])]}
3930 # we build here the list of nodes that didn't fail the RPC (at RPC
3931 # level), so that nodes with a non-responding node daemon don't
3932 # make all OSes invalid
3933 good_nodes = [node_name for node_name in rlist
3934 if not rlist[node_name].fail_msg]
3935 for node_name, nr in rlist.items():
3936 if nr.fail_msg or not nr.payload:
3938 for (name, path, status, diagnose, variants,
3939 params, api_versions) in nr.payload:
3940 if name not in all_os:
3941 # build a list of nodes for this os containing empty lists
3942 # for each node in node_list
3944 for nname in good_nodes:
3945 all_os[name][nname] = []
3946 # convert params from [name, help] to (name, help)
3947 params = [tuple(v) for v in params]
3948 all_os[name][node_name].append((path, status, diagnose,
3949 variants, params, api_versions))
3952 def _GetQueryData(self, lu):
3953 """Computes the list of nodes and their attributes.
3956 # Locking is not used
3957 assert not (compat.any(lu.glm.is_owned(level)
3958 for level in locking.LEVELS
3959 if level != locking.LEVEL_CLUSTER) or
3960 self.do_locking or self.use_locking)
3962 valid_nodes = [node.name
3963 for node in lu.cfg.GetAllNodesInfo().values()
3964 if not node.offline and node.vm_capable]
3965 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3966 cluster = lu.cfg.GetClusterInfo()
3970 for (os_name, os_data) in pol.items():
3971 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3972 hidden=(os_name in cluster.hidden_os),
3973 blacklisted=(os_name in cluster.blacklisted_os))
3977 api_versions = set()
3979 for idx, osl in enumerate(os_data.values()):
3980 info.valid = bool(info.valid and osl and osl[0][1])
3984 (node_variants, node_params, node_api) = osl[0][3:6]
3987 variants.update(node_variants)
3988 parameters.update(node_params)
3989 api_versions.update(node_api)
3991 # Filter out inconsistent values
3992 variants.intersection_update(node_variants)
3993 parameters.intersection_update(node_params)
3994 api_versions.intersection_update(node_api)
3996 info.variants = list(variants)
3997 info.parameters = list(parameters)
3998 info.api_versions = list(api_versions)
4000 data[os_name] = info
4002 # Prepare data in requested order
4003 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4007 class LUOsDiagnose(NoHooksLU):
4008 """Logical unit for OS diagnose/query.
4014 def _BuildFilter(fields, names):
4015 """Builds a filter for querying OSes.
4018 name_filter = qlang.MakeSimpleFilter("name", names)
4020 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4021 # respective field is not requested
4022 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4023 for fname in ["hidden", "blacklisted"]
4024 if fname not in fields]
4025 if "valid" not in fields:
4026 status_filter.append([qlang.OP_TRUE, "valid"])
4029 status_filter.insert(0, qlang.OP_AND)
4031 status_filter = None
4033 if name_filter and status_filter:
4034 return [qlang.OP_AND, name_filter, status_filter]
4038 return status_filter
4040 def CheckArguments(self):
4041 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4042 self.op.output_fields, False)
4044 def ExpandNames(self):
4045 self.oq.ExpandNames(self)
4047 def Exec(self, feedback_fn):
4048 return self.oq.OldStyleQuery(self)
4051 class LUNodeRemove(LogicalUnit):
4052 """Logical unit for removing a node.
4055 HPATH = "node-remove"
4056 HTYPE = constants.HTYPE_NODE
4058 def BuildHooksEnv(self):
4061 This doesn't run on the target node in the pre phase as a failed
4062 node would then be impossible to remove.
4066 "OP_TARGET": self.op.node_name,
4067 "NODE_NAME": self.op.node_name,
4070 def BuildHooksNodes(self):
4071 """Build hooks nodes.
4074 all_nodes = self.cfg.GetNodeList()
4076 all_nodes.remove(self.op.node_name)
4078 logging.warning("Node '%s', which is about to be removed, was not found"
4079 " in the list of all nodes", self.op.node_name)
4080 return (all_nodes, all_nodes)
4082 def CheckPrereq(self):
4083 """Check prerequisites.
4086 - the node exists in the configuration
4087 - it does not have primary or secondary instances
4088 - it's not the master
4090 Any errors are signaled by raising errors.OpPrereqError.
4093 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4094 node = self.cfg.GetNodeInfo(self.op.node_name)
4095 assert node is not None
4097 instance_list = self.cfg.GetInstanceList()
4099 masternode = self.cfg.GetMasterNode()
4100 if node.name == masternode:
4101 raise errors.OpPrereqError("Node is the master node, failover to another"
4102 " node is required", errors.ECODE_INVAL)
4104 for instance_name in instance_list:
4105 instance = self.cfg.GetInstanceInfo(instance_name)
4106 if node.name in instance.all_nodes:
4107 raise errors.OpPrereqError("Instance %s is still running on the node,"
4108 " please remove first" % instance_name,
4110 self.op.node_name = node.name
4113 def Exec(self, feedback_fn):
4114 """Removes the node from the cluster.
4118 logging.info("Stopping the node daemon and removing configs from node %s",
4121 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4123 # Promote nodes to master candidate as needed
4124 _AdjustCandidatePool(self, exceptions=[node.name])
4125 self.context.RemoveNode(node.name)
4127 # Run post hooks on the node before it's removed
4128 _RunPostHook(self, node.name)
4130 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4131 msg = result.fail_msg
4133 self.LogWarning("Errors encountered on the remote node while leaving"
4134 " the cluster: %s", msg)
4136 # Remove node from our /etc/hosts
4137 if self.cfg.GetClusterInfo().modify_etc_hosts:
4138 master_node = self.cfg.GetMasterNode()
4139 result = self.rpc.call_etc_hosts_modify(master_node,
4140 constants.ETC_HOSTS_REMOVE,
4142 result.Raise("Can't update hosts file with new host data")
4143 _RedistributeAncillaryFiles(self)
4146 class _NodeQuery(_QueryBase):
4147 FIELDS = query.NODE_FIELDS
4149 def ExpandNames(self, lu):
4150 lu.needed_locks = {}
4151 lu.share_locks[locking.LEVEL_NODE] = 1
4154 self.wanted = _GetWantedNodes(lu, self.names)
4156 self.wanted = locking.ALL_SET
4158 self.do_locking = (self.use_locking and
4159 query.NQ_LIVE in self.requested_data)
4162 # if we don't request only static fields, we need to lock the nodes
4163 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4165 def DeclareLocks(self, lu, level):
4168 def _GetQueryData(self, lu):
4169 """Computes the list of nodes and their attributes.
4172 all_info = lu.cfg.GetAllNodesInfo()
4174 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4176 # Gather data as requested
4177 if query.NQ_LIVE in self.requested_data:
4178 # filter out non-vm_capable nodes
4179 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4181 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4182 lu.cfg.GetHypervisorType())
4183 live_data = dict((name, nresult.payload)
4184 for (name, nresult) in node_data.items()
4185 if not nresult.fail_msg and nresult.payload)
4189 if query.NQ_INST in self.requested_data:
4190 node_to_primary = dict([(name, set()) for name in nodenames])
4191 node_to_secondary = dict([(name, set()) for name in nodenames])
4193 inst_data = lu.cfg.GetAllInstancesInfo()
4195 for inst in inst_data.values():
4196 if inst.primary_node in node_to_primary:
4197 node_to_primary[inst.primary_node].add(inst.name)
4198 for secnode in inst.secondary_nodes:
4199 if secnode in node_to_secondary:
4200 node_to_secondary[secnode].add(inst.name)
4202 node_to_primary = None
4203 node_to_secondary = None
4205 if query.NQ_OOB in self.requested_data:
4206 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4207 for name, node in all_info.iteritems())
4211 if query.NQ_GROUP in self.requested_data:
4212 groups = lu.cfg.GetAllNodeGroupsInfo()
4216 return query.NodeQueryData([all_info[name] for name in nodenames],
4217 live_data, lu.cfg.GetMasterNode(),
4218 node_to_primary, node_to_secondary, groups,
4219 oob_support, lu.cfg.GetClusterInfo())
4222 class LUNodeQuery(NoHooksLU):
4223 """Logical unit for querying nodes.
4226 # pylint: disable-msg=W0142
4229 def CheckArguments(self):
4230 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4231 self.op.output_fields, self.op.use_locking)
4233 def ExpandNames(self):
4234 self.nq.ExpandNames(self)
4236 def Exec(self, feedback_fn):
4237 return self.nq.OldStyleQuery(self)
4240 class LUNodeQueryvols(NoHooksLU):
4241 """Logical unit for getting volumes on node(s).
4245 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4246 _FIELDS_STATIC = utils.FieldSet("node")
4248 def CheckArguments(self):
4249 _CheckOutputFields(static=self._FIELDS_STATIC,
4250 dynamic=self._FIELDS_DYNAMIC,
4251 selected=self.op.output_fields)
4253 def ExpandNames(self):
4254 self.needed_locks = {}
4255 self.share_locks[locking.LEVEL_NODE] = 1
4256 if not self.op.nodes:
4257 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4259 self.needed_locks[locking.LEVEL_NODE] = \
4260 _GetWantedNodes(self, self.op.nodes)
4262 def Exec(self, feedback_fn):
4263 """Computes the list of nodes and their attributes.
4266 nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4267 volumes = self.rpc.call_node_volumes(nodenames)
4269 ilist = self.cfg.GetAllInstancesInfo()
4270 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4273 for node in nodenames:
4274 nresult = volumes[node]
4277 msg = nresult.fail_msg
4279 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4282 node_vols = sorted(nresult.payload,
4283 key=operator.itemgetter("dev"))
4285 for vol in node_vols:
4287 for field in self.op.output_fields:
4290 elif field == "phys":
4294 elif field == "name":
4296 elif field == "size":
4297 val = int(float(vol["size"]))
4298 elif field == "instance":
4299 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4301 raise errors.ParameterError(field)
4302 node_output.append(str(val))
4304 output.append(node_output)
4309 class LUNodeQueryStorage(NoHooksLU):
4310 """Logical unit for getting information on storage units on node(s).
4313 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4316 def CheckArguments(self):
4317 _CheckOutputFields(static=self._FIELDS_STATIC,
4318 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4319 selected=self.op.output_fields)
4321 def ExpandNames(self):
4322 self.needed_locks = {}
4323 self.share_locks[locking.LEVEL_NODE] = 1
4326 self.needed_locks[locking.LEVEL_NODE] = \
4327 _GetWantedNodes(self, self.op.nodes)
4329 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4331 def Exec(self, feedback_fn):
4332 """Computes the list of nodes and their attributes.
4335 self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4337 # Always get name to sort by
4338 if constants.SF_NAME in self.op.output_fields:
4339 fields = self.op.output_fields[:]
4341 fields = [constants.SF_NAME] + self.op.output_fields
4343 # Never ask for node or type as it's only known to the LU
4344 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4345 while extra in fields:
4346 fields.remove(extra)
4348 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4349 name_idx = field_idx[constants.SF_NAME]
4351 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4352 data = self.rpc.call_storage_list(self.nodes,
4353 self.op.storage_type, st_args,
4354 self.op.name, fields)
4358 for node in utils.NiceSort(self.nodes):
4359 nresult = data[node]
4363 msg = nresult.fail_msg
4365 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4368 rows = dict([(row[name_idx], row) for row in nresult.payload])
4370 for name in utils.NiceSort(rows.keys()):
4375 for field in self.op.output_fields:
4376 if field == constants.SF_NODE:
4378 elif field == constants.SF_TYPE:
4379 val = self.op.storage_type
4380 elif field in field_idx:
4381 val = row[field_idx[field]]
4383 raise errors.ParameterError(field)
4392 class _InstanceQuery(_QueryBase):
4393 FIELDS = query.INSTANCE_FIELDS
4395 def ExpandNames(self, lu):
4396 lu.needed_locks = {}
4397 lu.share_locks[locking.LEVEL_INSTANCE] = 1
4398 lu.share_locks[locking.LEVEL_NODE] = 1
4401 self.wanted = _GetWantedInstances(lu, self.names)
4403 self.wanted = locking.ALL_SET
4405 self.do_locking = (self.use_locking and
4406 query.IQ_LIVE in self.requested_data)
4408 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4409 lu.needed_locks[locking.LEVEL_NODE] = []
4410 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4412 def DeclareLocks(self, lu, level):
4413 if level == locking.LEVEL_NODE and self.do_locking:
4414 lu._LockInstancesNodes() # pylint: disable-msg=W0212
4416 def _GetQueryData(self, lu):
4417 """Computes the list of instances and their attributes.
4420 cluster = lu.cfg.GetClusterInfo()
4421 all_info = lu.cfg.GetAllInstancesInfo()
4423 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4425 instance_list = [all_info[name] for name in instance_names]
4426 nodes = frozenset(itertools.chain(*(inst.all_nodes
4427 for inst in instance_list)))
4428 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4431 wrongnode_inst = set()
4433 # Gather data as requested
4434 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4436 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4438 result = node_data[name]
4440 # offline nodes will be in both lists
4441 assert result.fail_msg
4442 offline_nodes.append(name)
4444 bad_nodes.append(name)
4445 elif result.payload:
4446 for inst in result.payload:
4447 if inst in all_info:
4448 if all_info[inst].primary_node == name:
4449 live_data.update(result.payload)
4451 wrongnode_inst.add(inst)
4453 # orphan instance; we don't list it here as we don't
4454 # handle this case yet in the output of instance listing
4455 logging.warning("Orphan instance '%s' found on node %s",
4457 # else no instance is alive
4461 if query.IQ_DISKUSAGE in self.requested_data:
4462 disk_usage = dict((inst.name,
4463 _ComputeDiskSize(inst.disk_template,
4464 [{constants.IDISK_SIZE: disk.size}
4465 for disk in inst.disks]))
4466 for inst in instance_list)
4470 if query.IQ_CONSOLE in self.requested_data:
4472 for inst in instance_list:
4473 if inst.name in live_data:
4474 # Instance is running
4475 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4477 consinfo[inst.name] = None
4478 assert set(consinfo.keys()) == set(instance_names)
4482 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4483 disk_usage, offline_nodes, bad_nodes,
4484 live_data, wrongnode_inst, consinfo)
4487 class LUQuery(NoHooksLU):
4488 """Query for resources/items of a certain kind.
4491 # pylint: disable-msg=W0142
4494 def CheckArguments(self):
4495 qcls = _GetQueryImplementation(self.op.what)
4497 self.impl = qcls(self.op.filter, self.op.fields, False)
4499 def ExpandNames(self):
4500 self.impl.ExpandNames(self)
4502 def DeclareLocks(self, level):
4503 self.impl.DeclareLocks(self, level)
4505 def Exec(self, feedback_fn):
4506 return self.impl.NewStyleQuery(self)
4509 class LUQueryFields(NoHooksLU):
4510 """Query for resources/items of a certain kind.
4513 # pylint: disable-msg=W0142
4516 def CheckArguments(self):
4517 self.qcls = _GetQueryImplementation(self.op.what)
4519 def ExpandNames(self):
4520 self.needed_locks = {}
4522 def Exec(self, feedback_fn):
4523 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4526 class LUNodeModifyStorage(NoHooksLU):
4527 """Logical unit for modifying a storage volume on a node.
4532 def CheckArguments(self):
4533 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4535 storage_type = self.op.storage_type
4538 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4540 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4541 " modified" % storage_type,
4544 diff = set(self.op.changes.keys()) - modifiable
4546 raise errors.OpPrereqError("The following fields can not be modified for"
4547 " storage units of type '%s': %r" %
4548 (storage_type, list(diff)),
4551 def ExpandNames(self):
4552 self.needed_locks = {
4553 locking.LEVEL_NODE: self.op.node_name,
4556 def Exec(self, feedback_fn):
4557 """Computes the list of nodes and their attributes.
4560 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4561 result = self.rpc.call_storage_modify(self.op.node_name,
4562 self.op.storage_type, st_args,
4563 self.op.name, self.op.changes)
4564 result.Raise("Failed to modify storage unit '%s' on %s" %
4565 (self.op.name, self.op.node_name))
4568 class LUNodeAdd(LogicalUnit):
4569 """Logical unit for adding node to the cluster.
4573 HTYPE = constants.HTYPE_NODE
4574 _NFLAGS = ["master_capable", "vm_capable"]
4576 def CheckArguments(self):
4577 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4578 # validate/normalize the node name
4579 self.hostname = netutils.GetHostname(name=self.op.node_name,
4580 family=self.primary_ip_family)
4581 self.op.node_name = self.hostname.name
4583 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4584 raise errors.OpPrereqError("Cannot readd the master node",
4587 if self.op.readd and self.op.group:
4588 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4589 " being readded", errors.ECODE_INVAL)
4591 def BuildHooksEnv(self):
4594 This will run on all nodes before, and on all nodes + the new node after.
4598 "OP_TARGET": self.op.node_name,
4599 "NODE_NAME": self.op.node_name,
4600 "NODE_PIP": self.op.primary_ip,
4601 "NODE_SIP": self.op.secondary_ip,
4602 "MASTER_CAPABLE": str(self.op.master_capable),
4603 "VM_CAPABLE": str(self.op.vm_capable),
4606 def BuildHooksNodes(self):
4607 """Build hooks nodes.
4610 # Exclude added node
4611 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4612 post_nodes = pre_nodes + [self.op.node_name, ]
4614 return (pre_nodes, post_nodes)
4616 def CheckPrereq(self):
4617 """Check prerequisites.
4620 - the new node is not already in the config
4622 - its parameters (single/dual homed) matches the cluster
4624 Any errors are signaled by raising errors.OpPrereqError.
4628 hostname = self.hostname
4629 node = hostname.name
4630 primary_ip = self.op.primary_ip = hostname.ip
4631 if self.op.secondary_ip is None:
4632 if self.primary_ip_family == netutils.IP6Address.family:
4633 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4634 " IPv4 address must be given as secondary",
4636 self.op.secondary_ip = primary_ip
4638 secondary_ip = self.op.secondary_ip
4639 if not netutils.IP4Address.IsValid(secondary_ip):
4640 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4641 " address" % secondary_ip, errors.ECODE_INVAL)
4643 node_list = cfg.GetNodeList()
4644 if not self.op.readd and node in node_list:
4645 raise errors.OpPrereqError("Node %s is already in the configuration" %
4646 node, errors.ECODE_EXISTS)
4647 elif self.op.readd and node not in node_list:
4648 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4651 self.changed_primary_ip = False
4653 for existing_node_name in node_list:
4654 existing_node = cfg.GetNodeInfo(existing_node_name)
4656 if self.op.readd and node == existing_node_name:
4657 if existing_node.secondary_ip != secondary_ip:
4658 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4659 " address configuration as before",
4661 if existing_node.primary_ip != primary_ip:
4662 self.changed_primary_ip = True
4666 if (existing_node.primary_ip == primary_ip or
4667 existing_node.secondary_ip == primary_ip or
4668 existing_node.primary_ip == secondary_ip or
4669 existing_node.secondary_ip == secondary_ip):
4670 raise errors.OpPrereqError("New node ip address(es) conflict with"
4671 " existing node %s" % existing_node.name,
4672 errors.ECODE_NOTUNIQUE)
4674 # After this 'if' block, None is no longer a valid value for the
4675 # _capable op attributes
4677 old_node = self.cfg.GetNodeInfo(node)
4678 assert old_node is not None, "Can't retrieve locked node %s" % node
4679 for attr in self._NFLAGS:
4680 if getattr(self.op, attr) is None:
4681 setattr(self.op, attr, getattr(old_node, attr))
4683 for attr in self._NFLAGS:
4684 if getattr(self.op, attr) is None:
4685 setattr(self.op, attr, True)
4687 if self.op.readd and not self.op.vm_capable:
4688 pri, sec = cfg.GetNodeInstances(node)
4690 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4691 " flag set to false, but it already holds"
4692 " instances" % node,
4695 # check that the type of the node (single versus dual homed) is the
4696 # same as for the master
4697 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4698 master_singlehomed = myself.secondary_ip == myself.primary_ip
4699 newbie_singlehomed = secondary_ip == primary_ip
4700 if master_singlehomed != newbie_singlehomed:
4701 if master_singlehomed:
4702 raise errors.OpPrereqError("The master has no secondary ip but the"
4703 " new node has one",
4706 raise errors.OpPrereqError("The master has a secondary ip but the"
4707 " new node doesn't have one",
4710 # checks reachability
4711 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4712 raise errors.OpPrereqError("Node not reachable by ping",
4713 errors.ECODE_ENVIRON)
4715 if not newbie_singlehomed:
4716 # check reachability from my secondary ip to newbie's secondary ip
4717 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4718 source=myself.secondary_ip):
4719 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4720 " based ping to node daemon port",
4721 errors.ECODE_ENVIRON)
4728 if self.op.master_capable:
4729 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4731 self.master_candidate = False
4734 self.new_node = old_node
4736 node_group = cfg.LookupNodeGroup(self.op.group)
4737 self.new_node = objects.Node(name=node,
4738 primary_ip=primary_ip,
4739 secondary_ip=secondary_ip,
4740 master_candidate=self.master_candidate,
4741 offline=False, drained=False,
4744 if self.op.ndparams:
4745 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4747 def Exec(self, feedback_fn):
4748 """Adds the new node to the cluster.
4751 new_node = self.new_node
4752 node = new_node.name
4754 # We adding a new node so we assume it's powered
4755 new_node.powered = True
4757 # for re-adds, reset the offline/drained/master-candidate flags;
4758 # we need to reset here, otherwise offline would prevent RPC calls
4759 # later in the procedure; this also means that if the re-add
4760 # fails, we are left with a non-offlined, broken node
4762 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4763 self.LogInfo("Readding a node, the offline/drained flags were reset")
4764 # if we demote the node, we do cleanup later in the procedure
4765 new_node.master_candidate = self.master_candidate
4766 if self.changed_primary_ip:
4767 new_node.primary_ip = self.op.primary_ip
4769 # copy the master/vm_capable flags
4770 for attr in self._NFLAGS:
4771 setattr(new_node, attr, getattr(self.op, attr))
4773 # notify the user about any possible mc promotion
4774 if new_node.master_candidate:
4775 self.LogInfo("Node will be a master candidate")
4777 if self.op.ndparams:
4778 new_node.ndparams = self.op.ndparams
4780 new_node.ndparams = {}
4782 # check connectivity
4783 result = self.rpc.call_version([node])[node]
4784 result.Raise("Can't get version information from node %s" % node)
4785 if constants.PROTOCOL_VERSION == result.payload:
4786 logging.info("Communication to node %s fine, sw version %s match",
4787 node, result.payload)
4789 raise errors.OpExecError("Version mismatch master version %s,"
4790 " node version %s" %
4791 (constants.PROTOCOL_VERSION, result.payload))
4793 # Add node to our /etc/hosts, and add key to known_hosts
4794 if self.cfg.GetClusterInfo().modify_etc_hosts:
4795 master_node = self.cfg.GetMasterNode()
4796 result = self.rpc.call_etc_hosts_modify(master_node,
4797 constants.ETC_HOSTS_ADD,
4800 result.Raise("Can't update hosts file with new host data")
4802 if new_node.secondary_ip != new_node.primary_ip:
4803 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4806 node_verify_list = [self.cfg.GetMasterNode()]
4807 node_verify_param = {
4808 constants.NV_NODELIST: [node],
4809 # TODO: do a node-net-test as well?
4812 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4813 self.cfg.GetClusterName())
4814 for verifier in node_verify_list:
4815 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4816 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4818 for failed in nl_payload:
4819 feedback_fn("ssh/hostname verification failed"
4820 " (checking from %s): %s" %
4821 (verifier, nl_payload[failed]))
4822 raise errors.OpExecError("ssh/hostname verification failed")
4825 _RedistributeAncillaryFiles(self)
4826 self.context.ReaddNode(new_node)
4827 # make sure we redistribute the config
4828 self.cfg.Update(new_node, feedback_fn)
4829 # and make sure the new node will not have old files around
4830 if not new_node.master_candidate:
4831 result = self.rpc.call_node_demote_from_mc(new_node.name)
4832 msg = result.fail_msg
4834 self.LogWarning("Node failed to demote itself from master"
4835 " candidate status: %s" % msg)
4837 _RedistributeAncillaryFiles(self, additional_nodes=[node],
4838 additional_vm=self.op.vm_capable)
4839 self.context.AddNode(new_node, self.proc.GetECId())
4842 class LUNodeSetParams(LogicalUnit):
4843 """Modifies the parameters of a node.
4845 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4846 to the node role (as _ROLE_*)
4847 @cvar _R2F: a dictionary from node role to tuples of flags
4848 @cvar _FLAGS: a list of attribute names corresponding to the flags
4851 HPATH = "node-modify"
4852 HTYPE = constants.HTYPE_NODE
4854 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4856 (True, False, False): _ROLE_CANDIDATE,
4857 (False, True, False): _ROLE_DRAINED,
4858 (False, False, True): _ROLE_OFFLINE,
4859 (False, False, False): _ROLE_REGULAR,
4861 _R2F = dict((v, k) for k, v in _F2R.items())
4862 _FLAGS = ["master_candidate", "drained", "offline"]
4864 def CheckArguments(self):
4865 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4866 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4867 self.op.master_capable, self.op.vm_capable,
4868 self.op.secondary_ip, self.op.ndparams]
4869 if all_mods.count(None) == len(all_mods):
4870 raise errors.OpPrereqError("Please pass at least one modification",
4872 if all_mods.count(True) > 1:
4873 raise errors.OpPrereqError("Can't set the node into more than one"
4874 " state at the same time",
4877 # Boolean value that tells us whether we might be demoting from MC
4878 self.might_demote = (self.op.master_candidate == False or
4879 self.op.offline == True or
4880 self.op.drained == True or
4881 self.op.master_capable == False)
4883 if self.op.secondary_ip:
4884 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4885 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4886 " address" % self.op.secondary_ip,
4889 self.lock_all = self.op.auto_promote and self.might_demote
4890 self.lock_instances = self.op.secondary_ip is not None
4892 def ExpandNames(self):
4894 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4896 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4898 if self.lock_instances:
4899 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4901 def DeclareLocks(self, level):
4902 # If we have locked all instances, before waiting to lock nodes, release
4903 # all the ones living on nodes unrelated to the current operation.
4904 if level == locking.LEVEL_NODE and self.lock_instances:
4905 self.affected_instances = []
4906 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4909 # Build list of instances to release
4910 for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
4911 instance = self.context.cfg.GetInstanceInfo(instance_name)
4912 if (instance.disk_template in constants.DTS_INT_MIRROR and
4913 self.op.node_name in instance.all_nodes):
4914 instances_keep.append(instance_name)
4915 self.affected_instances.append(instance)
4917 _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
4919 assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
4920 set(instances_keep))
4922 def BuildHooksEnv(self):
4925 This runs on the master node.
4929 "OP_TARGET": self.op.node_name,
4930 "MASTER_CANDIDATE": str(self.op.master_candidate),
4931 "OFFLINE": str(self.op.offline),
4932 "DRAINED": str(self.op.drained),
4933 "MASTER_CAPABLE": str(self.op.master_capable),
4934 "VM_CAPABLE": str(self.op.vm_capable),
4937 def BuildHooksNodes(self):
4938 """Build hooks nodes.
4941 nl = [self.cfg.GetMasterNode(), self.op.node_name]
4944 def CheckPrereq(self):
4945 """Check prerequisites.
4947 This only checks the instance list against the existing names.
4950 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4952 if (self.op.master_candidate is not None or
4953 self.op.drained is not None or
4954 self.op.offline is not None):
4955 # we can't change the master's node flags
4956 if self.op.node_name == self.cfg.GetMasterNode():
4957 raise errors.OpPrereqError("The master role can be changed"
4958 " only via master-failover",
4961 if self.op.master_candidate and not node.master_capable:
4962 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4963 " it a master candidate" % node.name,
4966 if self.op.vm_capable == False:
4967 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4969 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4970 " the vm_capable flag" % node.name,
4973 if node.master_candidate and self.might_demote and not self.lock_all:
4974 assert not self.op.auto_promote, "auto_promote set but lock_all not"
4975 # check if after removing the current node, we're missing master
4977 (mc_remaining, mc_should, _) = \
4978 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4979 if mc_remaining < mc_should:
4980 raise errors.OpPrereqError("Not enough master candidates, please"
4981 " pass auto promote option to allow"
4982 " promotion", errors.ECODE_STATE)
4984 self.old_flags = old_flags = (node.master_candidate,
4985 node.drained, node.offline)
4986 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4987 self.old_role = old_role = self._F2R[old_flags]
4989 # Check for ineffective changes
4990 for attr in self._FLAGS:
4991 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4992 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4993 setattr(self.op, attr, None)
4995 # Past this point, any flag change to False means a transition
4996 # away from the respective state, as only real changes are kept
4998 # TODO: We might query the real power state if it supports OOB
4999 if _SupportsOob(self.cfg, node):
5000 if self.op.offline is False and not (node.powered or
5001 self.op.powered == True):
5002 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5003 " offline status can be reset") %
5005 elif self.op.powered is not None:
5006 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5007 " as it does not support out-of-band"
5008 " handling") % self.op.node_name)
5010 # If we're being deofflined/drained, we'll MC ourself if needed
5011 if (self.op.drained == False or self.op.offline == False or
5012 (self.op.master_capable and not node.master_capable)):
5013 if _DecideSelfPromotion(self):
5014 self.op.master_candidate = True
5015 self.LogInfo("Auto-promoting node to master candidate")
5017 # If we're no longer master capable, we'll demote ourselves from MC
5018 if self.op.master_capable == False and node.master_candidate:
5019 self.LogInfo("Demoting from master candidate")
5020 self.op.master_candidate = False
5023 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5024 if self.op.master_candidate:
5025 new_role = self._ROLE_CANDIDATE
5026 elif self.op.drained:
5027 new_role = self._ROLE_DRAINED
5028 elif self.op.offline:
5029 new_role = self._ROLE_OFFLINE
5030 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5031 # False is still in new flags, which means we're un-setting (the
5033 new_role = self._ROLE_REGULAR
5034 else: # no new flags, nothing, keep old role
5037 self.new_role = new_role
5039 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5040 # Trying to transition out of offline status
5041 result = self.rpc.call_version([node.name])[node.name]
5043 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5044 " to report its version: %s" %
5045 (node.name, result.fail_msg),
5048 self.LogWarning("Transitioning node from offline to online state"
5049 " without using re-add. Please make sure the node"
5052 if self.op.secondary_ip:
5053 # Ok even without locking, because this can't be changed by any LU
5054 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5055 master_singlehomed = master.secondary_ip == master.primary_ip
5056 if master_singlehomed and self.op.secondary_ip:
5057 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5058 " homed cluster", errors.ECODE_INVAL)
5061 if self.affected_instances:
5062 raise errors.OpPrereqError("Cannot change secondary ip: offline"
5063 " node has instances (%s) configured"
5064 " to use it" % self.affected_instances)
5066 # On online nodes, check that no instances are running, and that
5067 # the node has the new ip and we can reach it.
5068 for instance in self.affected_instances:
5069 _CheckInstanceDown(self, instance, "cannot change secondary ip")
5071 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5072 if master.name != node.name:
5073 # check reachability from master secondary ip to new secondary ip
5074 if not netutils.TcpPing(self.op.secondary_ip,
5075 constants.DEFAULT_NODED_PORT,
5076 source=master.secondary_ip):
5077 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5078 " based ping to node daemon port",
5079 errors.ECODE_ENVIRON)
5081 if self.op.ndparams:
5082 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5083 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5084 self.new_ndparams = new_ndparams
5086 def Exec(self, feedback_fn):
5091 old_role = self.old_role
5092 new_role = self.new_role
5096 if self.op.ndparams:
5097 node.ndparams = self.new_ndparams
5099 if self.op.powered is not None:
5100 node.powered = self.op.powered
5102 for attr in ["master_capable", "vm_capable"]:
5103 val = getattr(self.op, attr)
5105 setattr(node, attr, val)
5106 result.append((attr, str(val)))
5108 if new_role != old_role:
5109 # Tell the node to demote itself, if no longer MC and not offline
5110 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5111 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5113 self.LogWarning("Node failed to demote itself: %s", msg)
5115 new_flags = self._R2F[new_role]
5116 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5118 result.append((desc, str(nf)))
5119 (node.master_candidate, node.drained, node.offline) = new_flags
5121 # we locked all nodes, we adjust the CP before updating this node
5123 _AdjustCandidatePool(self, [node.name])
5125 if self.op.secondary_ip:
5126 node.secondary_ip = self.op.secondary_ip
5127 result.append(("secondary_ip", self.op.secondary_ip))
5129 # this will trigger configuration file update, if needed
5130 self.cfg.Update(node, feedback_fn)
5132 # this will trigger job queue propagation or cleanup if the mc
5134 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5135 self.context.ReaddNode(node)
5140 class LUNodePowercycle(NoHooksLU):
5141 """Powercycles a node.
5146 def CheckArguments(self):
5147 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5148 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5149 raise errors.OpPrereqError("The node is the master and the force"
5150 " parameter was not set",
5153 def ExpandNames(self):
5154 """Locking for PowercycleNode.
5156 This is a last-resort option and shouldn't block on other
5157 jobs. Therefore, we grab no locks.
5160 self.needed_locks = {}
5162 def Exec(self, feedback_fn):
5166 result = self.rpc.call_node_powercycle(self.op.node_name,
5167 self.cfg.GetHypervisorType())
5168 result.Raise("Failed to schedule the reboot")
5169 return result.payload
5172 class LUClusterQuery(NoHooksLU):
5173 """Query cluster configuration.
5178 def ExpandNames(self):
5179 self.needed_locks = {}
5181 def Exec(self, feedback_fn):
5182 """Return cluster config.
5185 cluster = self.cfg.GetClusterInfo()
5188 # Filter just for enabled hypervisors
5189 for os_name, hv_dict in cluster.os_hvp.items():
5190 os_hvp[os_name] = {}
5191 for hv_name, hv_params in hv_dict.items():
5192 if hv_name in cluster.enabled_hypervisors:
5193 os_hvp[os_name][hv_name] = hv_params
5195 # Convert ip_family to ip_version
5196 primary_ip_version = constants.IP4_VERSION
5197 if cluster.primary_ip_family == netutils.IP6Address.family:
5198 primary_ip_version = constants.IP6_VERSION
5201 "software_version": constants.RELEASE_VERSION,
5202 "protocol_version": constants.PROTOCOL_VERSION,
5203 "config_version": constants.CONFIG_VERSION,
5204 "os_api_version": max(constants.OS_API_VERSIONS),
5205 "export_version": constants.EXPORT_VERSION,
5206 "architecture": (platform.architecture()[0], platform.machine()),
5207 "name": cluster.cluster_name,
5208 "master": cluster.master_node,
5209 "default_hypervisor": cluster.enabled_hypervisors[0],
5210 "enabled_hypervisors": cluster.enabled_hypervisors,
5211 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5212 for hypervisor_name in cluster.enabled_hypervisors]),
5214 "beparams": cluster.beparams,
5215 "osparams": cluster.osparams,
5216 "nicparams": cluster.nicparams,
5217 "ndparams": cluster.ndparams,
5218 "candidate_pool_size": cluster.candidate_pool_size,
5219 "master_netdev": cluster.master_netdev,
5220 "volume_group_name": cluster.volume_group_name,
5221 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5222 "file_storage_dir": cluster.file_storage_dir,
5223 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5224 "maintain_node_health": cluster.maintain_node_health,
5225 "ctime": cluster.ctime,
5226 "mtime": cluster.mtime,
5227 "uuid": cluster.uuid,
5228 "tags": list(cluster.GetTags()),
5229 "uid_pool": cluster.uid_pool,
5230 "default_iallocator": cluster.default_iallocator,
5231 "reserved_lvs": cluster.reserved_lvs,
5232 "primary_ip_version": primary_ip_version,
5233 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5234 "hidden_os": cluster.hidden_os,
5235 "blacklisted_os": cluster.blacklisted_os,
5241 class LUClusterConfigQuery(NoHooksLU):
5242 """Return configuration values.
5246 _FIELDS_DYNAMIC = utils.FieldSet()
5247 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5248 "watcher_pause", "volume_group_name")
5250 def CheckArguments(self):
5251 _CheckOutputFields(static=self._FIELDS_STATIC,
5252 dynamic=self._FIELDS_DYNAMIC,
5253 selected=self.op.output_fields)
5255 def ExpandNames(self):
5256 self.needed_locks = {}
5258 def Exec(self, feedback_fn):
5259 """Dump a representation of the cluster config to the standard output.
5263 for field in self.op.output_fields:
5264 if field == "cluster_name":
5265 entry = self.cfg.GetClusterName()
5266 elif field == "master_node":
5267 entry = self.cfg.GetMasterNode()
5268 elif field == "drain_flag":
5269 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5270 elif field == "watcher_pause":
5271 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5272 elif field == "volume_group_name":
5273 entry = self.cfg.GetVGName()
5275 raise errors.ParameterError(field)
5276 values.append(entry)
5280 class LUInstanceActivateDisks(NoHooksLU):
5281 """Bring up an instance's disks.
5286 def ExpandNames(self):
5287 self._ExpandAndLockInstance()
5288 self.needed_locks[locking.LEVEL_NODE] = []
5289 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5291 def DeclareLocks(self, level):
5292 if level == locking.LEVEL_NODE:
5293 self._LockInstancesNodes()
5295 def CheckPrereq(self):
5296 """Check prerequisites.
5298 This checks that the instance is in the cluster.
5301 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5302 assert self.instance is not None, \
5303 "Cannot retrieve locked instance %s" % self.op.instance_name
5304 _CheckNodeOnline(self, self.instance.primary_node)
5306 def Exec(self, feedback_fn):
5307 """Activate the disks.
5310 disks_ok, disks_info = \
5311 _AssembleInstanceDisks(self, self.instance,
5312 ignore_size=self.op.ignore_size)
5314 raise errors.OpExecError("Cannot activate block devices")
5319 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5321 """Prepare the block devices for an instance.
5323 This sets up the block devices on all nodes.
5325 @type lu: L{LogicalUnit}
5326 @param lu: the logical unit on whose behalf we execute
5327 @type instance: L{objects.Instance}
5328 @param instance: the instance for whose disks we assemble
5329 @type disks: list of L{objects.Disk} or None
5330 @param disks: which disks to assemble (or all, if None)
5331 @type ignore_secondaries: boolean
5332 @param ignore_secondaries: if true, errors on secondary nodes
5333 won't result in an error return from the function
5334 @type ignore_size: boolean
5335 @param ignore_size: if true, the current known size of the disk
5336 will not be used during the disk activation, useful for cases
5337 when the size is wrong
5338 @return: False if the operation failed, otherwise a list of
5339 (host, instance_visible_name, node_visible_name)
5340 with the mapping from node devices to instance devices
5345 iname = instance.name
5346 disks = _ExpandCheckDisks(instance, disks)
5348 # With the two passes mechanism we try to reduce the window of
5349 # opportunity for the race condition of switching DRBD to primary
5350 # before handshaking occured, but we do not eliminate it
5352 # The proper fix would be to wait (with some limits) until the
5353 # connection has been made and drbd transitions from WFConnection
5354 # into any other network-connected state (Connected, SyncTarget,
5357 # 1st pass, assemble on all nodes in secondary mode
5358 for idx, inst_disk in enumerate(disks):
5359 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5361 node_disk = node_disk.Copy()
5362 node_disk.UnsetSize()
5363 lu.cfg.SetDiskID(node_disk, node)
5364 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5365 msg = result.fail_msg
5367 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5368 " (is_primary=False, pass=1): %s",
5369 inst_disk.iv_name, node, msg)
5370 if not ignore_secondaries:
5373 # FIXME: race condition on drbd migration to primary
5375 # 2nd pass, do only the primary node
5376 for idx, inst_disk in enumerate(disks):
5379 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5380 if node != instance.primary_node:
5383 node_disk = node_disk.Copy()
5384 node_disk.UnsetSize()
5385 lu.cfg.SetDiskID(node_disk, node)
5386 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5387 msg = result.fail_msg
5389 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5390 " (is_primary=True, pass=2): %s",
5391 inst_disk.iv_name, node, msg)
5394 dev_path = result.payload
5396 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5398 # leave the disks configured for the primary node
5399 # this is a workaround that would be fixed better by
5400 # improving the logical/physical id handling
5402 lu.cfg.SetDiskID(disk, instance.primary_node)
5404 return disks_ok, device_info
5407 def _StartInstanceDisks(lu, instance, force):
5408 """Start the disks of an instance.
5411 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5412 ignore_secondaries=force)
5414 _ShutdownInstanceDisks(lu, instance)
5415 if force is not None and not force:
5416 lu.proc.LogWarning("", hint="If the message above refers to a"
5418 " you can retry the operation using '--force'.")
5419 raise errors.OpExecError("Disk consistency error")
5422 class LUInstanceDeactivateDisks(NoHooksLU):
5423 """Shutdown an instance's disks.
5428 def ExpandNames(self):
5429 self._ExpandAndLockInstance()
5430 self.needed_locks[locking.LEVEL_NODE] = []
5431 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5433 def DeclareLocks(self, level):
5434 if level == locking.LEVEL_NODE:
5435 self._LockInstancesNodes()
5437 def CheckPrereq(self):
5438 """Check prerequisites.
5440 This checks that the instance is in the cluster.
5443 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5444 assert self.instance is not None, \
5445 "Cannot retrieve locked instance %s" % self.op.instance_name
5447 def Exec(self, feedback_fn):
5448 """Deactivate the disks
5451 instance = self.instance
5453 _ShutdownInstanceDisks(self, instance)
5455 _SafeShutdownInstanceDisks(self, instance)
5458 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5459 """Shutdown block devices of an instance.
5461 This function checks if an instance is running, before calling
5462 _ShutdownInstanceDisks.
5465 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5466 _ShutdownInstanceDisks(lu, instance, disks=disks)
5469 def _ExpandCheckDisks(instance, disks):
5470 """Return the instance disks selected by the disks list
5472 @type disks: list of L{objects.Disk} or None
5473 @param disks: selected disks
5474 @rtype: list of L{objects.Disk}
5475 @return: selected instance disks to act on
5479 return instance.disks
5481 if not set(disks).issubset(instance.disks):
5482 raise errors.ProgrammerError("Can only act on disks belonging to the"
5487 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5488 """Shutdown block devices of an instance.
5490 This does the shutdown on all nodes of the instance.
5492 If the ignore_primary is false, errors on the primary node are
5497 disks = _ExpandCheckDisks(instance, disks)
5500 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5501 lu.cfg.SetDiskID(top_disk, node)
5502 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5503 msg = result.fail_msg
5505 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5506 disk.iv_name, node, msg)
5507 if ((node == instance.primary_node and not ignore_primary) or
5508 (node != instance.primary_node and not result.offline)):
5513 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5514 """Checks if a node has enough free memory.
5516 This function check if a given node has the needed amount of free
5517 memory. In case the node has less memory or we cannot get the
5518 information from the node, this function raise an OpPrereqError
5521 @type lu: C{LogicalUnit}
5522 @param lu: a logical unit from which we get configuration data
5524 @param node: the node to check
5525 @type reason: C{str}
5526 @param reason: string to use in the error message
5527 @type requested: C{int}
5528 @param requested: the amount of memory in MiB to check for
5529 @type hypervisor_name: C{str}
5530 @param hypervisor_name: the hypervisor to ask for memory stats
5531 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5532 we cannot check the node
5535 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5536 nodeinfo[node].Raise("Can't get data from node %s" % node,
5537 prereq=True, ecode=errors.ECODE_ENVIRON)
5538 free_mem = nodeinfo[node].payload.get("memory_free", None)
5539 if not isinstance(free_mem, int):
5540 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5541 " was '%s'" % (node, free_mem),
5542 errors.ECODE_ENVIRON)
5543 if requested > free_mem:
5544 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5545 " needed %s MiB, available %s MiB" %
5546 (node, reason, requested, free_mem),
5550 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5551 """Checks if nodes have enough free disk space in the all VGs.
5553 This function check if all given nodes have the needed amount of
5554 free disk. In case any node has less disk or we cannot get the
5555 information from the node, this function raise an OpPrereqError
5558 @type lu: C{LogicalUnit}
5559 @param lu: a logical unit from which we get configuration data
5560 @type nodenames: C{list}
5561 @param nodenames: the list of node names to check
5562 @type req_sizes: C{dict}
5563 @param req_sizes: the hash of vg and corresponding amount of disk in
5565 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5566 or we cannot check the node
5569 for vg, req_size in req_sizes.items():
5570 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5573 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5574 """Checks if nodes have enough free disk space in the specified VG.
5576 This function check if all given nodes have the needed amount of
5577 free disk. In case any node has less disk or we cannot get the
5578 information from the node, this function raise an OpPrereqError
5581 @type lu: C{LogicalUnit}
5582 @param lu: a logical unit from which we get configuration data
5583 @type nodenames: C{list}
5584 @param nodenames: the list of node names to check
5586 @param vg: the volume group to check
5587 @type requested: C{int}
5588 @param requested: the amount of disk in MiB to check for
5589 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5590 or we cannot check the node
5593 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5594 for node in nodenames:
5595 info = nodeinfo[node]
5596 info.Raise("Cannot get current information from node %s" % node,
5597 prereq=True, ecode=errors.ECODE_ENVIRON)
5598 vg_free = info.payload.get("vg_free", None)
5599 if not isinstance(vg_free, int):
5600 raise errors.OpPrereqError("Can't compute free disk space on node"
5601 " %s for vg %s, result was '%s'" %
5602 (node, vg, vg_free), errors.ECODE_ENVIRON)
5603 if requested > vg_free:
5604 raise errors.OpPrereqError("Not enough disk space on target node %s"
5605 " vg %s: required %d MiB, available %d MiB" %
5606 (node, vg, requested, vg_free),
5610 class LUInstanceStartup(LogicalUnit):
5611 """Starts an instance.
5614 HPATH = "instance-start"
5615 HTYPE = constants.HTYPE_INSTANCE
5618 def CheckArguments(self):
5620 if self.op.beparams:
5621 # fill the beparams dict
5622 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5624 def ExpandNames(self):
5625 self._ExpandAndLockInstance()
5627 def BuildHooksEnv(self):
5630 This runs on master, primary and secondary nodes of the instance.
5634 "FORCE": self.op.force,
5637 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5641 def BuildHooksNodes(self):
5642 """Build hooks nodes.
5645 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5648 def CheckPrereq(self):
5649 """Check prerequisites.
5651 This checks that the instance is in the cluster.
5654 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5655 assert self.instance is not None, \
5656 "Cannot retrieve locked instance %s" % self.op.instance_name
5659 if self.op.hvparams:
5660 # check hypervisor parameter syntax (locally)
5661 cluster = self.cfg.GetClusterInfo()
5662 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5663 filled_hvp = cluster.FillHV(instance)
5664 filled_hvp.update(self.op.hvparams)
5665 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5666 hv_type.CheckParameterSyntax(filled_hvp)
5667 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5669 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5671 if self.primary_offline and self.op.ignore_offline_nodes:
5672 self.proc.LogWarning("Ignoring offline primary node")
5674 if self.op.hvparams or self.op.beparams:
5675 self.proc.LogWarning("Overridden parameters are ignored")
5677 _CheckNodeOnline(self, instance.primary_node)
5679 bep = self.cfg.GetClusterInfo().FillBE(instance)
5681 # check bridges existence
5682 _CheckInstanceBridgesExist(self, instance)
5684 remote_info = self.rpc.call_instance_info(instance.primary_node,
5686 instance.hypervisor)
5687 remote_info.Raise("Error checking node %s" % instance.primary_node,
5688 prereq=True, ecode=errors.ECODE_ENVIRON)
5689 if not remote_info.payload: # not running already
5690 _CheckNodeFreeMemory(self, instance.primary_node,
5691 "starting instance %s" % instance.name,
5692 bep[constants.BE_MEMORY], instance.hypervisor)
5694 def Exec(self, feedback_fn):
5695 """Start the instance.
5698 instance = self.instance
5699 force = self.op.force
5701 if not self.op.no_remember:
5702 self.cfg.MarkInstanceUp(instance.name)
5704 if self.primary_offline:
5705 assert self.op.ignore_offline_nodes
5706 self.proc.LogInfo("Primary node offline, marked instance as started")
5708 node_current = instance.primary_node
5710 _StartInstanceDisks(self, instance, force)
5712 result = self.rpc.call_instance_start(node_current, instance,
5713 self.op.hvparams, self.op.beparams,
5714 self.op.startup_paused)
5715 msg = result.fail_msg
5717 _ShutdownInstanceDisks(self, instance)
5718 raise errors.OpExecError("Could not start instance: %s" % msg)
5721 class LUInstanceReboot(LogicalUnit):
5722 """Reboot an instance.
5725 HPATH = "instance-reboot"
5726 HTYPE = constants.HTYPE_INSTANCE
5729 def ExpandNames(self):
5730 self._ExpandAndLockInstance()
5732 def BuildHooksEnv(self):
5735 This runs on master, primary and secondary nodes of the instance.
5739 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5740 "REBOOT_TYPE": self.op.reboot_type,
5741 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5744 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5748 def BuildHooksNodes(self):
5749 """Build hooks nodes.
5752 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5755 def CheckPrereq(self):
5756 """Check prerequisites.
5758 This checks that the instance is in the cluster.
5761 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5762 assert self.instance is not None, \
5763 "Cannot retrieve locked instance %s" % self.op.instance_name
5765 _CheckNodeOnline(self, instance.primary_node)
5767 # check bridges existence
5768 _CheckInstanceBridgesExist(self, instance)
5770 def Exec(self, feedback_fn):
5771 """Reboot the instance.
5774 instance = self.instance
5775 ignore_secondaries = self.op.ignore_secondaries
5776 reboot_type = self.op.reboot_type
5778 remote_info = self.rpc.call_instance_info(instance.primary_node,
5780 instance.hypervisor)
5781 remote_info.Raise("Error checking node %s" % instance.primary_node)
5782 instance_running = bool(remote_info.payload)
5784 node_current = instance.primary_node
5786 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5787 constants.INSTANCE_REBOOT_HARD]:
5788 for disk in instance.disks:
5789 self.cfg.SetDiskID(disk, node_current)
5790 result = self.rpc.call_instance_reboot(node_current, instance,
5792 self.op.shutdown_timeout)
5793 result.Raise("Could not reboot instance")
5795 if instance_running:
5796 result = self.rpc.call_instance_shutdown(node_current, instance,
5797 self.op.shutdown_timeout)
5798 result.Raise("Could not shutdown instance for full reboot")
5799 _ShutdownInstanceDisks(self, instance)
5801 self.LogInfo("Instance %s was already stopped, starting now",
5803 _StartInstanceDisks(self, instance, ignore_secondaries)
5804 result = self.rpc.call_instance_start(node_current, instance,
5806 msg = result.fail_msg
5808 _ShutdownInstanceDisks(self, instance)
5809 raise errors.OpExecError("Could not start instance for"
5810 " full reboot: %s" % msg)
5812 self.cfg.MarkInstanceUp(instance.name)
5815 class LUInstanceShutdown(LogicalUnit):
5816 """Shutdown an instance.
5819 HPATH = "instance-stop"
5820 HTYPE = constants.HTYPE_INSTANCE
5823 def ExpandNames(self):
5824 self._ExpandAndLockInstance()
5826 def BuildHooksEnv(self):
5829 This runs on master, primary and secondary nodes of the instance.
5832 env = _BuildInstanceHookEnvByObject(self, self.instance)
5833 env["TIMEOUT"] = self.op.timeout
5836 def BuildHooksNodes(self):
5837 """Build hooks nodes.
5840 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5843 def CheckPrereq(self):
5844 """Check prerequisites.
5846 This checks that the instance is in the cluster.
5849 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5850 assert self.instance is not None, \
5851 "Cannot retrieve locked instance %s" % self.op.instance_name
5853 self.primary_offline = \
5854 self.cfg.GetNodeInfo(self.instance.primary_node).offline
5856 if self.primary_offline and self.op.ignore_offline_nodes:
5857 self.proc.LogWarning("Ignoring offline primary node")
5859 _CheckNodeOnline(self, self.instance.primary_node)
5861 def Exec(self, feedback_fn):
5862 """Shutdown the instance.
5865 instance = self.instance
5866 node_current = instance.primary_node
5867 timeout = self.op.timeout
5869 if not self.op.no_remember:
5870 self.cfg.MarkInstanceDown(instance.name)
5872 if self.primary_offline:
5873 assert self.op.ignore_offline_nodes
5874 self.proc.LogInfo("Primary node offline, marked instance as stopped")
5876 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5877 msg = result.fail_msg
5879 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5881 _ShutdownInstanceDisks(self, instance)
5884 class LUInstanceReinstall(LogicalUnit):
5885 """Reinstall an instance.
5888 HPATH = "instance-reinstall"
5889 HTYPE = constants.HTYPE_INSTANCE
5892 def ExpandNames(self):
5893 self._ExpandAndLockInstance()
5895 def BuildHooksEnv(self):
5898 This runs on master, primary and secondary nodes of the instance.
5901 return _BuildInstanceHookEnvByObject(self, self.instance)
5903 def BuildHooksNodes(self):
5904 """Build hooks nodes.
5907 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5910 def CheckPrereq(self):
5911 """Check prerequisites.
5913 This checks that the instance is in the cluster and is not running.
5916 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5917 assert instance is not None, \
5918 "Cannot retrieve locked instance %s" % self.op.instance_name
5919 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5920 " offline, cannot reinstall")
5921 for node in instance.secondary_nodes:
5922 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5923 " cannot reinstall")
5925 if instance.disk_template == constants.DT_DISKLESS:
5926 raise errors.OpPrereqError("Instance '%s' has no disks" %
5927 self.op.instance_name,
5929 _CheckInstanceDown(self, instance, "cannot reinstall")
5931 if self.op.os_type is not None:
5933 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5934 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5935 instance_os = self.op.os_type
5937 instance_os = instance.os
5939 nodelist = list(instance.all_nodes)
5941 if self.op.osparams:
5942 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5943 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5944 self.os_inst = i_osdict # the new dict (without defaults)
5948 self.instance = instance
5950 def Exec(self, feedback_fn):
5951 """Reinstall the instance.
5954 inst = self.instance
5956 if self.op.os_type is not None:
5957 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5958 inst.os = self.op.os_type
5959 # Write to configuration
5960 self.cfg.Update(inst, feedback_fn)
5962 _StartInstanceDisks(self, inst, None)
5964 feedback_fn("Running the instance OS create scripts...")
5965 # FIXME: pass debug option from opcode to backend
5966 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5967 self.op.debug_level,
5968 osparams=self.os_inst)
5969 result.Raise("Could not install OS for instance %s on node %s" %
5970 (inst.name, inst.primary_node))
5972 _ShutdownInstanceDisks(self, inst)
5975 class LUInstanceRecreateDisks(LogicalUnit):
5976 """Recreate an instance's missing disks.
5979 HPATH = "instance-recreate-disks"
5980 HTYPE = constants.HTYPE_INSTANCE
5983 def CheckArguments(self):
5984 # normalise the disk list
5985 self.op.disks = sorted(frozenset(self.op.disks))
5987 def ExpandNames(self):
5988 self._ExpandAndLockInstance()
5989 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5991 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
5992 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
5994 self.needed_locks[locking.LEVEL_NODE] = []
5996 def DeclareLocks(self, level):
5997 if level == locking.LEVEL_NODE:
5998 # if we replace the nodes, we only need to lock the old primary,
5999 # otherwise we need to lock all nodes for disk re-creation
6000 primary_only = bool(self.op.nodes)
6001 self._LockInstancesNodes(primary_only=primary_only)
6003 def BuildHooksEnv(self):
6006 This runs on master, primary and secondary nodes of the instance.
6009 return _BuildInstanceHookEnvByObject(self, self.instance)
6011 def BuildHooksNodes(self):
6012 """Build hooks nodes.
6015 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6018 def CheckPrereq(self):
6019 """Check prerequisites.
6021 This checks that the instance is in the cluster and is not running.
6024 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6025 assert instance is not None, \
6026 "Cannot retrieve locked instance %s" % self.op.instance_name
6028 if len(self.op.nodes) != len(instance.all_nodes):
6029 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6030 " %d replacement nodes were specified" %
6031 (instance.name, len(instance.all_nodes),
6032 len(self.op.nodes)),
6034 assert instance.disk_template != constants.DT_DRBD8 or \
6035 len(self.op.nodes) == 2
6036 assert instance.disk_template != constants.DT_PLAIN or \
6037 len(self.op.nodes) == 1
6038 primary_node = self.op.nodes[0]
6040 primary_node = instance.primary_node
6041 _CheckNodeOnline(self, primary_node)
6043 if instance.disk_template == constants.DT_DISKLESS:
6044 raise errors.OpPrereqError("Instance '%s' has no disks" %
6045 self.op.instance_name, errors.ECODE_INVAL)
6046 # if we replace nodes *and* the old primary is offline, we don't
6048 assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6049 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6050 if not (self.op.nodes and old_pnode.offline):
6051 _CheckInstanceDown(self, instance, "cannot recreate disks")
6053 if not self.op.disks:
6054 self.op.disks = range(len(instance.disks))
6056 for idx in self.op.disks:
6057 if idx >= len(instance.disks):
6058 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6060 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6061 raise errors.OpPrereqError("Can't recreate disks partially and"
6062 " change the nodes at the same time",
6064 self.instance = instance
6066 def Exec(self, feedback_fn):
6067 """Recreate the disks.
6070 instance = self.instance
6073 mods = [] # keeps track of needed logical_id changes
6075 for idx, disk in enumerate(instance.disks):
6076 if idx not in self.op.disks: # disk idx has not been passed in
6079 # update secondaries for disks, if needed
6081 if disk.dev_type == constants.LD_DRBD8:
6082 # need to update the nodes and minors
6083 assert len(self.op.nodes) == 2
6084 assert len(disk.logical_id) == 6 # otherwise disk internals
6086 (_, _, old_port, _, _, old_secret) = disk.logical_id
6087 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6088 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6089 new_minors[0], new_minors[1], old_secret)
6090 assert len(disk.logical_id) == len(new_id)
6091 mods.append((idx, new_id))
6093 # now that we have passed all asserts above, we can apply the mods
6094 # in a single run (to avoid partial changes)
6095 for idx, new_id in mods:
6096 instance.disks[idx].logical_id = new_id
6098 # change primary node, if needed
6100 instance.primary_node = self.op.nodes[0]
6101 self.LogWarning("Changing the instance's nodes, you will have to"
6102 " remove any disks left on the older nodes manually")
6105 self.cfg.Update(instance, feedback_fn)
6107 _CreateDisks(self, instance, to_skip=to_skip)
6110 class LUInstanceRename(LogicalUnit):
6111 """Rename an instance.
6114 HPATH = "instance-rename"
6115 HTYPE = constants.HTYPE_INSTANCE
6117 def CheckArguments(self):
6121 if self.op.ip_check and not self.op.name_check:
6122 # TODO: make the ip check more flexible and not depend on the name check
6123 raise errors.OpPrereqError("IP address check requires a name check",
6126 def BuildHooksEnv(self):
6129 This runs on master, primary and secondary nodes of the instance.
6132 env = _BuildInstanceHookEnvByObject(self, self.instance)
6133 env["INSTANCE_NEW_NAME"] = self.op.new_name
6136 def BuildHooksNodes(self):
6137 """Build hooks nodes.
6140 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6143 def CheckPrereq(self):
6144 """Check prerequisites.
6146 This checks that the instance is in the cluster and is not running.
6149 self.op.instance_name = _ExpandInstanceName(self.cfg,
6150 self.op.instance_name)
6151 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6152 assert instance is not None
6153 _CheckNodeOnline(self, instance.primary_node)
6154 _CheckInstanceDown(self, instance, "cannot rename")
6155 self.instance = instance
6157 new_name = self.op.new_name
6158 if self.op.name_check:
6159 hostname = netutils.GetHostname(name=new_name)
6160 if hostname != new_name:
6161 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6163 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6164 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6165 " same as given hostname '%s'") %
6166 (hostname.name, self.op.new_name),
6168 new_name = self.op.new_name = hostname.name
6169 if (self.op.ip_check and
6170 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6171 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6172 (hostname.ip, new_name),
6173 errors.ECODE_NOTUNIQUE)
6175 instance_list = self.cfg.GetInstanceList()
6176 if new_name in instance_list and new_name != instance.name:
6177 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6178 new_name, errors.ECODE_EXISTS)
6180 def Exec(self, feedback_fn):
6181 """Rename the instance.
6184 inst = self.instance
6185 old_name = inst.name
6187 rename_file_storage = False
6188 if (inst.disk_template in constants.DTS_FILEBASED and
6189 self.op.new_name != inst.name):
6190 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6191 rename_file_storage = True
6193 self.cfg.RenameInstance(inst.name, self.op.new_name)
6194 # Change the instance lock. This is definitely safe while we hold the BGL.
6195 # Otherwise the new lock would have to be added in acquired mode.
6197 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6198 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6200 # re-read the instance from the configuration after rename
6201 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6203 if rename_file_storage:
6204 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6205 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6206 old_file_storage_dir,
6207 new_file_storage_dir)
6208 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6209 " (but the instance has been renamed in Ganeti)" %
6210 (inst.primary_node, old_file_storage_dir,
6211 new_file_storage_dir))
6213 _StartInstanceDisks(self, inst, None)
6215 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6216 old_name, self.op.debug_level)
6217 msg = result.fail_msg
6219 msg = ("Could not run OS rename script for instance %s on node %s"
6220 " (but the instance has been renamed in Ganeti): %s" %
6221 (inst.name, inst.primary_node, msg))
6222 self.proc.LogWarning(msg)
6224 _ShutdownInstanceDisks(self, inst)
6229 class LUInstanceRemove(LogicalUnit):
6230 """Remove an instance.
6233 HPATH = "instance-remove"
6234 HTYPE = constants.HTYPE_INSTANCE
6237 def ExpandNames(self):
6238 self._ExpandAndLockInstance()
6239 self.needed_locks[locking.LEVEL_NODE] = []
6240 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6242 def DeclareLocks(self, level):
6243 if level == locking.LEVEL_NODE:
6244 self._LockInstancesNodes()
6246 def BuildHooksEnv(self):
6249 This runs on master, primary and secondary nodes of the instance.
6252 env = _BuildInstanceHookEnvByObject(self, self.instance)
6253 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6256 def BuildHooksNodes(self):
6257 """Build hooks nodes.
6260 nl = [self.cfg.GetMasterNode()]
6261 nl_post = list(self.instance.all_nodes) + nl
6262 return (nl, nl_post)
6264 def CheckPrereq(self):
6265 """Check prerequisites.
6267 This checks that the instance is in the cluster.
6270 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6271 assert self.instance is not None, \
6272 "Cannot retrieve locked instance %s" % self.op.instance_name
6274 def Exec(self, feedback_fn):
6275 """Remove the instance.
6278 instance = self.instance
6279 logging.info("Shutting down instance %s on node %s",
6280 instance.name, instance.primary_node)
6282 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6283 self.op.shutdown_timeout)
6284 msg = result.fail_msg
6286 if self.op.ignore_failures:
6287 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6289 raise errors.OpExecError("Could not shutdown instance %s on"
6291 (instance.name, instance.primary_node, msg))
6293 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6296 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6297 """Utility function to remove an instance.
6300 logging.info("Removing block devices for instance %s", instance.name)
6302 if not _RemoveDisks(lu, instance):
6303 if not ignore_failures:
6304 raise errors.OpExecError("Can't remove instance's disks")
6305 feedback_fn("Warning: can't remove instance's disks")
6307 logging.info("Removing instance %s out of cluster config", instance.name)
6309 lu.cfg.RemoveInstance(instance.name)
6311 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6312 "Instance lock removal conflict"
6314 # Remove lock for the instance
6315 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6318 class LUInstanceQuery(NoHooksLU):
6319 """Logical unit for querying instances.
6322 # pylint: disable-msg=W0142
6325 def CheckArguments(self):
6326 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6327 self.op.output_fields, self.op.use_locking)
6329 def ExpandNames(self):
6330 self.iq.ExpandNames(self)
6332 def DeclareLocks(self, level):
6333 self.iq.DeclareLocks(self, level)
6335 def Exec(self, feedback_fn):
6336 return self.iq.OldStyleQuery(self)
6339 class LUInstanceFailover(LogicalUnit):
6340 """Failover an instance.
6343 HPATH = "instance-failover"
6344 HTYPE = constants.HTYPE_INSTANCE
6347 def CheckArguments(self):
6348 """Check the arguments.
6351 self.iallocator = getattr(self.op, "iallocator", None)
6352 self.target_node = getattr(self.op, "target_node", None)
6354 def ExpandNames(self):
6355 self._ExpandAndLockInstance()
6357 if self.op.target_node is not None:
6358 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6360 self.needed_locks[locking.LEVEL_NODE] = []
6361 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6363 ignore_consistency = self.op.ignore_consistency
6364 shutdown_timeout = self.op.shutdown_timeout
6365 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6368 ignore_consistency=ignore_consistency,
6369 shutdown_timeout=shutdown_timeout)
6370 self.tasklets = [self._migrater]
6372 def DeclareLocks(self, level):
6373 if level == locking.LEVEL_NODE:
6374 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6375 if instance.disk_template in constants.DTS_EXT_MIRROR:
6376 if self.op.target_node is None:
6377 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6379 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6380 self.op.target_node]
6381 del self.recalculate_locks[locking.LEVEL_NODE]
6383 self._LockInstancesNodes()
6385 def BuildHooksEnv(self):
6388 This runs on master, primary and secondary nodes of the instance.
6391 instance = self._migrater.instance
6392 source_node = instance.primary_node
6393 target_node = self.op.target_node
6395 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6396 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6397 "OLD_PRIMARY": source_node,
6398 "NEW_PRIMARY": target_node,
6401 if instance.disk_template in constants.DTS_INT_MIRROR:
6402 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6403 env["NEW_SECONDARY"] = source_node
6405 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6407 env.update(_BuildInstanceHookEnvByObject(self, instance))
6411 def BuildHooksNodes(self):
6412 """Build hooks nodes.
6415 instance = self._migrater.instance
6416 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6417 return (nl, nl + [instance.primary_node])
6420 class LUInstanceMigrate(LogicalUnit):
6421 """Migrate an instance.
6423 This is migration without shutting down, compared to the failover,
6424 which is done with shutdown.
6427 HPATH = "instance-migrate"
6428 HTYPE = constants.HTYPE_INSTANCE
6431 def ExpandNames(self):
6432 self._ExpandAndLockInstance()
6434 if self.op.target_node is not None:
6435 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6437 self.needed_locks[locking.LEVEL_NODE] = []
6438 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6440 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6441 cleanup=self.op.cleanup,
6443 fallback=self.op.allow_failover)
6444 self.tasklets = [self._migrater]
6446 def DeclareLocks(self, level):
6447 if level == locking.LEVEL_NODE:
6448 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6449 if instance.disk_template in constants.DTS_EXT_MIRROR:
6450 if self.op.target_node is None:
6451 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6453 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6454 self.op.target_node]
6455 del self.recalculate_locks[locking.LEVEL_NODE]
6457 self._LockInstancesNodes()
6459 def BuildHooksEnv(self):
6462 This runs on master, primary and secondary nodes of the instance.
6465 instance = self._migrater.instance
6466 source_node = instance.primary_node
6467 target_node = self.op.target_node
6468 env = _BuildInstanceHookEnvByObject(self, instance)
6470 "MIGRATE_LIVE": self._migrater.live,
6471 "MIGRATE_CLEANUP": self.op.cleanup,
6472 "OLD_PRIMARY": source_node,
6473 "NEW_PRIMARY": target_node,
6476 if instance.disk_template in constants.DTS_INT_MIRROR:
6477 env["OLD_SECONDARY"] = target_node
6478 env["NEW_SECONDARY"] = source_node
6480 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6484 def BuildHooksNodes(self):
6485 """Build hooks nodes.
6488 instance = self._migrater.instance
6489 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6490 return (nl, nl + [instance.primary_node])
6493 class LUInstanceMove(LogicalUnit):
6494 """Move an instance by data-copying.
6497 HPATH = "instance-move"
6498 HTYPE = constants.HTYPE_INSTANCE
6501 def ExpandNames(self):
6502 self._ExpandAndLockInstance()
6503 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6504 self.op.target_node = target_node
6505 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6506 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6508 def DeclareLocks(self, level):
6509 if level == locking.LEVEL_NODE:
6510 self._LockInstancesNodes(primary_only=True)
6512 def BuildHooksEnv(self):
6515 This runs on master, primary and secondary nodes of the instance.
6519 "TARGET_NODE": self.op.target_node,
6520 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6522 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6525 def BuildHooksNodes(self):
6526 """Build hooks nodes.
6530 self.cfg.GetMasterNode(),
6531 self.instance.primary_node,
6532 self.op.target_node,
6536 def CheckPrereq(self):
6537 """Check prerequisites.
6539 This checks that the instance is in the cluster.
6542 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6543 assert self.instance is not None, \
6544 "Cannot retrieve locked instance %s" % self.op.instance_name
6546 node = self.cfg.GetNodeInfo(self.op.target_node)
6547 assert node is not None, \
6548 "Cannot retrieve locked node %s" % self.op.target_node
6550 self.target_node = target_node = node.name
6552 if target_node == instance.primary_node:
6553 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6554 (instance.name, target_node),
6557 bep = self.cfg.GetClusterInfo().FillBE(instance)
6559 for idx, dsk in enumerate(instance.disks):
6560 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6561 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6562 " cannot copy" % idx, errors.ECODE_STATE)
6564 _CheckNodeOnline(self, target_node)
6565 _CheckNodeNotDrained(self, target_node)
6566 _CheckNodeVmCapable(self, target_node)
6568 if instance.admin_up:
6569 # check memory requirements on the secondary node
6570 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6571 instance.name, bep[constants.BE_MEMORY],
6572 instance.hypervisor)
6574 self.LogInfo("Not checking memory on the secondary node as"
6575 " instance will not be started")
6577 # check bridge existance
6578 _CheckInstanceBridgesExist(self, instance, node=target_node)
6580 def Exec(self, feedback_fn):
6581 """Move an instance.
6583 The move is done by shutting it down on its present node, copying
6584 the data over (slow) and starting it on the new node.
6587 instance = self.instance
6589 source_node = instance.primary_node
6590 target_node = self.target_node
6592 self.LogInfo("Shutting down instance %s on source node %s",
6593 instance.name, source_node)
6595 result = self.rpc.call_instance_shutdown(source_node, instance,
6596 self.op.shutdown_timeout)
6597 msg = result.fail_msg
6599 if self.op.ignore_consistency:
6600 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6601 " Proceeding anyway. Please make sure node"
6602 " %s is down. Error details: %s",
6603 instance.name, source_node, source_node, msg)
6605 raise errors.OpExecError("Could not shutdown instance %s on"
6607 (instance.name, source_node, msg))
6609 # create the target disks
6611 _CreateDisks(self, instance, target_node=target_node)
6612 except errors.OpExecError:
6613 self.LogWarning("Device creation failed, reverting...")
6615 _RemoveDisks(self, instance, target_node=target_node)
6617 self.cfg.ReleaseDRBDMinors(instance.name)
6620 cluster_name = self.cfg.GetClusterInfo().cluster_name
6623 # activate, get path, copy the data over
6624 for idx, disk in enumerate(instance.disks):
6625 self.LogInfo("Copying data for disk %d", idx)
6626 result = self.rpc.call_blockdev_assemble(target_node, disk,
6627 instance.name, True, idx)
6629 self.LogWarning("Can't assemble newly created disk %d: %s",
6630 idx, result.fail_msg)
6631 errs.append(result.fail_msg)
6633 dev_path = result.payload
6634 result = self.rpc.call_blockdev_export(source_node, disk,
6635 target_node, dev_path,
6638 self.LogWarning("Can't copy data over for disk %d: %s",
6639 idx, result.fail_msg)
6640 errs.append(result.fail_msg)
6644 self.LogWarning("Some disks failed to copy, aborting")
6646 _RemoveDisks(self, instance, target_node=target_node)
6648 self.cfg.ReleaseDRBDMinors(instance.name)
6649 raise errors.OpExecError("Errors during disk copy: %s" %
6652 instance.primary_node = target_node
6653 self.cfg.Update(instance, feedback_fn)
6655 self.LogInfo("Removing the disks on the original node")
6656 _RemoveDisks(self, instance, target_node=source_node)
6658 # Only start the instance if it's marked as up
6659 if instance.admin_up:
6660 self.LogInfo("Starting instance %s on node %s",
6661 instance.name, target_node)
6663 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6664 ignore_secondaries=True)
6666 _ShutdownInstanceDisks(self, instance)
6667 raise errors.OpExecError("Can't activate the instance's disks")
6669 result = self.rpc.call_instance_start(target_node, instance,
6671 msg = result.fail_msg
6673 _ShutdownInstanceDisks(self, instance)
6674 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6675 (instance.name, target_node, msg))
6678 class LUNodeMigrate(LogicalUnit):
6679 """Migrate all instances from a node.
6682 HPATH = "node-migrate"
6683 HTYPE = constants.HTYPE_NODE
6686 def CheckArguments(self):
6689 def ExpandNames(self):
6690 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6692 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6693 self.needed_locks = {
6694 locking.LEVEL_NODE: [self.op.node_name],
6697 def BuildHooksEnv(self):
6700 This runs on the master, the primary and all the secondaries.
6704 "NODE_NAME": self.op.node_name,
6707 def BuildHooksNodes(self):
6708 """Build hooks nodes.
6711 nl = [self.cfg.GetMasterNode()]
6714 def CheckPrereq(self):
6717 def Exec(self, feedback_fn):
6718 # Prepare jobs for migration instances
6720 [opcodes.OpInstanceMigrate(instance_name=inst.name,
6723 iallocator=self.op.iallocator,
6724 target_node=self.op.target_node)]
6725 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6728 # TODO: Run iallocator in this opcode and pass correct placement options to
6729 # OpInstanceMigrate. Since other jobs can modify the cluster between
6730 # running the iallocator and the actual migration, a good consistency model
6731 # will have to be found.
6733 assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
6734 frozenset([self.op.node_name]))
6736 return ResultWithJobs(jobs)
6739 class TLMigrateInstance(Tasklet):
6740 """Tasklet class for instance migration.
6743 @ivar live: whether the migration will be done live or non-live;
6744 this variable is initalized only after CheckPrereq has run
6745 @type cleanup: boolean
6746 @ivar cleanup: Wheater we cleanup from a failed migration
6747 @type iallocator: string
6748 @ivar iallocator: The iallocator used to determine target_node
6749 @type target_node: string
6750 @ivar target_node: If given, the target_node to reallocate the instance to
6751 @type failover: boolean
6752 @ivar failover: Whether operation results in failover or migration
6753 @type fallback: boolean
6754 @ivar fallback: Whether fallback to failover is allowed if migration not
6756 @type ignore_consistency: boolean
6757 @ivar ignore_consistency: Wheter we should ignore consistency between source
6759 @type shutdown_timeout: int
6760 @ivar shutdown_timeout: In case of failover timeout of the shutdown
6763 def __init__(self, lu, instance_name, cleanup=False,
6764 failover=False, fallback=False,
6765 ignore_consistency=False,
6766 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6767 """Initializes this class.
6770 Tasklet.__init__(self, lu)
6773 self.instance_name = instance_name
6774 self.cleanup = cleanup
6775 self.live = False # will be overridden later
6776 self.failover = failover
6777 self.fallback = fallback
6778 self.ignore_consistency = ignore_consistency
6779 self.shutdown_timeout = shutdown_timeout
6781 def CheckPrereq(self):
6782 """Check prerequisites.
6784 This checks that the instance is in the cluster.
6787 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6788 instance = self.cfg.GetInstanceInfo(instance_name)
6789 assert instance is not None
6790 self.instance = instance
6792 if (not self.cleanup and not instance.admin_up and not self.failover and
6794 self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6796 self.failover = True
6798 if instance.disk_template not in constants.DTS_MIRRORED:
6803 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6804 " %s" % (instance.disk_template, text),
6807 if instance.disk_template in constants.DTS_EXT_MIRROR:
6808 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6810 if self.lu.op.iallocator:
6811 self._RunAllocator()
6813 # We set set self.target_node as it is required by
6815 self.target_node = self.lu.op.target_node
6817 # self.target_node is already populated, either directly or by the
6819 target_node = self.target_node
6820 if self.target_node == instance.primary_node:
6821 raise errors.OpPrereqError("Cannot migrate instance %s"
6822 " to its primary (%s)" %
6823 (instance.name, instance.primary_node))
6825 if len(self.lu.tasklets) == 1:
6826 # It is safe to release locks only when we're the only tasklet
6828 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6829 keep=[instance.primary_node, self.target_node])
6832 secondary_nodes = instance.secondary_nodes
6833 if not secondary_nodes:
6834 raise errors.ConfigurationError("No secondary node but using"
6835 " %s disk template" %
6836 instance.disk_template)
6837 target_node = secondary_nodes[0]
6838 if self.lu.op.iallocator or (self.lu.op.target_node and
6839 self.lu.op.target_node != target_node):
6841 text = "failed over"
6844 raise errors.OpPrereqError("Instances with disk template %s cannot"
6845 " be %s to arbitrary nodes"
6846 " (neither an iallocator nor a target"
6847 " node can be passed)" %
6848 (instance.disk_template, text),
6851 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6853 # check memory requirements on the secondary node
6854 if not self.failover or instance.admin_up:
6855 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6856 instance.name, i_be[constants.BE_MEMORY],
6857 instance.hypervisor)
6859 self.lu.LogInfo("Not checking memory on the secondary node as"
6860 " instance will not be started")
6862 # check bridge existance
6863 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6865 if not self.cleanup:
6866 _CheckNodeNotDrained(self.lu, target_node)
6867 if not self.failover:
6868 result = self.rpc.call_instance_migratable(instance.primary_node,
6870 if result.fail_msg and self.fallback:
6871 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6873 self.failover = True
6875 result.Raise("Can't migrate, please use failover",
6876 prereq=True, ecode=errors.ECODE_STATE)
6878 assert not (self.failover and self.cleanup)
6880 if not self.failover:
6881 if self.lu.op.live is not None and self.lu.op.mode is not None:
6882 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6883 " parameters are accepted",
6885 if self.lu.op.live is not None:
6887 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6889 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6890 # reset the 'live' parameter to None so that repeated
6891 # invocations of CheckPrereq do not raise an exception
6892 self.lu.op.live = None
6893 elif self.lu.op.mode is None:
6894 # read the default value from the hypervisor
6895 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6897 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6899 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6901 # Failover is never live
6904 def _RunAllocator(self):
6905 """Run the allocator based on input opcode.
6908 ial = IAllocator(self.cfg, self.rpc,
6909 mode=constants.IALLOCATOR_MODE_RELOC,
6910 name=self.instance_name,
6911 # TODO See why hail breaks with a single node below
6912 relocate_from=[self.instance.primary_node,
6913 self.instance.primary_node],
6916 ial.Run(self.lu.op.iallocator)
6919 raise errors.OpPrereqError("Can't compute nodes using"
6920 " iallocator '%s': %s" %
6921 (self.lu.op.iallocator, ial.info),
6923 if len(ial.result) != ial.required_nodes:
6924 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6925 " of nodes (%s), required %s" %
6926 (self.lu.op.iallocator, len(ial.result),
6927 ial.required_nodes), errors.ECODE_FAULT)
6928 self.target_node = ial.result[0]
6929 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6930 self.instance_name, self.lu.op.iallocator,
6931 utils.CommaJoin(ial.result))
6933 def _WaitUntilSync(self):
6934 """Poll with custom rpc for disk sync.
6936 This uses our own step-based rpc call.
6939 self.feedback_fn("* wait until resync is done")
6943 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6945 self.instance.disks)
6947 for node, nres in result.items():
6948 nres.Raise("Cannot resync disks on node %s" % node)
6949 node_done, node_percent = nres.payload
6950 all_done = all_done and node_done
6951 if node_percent is not None:
6952 min_percent = min(min_percent, node_percent)
6954 if min_percent < 100:
6955 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6958 def _EnsureSecondary(self, node):
6959 """Demote a node to secondary.
6962 self.feedback_fn("* switching node %s to secondary mode" % node)
6964 for dev in self.instance.disks:
6965 self.cfg.SetDiskID(dev, node)
6967 result = self.rpc.call_blockdev_close(node, self.instance.name,
6968 self.instance.disks)
6969 result.Raise("Cannot change disk to secondary on node %s" % node)
6971 def _GoStandalone(self):
6972 """Disconnect from the network.
6975 self.feedback_fn("* changing into standalone mode")
6976 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6977 self.instance.disks)
6978 for node, nres in result.items():
6979 nres.Raise("Cannot disconnect disks node %s" % node)
6981 def _GoReconnect(self, multimaster):
6982 """Reconnect to the network.
6988 msg = "single-master"
6989 self.feedback_fn("* changing disks into %s mode" % msg)
6990 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6991 self.instance.disks,
6992 self.instance.name, multimaster)
6993 for node, nres in result.items():
6994 nres.Raise("Cannot change disks config on node %s" % node)
6996 def _ExecCleanup(self):
6997 """Try to cleanup after a failed migration.
6999 The cleanup is done by:
7000 - check that the instance is running only on one node
7001 (and update the config if needed)
7002 - change disks on its secondary node to secondary
7003 - wait until disks are fully synchronized
7004 - disconnect from the network
7005 - change disks into single-master mode
7006 - wait again until disks are fully synchronized
7009 instance = self.instance
7010 target_node = self.target_node
7011 source_node = self.source_node
7013 # check running on only one node
7014 self.feedback_fn("* checking where the instance actually runs"
7015 " (if this hangs, the hypervisor might be in"
7017 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7018 for node, result in ins_l.items():
7019 result.Raise("Can't contact node %s" % node)
7021 runningon_source = instance.name in ins_l[source_node].payload
7022 runningon_target = instance.name in ins_l[target_node].payload
7024 if runningon_source and runningon_target:
7025 raise errors.OpExecError("Instance seems to be running on two nodes,"
7026 " or the hypervisor is confused; you will have"
7027 " to ensure manually that it runs only on one"
7028 " and restart this operation")
7030 if not (runningon_source or runningon_target):
7031 raise errors.OpExecError("Instance does not seem to be running at all;"
7032 " in this case it's safer to repair by"
7033 " running 'gnt-instance stop' to ensure disk"
7034 " shutdown, and then restarting it")
7036 if runningon_target:
7037 # the migration has actually succeeded, we need to update the config
7038 self.feedback_fn("* instance running on secondary node (%s),"
7039 " updating config" % target_node)
7040 instance.primary_node = target_node
7041 self.cfg.Update(instance, self.feedback_fn)
7042 demoted_node = source_node
7044 self.feedback_fn("* instance confirmed to be running on its"
7045 " primary node (%s)" % source_node)
7046 demoted_node = target_node
7048 if instance.disk_template in constants.DTS_INT_MIRROR:
7049 self._EnsureSecondary(demoted_node)
7051 self._WaitUntilSync()
7052 except errors.OpExecError:
7053 # we ignore here errors, since if the device is standalone, it
7054 # won't be able to sync
7056 self._GoStandalone()
7057 self._GoReconnect(False)
7058 self._WaitUntilSync()
7060 self.feedback_fn("* done")
7062 def _RevertDiskStatus(self):
7063 """Try to revert the disk status after a failed migration.
7066 target_node = self.target_node
7067 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7071 self._EnsureSecondary(target_node)
7072 self._GoStandalone()
7073 self._GoReconnect(False)
7074 self._WaitUntilSync()
7075 except errors.OpExecError, err:
7076 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7077 " please try to recover the instance manually;"
7078 " error '%s'" % str(err))
7080 def _AbortMigration(self):
7081 """Call the hypervisor code to abort a started migration.
7084 instance = self.instance
7085 target_node = self.target_node
7086 migration_info = self.migration_info
7088 abort_result = self.rpc.call_finalize_migration(target_node,
7092 abort_msg = abort_result.fail_msg
7094 logging.error("Aborting migration failed on target node %s: %s",
7095 target_node, abort_msg)
7096 # Don't raise an exception here, as we stil have to try to revert the
7097 # disk status, even if this step failed.
7099 def _ExecMigration(self):
7100 """Migrate an instance.
7102 The migrate is done by:
7103 - change the disks into dual-master mode
7104 - wait until disks are fully synchronized again
7105 - migrate the instance
7106 - change disks on the new secondary node (the old primary) to secondary
7107 - wait until disks are fully synchronized
7108 - change disks into single-master mode
7111 instance = self.instance
7112 target_node = self.target_node
7113 source_node = self.source_node
7115 self.feedback_fn("* checking disk consistency between source and target")
7116 for dev in instance.disks:
7117 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7118 raise errors.OpExecError("Disk %s is degraded or not fully"
7119 " synchronized on target node,"
7120 " aborting migration" % dev.iv_name)
7122 # First get the migration information from the remote node
7123 result = self.rpc.call_migration_info(source_node, instance)
7124 msg = result.fail_msg
7126 log_err = ("Failed fetching source migration information from %s: %s" %
7128 logging.error(log_err)
7129 raise errors.OpExecError(log_err)
7131 self.migration_info = migration_info = result.payload
7133 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7134 # Then switch the disks to master/master mode
7135 self._EnsureSecondary(target_node)
7136 self._GoStandalone()
7137 self._GoReconnect(True)
7138 self._WaitUntilSync()
7140 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7141 result = self.rpc.call_accept_instance(target_node,
7144 self.nodes_ip[target_node])
7146 msg = result.fail_msg
7148 logging.error("Instance pre-migration failed, trying to revert"
7149 " disk status: %s", msg)
7150 self.feedback_fn("Pre-migration failed, aborting")
7151 self._AbortMigration()
7152 self._RevertDiskStatus()
7153 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7154 (instance.name, msg))
7156 self.feedback_fn("* migrating instance to %s" % target_node)
7157 result = self.rpc.call_instance_migrate(source_node, instance,
7158 self.nodes_ip[target_node],
7160 msg = result.fail_msg
7162 logging.error("Instance migration failed, trying to revert"
7163 " disk status: %s", msg)
7164 self.feedback_fn("Migration failed, aborting")
7165 self._AbortMigration()
7166 self._RevertDiskStatus()
7167 raise errors.OpExecError("Could not migrate instance %s: %s" %
7168 (instance.name, msg))
7170 instance.primary_node = target_node
7171 # distribute new instance config to the other nodes
7172 self.cfg.Update(instance, self.feedback_fn)
7174 result = self.rpc.call_finalize_migration(target_node,
7178 msg = result.fail_msg
7180 logging.error("Instance migration succeeded, but finalization failed:"
7182 raise errors.OpExecError("Could not finalize instance migration: %s" %
7185 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7186 self._EnsureSecondary(source_node)
7187 self._WaitUntilSync()
7188 self._GoStandalone()
7189 self._GoReconnect(False)
7190 self._WaitUntilSync()
7192 self.feedback_fn("* done")
7194 def _ExecFailover(self):
7195 """Failover an instance.
7197 The failover is done by shutting it down on its present node and
7198 starting it on the secondary.
7201 instance = self.instance
7202 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7204 source_node = instance.primary_node
7205 target_node = self.target_node
7207 if instance.admin_up:
7208 self.feedback_fn("* checking disk consistency between source and target")
7209 for dev in instance.disks:
7210 # for drbd, these are drbd over lvm
7211 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7212 if primary_node.offline:
7213 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7215 (primary_node.name, dev.iv_name, target_node))
7216 elif not self.ignore_consistency:
7217 raise errors.OpExecError("Disk %s is degraded on target node,"
7218 " aborting failover" % dev.iv_name)
7220 self.feedback_fn("* not checking disk consistency as instance is not"
7223 self.feedback_fn("* shutting down instance on source node")
7224 logging.info("Shutting down instance %s on node %s",
7225 instance.name, source_node)
7227 result = self.rpc.call_instance_shutdown(source_node, instance,
7228 self.shutdown_timeout)
7229 msg = result.fail_msg
7231 if self.ignore_consistency or primary_node.offline:
7232 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7233 " proceeding anyway; please make sure node"
7234 " %s is down; error details: %s",
7235 instance.name, source_node, source_node, msg)
7237 raise errors.OpExecError("Could not shutdown instance %s on"
7239 (instance.name, source_node, msg))
7241 self.feedback_fn("* deactivating the instance's disks on source node")
7242 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7243 raise errors.OpExecError("Can't shut down the instance's disks")
7245 instance.primary_node = target_node
7246 # distribute new instance config to the other nodes
7247 self.cfg.Update(instance, self.feedback_fn)
7249 # Only start the instance if it's marked as up
7250 if instance.admin_up:
7251 self.feedback_fn("* activating the instance's disks on target node %s" %
7253 logging.info("Starting instance %s on node %s",
7254 instance.name, target_node)
7256 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7257 ignore_secondaries=True)
7259 _ShutdownInstanceDisks(self.lu, instance)
7260 raise errors.OpExecError("Can't activate the instance's disks")
7262 self.feedback_fn("* starting the instance on the target node %s" %
7264 result = self.rpc.call_instance_start(target_node, instance, None, None,
7266 msg = result.fail_msg
7268 _ShutdownInstanceDisks(self.lu, instance)
7269 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7270 (instance.name, target_node, msg))
7272 def Exec(self, feedback_fn):
7273 """Perform the migration.
7276 self.feedback_fn = feedback_fn
7277 self.source_node = self.instance.primary_node
7279 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7280 if self.instance.disk_template in constants.DTS_INT_MIRROR:
7281 self.target_node = self.instance.secondary_nodes[0]
7282 # Otherwise self.target_node has been populated either
7283 # directly, or through an iallocator.
7285 self.all_nodes = [self.source_node, self.target_node]
7287 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
7288 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
7292 feedback_fn("Failover instance %s" % self.instance.name)
7293 self._ExecFailover()
7295 feedback_fn("Migrating instance %s" % self.instance.name)
7298 return self._ExecCleanup()
7300 return self._ExecMigration()
7303 def _CreateBlockDev(lu, node, instance, device, force_create,
7305 """Create a tree of block devices on a given node.
7307 If this device type has to be created on secondaries, create it and
7310 If not, just recurse to children keeping the same 'force' value.
7312 @param lu: the lu on whose behalf we execute
7313 @param node: the node on which to create the device
7314 @type instance: L{objects.Instance}
7315 @param instance: the instance which owns the device
7316 @type device: L{objects.Disk}
7317 @param device: the device to create
7318 @type force_create: boolean
7319 @param force_create: whether to force creation of this device; this
7320 will be change to True whenever we find a device which has
7321 CreateOnSecondary() attribute
7322 @param info: the extra 'metadata' we should attach to the device
7323 (this will be represented as a LVM tag)
7324 @type force_open: boolean
7325 @param force_open: this parameter will be passes to the
7326 L{backend.BlockdevCreate} function where it specifies
7327 whether we run on primary or not, and it affects both
7328 the child assembly and the device own Open() execution
7331 if device.CreateOnSecondary():
7335 for child in device.children:
7336 _CreateBlockDev(lu, node, instance, child, force_create,
7339 if not force_create:
7342 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7345 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7346 """Create a single block device on a given node.
7348 This will not recurse over children of the device, so they must be
7351 @param lu: the lu on whose behalf we execute
7352 @param node: the node on which to create the device
7353 @type instance: L{objects.Instance}
7354 @param instance: the instance which owns the device
7355 @type device: L{objects.Disk}
7356 @param device: the device to create
7357 @param info: the extra 'metadata' we should attach to the device
7358 (this will be represented as a LVM tag)
7359 @type force_open: boolean
7360 @param force_open: this parameter will be passes to the
7361 L{backend.BlockdevCreate} function where it specifies
7362 whether we run on primary or not, and it affects both
7363 the child assembly and the device own Open() execution
7366 lu.cfg.SetDiskID(device, node)
7367 result = lu.rpc.call_blockdev_create(node, device, device.size,
7368 instance.name, force_open, info)
7369 result.Raise("Can't create block device %s on"
7370 " node %s for instance %s" % (device, node, instance.name))
7371 if device.physical_id is None:
7372 device.physical_id = result.payload
7375 def _GenerateUniqueNames(lu, exts):
7376 """Generate a suitable LV name.
7378 This will generate a logical volume name for the given instance.
7383 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7384 results.append("%s%s" % (new_id, val))
7388 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7389 iv_name, p_minor, s_minor):
7390 """Generate a drbd8 device complete with its children.
7393 assert len(vgnames) == len(names) == 2
7394 port = lu.cfg.AllocatePort()
7395 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7396 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7397 logical_id=(vgnames[0], names[0]))
7398 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7399 logical_id=(vgnames[1], names[1]))
7400 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7401 logical_id=(primary, secondary, port,
7404 children=[dev_data, dev_meta],
7409 def _GenerateDiskTemplate(lu, template_name,
7410 instance_name, primary_node,
7411 secondary_nodes, disk_info,
7412 file_storage_dir, file_driver,
7413 base_index, feedback_fn):
7414 """Generate the entire disk layout for a given template type.
7417 #TODO: compute space requirements
7419 vgname = lu.cfg.GetVGName()
7420 disk_count = len(disk_info)
7422 if template_name == constants.DT_DISKLESS:
7424 elif template_name == constants.DT_PLAIN:
7425 if len(secondary_nodes) != 0:
7426 raise errors.ProgrammerError("Wrong template configuration")
7428 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7429 for i in range(disk_count)])
7430 for idx, disk in enumerate(disk_info):
7431 disk_index = idx + base_index
7432 vg = disk.get(constants.IDISK_VG, vgname)
7433 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7434 disk_dev = objects.Disk(dev_type=constants.LD_LV,
7435 size=disk[constants.IDISK_SIZE],
7436 logical_id=(vg, names[idx]),
7437 iv_name="disk/%d" % disk_index,
7438 mode=disk[constants.IDISK_MODE])
7439 disks.append(disk_dev)
7440 elif template_name == constants.DT_DRBD8:
7441 if len(secondary_nodes) != 1:
7442 raise errors.ProgrammerError("Wrong template configuration")
7443 remote_node = secondary_nodes[0]
7444 minors = lu.cfg.AllocateDRBDMinor(
7445 [primary_node, remote_node] * len(disk_info), instance_name)
7448 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7449 for i in range(disk_count)]):
7450 names.append(lv_prefix + "_data")
7451 names.append(lv_prefix + "_meta")
7452 for idx, disk in enumerate(disk_info):
7453 disk_index = idx + base_index
7454 data_vg = disk.get(constants.IDISK_VG, vgname)
7455 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7456 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7457 disk[constants.IDISK_SIZE],
7459 names[idx * 2:idx * 2 + 2],
7460 "disk/%d" % disk_index,
7461 minors[idx * 2], minors[idx * 2 + 1])
7462 disk_dev.mode = disk[constants.IDISK_MODE]
7463 disks.append(disk_dev)
7464 elif template_name == constants.DT_FILE:
7465 if len(secondary_nodes) != 0:
7466 raise errors.ProgrammerError("Wrong template configuration")
7468 opcodes.RequireFileStorage()
7470 for idx, disk in enumerate(disk_info):
7471 disk_index = idx + base_index
7472 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7473 size=disk[constants.IDISK_SIZE],
7474 iv_name="disk/%d" % disk_index,
7475 logical_id=(file_driver,
7476 "%s/disk%d" % (file_storage_dir,
7478 mode=disk[constants.IDISK_MODE])
7479 disks.append(disk_dev)
7480 elif template_name == constants.DT_SHARED_FILE:
7481 if len(secondary_nodes) != 0:
7482 raise errors.ProgrammerError("Wrong template configuration")
7484 opcodes.RequireSharedFileStorage()
7486 for idx, disk in enumerate(disk_info):
7487 disk_index = idx + base_index
7488 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7489 size=disk[constants.IDISK_SIZE],
7490 iv_name="disk/%d" % disk_index,
7491 logical_id=(file_driver,
7492 "%s/disk%d" % (file_storage_dir,
7494 mode=disk[constants.IDISK_MODE])
7495 disks.append(disk_dev)
7496 elif template_name == constants.DT_BLOCK:
7497 if len(secondary_nodes) != 0:
7498 raise errors.ProgrammerError("Wrong template configuration")
7500 for idx, disk in enumerate(disk_info):
7501 disk_index = idx + base_index
7502 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7503 size=disk[constants.IDISK_SIZE],
7504 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7505 disk[constants.IDISK_ADOPT]),
7506 iv_name="disk/%d" % disk_index,
7507 mode=disk[constants.IDISK_MODE])
7508 disks.append(disk_dev)
7511 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7515 def _GetInstanceInfoText(instance):
7516 """Compute that text that should be added to the disk's metadata.
7519 return "originstname+%s" % instance.name
7522 def _CalcEta(time_taken, written, total_size):
7523 """Calculates the ETA based on size written and total size.
7525 @param time_taken: The time taken so far
7526 @param written: amount written so far
7527 @param total_size: The total size of data to be written
7528 @return: The remaining time in seconds
7531 avg_time = time_taken / float(written)
7532 return (total_size - written) * avg_time
7535 def _WipeDisks(lu, instance):
7536 """Wipes instance disks.
7538 @type lu: L{LogicalUnit}
7539 @param lu: the logical unit on whose behalf we execute
7540 @type instance: L{objects.Instance}
7541 @param instance: the instance whose disks we should create
7542 @return: the success of the wipe
7545 node = instance.primary_node
7547 for device in instance.disks:
7548 lu.cfg.SetDiskID(device, node)
7550 logging.info("Pause sync of instance %s disks", instance.name)
7551 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7553 for idx, success in enumerate(result.payload):
7555 logging.warn("pause-sync of instance %s for disks %d failed",
7559 for idx, device in enumerate(instance.disks):
7560 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7561 # MAX_WIPE_CHUNK at max
7562 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7563 constants.MIN_WIPE_CHUNK_PERCENT)
7564 # we _must_ make this an int, otherwise rounding errors will
7566 wipe_chunk_size = int(wipe_chunk_size)
7568 lu.LogInfo("* Wiping disk %d", idx)
7569 logging.info("Wiping disk %d for instance %s, node %s using"
7570 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7575 start_time = time.time()
7577 while offset < size:
7578 wipe_size = min(wipe_chunk_size, size - offset)
7579 logging.debug("Wiping disk %d, offset %s, chunk %s",
7580 idx, offset, wipe_size)
7581 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7582 result.Raise("Could not wipe disk %d at offset %d for size %d" %
7583 (idx, offset, wipe_size))
7586 if now - last_output >= 60:
7587 eta = _CalcEta(now - start_time, offset, size)
7588 lu.LogInfo(" - done: %.1f%% ETA: %s" %
7589 (offset / float(size) * 100, utils.FormatSeconds(eta)))
7592 logging.info("Resume sync of instance %s disks", instance.name)
7594 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7596 for idx, success in enumerate(result.payload):
7598 lu.LogWarning("Resume sync of disk %d failed, please have a"
7599 " look at the status and troubleshoot the issue", idx)
7600 logging.warn("resume-sync of instance %s for disks %d failed",
7604 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7605 """Create all disks for an instance.
7607 This abstracts away some work from AddInstance.
7609 @type lu: L{LogicalUnit}
7610 @param lu: the logical unit on whose behalf we execute
7611 @type instance: L{objects.Instance}
7612 @param instance: the instance whose disks we should create
7614 @param to_skip: list of indices to skip
7615 @type target_node: string
7616 @param target_node: if passed, overrides the target node for creation
7618 @return: the success of the creation
7621 info = _GetInstanceInfoText(instance)
7622 if target_node is None:
7623 pnode = instance.primary_node
7624 all_nodes = instance.all_nodes
7629 if instance.disk_template in constants.DTS_FILEBASED:
7630 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7631 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7633 result.Raise("Failed to create directory '%s' on"
7634 " node %s" % (file_storage_dir, pnode))
7636 # Note: this needs to be kept in sync with adding of disks in
7637 # LUInstanceSetParams
7638 for idx, device in enumerate(instance.disks):
7639 if to_skip and idx in to_skip:
7641 logging.info("Creating volume %s for instance %s",
7642 device.iv_name, instance.name)
7644 for node in all_nodes:
7645 f_create = node == pnode
7646 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7649 def _RemoveDisks(lu, instance, target_node=None):
7650 """Remove all disks for an instance.
7652 This abstracts away some work from `AddInstance()` and
7653 `RemoveInstance()`. Note that in case some of the devices couldn't
7654 be removed, the removal will continue with the other ones (compare
7655 with `_CreateDisks()`).
7657 @type lu: L{LogicalUnit}
7658 @param lu: the logical unit on whose behalf we execute
7659 @type instance: L{objects.Instance}
7660 @param instance: the instance whose disks we should remove
7661 @type target_node: string
7662 @param target_node: used to override the node on which to remove the disks
7664 @return: the success of the removal
7667 logging.info("Removing block devices for instance %s", instance.name)
7670 for device in instance.disks:
7672 edata = [(target_node, device)]
7674 edata = device.ComputeNodeTree(instance.primary_node)
7675 for node, disk in edata:
7676 lu.cfg.SetDiskID(disk, node)
7677 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7679 lu.LogWarning("Could not remove block device %s on node %s,"
7680 " continuing anyway: %s", device.iv_name, node, msg)
7683 if instance.disk_template == constants.DT_FILE:
7684 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7688 tgt = instance.primary_node
7689 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7691 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7692 file_storage_dir, instance.primary_node, result.fail_msg)
7698 def _ComputeDiskSizePerVG(disk_template, disks):
7699 """Compute disk size requirements in the volume group
7702 def _compute(disks, payload):
7703 """Universal algorithm.
7708 vgs[disk[constants.IDISK_VG]] = \
7709 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7713 # Required free disk space as a function of disk and swap space
7715 constants.DT_DISKLESS: {},
7716 constants.DT_PLAIN: _compute(disks, 0),
7717 # 128 MB are added for drbd metadata for each disk
7718 constants.DT_DRBD8: _compute(disks, 128),
7719 constants.DT_FILE: {},
7720 constants.DT_SHARED_FILE: {},
7723 if disk_template not in req_size_dict:
7724 raise errors.ProgrammerError("Disk template '%s' size requirement"
7725 " is unknown" % disk_template)
7727 return req_size_dict[disk_template]
7730 def _ComputeDiskSize(disk_template, disks):
7731 """Compute disk size requirements in the volume group
7734 # Required free disk space as a function of disk and swap space
7736 constants.DT_DISKLESS: None,
7737 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7738 # 128 MB are added for drbd metadata for each disk
7739 constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7740 constants.DT_FILE: None,
7741 constants.DT_SHARED_FILE: 0,
7742 constants.DT_BLOCK: 0,
7745 if disk_template not in req_size_dict:
7746 raise errors.ProgrammerError("Disk template '%s' size requirement"
7747 " is unknown" % disk_template)
7749 return req_size_dict[disk_template]
7752 def _FilterVmNodes(lu, nodenames):
7753 """Filters out non-vm_capable nodes from a list.
7755 @type lu: L{LogicalUnit}
7756 @param lu: the logical unit for which we check
7757 @type nodenames: list
7758 @param nodenames: the list of nodes on which we should check
7760 @return: the list of vm-capable nodes
7763 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7764 return [name for name in nodenames if name not in vm_nodes]
7767 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7768 """Hypervisor parameter validation.
7770 This function abstract the hypervisor parameter validation to be
7771 used in both instance create and instance modify.
7773 @type lu: L{LogicalUnit}
7774 @param lu: the logical unit for which we check
7775 @type nodenames: list
7776 @param nodenames: the list of nodes on which we should check
7777 @type hvname: string
7778 @param hvname: the name of the hypervisor we should use
7779 @type hvparams: dict
7780 @param hvparams: the parameters which we need to check
7781 @raise errors.OpPrereqError: if the parameters are not valid
7784 nodenames = _FilterVmNodes(lu, nodenames)
7785 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7788 for node in nodenames:
7792 info.Raise("Hypervisor parameter validation failed on node %s" % node)
7795 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7796 """OS parameters validation.
7798 @type lu: L{LogicalUnit}
7799 @param lu: the logical unit for which we check
7800 @type required: boolean
7801 @param required: whether the validation should fail if the OS is not
7803 @type nodenames: list
7804 @param nodenames: the list of nodes on which we should check
7805 @type osname: string
7806 @param osname: the name of the hypervisor we should use
7807 @type osparams: dict
7808 @param osparams: the parameters which we need to check
7809 @raise errors.OpPrereqError: if the parameters are not valid
7812 nodenames = _FilterVmNodes(lu, nodenames)
7813 result = lu.rpc.call_os_validate(required, nodenames, osname,
7814 [constants.OS_VALIDATE_PARAMETERS],
7816 for node, nres in result.items():
7817 # we don't check for offline cases since this should be run only
7818 # against the master node and/or an instance's nodes
7819 nres.Raise("OS Parameters validation failed on node %s" % node)
7820 if not nres.payload:
7821 lu.LogInfo("OS %s not found on node %s, validation skipped",
7825 class LUInstanceCreate(LogicalUnit):
7826 """Create an instance.
7829 HPATH = "instance-add"
7830 HTYPE = constants.HTYPE_INSTANCE
7833 def CheckArguments(self):
7837 # do not require name_check to ease forward/backward compatibility
7839 if self.op.no_install and self.op.start:
7840 self.LogInfo("No-installation mode selected, disabling startup")
7841 self.op.start = False
7842 # validate/normalize the instance name
7843 self.op.instance_name = \
7844 netutils.Hostname.GetNormalizedName(self.op.instance_name)
7846 if self.op.ip_check and not self.op.name_check:
7847 # TODO: make the ip check more flexible and not depend on the name check
7848 raise errors.OpPrereqError("Cannot do IP address check without a name"
7849 " check", errors.ECODE_INVAL)
7851 # check nics' parameter names
7852 for nic in self.op.nics:
7853 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7855 # check disks. parameter names and consistent adopt/no-adopt strategy
7856 has_adopt = has_no_adopt = False
7857 for disk in self.op.disks:
7858 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7859 if constants.IDISK_ADOPT in disk:
7863 if has_adopt and has_no_adopt:
7864 raise errors.OpPrereqError("Either all disks are adopted or none is",
7867 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7868 raise errors.OpPrereqError("Disk adoption is not supported for the"
7869 " '%s' disk template" %
7870 self.op.disk_template,
7872 if self.op.iallocator is not None:
7873 raise errors.OpPrereqError("Disk adoption not allowed with an"
7874 " iallocator script", errors.ECODE_INVAL)
7875 if self.op.mode == constants.INSTANCE_IMPORT:
7876 raise errors.OpPrereqError("Disk adoption not allowed for"
7877 " instance import", errors.ECODE_INVAL)
7879 if self.op.disk_template in constants.DTS_MUST_ADOPT:
7880 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7881 " but no 'adopt' parameter given" %
7882 self.op.disk_template,
7885 self.adopt_disks = has_adopt
7887 # instance name verification
7888 if self.op.name_check:
7889 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7890 self.op.instance_name = self.hostname1.name
7891 # used in CheckPrereq for ip ping check
7892 self.check_ip = self.hostname1.ip
7894 self.check_ip = None
7896 # file storage checks
7897 if (self.op.file_driver and
7898 not self.op.file_driver in constants.FILE_DRIVER):
7899 raise errors.OpPrereqError("Invalid file driver name '%s'" %
7900 self.op.file_driver, errors.ECODE_INVAL)
7902 if self.op.disk_template == constants.DT_FILE:
7903 opcodes.RequireFileStorage()
7904 elif self.op.disk_template == constants.DT_SHARED_FILE:
7905 opcodes.RequireSharedFileStorage()
7907 ### Node/iallocator related checks
7908 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7910 if self.op.pnode is not None:
7911 if self.op.disk_template in constants.DTS_INT_MIRROR:
7912 if self.op.snode is None:
7913 raise errors.OpPrereqError("The networked disk templates need"
7914 " a mirror node", errors.ECODE_INVAL)
7916 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7918 self.op.snode = None
7920 self._cds = _GetClusterDomainSecret()
7922 if self.op.mode == constants.INSTANCE_IMPORT:
7923 # On import force_variant must be True, because if we forced it at
7924 # initial install, our only chance when importing it back is that it
7926 self.op.force_variant = True
7928 if self.op.no_install:
7929 self.LogInfo("No-installation mode has no effect during import")
7931 elif self.op.mode == constants.INSTANCE_CREATE:
7932 if self.op.os_type is None:
7933 raise errors.OpPrereqError("No guest OS specified",
7935 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7936 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7937 " installation" % self.op.os_type,
7939 if self.op.disk_template is None:
7940 raise errors.OpPrereqError("No disk template specified",
7943 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7944 # Check handshake to ensure both clusters have the same domain secret
7945 src_handshake = self.op.source_handshake
7946 if not src_handshake:
7947 raise errors.OpPrereqError("Missing source handshake",
7950 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7953 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7956 # Load and check source CA
7957 self.source_x509_ca_pem = self.op.source_x509_ca
7958 if not self.source_x509_ca_pem:
7959 raise errors.OpPrereqError("Missing source X509 CA",
7963 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7965 except OpenSSL.crypto.Error, err:
7966 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7967 (err, ), errors.ECODE_INVAL)
7969 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7970 if errcode is not None:
7971 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7974 self.source_x509_ca = cert
7976 src_instance_name = self.op.source_instance_name
7977 if not src_instance_name:
7978 raise errors.OpPrereqError("Missing source instance name",
7981 self.source_instance_name = \
7982 netutils.GetHostname(name=src_instance_name).name
7985 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7986 self.op.mode, errors.ECODE_INVAL)
7988 def ExpandNames(self):
7989 """ExpandNames for CreateInstance.
7991 Figure out the right locks for instance creation.
7994 self.needed_locks = {}
7996 instance_name = self.op.instance_name
7997 # this is just a preventive check, but someone might still add this
7998 # instance in the meantime, and creation will fail at lock-add time
7999 if instance_name in self.cfg.GetInstanceList():
8000 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8001 instance_name, errors.ECODE_EXISTS)
8003 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8005 if self.op.iallocator:
8006 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8008 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8009 nodelist = [self.op.pnode]
8010 if self.op.snode is not None:
8011 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8012 nodelist.append(self.op.snode)
8013 self.needed_locks[locking.LEVEL_NODE] = nodelist
8015 # in case of import lock the source node too
8016 if self.op.mode == constants.INSTANCE_IMPORT:
8017 src_node = self.op.src_node
8018 src_path = self.op.src_path
8020 if src_path is None:
8021 self.op.src_path = src_path = self.op.instance_name
8023 if src_node is None:
8024 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8025 self.op.src_node = None
8026 if os.path.isabs(src_path):
8027 raise errors.OpPrereqError("Importing an instance from an absolute"
8028 " path requires a source node option",
8031 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8032 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8033 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8034 if not os.path.isabs(src_path):
8035 self.op.src_path = src_path = \
8036 utils.PathJoin(constants.EXPORT_DIR, src_path)
8038 def _RunAllocator(self):
8039 """Run the allocator based on input opcode.
8042 nics = [n.ToDict() for n in self.nics]
8043 ial = IAllocator(self.cfg, self.rpc,
8044 mode=constants.IALLOCATOR_MODE_ALLOC,
8045 name=self.op.instance_name,
8046 disk_template=self.op.disk_template,
8049 vcpus=self.be_full[constants.BE_VCPUS],
8050 memory=self.be_full[constants.BE_MEMORY],
8053 hypervisor=self.op.hypervisor,
8056 ial.Run(self.op.iallocator)
8059 raise errors.OpPrereqError("Can't compute nodes using"
8060 " iallocator '%s': %s" %
8061 (self.op.iallocator, ial.info),
8063 if len(ial.result) != ial.required_nodes:
8064 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8065 " of nodes (%s), required %s" %
8066 (self.op.iallocator, len(ial.result),
8067 ial.required_nodes), errors.ECODE_FAULT)
8068 self.op.pnode = ial.result[0]
8069 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8070 self.op.instance_name, self.op.iallocator,
8071 utils.CommaJoin(ial.result))
8072 if ial.required_nodes == 2:
8073 self.op.snode = ial.result[1]
8075 def BuildHooksEnv(self):
8078 This runs on master, primary and secondary nodes of the instance.
8082 "ADD_MODE": self.op.mode,
8084 if self.op.mode == constants.INSTANCE_IMPORT:
8085 env["SRC_NODE"] = self.op.src_node
8086 env["SRC_PATH"] = self.op.src_path
8087 env["SRC_IMAGES"] = self.src_images
8089 env.update(_BuildInstanceHookEnv(
8090 name=self.op.instance_name,
8091 primary_node=self.op.pnode,
8092 secondary_nodes=self.secondaries,
8093 status=self.op.start,
8094 os_type=self.op.os_type,
8095 memory=self.be_full[constants.BE_MEMORY],
8096 vcpus=self.be_full[constants.BE_VCPUS],
8097 nics=_NICListToTuple(self, self.nics),
8098 disk_template=self.op.disk_template,
8099 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8100 for d in self.disks],
8103 hypervisor_name=self.op.hypervisor,
8109 def BuildHooksNodes(self):
8110 """Build hooks nodes.
8113 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8116 def _ReadExportInfo(self):
8117 """Reads the export information from disk.
8119 It will override the opcode source node and path with the actual
8120 information, if these two were not specified before.
8122 @return: the export information
8125 assert self.op.mode == constants.INSTANCE_IMPORT
8127 src_node = self.op.src_node
8128 src_path = self.op.src_path
8130 if src_node is None:
8131 locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
8132 exp_list = self.rpc.call_export_list(locked_nodes)
8134 for node in exp_list:
8135 if exp_list[node].fail_msg:
8137 if src_path in exp_list[node].payload:
8139 self.op.src_node = src_node = node
8140 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8144 raise errors.OpPrereqError("No export found for relative path %s" %
8145 src_path, errors.ECODE_INVAL)
8147 _CheckNodeOnline(self, src_node)
8148 result = self.rpc.call_export_info(src_node, src_path)
8149 result.Raise("No export or invalid export found in dir %s" % src_path)
8151 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8152 if not export_info.has_section(constants.INISECT_EXP):
8153 raise errors.ProgrammerError("Corrupted export config",
8154 errors.ECODE_ENVIRON)
8156 ei_version = export_info.get(constants.INISECT_EXP, "version")
8157 if (int(ei_version) != constants.EXPORT_VERSION):
8158 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8159 (ei_version, constants.EXPORT_VERSION),
8160 errors.ECODE_ENVIRON)
8163 def _ReadExportParams(self, einfo):
8164 """Use export parameters as defaults.
8166 In case the opcode doesn't specify (as in override) some instance
8167 parameters, then try to use them from the export information, if
8171 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8173 if self.op.disk_template is None:
8174 if einfo.has_option(constants.INISECT_INS, "disk_template"):
8175 self.op.disk_template = einfo.get(constants.INISECT_INS,
8178 raise errors.OpPrereqError("No disk template specified and the export"
8179 " is missing the disk_template information",
8182 if not self.op.disks:
8183 if einfo.has_option(constants.INISECT_INS, "disk_count"):
8185 # TODO: import the disk iv_name too
8186 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8187 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8188 disks.append({constants.IDISK_SIZE: disk_sz})
8189 self.op.disks = disks
8191 raise errors.OpPrereqError("No disk info specified and the export"
8192 " is missing the disk information",
8195 if (not self.op.nics and
8196 einfo.has_option(constants.INISECT_INS, "nic_count")):
8198 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8200 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8201 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8206 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8207 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8209 if (self.op.hypervisor is None and
8210 einfo.has_option(constants.INISECT_INS, "hypervisor")):
8211 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8213 if einfo.has_section(constants.INISECT_HYP):
8214 # use the export parameters but do not override the ones
8215 # specified by the user
8216 for name, value in einfo.items(constants.INISECT_HYP):
8217 if name not in self.op.hvparams:
8218 self.op.hvparams[name] = value
8220 if einfo.has_section(constants.INISECT_BEP):
8221 # use the parameters, without overriding
8222 for name, value in einfo.items(constants.INISECT_BEP):
8223 if name not in self.op.beparams:
8224 self.op.beparams[name] = value
8226 # try to read the parameters old style, from the main section
8227 for name in constants.BES_PARAMETERS:
8228 if (name not in self.op.beparams and
8229 einfo.has_option(constants.INISECT_INS, name)):
8230 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8232 if einfo.has_section(constants.INISECT_OSP):
8233 # use the parameters, without overriding
8234 for name, value in einfo.items(constants.INISECT_OSP):
8235 if name not in self.op.osparams:
8236 self.op.osparams[name] = value
8238 def _RevertToDefaults(self, cluster):
8239 """Revert the instance parameters to the default values.
8243 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8244 for name in self.op.hvparams.keys():
8245 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8246 del self.op.hvparams[name]
8248 be_defs = cluster.SimpleFillBE({})
8249 for name in self.op.beparams.keys():
8250 if name in be_defs and be_defs[name] == self.op.beparams[name]:
8251 del self.op.beparams[name]
8253 nic_defs = cluster.SimpleFillNIC({})
8254 for nic in self.op.nics:
8255 for name in constants.NICS_PARAMETERS:
8256 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8259 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8260 for name in self.op.osparams.keys():
8261 if name in os_defs and os_defs[name] == self.op.osparams[name]:
8262 del self.op.osparams[name]
8264 def _CalculateFileStorageDir(self):
8265 """Calculate final instance file storage dir.
8268 # file storage dir calculation/check
8269 self.instance_file_storage_dir = None
8270 if self.op.disk_template in constants.DTS_FILEBASED:
8271 # build the full file storage dir path
8274 if self.op.disk_template == constants.DT_SHARED_FILE:
8275 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8277 get_fsd_fn = self.cfg.GetFileStorageDir
8279 cfg_storagedir = get_fsd_fn()
8280 if not cfg_storagedir:
8281 raise errors.OpPrereqError("Cluster file storage dir not defined")
8282 joinargs.append(cfg_storagedir)
8284 if self.op.file_storage_dir is not None:
8285 joinargs.append(self.op.file_storage_dir)
8287 joinargs.append(self.op.instance_name)
8289 # pylint: disable-msg=W0142
8290 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8292 def CheckPrereq(self):
8293 """Check prerequisites.
8296 self._CalculateFileStorageDir()
8298 if self.op.mode == constants.INSTANCE_IMPORT:
8299 export_info = self._ReadExportInfo()
8300 self._ReadExportParams(export_info)
8302 if (not self.cfg.GetVGName() and
8303 self.op.disk_template not in constants.DTS_NOT_LVM):
8304 raise errors.OpPrereqError("Cluster does not support lvm-based"
8305 " instances", errors.ECODE_STATE)
8307 if self.op.hypervisor is None:
8308 self.op.hypervisor = self.cfg.GetHypervisorType()
8310 cluster = self.cfg.GetClusterInfo()
8311 enabled_hvs = cluster.enabled_hypervisors
8312 if self.op.hypervisor not in enabled_hvs:
8313 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8314 " cluster (%s)" % (self.op.hypervisor,
8315 ",".join(enabled_hvs)),
8318 # Check tag validity
8319 for tag in self.op.tags:
8320 objects.TaggableObject.ValidateTag(tag)
8322 # check hypervisor parameter syntax (locally)
8323 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8324 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8326 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8327 hv_type.CheckParameterSyntax(filled_hvp)
8328 self.hv_full = filled_hvp
8329 # check that we don't specify global parameters on an instance
8330 _CheckGlobalHvParams(self.op.hvparams)
8332 # fill and remember the beparams dict
8333 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8334 self.be_full = cluster.SimpleFillBE(self.op.beparams)
8336 # build os parameters
8337 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8339 # now that hvp/bep are in final format, let's reset to defaults,
8341 if self.op.identify_defaults:
8342 self._RevertToDefaults(cluster)
8346 for idx, nic in enumerate(self.op.nics):
8347 nic_mode_req = nic.get(constants.INIC_MODE, None)
8348 nic_mode = nic_mode_req
8349 if nic_mode is None:
8350 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8352 # in routed mode, for the first nic, the default ip is 'auto'
8353 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8354 default_ip_mode = constants.VALUE_AUTO
8356 default_ip_mode = constants.VALUE_NONE
8358 # ip validity checks
8359 ip = nic.get(constants.INIC_IP, default_ip_mode)
8360 if ip is None or ip.lower() == constants.VALUE_NONE:
8362 elif ip.lower() == constants.VALUE_AUTO:
8363 if not self.op.name_check:
8364 raise errors.OpPrereqError("IP address set to auto but name checks"
8365 " have been skipped",
8367 nic_ip = self.hostname1.ip
8369 if not netutils.IPAddress.IsValid(ip):
8370 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8374 # TODO: check the ip address for uniqueness
8375 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8376 raise errors.OpPrereqError("Routed nic mode requires an ip address",
8379 # MAC address verification
8380 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8381 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8382 mac = utils.NormalizeAndValidateMac(mac)
8385 self.cfg.ReserveMAC(mac, self.proc.GetECId())
8386 except errors.ReservationError:
8387 raise errors.OpPrereqError("MAC address %s already in use"
8388 " in cluster" % mac,
8389 errors.ECODE_NOTUNIQUE)
8391 # Build nic parameters
8392 link = nic.get(constants.INIC_LINK, None)
8395 nicparams[constants.NIC_MODE] = nic_mode_req
8397 nicparams[constants.NIC_LINK] = link
8399 check_params = cluster.SimpleFillNIC(nicparams)
8400 objects.NIC.CheckParameterSyntax(check_params)
8401 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8403 # disk checks/pre-build
8404 default_vg = self.cfg.GetVGName()
8406 for disk in self.op.disks:
8407 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8408 if mode not in constants.DISK_ACCESS_SET:
8409 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8410 mode, errors.ECODE_INVAL)
8411 size = disk.get(constants.IDISK_SIZE, None)
8413 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8416 except (TypeError, ValueError):
8417 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8420 data_vg = disk.get(constants.IDISK_VG, default_vg)
8422 constants.IDISK_SIZE: size,
8423 constants.IDISK_MODE: mode,
8424 constants.IDISK_VG: data_vg,
8425 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8427 if constants.IDISK_ADOPT in disk:
8428 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8429 self.disks.append(new_disk)
8431 if self.op.mode == constants.INSTANCE_IMPORT:
8433 # Check that the new instance doesn't have less disks than the export
8434 instance_disks = len(self.disks)
8435 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8436 if instance_disks < export_disks:
8437 raise errors.OpPrereqError("Not enough disks to import."
8438 " (instance: %d, export: %d)" %
8439 (instance_disks, export_disks),
8443 for idx in range(export_disks):
8444 option = "disk%d_dump" % idx
8445 if export_info.has_option(constants.INISECT_INS, option):
8446 # FIXME: are the old os-es, disk sizes, etc. useful?
8447 export_name = export_info.get(constants.INISECT_INS, option)
8448 image = utils.PathJoin(self.op.src_path, export_name)
8449 disk_images.append(image)
8451 disk_images.append(False)
8453 self.src_images = disk_images
8455 old_name = export_info.get(constants.INISECT_INS, "name")
8457 exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8458 except (TypeError, ValueError), err:
8459 raise errors.OpPrereqError("Invalid export file, nic_count is not"
8460 " an integer: %s" % str(err),
8462 if self.op.instance_name == old_name:
8463 for idx, nic in enumerate(self.nics):
8464 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8465 nic_mac_ini = "nic%d_mac" % idx
8466 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8468 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8470 # ip ping checks (we use the same ip that was resolved in ExpandNames)
8471 if self.op.ip_check:
8472 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8473 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8474 (self.check_ip, self.op.instance_name),
8475 errors.ECODE_NOTUNIQUE)
8477 #### mac address generation
8478 # By generating here the mac address both the allocator and the hooks get
8479 # the real final mac address rather than the 'auto' or 'generate' value.
8480 # There is a race condition between the generation and the instance object
8481 # creation, which means that we know the mac is valid now, but we're not
8482 # sure it will be when we actually add the instance. If things go bad
8483 # adding the instance will abort because of a duplicate mac, and the
8484 # creation job will fail.
8485 for nic in self.nics:
8486 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8487 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8491 if self.op.iallocator is not None:
8492 self._RunAllocator()
8494 #### node related checks
8496 # check primary node
8497 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8498 assert self.pnode is not None, \
8499 "Cannot retrieve locked node %s" % self.op.pnode
8501 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8502 pnode.name, errors.ECODE_STATE)
8504 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8505 pnode.name, errors.ECODE_STATE)
8506 if not pnode.vm_capable:
8507 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8508 " '%s'" % pnode.name, errors.ECODE_STATE)
8510 self.secondaries = []
8512 # mirror node verification
8513 if self.op.disk_template in constants.DTS_INT_MIRROR:
8514 if self.op.snode == pnode.name:
8515 raise errors.OpPrereqError("The secondary node cannot be the"
8516 " primary node", errors.ECODE_INVAL)
8517 _CheckNodeOnline(self, self.op.snode)
8518 _CheckNodeNotDrained(self, self.op.snode)
8519 _CheckNodeVmCapable(self, self.op.snode)
8520 self.secondaries.append(self.op.snode)
8522 nodenames = [pnode.name] + self.secondaries
8524 if not self.adopt_disks:
8525 # Check lv size requirements, if not adopting
8526 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8527 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8529 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8530 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8531 disk[constants.IDISK_ADOPT])
8532 for disk in self.disks])
8533 if len(all_lvs) != len(self.disks):
8534 raise errors.OpPrereqError("Duplicate volume names given for adoption",
8536 for lv_name in all_lvs:
8538 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8539 # to ReserveLV uses the same syntax
8540 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8541 except errors.ReservationError:
8542 raise errors.OpPrereqError("LV named %s used by another instance" %
8543 lv_name, errors.ECODE_NOTUNIQUE)
8545 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8546 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8548 node_lvs = self.rpc.call_lv_list([pnode.name],
8549 vg_names.payload.keys())[pnode.name]
8550 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8551 node_lvs = node_lvs.payload
8553 delta = all_lvs.difference(node_lvs.keys())
8555 raise errors.OpPrereqError("Missing logical volume(s): %s" %
8556 utils.CommaJoin(delta),
8558 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8560 raise errors.OpPrereqError("Online logical volumes found, cannot"
8561 " adopt: %s" % utils.CommaJoin(online_lvs),
8563 # update the size of disk based on what is found
8564 for dsk in self.disks:
8565 dsk[constants.IDISK_SIZE] = \
8566 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8567 dsk[constants.IDISK_ADOPT])][0]))
8569 elif self.op.disk_template == constants.DT_BLOCK:
8570 # Normalize and de-duplicate device paths
8571 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8572 for disk in self.disks])
8573 if len(all_disks) != len(self.disks):
8574 raise errors.OpPrereqError("Duplicate disk names given for adoption",
8576 baddisks = [d for d in all_disks
8577 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8579 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8580 " cannot be adopted" %
8581 (", ".join(baddisks),
8582 constants.ADOPTABLE_BLOCKDEV_ROOT),
8585 node_disks = self.rpc.call_bdev_sizes([pnode.name],
8586 list(all_disks))[pnode.name]
8587 node_disks.Raise("Cannot get block device information from node %s" %
8589 node_disks = node_disks.payload
8590 delta = all_disks.difference(node_disks.keys())
8592 raise errors.OpPrereqError("Missing block device(s): %s" %
8593 utils.CommaJoin(delta),
8595 for dsk in self.disks:
8596 dsk[constants.IDISK_SIZE] = \
8597 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8599 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8601 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8602 # check OS parameters (remotely)
8603 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8605 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8607 # memory check on primary node
8609 _CheckNodeFreeMemory(self, self.pnode.name,
8610 "creating instance %s" % self.op.instance_name,
8611 self.be_full[constants.BE_MEMORY],
8614 self.dry_run_result = list(nodenames)
8616 def Exec(self, feedback_fn):
8617 """Create and add the instance to the cluster.
8620 instance = self.op.instance_name
8621 pnode_name = self.pnode.name
8623 ht_kind = self.op.hypervisor
8624 if ht_kind in constants.HTS_REQ_PORT:
8625 network_port = self.cfg.AllocatePort()
8629 disks = _GenerateDiskTemplate(self,
8630 self.op.disk_template,
8631 instance, pnode_name,
8634 self.instance_file_storage_dir,
8635 self.op.file_driver,
8639 iobj = objects.Instance(name=instance, os=self.op.os_type,
8640 primary_node=pnode_name,
8641 nics=self.nics, disks=disks,
8642 disk_template=self.op.disk_template,
8644 network_port=network_port,
8645 beparams=self.op.beparams,
8646 hvparams=self.op.hvparams,
8647 hypervisor=self.op.hypervisor,
8648 osparams=self.op.osparams,
8652 for tag in self.op.tags:
8655 if self.adopt_disks:
8656 if self.op.disk_template == constants.DT_PLAIN:
8657 # rename LVs to the newly-generated names; we need to construct
8658 # 'fake' LV disks with the old data, plus the new unique_id
8659 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8661 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8662 rename_to.append(t_dsk.logical_id)
8663 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8664 self.cfg.SetDiskID(t_dsk, pnode_name)
8665 result = self.rpc.call_blockdev_rename(pnode_name,
8666 zip(tmp_disks, rename_to))
8667 result.Raise("Failed to rename adoped LVs")
8669 feedback_fn("* creating instance disks...")
8671 _CreateDisks(self, iobj)
8672 except errors.OpExecError:
8673 self.LogWarning("Device creation failed, reverting...")
8675 _RemoveDisks(self, iobj)
8677 self.cfg.ReleaseDRBDMinors(instance)
8680 feedback_fn("adding instance %s to cluster config" % instance)
8682 self.cfg.AddInstance(iobj, self.proc.GetECId())
8684 # Declare that we don't want to remove the instance lock anymore, as we've
8685 # added the instance to the config
8686 del self.remove_locks[locking.LEVEL_INSTANCE]
8688 if self.op.mode == constants.INSTANCE_IMPORT:
8689 # Release unused nodes
8690 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8693 _ReleaseLocks(self, locking.LEVEL_NODE)
8696 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8697 feedback_fn("* wiping instance disks...")
8699 _WipeDisks(self, iobj)
8700 except errors.OpExecError, err:
8701 logging.exception("Wiping disks failed")
8702 self.LogWarning("Wiping instance disks failed (%s)", err)
8706 # Something is already wrong with the disks, don't do anything else
8708 elif self.op.wait_for_sync:
8709 disk_abort = not _WaitForSync(self, iobj)
8710 elif iobj.disk_template in constants.DTS_INT_MIRROR:
8711 # make sure the disks are not degraded (still sync-ing is ok)
8713 feedback_fn("* checking mirrors status")
8714 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8719 _RemoveDisks(self, iobj)
8720 self.cfg.RemoveInstance(iobj.name)
8721 # Make sure the instance lock gets removed
8722 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8723 raise errors.OpExecError("There are some degraded disks for"
8726 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8727 if self.op.mode == constants.INSTANCE_CREATE:
8728 if not self.op.no_install:
8729 feedback_fn("* running the instance OS create scripts...")
8730 # FIXME: pass debug option from opcode to backend
8731 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8732 self.op.debug_level)
8733 result.Raise("Could not add os for instance %s"
8734 " on node %s" % (instance, pnode_name))
8736 elif self.op.mode == constants.INSTANCE_IMPORT:
8737 feedback_fn("* running the instance OS import scripts...")
8741 for idx, image in enumerate(self.src_images):
8745 # FIXME: pass debug option from opcode to backend
8746 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8747 constants.IEIO_FILE, (image, ),
8748 constants.IEIO_SCRIPT,
8749 (iobj.disks[idx], idx),
8751 transfers.append(dt)
8754 masterd.instance.TransferInstanceData(self, feedback_fn,
8755 self.op.src_node, pnode_name,
8756 self.pnode.secondary_ip,
8758 if not compat.all(import_result):
8759 self.LogWarning("Some disks for instance %s on node %s were not"
8760 " imported successfully" % (instance, pnode_name))
8762 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8763 feedback_fn("* preparing remote import...")
8764 # The source cluster will stop the instance before attempting to make a
8765 # connection. In some cases stopping an instance can take a long time,
8766 # hence the shutdown timeout is added to the connection timeout.
8767 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8768 self.op.source_shutdown_timeout)
8769 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8771 assert iobj.primary_node == self.pnode.name
8773 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8774 self.source_x509_ca,
8775 self._cds, timeouts)
8776 if not compat.all(disk_results):
8777 # TODO: Should the instance still be started, even if some disks
8778 # failed to import (valid for local imports, too)?
8779 self.LogWarning("Some disks for instance %s on node %s were not"
8780 " imported successfully" % (instance, pnode_name))
8782 # Run rename script on newly imported instance
8783 assert iobj.name == instance
8784 feedback_fn("Running rename script for %s" % instance)
8785 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8786 self.source_instance_name,
8787 self.op.debug_level)
8789 self.LogWarning("Failed to run rename script for %s on node"
8790 " %s: %s" % (instance, pnode_name, result.fail_msg))
8793 # also checked in the prereq part
8794 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8798 iobj.admin_up = True
8799 self.cfg.Update(iobj, feedback_fn)
8800 logging.info("Starting instance %s on node %s", instance, pnode_name)
8801 feedback_fn("* starting instance...")
8802 result = self.rpc.call_instance_start(pnode_name, iobj,
8804 result.Raise("Could not start instance")
8806 return list(iobj.all_nodes)
8809 class LUInstanceConsole(NoHooksLU):
8810 """Connect to an instance's console.
8812 This is somewhat special in that it returns the command line that
8813 you need to run on the master node in order to connect to the
8819 def ExpandNames(self):
8820 self._ExpandAndLockInstance()
8822 def CheckPrereq(self):
8823 """Check prerequisites.
8825 This checks that the instance is in the cluster.
8828 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8829 assert self.instance is not None, \
8830 "Cannot retrieve locked instance %s" % self.op.instance_name
8831 _CheckNodeOnline(self, self.instance.primary_node)
8833 def Exec(self, feedback_fn):
8834 """Connect to the console of an instance
8837 instance = self.instance
8838 node = instance.primary_node
8840 node_insts = self.rpc.call_instance_list([node],
8841 [instance.hypervisor])[node]
8842 node_insts.Raise("Can't get node information from %s" % node)
8844 if instance.name not in node_insts.payload:
8845 if instance.admin_up:
8846 state = constants.INSTST_ERRORDOWN
8848 state = constants.INSTST_ADMINDOWN
8849 raise errors.OpExecError("Instance %s is not running (state %s)" %
8850 (instance.name, state))
8852 logging.debug("Connecting to console of %s on %s", instance.name, node)
8854 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8857 def _GetInstanceConsole(cluster, instance):
8858 """Returns console information for an instance.
8860 @type cluster: L{objects.Cluster}
8861 @type instance: L{objects.Instance}
8865 hyper = hypervisor.GetHypervisor(instance.hypervisor)
8866 # beparams and hvparams are passed separately, to avoid editing the
8867 # instance and then saving the defaults in the instance itself.
8868 hvparams = cluster.FillHV(instance)
8869 beparams = cluster.FillBE(instance)
8870 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8872 assert console.instance == instance.name
8873 assert console.Validate()
8875 return console.ToDict()
8878 class LUInstanceReplaceDisks(LogicalUnit):
8879 """Replace the disks of an instance.
8882 HPATH = "mirrors-replace"
8883 HTYPE = constants.HTYPE_INSTANCE
8886 def CheckArguments(self):
8887 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8890 def ExpandNames(self):
8891 self._ExpandAndLockInstance()
8893 assert locking.LEVEL_NODE not in self.needed_locks
8894 assert locking.LEVEL_NODEGROUP not in self.needed_locks
8896 assert self.op.iallocator is None or self.op.remote_node is None, \
8897 "Conflicting options"
8899 if self.op.remote_node is not None:
8900 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8902 # Warning: do not remove the locking of the new secondary here
8903 # unless DRBD8.AddChildren is changed to work in parallel;
8904 # currently it doesn't since parallel invocations of
8905 # FindUnusedMinor will conflict
8906 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
8907 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8909 self.needed_locks[locking.LEVEL_NODE] = []
8910 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8912 if self.op.iallocator is not None:
8913 # iallocator will select a new node in the same group
8914 self.needed_locks[locking.LEVEL_NODEGROUP] = []
8916 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8917 self.op.iallocator, self.op.remote_node,
8918 self.op.disks, False, self.op.early_release)
8920 self.tasklets = [self.replacer]
8922 def DeclareLocks(self, level):
8923 if level == locking.LEVEL_NODEGROUP:
8924 assert self.op.remote_node is None
8925 assert self.op.iallocator is not None
8926 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
8928 self.share_locks[locking.LEVEL_NODEGROUP] = 1
8929 self.needed_locks[locking.LEVEL_NODEGROUP] = \
8930 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8932 elif level == locking.LEVEL_NODE:
8933 if self.op.iallocator is not None:
8934 assert self.op.remote_node is None
8935 assert not self.needed_locks[locking.LEVEL_NODE]
8937 # Lock member nodes of all locked groups
8938 self.needed_locks[locking.LEVEL_NODE] = [node_name
8939 for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
8940 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
8942 self._LockInstancesNodes()
8944 def BuildHooksEnv(self):
8947 This runs on the master, the primary and all the secondaries.
8950 instance = self.replacer.instance
8952 "MODE": self.op.mode,
8953 "NEW_SECONDARY": self.op.remote_node,
8954 "OLD_SECONDARY": instance.secondary_nodes[0],
8956 env.update(_BuildInstanceHookEnvByObject(self, instance))
8959 def BuildHooksNodes(self):
8960 """Build hooks nodes.
8963 instance = self.replacer.instance
8965 self.cfg.GetMasterNode(),
8966 instance.primary_node,
8968 if self.op.remote_node is not None:
8969 nl.append(self.op.remote_node)
8972 def CheckPrereq(self):
8973 """Check prerequisites.
8976 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
8977 self.op.iallocator is None)
8979 owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
8981 groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8982 if owned_groups != groups:
8983 raise errors.OpExecError("Node groups used by instance '%s' changed"
8984 " since lock was acquired, current list is %r,"
8985 " used to be '%s'" %
8986 (self.op.instance_name,
8987 utils.CommaJoin(groups),
8988 utils.CommaJoin(owned_groups)))
8990 return LogicalUnit.CheckPrereq(self)
8993 class TLReplaceDisks(Tasklet):
8994 """Replaces disks for an instance.
8996 Note: Locking is not within the scope of this class.
8999 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9000 disks, delay_iallocator, early_release):
9001 """Initializes this class.
9004 Tasklet.__init__(self, lu)
9007 self.instance_name = instance_name
9009 self.iallocator_name = iallocator_name
9010 self.remote_node = remote_node
9012 self.delay_iallocator = delay_iallocator
9013 self.early_release = early_release
9016 self.instance = None
9017 self.new_node = None
9018 self.target_node = None
9019 self.other_node = None
9020 self.remote_node_info = None
9021 self.node_secondary_ip = None
9024 def CheckArguments(mode, remote_node, iallocator):
9025 """Helper function for users of this class.
9028 # check for valid parameter combination
9029 if mode == constants.REPLACE_DISK_CHG:
9030 if remote_node is None and iallocator is None:
9031 raise errors.OpPrereqError("When changing the secondary either an"
9032 " iallocator script must be used or the"
9033 " new node given", errors.ECODE_INVAL)
9035 if remote_node is not None and iallocator is not None:
9036 raise errors.OpPrereqError("Give either the iallocator or the new"
9037 " secondary, not both", errors.ECODE_INVAL)
9039 elif remote_node is not None or iallocator is not None:
9040 # Not replacing the secondary
9041 raise errors.OpPrereqError("The iallocator and new node options can"
9042 " only be used when changing the"
9043 " secondary node", errors.ECODE_INVAL)
9046 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9047 """Compute a new secondary node using an IAllocator.
9050 ial = IAllocator(lu.cfg, lu.rpc,
9051 mode=constants.IALLOCATOR_MODE_RELOC,
9053 relocate_from=relocate_from)
9055 ial.Run(iallocator_name)
9058 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9059 " %s" % (iallocator_name, ial.info),
9062 if len(ial.result) != ial.required_nodes:
9063 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9064 " of nodes (%s), required %s" %
9066 len(ial.result), ial.required_nodes),
9069 remote_node_name = ial.result[0]
9071 lu.LogInfo("Selected new secondary for instance '%s': %s",
9072 instance_name, remote_node_name)
9074 return remote_node_name
9076 def _FindFaultyDisks(self, node_name):
9077 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9080 def _CheckDisksActivated(self, instance):
9081 """Checks if the instance disks are activated.
9083 @param instance: The instance to check disks
9084 @return: True if they are activated, False otherwise
9087 nodes = instance.all_nodes
9089 for idx, dev in enumerate(instance.disks):
9091 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9092 self.cfg.SetDiskID(dev, node)
9094 result = self.rpc.call_blockdev_find(node, dev)
9098 elif result.fail_msg or not result.payload:
9103 def CheckPrereq(self):
9104 """Check prerequisites.
9106 This checks that the instance is in the cluster.
9109 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9110 assert instance is not None, \
9111 "Cannot retrieve locked instance %s" % self.instance_name
9113 if instance.disk_template != constants.DT_DRBD8:
9114 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9115 " instances", errors.ECODE_INVAL)
9117 if len(instance.secondary_nodes) != 1:
9118 raise errors.OpPrereqError("The instance has a strange layout,"
9119 " expected one secondary but found %d" %
9120 len(instance.secondary_nodes),
9123 if not self.delay_iallocator:
9124 self._CheckPrereq2()
9126 def _CheckPrereq2(self):
9127 """Check prerequisites, second part.
9129 This function should always be part of CheckPrereq. It was separated and is
9130 now called from Exec because during node evacuation iallocator was only
9131 called with an unmodified cluster model, not taking planned changes into
9135 instance = self.instance
9136 secondary_node = instance.secondary_nodes[0]
9138 if self.iallocator_name is None:
9139 remote_node = self.remote_node
9141 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9142 instance.name, instance.secondary_nodes)
9144 if remote_node is None:
9145 self.remote_node_info = None
9147 assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
9148 "Remote node '%s' is not locked" % remote_node
9150 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9151 assert self.remote_node_info is not None, \
9152 "Cannot retrieve locked node %s" % remote_node
9154 if remote_node == self.instance.primary_node:
9155 raise errors.OpPrereqError("The specified node is the primary node of"
9156 " the instance", errors.ECODE_INVAL)
9158 if remote_node == secondary_node:
9159 raise errors.OpPrereqError("The specified node is already the"
9160 " secondary node of the instance",
9163 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9164 constants.REPLACE_DISK_CHG):
9165 raise errors.OpPrereqError("Cannot specify disks to be replaced",
9168 if self.mode == constants.REPLACE_DISK_AUTO:
9169 if not self._CheckDisksActivated(instance):
9170 raise errors.OpPrereqError("Please run activate-disks on instance %s"
9171 " first" % self.instance_name,
9173 faulty_primary = self._FindFaultyDisks(instance.primary_node)
9174 faulty_secondary = self._FindFaultyDisks(secondary_node)
9176 if faulty_primary and faulty_secondary:
9177 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9178 " one node and can not be repaired"
9179 " automatically" % self.instance_name,
9183 self.disks = faulty_primary
9184 self.target_node = instance.primary_node
9185 self.other_node = secondary_node
9186 check_nodes = [self.target_node, self.other_node]
9187 elif faulty_secondary:
9188 self.disks = faulty_secondary
9189 self.target_node = secondary_node
9190 self.other_node = instance.primary_node
9191 check_nodes = [self.target_node, self.other_node]
9197 # Non-automatic modes
9198 if self.mode == constants.REPLACE_DISK_PRI:
9199 self.target_node = instance.primary_node
9200 self.other_node = secondary_node
9201 check_nodes = [self.target_node, self.other_node]
9203 elif self.mode == constants.REPLACE_DISK_SEC:
9204 self.target_node = secondary_node
9205 self.other_node = instance.primary_node
9206 check_nodes = [self.target_node, self.other_node]
9208 elif self.mode == constants.REPLACE_DISK_CHG:
9209 self.new_node = remote_node
9210 self.other_node = instance.primary_node
9211 self.target_node = secondary_node
9212 check_nodes = [self.new_node, self.other_node]
9214 _CheckNodeNotDrained(self.lu, remote_node)
9215 _CheckNodeVmCapable(self.lu, remote_node)
9217 old_node_info = self.cfg.GetNodeInfo(secondary_node)
9218 assert old_node_info is not None
9219 if old_node_info.offline and not self.early_release:
9220 # doesn't make sense to delay the release
9221 self.early_release = True
9222 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9223 " early-release mode", secondary_node)
9226 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9229 # If not specified all disks should be replaced
9231 self.disks = range(len(self.instance.disks))
9233 for node in check_nodes:
9234 _CheckNodeOnline(self.lu, node)
9236 touched_nodes = frozenset(node_name for node_name in [self.new_node,
9239 if node_name is not None)
9241 # Release unneeded node locks
9242 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9244 # Release any owned node group
9245 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9246 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9248 # Check whether disks are valid
9249 for disk_idx in self.disks:
9250 instance.FindDisk(disk_idx)
9252 # Get secondary node IP addresses
9253 self.node_secondary_ip = \
9254 dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
9255 for node_name in touched_nodes)
9257 def Exec(self, feedback_fn):
9258 """Execute disk replacement.
9260 This dispatches the disk replacement to the appropriate handler.
9263 if self.delay_iallocator:
9264 self._CheckPrereq2()
9267 # Verify owned locks before starting operation
9268 owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9269 assert set(owned_locks) == set(self.node_secondary_ip), \
9270 ("Incorrect node locks, owning %s, expected %s" %
9271 (owned_locks, self.node_secondary_ip.keys()))
9273 owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9274 assert list(owned_locks) == [self.instance_name], \
9275 "Instance '%s' not locked" % self.instance_name
9277 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9278 "Should not own any node group lock at this point"
9281 feedback_fn("No disks need replacement")
9284 feedback_fn("Replacing disk(s) %s for %s" %
9285 (utils.CommaJoin(self.disks), self.instance.name))
9287 activate_disks = (not self.instance.admin_up)
9289 # Activate the instance disks if we're replacing them on a down instance
9291 _StartInstanceDisks(self.lu, self.instance, True)
9294 # Should we replace the secondary node?
9295 if self.new_node is not None:
9296 fn = self._ExecDrbd8Secondary
9298 fn = self._ExecDrbd8DiskOnly
9300 result = fn(feedback_fn)
9302 # Deactivate the instance disks if we're replacing them on a
9305 _SafeShutdownInstanceDisks(self.lu, self.instance)
9308 # Verify owned locks
9309 owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9310 nodes = frozenset(self.node_secondary_ip)
9311 assert ((self.early_release and not owned_locks) or
9312 (not self.early_release and not (set(owned_locks) - nodes))), \
9313 ("Not owning the correct locks, early_release=%s, owned=%r,"
9314 " nodes=%r" % (self.early_release, owned_locks, nodes))
9318 def _CheckVolumeGroup(self, nodes):
9319 self.lu.LogInfo("Checking volume groups")
9321 vgname = self.cfg.GetVGName()
9323 # Make sure volume group exists on all involved nodes
9324 results = self.rpc.call_vg_list(nodes)
9326 raise errors.OpExecError("Can't list volume groups on the nodes")
9330 res.Raise("Error checking node %s" % node)
9331 if vgname not in res.payload:
9332 raise errors.OpExecError("Volume group '%s' not found on node %s" %
9335 def _CheckDisksExistence(self, nodes):
9336 # Check disk existence
9337 for idx, dev in enumerate(self.instance.disks):
9338 if idx not in self.disks:
9342 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9343 self.cfg.SetDiskID(dev, node)
9345 result = self.rpc.call_blockdev_find(node, dev)
9347 msg = result.fail_msg
9348 if msg or not result.payload:
9350 msg = "disk not found"
9351 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9354 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9355 for idx, dev in enumerate(self.instance.disks):
9356 if idx not in self.disks:
9359 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9362 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9364 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9365 " replace disks for instance %s" %
9366 (node_name, self.instance.name))
9368 def _CreateNewStorage(self, node_name):
9369 """Create new storage on the primary or secondary node.
9371 This is only used for same-node replaces, not for changing the
9372 secondary node, hence we don't want to modify the existing disk.
9377 for idx, dev in enumerate(self.instance.disks):
9378 if idx not in self.disks:
9381 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9383 self.cfg.SetDiskID(dev, node_name)
9385 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9386 names = _GenerateUniqueNames(self.lu, lv_names)
9388 vg_data = dev.children[0].logical_id[0]
9389 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9390 logical_id=(vg_data, names[0]))
9391 vg_meta = dev.children[1].logical_id[0]
9392 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9393 logical_id=(vg_meta, names[1]))
9395 new_lvs = [lv_data, lv_meta]
9396 old_lvs = [child.Copy() for child in dev.children]
9397 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9399 # we pass force_create=True to force the LVM creation
9400 for new_lv in new_lvs:
9401 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9402 _GetInstanceInfoText(self.instance), False)
9406 def _CheckDevices(self, node_name, iv_names):
9407 for name, (dev, _, _) in iv_names.iteritems():
9408 self.cfg.SetDiskID(dev, node_name)
9410 result = self.rpc.call_blockdev_find(node_name, dev)
9412 msg = result.fail_msg
9413 if msg or not result.payload:
9415 msg = "disk not found"
9416 raise errors.OpExecError("Can't find DRBD device %s: %s" %
9419 if result.payload.is_degraded:
9420 raise errors.OpExecError("DRBD device %s is degraded!" % name)
9422 def _RemoveOldStorage(self, node_name, iv_names):
9423 for name, (_, old_lvs, _) in iv_names.iteritems():
9424 self.lu.LogInfo("Remove logical volumes for %s" % name)
9427 self.cfg.SetDiskID(lv, node_name)
9429 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9431 self.lu.LogWarning("Can't remove old LV: %s" % msg,
9432 hint="remove unused LVs manually")
9434 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
9435 """Replace a disk on the primary or secondary for DRBD 8.
9437 The algorithm for replace is quite complicated:
9439 1. for each disk to be replaced:
9441 1. create new LVs on the target node with unique names
9442 1. detach old LVs from the drbd device
9443 1. rename old LVs to name_replaced.<time_t>
9444 1. rename new LVs to old LVs
9445 1. attach the new LVs (with the old names now) to the drbd device
9447 1. wait for sync across all devices
9449 1. for each modified disk:
9451 1. remove old LVs (which have the name name_replaces.<time_t>)
9453 Failures are not very well handled.
9458 # Step: check device activation
9459 self.lu.LogStep(1, steps_total, "Check device existence")
9460 self._CheckDisksExistence([self.other_node, self.target_node])
9461 self._CheckVolumeGroup([self.target_node, self.other_node])
9463 # Step: check other node consistency
9464 self.lu.LogStep(2, steps_total, "Check peer consistency")
9465 self._CheckDisksConsistency(self.other_node,
9466 self.other_node == self.instance.primary_node,
9469 # Step: create new storage
9470 self.lu.LogStep(3, steps_total, "Allocate new storage")
9471 iv_names = self._CreateNewStorage(self.target_node)
9473 # Step: for each lv, detach+rename*2+attach
9474 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9475 for dev, old_lvs, new_lvs in iv_names.itervalues():
9476 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9478 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9480 result.Raise("Can't detach drbd from local storage on node"
9481 " %s for device %s" % (self.target_node, dev.iv_name))
9483 #cfg.Update(instance)
9485 # ok, we created the new LVs, so now we know we have the needed
9486 # storage; as such, we proceed on the target node to rename
9487 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9488 # using the assumption that logical_id == physical_id (which in
9489 # turn is the unique_id on that node)
9491 # FIXME(iustin): use a better name for the replaced LVs
9492 temp_suffix = int(time.time())
9493 ren_fn = lambda d, suff: (d.physical_id[0],
9494 d.physical_id[1] + "_replaced-%s" % suff)
9496 # Build the rename list based on what LVs exist on the node
9497 rename_old_to_new = []
9498 for to_ren in old_lvs:
9499 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9500 if not result.fail_msg and result.payload:
9502 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9504 self.lu.LogInfo("Renaming the old LVs on the target node")
9505 result = self.rpc.call_blockdev_rename(self.target_node,
9507 result.Raise("Can't rename old LVs on node %s" % self.target_node)
9509 # Now we rename the new LVs to the old LVs
9510 self.lu.LogInfo("Renaming the new LVs on the target node")
9511 rename_new_to_old = [(new, old.physical_id)
9512 for old, new in zip(old_lvs, new_lvs)]
9513 result = self.rpc.call_blockdev_rename(self.target_node,
9515 result.Raise("Can't rename new LVs on node %s" % self.target_node)
9517 # Intermediate steps of in memory modifications
9518 for old, new in zip(old_lvs, new_lvs):
9519 new.logical_id = old.logical_id
9520 self.cfg.SetDiskID(new, self.target_node)
9522 # We need to modify old_lvs so that removal later removes the
9523 # right LVs, not the newly added ones; note that old_lvs is a
9525 for disk in old_lvs:
9526 disk.logical_id = ren_fn(disk, temp_suffix)
9527 self.cfg.SetDiskID(disk, self.target_node)
9529 # Now that the new lvs have the old name, we can add them to the device
9530 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9531 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9533 msg = result.fail_msg
9535 for new_lv in new_lvs:
9536 msg2 = self.rpc.call_blockdev_remove(self.target_node,
9539 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9540 hint=("cleanup manually the unused logical"
9542 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9545 if self.early_release:
9546 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9548 self._RemoveOldStorage(self.target_node, iv_names)
9549 # WARNING: we release both node locks here, do not do other RPCs
9550 # than WaitForSync to the primary node
9551 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9552 names=[self.target_node, self.other_node])
9555 # This can fail as the old devices are degraded and _WaitForSync
9556 # does a combined result over all disks, so we don't check its return value
9557 self.lu.LogStep(cstep, steps_total, "Sync devices")
9559 _WaitForSync(self.lu, self.instance)
9561 # Check all devices manually
9562 self._CheckDevices(self.instance.primary_node, iv_names)
9564 # Step: remove old storage
9565 if not self.early_release:
9566 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9568 self._RemoveOldStorage(self.target_node, iv_names)
9570 def _ExecDrbd8Secondary(self, feedback_fn):
9571 """Replace the secondary node for DRBD 8.
9573 The algorithm for replace is quite complicated:
9574 - for all disks of the instance:
9575 - create new LVs on the new node with same names
9576 - shutdown the drbd device on the old secondary
9577 - disconnect the drbd network on the primary
9578 - create the drbd device on the new secondary
9579 - network attach the drbd on the primary, using an artifice:
9580 the drbd code for Attach() will connect to the network if it
9581 finds a device which is connected to the good local disks but
9583 - wait for sync across all devices
9584 - remove all disks from the old secondary
9586 Failures are not very well handled.
9591 # Step: check device activation
9592 self.lu.LogStep(1, steps_total, "Check device existence")
9593 self._CheckDisksExistence([self.instance.primary_node])
9594 self._CheckVolumeGroup([self.instance.primary_node])
9596 # Step: check other node consistency
9597 self.lu.LogStep(2, steps_total, "Check peer consistency")
9598 self._CheckDisksConsistency(self.instance.primary_node, True, True)
9600 # Step: create new storage
9601 self.lu.LogStep(3, steps_total, "Allocate new storage")
9602 for idx, dev in enumerate(self.instance.disks):
9603 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9604 (self.new_node, idx))
9605 # we pass force_create=True to force LVM creation
9606 for new_lv in dev.children:
9607 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9608 _GetInstanceInfoText(self.instance), False)
9610 # Step 4: dbrd minors and drbd setups changes
9611 # after this, we must manually remove the drbd minors on both the
9612 # error and the success paths
9613 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9614 minors = self.cfg.AllocateDRBDMinor([self.new_node
9615 for dev in self.instance.disks],
9617 logging.debug("Allocated minors %r", minors)
9620 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9621 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9622 (self.new_node, idx))
9623 # create new devices on new_node; note that we create two IDs:
9624 # one without port, so the drbd will be activated without
9625 # networking information on the new node at this stage, and one
9626 # with network, for the latter activation in step 4
9627 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9628 if self.instance.primary_node == o_node1:
9631 assert self.instance.primary_node == o_node2, "Three-node instance?"
9634 new_alone_id = (self.instance.primary_node, self.new_node, None,
9635 p_minor, new_minor, o_secret)
9636 new_net_id = (self.instance.primary_node, self.new_node, o_port,
9637 p_minor, new_minor, o_secret)
9639 iv_names[idx] = (dev, dev.children, new_net_id)
9640 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9642 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9643 logical_id=new_alone_id,
9644 children=dev.children,
9647 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9648 _GetInstanceInfoText(self.instance), False)
9649 except errors.GenericError:
9650 self.cfg.ReleaseDRBDMinors(self.instance.name)
9653 # We have new devices, shutdown the drbd on the old secondary
9654 for idx, dev in enumerate(self.instance.disks):
9655 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9656 self.cfg.SetDiskID(dev, self.target_node)
9657 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9659 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9660 "node: %s" % (idx, msg),
9661 hint=("Please cleanup this device manually as"
9662 " soon as possible"))
9664 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9665 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9666 self.node_secondary_ip,
9667 self.instance.disks)\
9668 [self.instance.primary_node]
9670 msg = result.fail_msg
9672 # detaches didn't succeed (unlikely)
9673 self.cfg.ReleaseDRBDMinors(self.instance.name)
9674 raise errors.OpExecError("Can't detach the disks from the network on"
9675 " old node: %s" % (msg,))
9677 # if we managed to detach at least one, we update all the disks of
9678 # the instance to point to the new secondary
9679 self.lu.LogInfo("Updating instance configuration")
9680 for dev, _, new_logical_id in iv_names.itervalues():
9681 dev.logical_id = new_logical_id
9682 self.cfg.SetDiskID(dev, self.instance.primary_node)
9684 self.cfg.Update(self.instance, feedback_fn)
9686 # and now perform the drbd attach
9687 self.lu.LogInfo("Attaching primary drbds to new secondary"
9688 " (standalone => connected)")
9689 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9691 self.node_secondary_ip,
9692 self.instance.disks,
9695 for to_node, to_result in result.items():
9696 msg = to_result.fail_msg
9698 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9700 hint=("please do a gnt-instance info to see the"
9701 " status of disks"))
9703 if self.early_release:
9704 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9706 self._RemoveOldStorage(self.target_node, iv_names)
9707 # WARNING: we release all node locks here, do not do other RPCs
9708 # than WaitForSync to the primary node
9709 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9710 names=[self.instance.primary_node,
9715 # This can fail as the old devices are degraded and _WaitForSync
9716 # does a combined result over all disks, so we don't check its return value
9717 self.lu.LogStep(cstep, steps_total, "Sync devices")
9719 _WaitForSync(self.lu, self.instance)
9721 # Check all devices manually
9722 self._CheckDevices(self.instance.primary_node, iv_names)
9724 # Step: remove old storage
9725 if not self.early_release:
9726 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9727 self._RemoveOldStorage(self.target_node, iv_names)
9730 class LURepairNodeStorage(NoHooksLU):
9731 """Repairs the volume group on a node.
9736 def CheckArguments(self):
9737 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9739 storage_type = self.op.storage_type
9741 if (constants.SO_FIX_CONSISTENCY not in
9742 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9743 raise errors.OpPrereqError("Storage units of type '%s' can not be"
9744 " repaired" % storage_type,
9747 def ExpandNames(self):
9748 self.needed_locks = {
9749 locking.LEVEL_NODE: [self.op.node_name],
9752 def _CheckFaultyDisks(self, instance, node_name):
9753 """Ensure faulty disks abort the opcode or at least warn."""
9755 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9757 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9758 " node '%s'" % (instance.name, node_name),
9760 except errors.OpPrereqError, err:
9761 if self.op.ignore_consistency:
9762 self.proc.LogWarning(str(err.args[0]))
9766 def CheckPrereq(self):
9767 """Check prerequisites.
9770 # Check whether any instance on this node has faulty disks
9771 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9772 if not inst.admin_up:
9774 check_nodes = set(inst.all_nodes)
9775 check_nodes.discard(self.op.node_name)
9776 for inst_node_name in check_nodes:
9777 self._CheckFaultyDisks(inst, inst_node_name)
9779 def Exec(self, feedback_fn):
9780 feedback_fn("Repairing storage unit '%s' on %s ..." %
9781 (self.op.name, self.op.node_name))
9783 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9784 result = self.rpc.call_storage_execute(self.op.node_name,
9785 self.op.storage_type, st_args,
9787 constants.SO_FIX_CONSISTENCY)
9788 result.Raise("Failed to repair storage unit '%s' on %s" %
9789 (self.op.name, self.op.node_name))
9792 class LUNodeEvacuate(NoHooksLU):
9793 """Evacuates instances off a list of nodes.
9798 def CheckArguments(self):
9799 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9801 def ExpandNames(self):
9802 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9804 if self.op.remote_node is not None:
9805 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9806 assert self.op.remote_node
9808 if self.op.remote_node == self.op.node_name:
9809 raise errors.OpPrereqError("Can not use evacuated node as a new"
9810 " secondary node", errors.ECODE_INVAL)
9812 if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
9813 raise errors.OpPrereqError("Without the use of an iallocator only"
9814 " secondary instances can be evacuated",
9818 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9819 self.needed_locks = {
9820 locking.LEVEL_INSTANCE: [],
9821 locking.LEVEL_NODEGROUP: [],
9822 locking.LEVEL_NODE: [],
9825 if self.op.remote_node is None:
9826 # Iallocator will choose any node(s) in the same group
9827 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
9829 group_nodes = frozenset([self.op.remote_node])
9831 # Determine nodes to be locked
9832 self.lock_nodes = set([self.op.node_name]) | group_nodes
9834 def _DetermineInstances(self):
9835 """Builds list of instances to operate on.
9838 assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
9840 if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
9841 # Primary instances only
9842 inst_fn = _GetNodePrimaryInstances
9843 assert self.op.remote_node is None, \
9844 "Evacuating primary instances requires iallocator"
9845 elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
9846 # Secondary instances only
9847 inst_fn = _GetNodeSecondaryInstances
9850 assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
9851 inst_fn = _GetNodeInstances
9853 return inst_fn(self.cfg, self.op.node_name)
9855 def DeclareLocks(self, level):
9856 if level == locking.LEVEL_INSTANCE:
9857 # Lock instances optimistically, needs verification once node and group
9858 # locks have been acquired
9859 self.needed_locks[locking.LEVEL_INSTANCE] = \
9860 set(i.name for i in self._DetermineInstances())
9862 elif level == locking.LEVEL_NODEGROUP:
9863 # Lock node groups optimistically, needs verification once nodes have
9865 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9866 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
9868 elif level == locking.LEVEL_NODE:
9869 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
9871 def CheckPrereq(self):
9873 owned_instances = self.glm.list_owned(locking.LEVEL_INSTANCE)
9874 owned_nodes = self.glm.list_owned(locking.LEVEL_NODE)
9875 owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
9877 assert owned_nodes == self.lock_nodes
9879 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
9880 if owned_groups != wanted_groups:
9881 raise errors.OpExecError("Node groups changed since locks were acquired,"
9882 " current groups are '%s', used to be '%s'" %
9883 (utils.CommaJoin(wanted_groups),
9884 utils.CommaJoin(owned_groups)))
9886 # Determine affected instances
9887 self.instances = self._DetermineInstances()
9888 self.instance_names = [i.name for i in self.instances]
9890 if set(self.instance_names) != owned_instances:
9891 raise errors.OpExecError("Instances on node '%s' changed since locks"
9892 " were acquired, current instances are '%s',"
9893 " used to be '%s'" %
9895 utils.CommaJoin(self.instance_names),
9896 utils.CommaJoin(owned_instances)))
9898 if self.instance_names:
9899 self.LogInfo("Evacuating instances from node '%s': %s",
9901 utils.CommaJoin(utils.NiceSort(self.instance_names)))
9903 self.LogInfo("No instances to evacuate from node '%s'",
9906 if self.op.remote_node is not None:
9907 for i in self.instances:
9908 if i.primary_node == self.op.remote_node:
9909 raise errors.OpPrereqError("Node %s is the primary node of"
9910 " instance %s, cannot use it as"
9912 (self.op.remote_node, i.name),
9915 def Exec(self, feedback_fn):
9916 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
9918 if not self.instance_names:
9919 # No instances to evacuate
9922 elif self.op.iallocator is not None:
9923 # TODO: Implement relocation to other group
9924 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
9925 evac_mode=self.op.mode,
9926 instances=list(self.instance_names))
9928 ial.Run(self.op.iallocator)
9931 raise errors.OpPrereqError("Can't compute node evacuation using"
9932 " iallocator '%s': %s" %
9933 (self.op.iallocator, ial.info),
9936 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
9938 elif self.op.remote_node is not None:
9939 assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
9941 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
9942 remote_node=self.op.remote_node,
9944 mode=constants.REPLACE_DISK_CHG,
9945 early_release=self.op.early_release)]
9946 for instance_name in self.instance_names
9950 raise errors.ProgrammerError("No iallocator or remote node")
9952 return ResultWithJobs(jobs)
9955 def _SetOpEarlyRelease(early_release, op):
9956 """Sets C{early_release} flag on opcodes if available.
9960 op.early_release = early_release
9961 except AttributeError:
9962 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
9967 def _NodeEvacDest(use_nodes, group, nodes):
9968 """Returns group or nodes depending on caller's choice.
9972 return utils.CommaJoin(nodes)
9977 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
9978 """Unpacks the result of change-group and node-evacuate iallocator requests.
9980 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
9981 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
9983 @type lu: L{LogicalUnit}
9984 @param lu: Logical unit instance
9985 @type alloc_result: tuple/list
9986 @param alloc_result: Result from iallocator
9987 @type early_release: bool
9988 @param early_release: Whether to release locks early if possible
9989 @type use_nodes: bool
9990 @param use_nodes: Whether to display node names instead of groups
9993 (moved, failed, jobs) = alloc_result
9996 lu.LogWarning("Unable to evacuate instances %s",
9997 utils.CommaJoin("%s (%s)" % (name, reason)
9998 for (name, reason) in failed))
10001 lu.LogInfo("Instances to be moved: %s",
10002 utils.CommaJoin("%s (to %s)" %
10003 (name, _NodeEvacDest(use_nodes, group, nodes))
10004 for (name, group, nodes) in moved))
10006 return [map(compat.partial(_SetOpEarlyRelease, early_release),
10007 map(opcodes.OpCode.LoadOpCode, ops))
10011 class LUInstanceGrowDisk(LogicalUnit):
10012 """Grow a disk of an instance.
10015 HPATH = "disk-grow"
10016 HTYPE = constants.HTYPE_INSTANCE
10019 def ExpandNames(self):
10020 self._ExpandAndLockInstance()
10021 self.needed_locks[locking.LEVEL_NODE] = []
10022 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10024 def DeclareLocks(self, level):
10025 if level == locking.LEVEL_NODE:
10026 self._LockInstancesNodes()
10028 def BuildHooksEnv(self):
10029 """Build hooks env.
10031 This runs on the master, the primary and all the secondaries.
10035 "DISK": self.op.disk,
10036 "AMOUNT": self.op.amount,
10038 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10041 def BuildHooksNodes(self):
10042 """Build hooks nodes.
10045 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10048 def CheckPrereq(self):
10049 """Check prerequisites.
10051 This checks that the instance is in the cluster.
10054 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10055 assert instance is not None, \
10056 "Cannot retrieve locked instance %s" % self.op.instance_name
10057 nodenames = list(instance.all_nodes)
10058 for node in nodenames:
10059 _CheckNodeOnline(self, node)
10061 self.instance = instance
10063 if instance.disk_template not in constants.DTS_GROWABLE:
10064 raise errors.OpPrereqError("Instance's disk layout does not support"
10065 " growing", errors.ECODE_INVAL)
10067 self.disk = instance.FindDisk(self.op.disk)
10069 if instance.disk_template not in (constants.DT_FILE,
10070 constants.DT_SHARED_FILE):
10071 # TODO: check the free disk space for file, when that feature will be
10073 _CheckNodesFreeDiskPerVG(self, nodenames,
10074 self.disk.ComputeGrowth(self.op.amount))
10076 def Exec(self, feedback_fn):
10077 """Execute disk grow.
10080 instance = self.instance
10083 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10085 raise errors.OpExecError("Cannot activate block device to grow")
10087 # First run all grow ops in dry-run mode
10088 for node in instance.all_nodes:
10089 self.cfg.SetDiskID(disk, node)
10090 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10091 result.Raise("Grow request failed to node %s" % node)
10093 # We know that (as far as we can test) operations across different
10094 # nodes will succeed, time to run it for real
10095 for node in instance.all_nodes:
10096 self.cfg.SetDiskID(disk, node)
10097 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10098 result.Raise("Grow request failed to node %s" % node)
10100 # TODO: Rewrite code to work properly
10101 # DRBD goes into sync mode for a short amount of time after executing the
10102 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10103 # calling "resize" in sync mode fails. Sleeping for a short amount of
10104 # time is a work-around.
10107 disk.RecordGrow(self.op.amount)
10108 self.cfg.Update(instance, feedback_fn)
10109 if self.op.wait_for_sync:
10110 disk_abort = not _WaitForSync(self, instance, disks=[disk])
10112 self.proc.LogWarning("Disk sync-ing has not returned a good"
10113 " status; please check the instance")
10114 if not instance.admin_up:
10115 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10116 elif not instance.admin_up:
10117 self.proc.LogWarning("Not shutting down the disk even if the instance is"
10118 " not supposed to be running because no wait for"
10119 " sync mode was requested")
10122 class LUInstanceQueryData(NoHooksLU):
10123 """Query runtime instance data.
10128 def ExpandNames(self):
10129 self.needed_locks = {}
10131 # Use locking if requested or when non-static information is wanted
10132 if not (self.op.static or self.op.use_locking):
10133 self.LogWarning("Non-static data requested, locks need to be acquired")
10134 self.op.use_locking = True
10136 if self.op.instances or not self.op.use_locking:
10137 # Expand instance names right here
10138 self.wanted_names = _GetWantedInstances(self, self.op.instances)
10140 # Will use acquired locks
10141 self.wanted_names = None
10143 if self.op.use_locking:
10144 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10146 if self.wanted_names is None:
10147 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10149 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10151 self.needed_locks[locking.LEVEL_NODE] = []
10152 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10153 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10155 def DeclareLocks(self, level):
10156 if self.op.use_locking and level == locking.LEVEL_NODE:
10157 self._LockInstancesNodes()
10159 def CheckPrereq(self):
10160 """Check prerequisites.
10162 This only checks the optional instance list against the existing names.
10165 if self.wanted_names is None:
10166 assert self.op.use_locking, "Locking was not used"
10167 self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
10169 self.wanted_instances = [self.cfg.GetInstanceInfo(name)
10170 for name in self.wanted_names]
10172 def _ComputeBlockdevStatus(self, node, instance_name, dev):
10173 """Returns the status of a block device
10176 if self.op.static or not node:
10179 self.cfg.SetDiskID(dev, node)
10181 result = self.rpc.call_blockdev_find(node, dev)
10185 result.Raise("Can't compute disk status for %s" % instance_name)
10187 status = result.payload
10191 return (status.dev_path, status.major, status.minor,
10192 status.sync_percent, status.estimated_time,
10193 status.is_degraded, status.ldisk_status)
10195 def _ComputeDiskStatus(self, instance, snode, dev):
10196 """Compute block device status.
10199 if dev.dev_type in constants.LDS_DRBD:
10200 # we change the snode then (otherwise we use the one passed in)
10201 if dev.logical_id[0] == instance.primary_node:
10202 snode = dev.logical_id[1]
10204 snode = dev.logical_id[0]
10206 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10207 instance.name, dev)
10208 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10211 dev_children = map(compat.partial(self._ComputeDiskStatus,
10218 "iv_name": dev.iv_name,
10219 "dev_type": dev.dev_type,
10220 "logical_id": dev.logical_id,
10221 "physical_id": dev.physical_id,
10222 "pstatus": dev_pstatus,
10223 "sstatus": dev_sstatus,
10224 "children": dev_children,
10229 def Exec(self, feedback_fn):
10230 """Gather and return data"""
10233 cluster = self.cfg.GetClusterInfo()
10235 for instance in self.wanted_instances:
10236 pnode = self.cfg.GetNodeInfo(instance.primary_node)
10238 if self.op.static or pnode.offline:
10239 remote_state = None
10241 self.LogWarning("Primary node %s is marked offline, returning static"
10242 " information only for instance %s" %
10243 (pnode.name, instance.name))
10245 remote_info = self.rpc.call_instance_info(instance.primary_node,
10247 instance.hypervisor)
10248 remote_info.Raise("Error checking node %s" % instance.primary_node)
10249 remote_info = remote_info.payload
10250 if remote_info and "state" in remote_info:
10251 remote_state = "up"
10253 remote_state = "down"
10255 if instance.admin_up:
10256 config_state = "up"
10258 config_state = "down"
10260 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10263 result[instance.name] = {
10264 "name": instance.name,
10265 "config_state": config_state,
10266 "run_state": remote_state,
10267 "pnode": instance.primary_node,
10268 "snodes": instance.secondary_nodes,
10270 # this happens to be the same format used for hooks
10271 "nics": _NICListToTuple(self, instance.nics),
10272 "disk_template": instance.disk_template,
10274 "hypervisor": instance.hypervisor,
10275 "network_port": instance.network_port,
10276 "hv_instance": instance.hvparams,
10277 "hv_actual": cluster.FillHV(instance, skip_globals=True),
10278 "be_instance": instance.beparams,
10279 "be_actual": cluster.FillBE(instance),
10280 "os_instance": instance.osparams,
10281 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10282 "serial_no": instance.serial_no,
10283 "mtime": instance.mtime,
10284 "ctime": instance.ctime,
10285 "uuid": instance.uuid,
10291 class LUInstanceSetParams(LogicalUnit):
10292 """Modifies an instances's parameters.
10295 HPATH = "instance-modify"
10296 HTYPE = constants.HTYPE_INSTANCE
10299 def CheckArguments(self):
10300 if not (self.op.nics or self.op.disks or self.op.disk_template or
10301 self.op.hvparams or self.op.beparams or self.op.os_name):
10302 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10304 if self.op.hvparams:
10305 _CheckGlobalHvParams(self.op.hvparams)
10309 for disk_op, disk_dict in self.op.disks:
10310 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10311 if disk_op == constants.DDM_REMOVE:
10312 disk_addremove += 1
10314 elif disk_op == constants.DDM_ADD:
10315 disk_addremove += 1
10317 if not isinstance(disk_op, int):
10318 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10319 if not isinstance(disk_dict, dict):
10320 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10321 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10323 if disk_op == constants.DDM_ADD:
10324 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10325 if mode not in constants.DISK_ACCESS_SET:
10326 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10327 errors.ECODE_INVAL)
10328 size = disk_dict.get(constants.IDISK_SIZE, None)
10330 raise errors.OpPrereqError("Required disk parameter size missing",
10331 errors.ECODE_INVAL)
10334 except (TypeError, ValueError), err:
10335 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10336 str(err), errors.ECODE_INVAL)
10337 disk_dict[constants.IDISK_SIZE] = size
10339 # modification of disk
10340 if constants.IDISK_SIZE in disk_dict:
10341 raise errors.OpPrereqError("Disk size change not possible, use"
10342 " grow-disk", errors.ECODE_INVAL)
10344 if disk_addremove > 1:
10345 raise errors.OpPrereqError("Only one disk add or remove operation"
10346 " supported at a time", errors.ECODE_INVAL)
10348 if self.op.disks and self.op.disk_template is not None:
10349 raise errors.OpPrereqError("Disk template conversion and other disk"
10350 " changes not supported at the same time",
10351 errors.ECODE_INVAL)
10353 if (self.op.disk_template and
10354 self.op.disk_template in constants.DTS_INT_MIRROR and
10355 self.op.remote_node is None):
10356 raise errors.OpPrereqError("Changing the disk template to a mirrored"
10357 " one requires specifying a secondary node",
10358 errors.ECODE_INVAL)
10362 for nic_op, nic_dict in self.op.nics:
10363 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10364 if nic_op == constants.DDM_REMOVE:
10367 elif nic_op == constants.DDM_ADD:
10370 if not isinstance(nic_op, int):
10371 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10372 if not isinstance(nic_dict, dict):
10373 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10374 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10376 # nic_dict should be a dict
10377 nic_ip = nic_dict.get(constants.INIC_IP, None)
10378 if nic_ip is not None:
10379 if nic_ip.lower() == constants.VALUE_NONE:
10380 nic_dict[constants.INIC_IP] = None
10382 if not netutils.IPAddress.IsValid(nic_ip):
10383 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10384 errors.ECODE_INVAL)
10386 nic_bridge = nic_dict.get("bridge", None)
10387 nic_link = nic_dict.get(constants.INIC_LINK, None)
10388 if nic_bridge and nic_link:
10389 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10390 " at the same time", errors.ECODE_INVAL)
10391 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10392 nic_dict["bridge"] = None
10393 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10394 nic_dict[constants.INIC_LINK] = None
10396 if nic_op == constants.DDM_ADD:
10397 nic_mac = nic_dict.get(constants.INIC_MAC, None)
10398 if nic_mac is None:
10399 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10401 if constants.INIC_MAC in nic_dict:
10402 nic_mac = nic_dict[constants.INIC_MAC]
10403 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10404 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10406 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10407 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10408 " modifying an existing nic",
10409 errors.ECODE_INVAL)
10411 if nic_addremove > 1:
10412 raise errors.OpPrereqError("Only one NIC add or remove operation"
10413 " supported at a time", errors.ECODE_INVAL)
10415 def ExpandNames(self):
10416 self._ExpandAndLockInstance()
10417 self.needed_locks[locking.LEVEL_NODE] = []
10418 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10420 def DeclareLocks(self, level):
10421 if level == locking.LEVEL_NODE:
10422 self._LockInstancesNodes()
10423 if self.op.disk_template and self.op.remote_node:
10424 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10425 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10427 def BuildHooksEnv(self):
10428 """Build hooks env.
10430 This runs on the master, primary and secondaries.
10434 if constants.BE_MEMORY in self.be_new:
10435 args["memory"] = self.be_new[constants.BE_MEMORY]
10436 if constants.BE_VCPUS in self.be_new:
10437 args["vcpus"] = self.be_new[constants.BE_VCPUS]
10438 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10439 # information at all.
10442 nic_override = dict(self.op.nics)
10443 for idx, nic in enumerate(self.instance.nics):
10444 if idx in nic_override:
10445 this_nic_override = nic_override[idx]
10447 this_nic_override = {}
10448 if constants.INIC_IP in this_nic_override:
10449 ip = this_nic_override[constants.INIC_IP]
10452 if constants.INIC_MAC in this_nic_override:
10453 mac = this_nic_override[constants.INIC_MAC]
10456 if idx in self.nic_pnew:
10457 nicparams = self.nic_pnew[idx]
10459 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10460 mode = nicparams[constants.NIC_MODE]
10461 link = nicparams[constants.NIC_LINK]
10462 args["nics"].append((ip, mac, mode, link))
10463 if constants.DDM_ADD in nic_override:
10464 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10465 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10466 nicparams = self.nic_pnew[constants.DDM_ADD]
10467 mode = nicparams[constants.NIC_MODE]
10468 link = nicparams[constants.NIC_LINK]
10469 args["nics"].append((ip, mac, mode, link))
10470 elif constants.DDM_REMOVE in nic_override:
10471 del args["nics"][-1]
10473 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10474 if self.op.disk_template:
10475 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10479 def BuildHooksNodes(self):
10480 """Build hooks nodes.
10483 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10486 def CheckPrereq(self):
10487 """Check prerequisites.
10489 This only checks the instance list against the existing names.
10492 # checking the new params on the primary/secondary nodes
10494 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10495 cluster = self.cluster = self.cfg.GetClusterInfo()
10496 assert self.instance is not None, \
10497 "Cannot retrieve locked instance %s" % self.op.instance_name
10498 pnode = instance.primary_node
10499 nodelist = list(instance.all_nodes)
10502 if self.op.os_name and not self.op.force:
10503 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10504 self.op.force_variant)
10505 instance_os = self.op.os_name
10507 instance_os = instance.os
10509 if self.op.disk_template:
10510 if instance.disk_template == self.op.disk_template:
10511 raise errors.OpPrereqError("Instance already has disk template %s" %
10512 instance.disk_template, errors.ECODE_INVAL)
10514 if (instance.disk_template,
10515 self.op.disk_template) not in self._DISK_CONVERSIONS:
10516 raise errors.OpPrereqError("Unsupported disk template conversion from"
10517 " %s to %s" % (instance.disk_template,
10518 self.op.disk_template),
10519 errors.ECODE_INVAL)
10520 _CheckInstanceDown(self, instance, "cannot change disk template")
10521 if self.op.disk_template in constants.DTS_INT_MIRROR:
10522 if self.op.remote_node == pnode:
10523 raise errors.OpPrereqError("Given new secondary node %s is the same"
10524 " as the primary node of the instance" %
10525 self.op.remote_node, errors.ECODE_STATE)
10526 _CheckNodeOnline(self, self.op.remote_node)
10527 _CheckNodeNotDrained(self, self.op.remote_node)
10528 # FIXME: here we assume that the old instance type is DT_PLAIN
10529 assert instance.disk_template == constants.DT_PLAIN
10530 disks = [{constants.IDISK_SIZE: d.size,
10531 constants.IDISK_VG: d.logical_id[0]}
10532 for d in instance.disks]
10533 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10534 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10536 # hvparams processing
10537 if self.op.hvparams:
10538 hv_type = instance.hypervisor
10539 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10540 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10541 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10544 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10545 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10546 self.hv_new = hv_new # the new actual values
10547 self.hv_inst = i_hvdict # the new dict (without defaults)
10549 self.hv_new = self.hv_inst = {}
10551 # beparams processing
10552 if self.op.beparams:
10553 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10555 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10556 be_new = cluster.SimpleFillBE(i_bedict)
10557 self.be_new = be_new # the new actual values
10558 self.be_inst = i_bedict # the new dict (without defaults)
10560 self.be_new = self.be_inst = {}
10561 be_old = cluster.FillBE(instance)
10563 # osparams processing
10564 if self.op.osparams:
10565 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10566 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10567 self.os_inst = i_osdict # the new dict (without defaults)
10573 if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10574 be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10575 mem_check_list = [pnode]
10576 if be_new[constants.BE_AUTO_BALANCE]:
10577 # either we changed auto_balance to yes or it was from before
10578 mem_check_list.extend(instance.secondary_nodes)
10579 instance_info = self.rpc.call_instance_info(pnode, instance.name,
10580 instance.hypervisor)
10581 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10582 instance.hypervisor)
10583 pninfo = nodeinfo[pnode]
10584 msg = pninfo.fail_msg
10586 # Assume the primary node is unreachable and go ahead
10587 self.warn.append("Can't get info from primary node %s: %s" %
10589 elif not isinstance(pninfo.payload.get("memory_free", None), int):
10590 self.warn.append("Node data from primary node %s doesn't contain"
10591 " free memory information" % pnode)
10592 elif instance_info.fail_msg:
10593 self.warn.append("Can't get instance runtime information: %s" %
10594 instance_info.fail_msg)
10596 if instance_info.payload:
10597 current_mem = int(instance_info.payload["memory"])
10599 # Assume instance not running
10600 # (there is a slight race condition here, but it's not very probable,
10601 # and we have no other way to check)
10603 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10604 pninfo.payload["memory_free"])
10606 raise errors.OpPrereqError("This change will prevent the instance"
10607 " from starting, due to %d MB of memory"
10608 " missing on its primary node" % miss_mem,
10609 errors.ECODE_NORES)
10611 if be_new[constants.BE_AUTO_BALANCE]:
10612 for node, nres in nodeinfo.items():
10613 if node not in instance.secondary_nodes:
10615 nres.Raise("Can't get info from secondary node %s" % node,
10616 prereq=True, ecode=errors.ECODE_STATE)
10617 if not isinstance(nres.payload.get("memory_free", None), int):
10618 raise errors.OpPrereqError("Secondary node %s didn't return free"
10619 " memory information" % node,
10620 errors.ECODE_STATE)
10621 elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10622 raise errors.OpPrereqError("This change will prevent the instance"
10623 " from failover to its secondary node"
10624 " %s, due to not enough memory" % node,
10625 errors.ECODE_STATE)
10629 self.nic_pinst = {}
10630 for nic_op, nic_dict in self.op.nics:
10631 if nic_op == constants.DDM_REMOVE:
10632 if not instance.nics:
10633 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10634 errors.ECODE_INVAL)
10636 if nic_op != constants.DDM_ADD:
10638 if not instance.nics:
10639 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10640 " no NICs" % nic_op,
10641 errors.ECODE_INVAL)
10642 if nic_op < 0 or nic_op >= len(instance.nics):
10643 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10645 (nic_op, len(instance.nics) - 1),
10646 errors.ECODE_INVAL)
10647 old_nic_params = instance.nics[nic_op].nicparams
10648 old_nic_ip = instance.nics[nic_op].ip
10650 old_nic_params = {}
10653 update_params_dict = dict([(key, nic_dict[key])
10654 for key in constants.NICS_PARAMETERS
10655 if key in nic_dict])
10657 if "bridge" in nic_dict:
10658 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10660 new_nic_params = _GetUpdatedParams(old_nic_params,
10661 update_params_dict)
10662 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10663 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10664 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10665 self.nic_pinst[nic_op] = new_nic_params
10666 self.nic_pnew[nic_op] = new_filled_nic_params
10667 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10669 if new_nic_mode == constants.NIC_MODE_BRIDGED:
10670 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10671 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10673 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10675 self.warn.append(msg)
10677 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10678 if new_nic_mode == constants.NIC_MODE_ROUTED:
10679 if constants.INIC_IP in nic_dict:
10680 nic_ip = nic_dict[constants.INIC_IP]
10682 nic_ip = old_nic_ip
10684 raise errors.OpPrereqError("Cannot set the nic ip to None"
10685 " on a routed nic", errors.ECODE_INVAL)
10686 if constants.INIC_MAC in nic_dict:
10687 nic_mac = nic_dict[constants.INIC_MAC]
10688 if nic_mac is None:
10689 raise errors.OpPrereqError("Cannot set the nic mac to None",
10690 errors.ECODE_INVAL)
10691 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10692 # otherwise generate the mac
10693 nic_dict[constants.INIC_MAC] = \
10694 self.cfg.GenerateMAC(self.proc.GetECId())
10696 # or validate/reserve the current one
10698 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10699 except errors.ReservationError:
10700 raise errors.OpPrereqError("MAC address %s already in use"
10701 " in cluster" % nic_mac,
10702 errors.ECODE_NOTUNIQUE)
10705 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10706 raise errors.OpPrereqError("Disk operations not supported for"
10707 " diskless instances",
10708 errors.ECODE_INVAL)
10709 for disk_op, _ in self.op.disks:
10710 if disk_op == constants.DDM_REMOVE:
10711 if len(instance.disks) == 1:
10712 raise errors.OpPrereqError("Cannot remove the last disk of"
10713 " an instance", errors.ECODE_INVAL)
10714 _CheckInstanceDown(self, instance, "cannot remove disks")
10716 if (disk_op == constants.DDM_ADD and
10717 len(instance.disks) >= constants.MAX_DISKS):
10718 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10719 " add more" % constants.MAX_DISKS,
10720 errors.ECODE_STATE)
10721 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10723 if disk_op < 0 or disk_op >= len(instance.disks):
10724 raise errors.OpPrereqError("Invalid disk index %s, valid values"
10726 (disk_op, len(instance.disks)),
10727 errors.ECODE_INVAL)
10731 def _ConvertPlainToDrbd(self, feedback_fn):
10732 """Converts an instance from plain to drbd.
10735 feedback_fn("Converting template to drbd")
10736 instance = self.instance
10737 pnode = instance.primary_node
10738 snode = self.op.remote_node
10740 # create a fake disk info for _GenerateDiskTemplate
10741 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10742 constants.IDISK_VG: d.logical_id[0]}
10743 for d in instance.disks]
10744 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10745 instance.name, pnode, [snode],
10746 disk_info, None, None, 0, feedback_fn)
10747 info = _GetInstanceInfoText(instance)
10748 feedback_fn("Creating aditional volumes...")
10749 # first, create the missing data and meta devices
10750 for disk in new_disks:
10751 # unfortunately this is... not too nice
10752 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10754 for child in disk.children:
10755 _CreateSingleBlockDev(self, snode, instance, child, info, True)
10756 # at this stage, all new LVs have been created, we can rename the
10758 feedback_fn("Renaming original volumes...")
10759 rename_list = [(o, n.children[0].logical_id)
10760 for (o, n) in zip(instance.disks, new_disks)]
10761 result = self.rpc.call_blockdev_rename(pnode, rename_list)
10762 result.Raise("Failed to rename original LVs")
10764 feedback_fn("Initializing DRBD devices...")
10765 # all child devices are in place, we can now create the DRBD devices
10766 for disk in new_disks:
10767 for node in [pnode, snode]:
10768 f_create = node == pnode
10769 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10771 # at this point, the instance has been modified
10772 instance.disk_template = constants.DT_DRBD8
10773 instance.disks = new_disks
10774 self.cfg.Update(instance, feedback_fn)
10776 # disks are created, waiting for sync
10777 disk_abort = not _WaitForSync(self, instance,
10778 oneshot=not self.op.wait_for_sync)
10780 raise errors.OpExecError("There are some degraded disks for"
10781 " this instance, please cleanup manually")
10783 def _ConvertDrbdToPlain(self, feedback_fn):
10784 """Converts an instance from drbd to plain.
10787 instance = self.instance
10788 assert len(instance.secondary_nodes) == 1
10789 pnode = instance.primary_node
10790 snode = instance.secondary_nodes[0]
10791 feedback_fn("Converting template to plain")
10793 old_disks = instance.disks
10794 new_disks = [d.children[0] for d in old_disks]
10796 # copy over size and mode
10797 for parent, child in zip(old_disks, new_disks):
10798 child.size = parent.size
10799 child.mode = parent.mode
10801 # update instance structure
10802 instance.disks = new_disks
10803 instance.disk_template = constants.DT_PLAIN
10804 self.cfg.Update(instance, feedback_fn)
10806 feedback_fn("Removing volumes on the secondary node...")
10807 for disk in old_disks:
10808 self.cfg.SetDiskID(disk, snode)
10809 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10811 self.LogWarning("Could not remove block device %s on node %s,"
10812 " continuing anyway: %s", disk.iv_name, snode, msg)
10814 feedback_fn("Removing unneeded volumes on the primary node...")
10815 for idx, disk in enumerate(old_disks):
10816 meta = disk.children[1]
10817 self.cfg.SetDiskID(meta, pnode)
10818 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10820 self.LogWarning("Could not remove metadata for disk %d on node %s,"
10821 " continuing anyway: %s", idx, pnode, msg)
10823 def Exec(self, feedback_fn):
10824 """Modifies an instance.
10826 All parameters take effect only at the next restart of the instance.
10829 # Process here the warnings from CheckPrereq, as we don't have a
10830 # feedback_fn there.
10831 for warn in self.warn:
10832 feedback_fn("WARNING: %s" % warn)
10835 instance = self.instance
10837 for disk_op, disk_dict in self.op.disks:
10838 if disk_op == constants.DDM_REMOVE:
10839 # remove the last disk
10840 device = instance.disks.pop()
10841 device_idx = len(instance.disks)
10842 for node, disk in device.ComputeNodeTree(instance.primary_node):
10843 self.cfg.SetDiskID(disk, node)
10844 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10846 self.LogWarning("Could not remove disk/%d on node %s: %s,"
10847 " continuing anyway", device_idx, node, msg)
10848 result.append(("disk/%d" % device_idx, "remove"))
10849 elif disk_op == constants.DDM_ADD:
10851 if instance.disk_template in (constants.DT_FILE,
10852 constants.DT_SHARED_FILE):
10853 file_driver, file_path = instance.disks[0].logical_id
10854 file_path = os.path.dirname(file_path)
10856 file_driver = file_path = None
10857 disk_idx_base = len(instance.disks)
10858 new_disk = _GenerateDiskTemplate(self,
10859 instance.disk_template,
10860 instance.name, instance.primary_node,
10861 instance.secondary_nodes,
10865 disk_idx_base, feedback_fn)[0]
10866 instance.disks.append(new_disk)
10867 info = _GetInstanceInfoText(instance)
10869 logging.info("Creating volume %s for instance %s",
10870 new_disk.iv_name, instance.name)
10871 # Note: this needs to be kept in sync with _CreateDisks
10873 for node in instance.all_nodes:
10874 f_create = node == instance.primary_node
10876 _CreateBlockDev(self, node, instance, new_disk,
10877 f_create, info, f_create)
10878 except errors.OpExecError, err:
10879 self.LogWarning("Failed to create volume %s (%s) on"
10881 new_disk.iv_name, new_disk, node, err)
10882 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10883 (new_disk.size, new_disk.mode)))
10885 # change a given disk
10886 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10887 result.append(("disk.mode/%d" % disk_op,
10888 disk_dict[constants.IDISK_MODE]))
10890 if self.op.disk_template:
10891 r_shut = _ShutdownInstanceDisks(self, instance)
10893 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10894 " proceed with disk template conversion")
10895 mode = (instance.disk_template, self.op.disk_template)
10897 self._DISK_CONVERSIONS[mode](self, feedback_fn)
10899 self.cfg.ReleaseDRBDMinors(instance.name)
10901 result.append(("disk_template", self.op.disk_template))
10904 for nic_op, nic_dict in self.op.nics:
10905 if nic_op == constants.DDM_REMOVE:
10906 # remove the last nic
10907 del instance.nics[-1]
10908 result.append(("nic.%d" % len(instance.nics), "remove"))
10909 elif nic_op == constants.DDM_ADD:
10910 # mac and bridge should be set, by now
10911 mac = nic_dict[constants.INIC_MAC]
10912 ip = nic_dict.get(constants.INIC_IP, None)
10913 nicparams = self.nic_pinst[constants.DDM_ADD]
10914 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10915 instance.nics.append(new_nic)
10916 result.append(("nic.%d" % (len(instance.nics) - 1),
10917 "add:mac=%s,ip=%s,mode=%s,link=%s" %
10918 (new_nic.mac, new_nic.ip,
10919 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10920 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10923 for key in (constants.INIC_MAC, constants.INIC_IP):
10924 if key in nic_dict:
10925 setattr(instance.nics[nic_op], key, nic_dict[key])
10926 if nic_op in self.nic_pinst:
10927 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10928 for key, val in nic_dict.iteritems():
10929 result.append(("nic.%s/%d" % (key, nic_op), val))
10932 if self.op.hvparams:
10933 instance.hvparams = self.hv_inst
10934 for key, val in self.op.hvparams.iteritems():
10935 result.append(("hv/%s" % key, val))
10938 if self.op.beparams:
10939 instance.beparams = self.be_inst
10940 for key, val in self.op.beparams.iteritems():
10941 result.append(("be/%s" % key, val))
10944 if self.op.os_name:
10945 instance.os = self.op.os_name
10948 if self.op.osparams:
10949 instance.osparams = self.os_inst
10950 for key, val in self.op.osparams.iteritems():
10951 result.append(("os/%s" % key, val))
10953 self.cfg.Update(instance, feedback_fn)
10957 _DISK_CONVERSIONS = {
10958 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10959 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10963 class LUBackupQuery(NoHooksLU):
10964 """Query the exports list
10969 def ExpandNames(self):
10970 self.needed_locks = {}
10971 self.share_locks[locking.LEVEL_NODE] = 1
10972 if not self.op.nodes:
10973 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10975 self.needed_locks[locking.LEVEL_NODE] = \
10976 _GetWantedNodes(self, self.op.nodes)
10978 def Exec(self, feedback_fn):
10979 """Compute the list of all the exported system images.
10982 @return: a dictionary with the structure node->(export-list)
10983 where export-list is a list of the instances exported on
10987 self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
10988 rpcresult = self.rpc.call_export_list(self.nodes)
10990 for node in rpcresult:
10991 if rpcresult[node].fail_msg:
10992 result[node] = False
10994 result[node] = rpcresult[node].payload
10999 class LUBackupPrepare(NoHooksLU):
11000 """Prepares an instance for an export and returns useful information.
11005 def ExpandNames(self):
11006 self._ExpandAndLockInstance()
11008 def CheckPrereq(self):
11009 """Check prerequisites.
11012 instance_name = self.op.instance_name
11014 self.instance = self.cfg.GetInstanceInfo(instance_name)
11015 assert self.instance is not None, \
11016 "Cannot retrieve locked instance %s" % self.op.instance_name
11017 _CheckNodeOnline(self, self.instance.primary_node)
11019 self._cds = _GetClusterDomainSecret()
11021 def Exec(self, feedback_fn):
11022 """Prepares an instance for an export.
11025 instance = self.instance
11027 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11028 salt = utils.GenerateSecret(8)
11030 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11031 result = self.rpc.call_x509_cert_create(instance.primary_node,
11032 constants.RIE_CERT_VALIDITY)
11033 result.Raise("Can't create X509 key and certificate on %s" % result.node)
11035 (name, cert_pem) = result.payload
11037 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11041 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11042 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11044 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11050 class LUBackupExport(LogicalUnit):
11051 """Export an instance to an image in the cluster.
11054 HPATH = "instance-export"
11055 HTYPE = constants.HTYPE_INSTANCE
11058 def CheckArguments(self):
11059 """Check the arguments.
11062 self.x509_key_name = self.op.x509_key_name
11063 self.dest_x509_ca_pem = self.op.destination_x509_ca
11065 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11066 if not self.x509_key_name:
11067 raise errors.OpPrereqError("Missing X509 key name for encryption",
11068 errors.ECODE_INVAL)
11070 if not self.dest_x509_ca_pem:
11071 raise errors.OpPrereqError("Missing destination X509 CA",
11072 errors.ECODE_INVAL)
11074 def ExpandNames(self):
11075 self._ExpandAndLockInstance()
11077 # Lock all nodes for local exports
11078 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11079 # FIXME: lock only instance primary and destination node
11081 # Sad but true, for now we have do lock all nodes, as we don't know where
11082 # the previous export might be, and in this LU we search for it and
11083 # remove it from its current node. In the future we could fix this by:
11084 # - making a tasklet to search (share-lock all), then create the
11085 # new one, then one to remove, after
11086 # - removing the removal operation altogether
11087 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11089 def DeclareLocks(self, level):
11090 """Last minute lock declaration."""
11091 # All nodes are locked anyway, so nothing to do here.
11093 def BuildHooksEnv(self):
11094 """Build hooks env.
11096 This will run on the master, primary node and target node.
11100 "EXPORT_MODE": self.op.mode,
11101 "EXPORT_NODE": self.op.target_node,
11102 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11103 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11104 # TODO: Generic function for boolean env variables
11105 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11108 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11112 def BuildHooksNodes(self):
11113 """Build hooks nodes.
11116 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11118 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11119 nl.append(self.op.target_node)
11123 def CheckPrereq(self):
11124 """Check prerequisites.
11126 This checks that the instance and node names are valid.
11129 instance_name = self.op.instance_name
11131 self.instance = self.cfg.GetInstanceInfo(instance_name)
11132 assert self.instance is not None, \
11133 "Cannot retrieve locked instance %s" % self.op.instance_name
11134 _CheckNodeOnline(self, self.instance.primary_node)
11136 if (self.op.remove_instance and self.instance.admin_up and
11137 not self.op.shutdown):
11138 raise errors.OpPrereqError("Can not remove instance without shutting it"
11141 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11142 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11143 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11144 assert self.dst_node is not None
11146 _CheckNodeOnline(self, self.dst_node.name)
11147 _CheckNodeNotDrained(self, self.dst_node.name)
11150 self.dest_disk_info = None
11151 self.dest_x509_ca = None
11153 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11154 self.dst_node = None
11156 if len(self.op.target_node) != len(self.instance.disks):
11157 raise errors.OpPrereqError(("Received destination information for %s"
11158 " disks, but instance %s has %s disks") %
11159 (len(self.op.target_node), instance_name,
11160 len(self.instance.disks)),
11161 errors.ECODE_INVAL)
11163 cds = _GetClusterDomainSecret()
11165 # Check X509 key name
11167 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11168 except (TypeError, ValueError), err:
11169 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11171 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11172 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11173 errors.ECODE_INVAL)
11175 # Load and verify CA
11177 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11178 except OpenSSL.crypto.Error, err:
11179 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11180 (err, ), errors.ECODE_INVAL)
11182 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11183 if errcode is not None:
11184 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11185 (msg, ), errors.ECODE_INVAL)
11187 self.dest_x509_ca = cert
11189 # Verify target information
11191 for idx, disk_data in enumerate(self.op.target_node):
11193 (host, port, magic) = \
11194 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11195 except errors.GenericError, err:
11196 raise errors.OpPrereqError("Target info for disk %s: %s" %
11197 (idx, err), errors.ECODE_INVAL)
11199 disk_info.append((host, port, magic))
11201 assert len(disk_info) == len(self.op.target_node)
11202 self.dest_disk_info = disk_info
11205 raise errors.ProgrammerError("Unhandled export mode %r" %
11208 # instance disk type verification
11209 # TODO: Implement export support for file-based disks
11210 for disk in self.instance.disks:
11211 if disk.dev_type == constants.LD_FILE:
11212 raise errors.OpPrereqError("Export not supported for instances with"
11213 " file-based disks", errors.ECODE_INVAL)
11215 def _CleanupExports(self, feedback_fn):
11216 """Removes exports of current instance from all other nodes.
11218 If an instance in a cluster with nodes A..D was exported to node C, its
11219 exports will be removed from the nodes A, B and D.
11222 assert self.op.mode != constants.EXPORT_MODE_REMOTE
11224 nodelist = self.cfg.GetNodeList()
11225 nodelist.remove(self.dst_node.name)
11227 # on one-node clusters nodelist will be empty after the removal
11228 # if we proceed the backup would be removed because OpBackupQuery
11229 # substitutes an empty list with the full cluster node list.
11230 iname = self.instance.name
11232 feedback_fn("Removing old exports for instance %s" % iname)
11233 exportlist = self.rpc.call_export_list(nodelist)
11234 for node in exportlist:
11235 if exportlist[node].fail_msg:
11237 if iname in exportlist[node].payload:
11238 msg = self.rpc.call_export_remove(node, iname).fail_msg
11240 self.LogWarning("Could not remove older export for instance %s"
11241 " on node %s: %s", iname, node, msg)
11243 def Exec(self, feedback_fn):
11244 """Export an instance to an image in the cluster.
11247 assert self.op.mode in constants.EXPORT_MODES
11249 instance = self.instance
11250 src_node = instance.primary_node
11252 if self.op.shutdown:
11253 # shutdown the instance, but not the disks
11254 feedback_fn("Shutting down instance %s" % instance.name)
11255 result = self.rpc.call_instance_shutdown(src_node, instance,
11256 self.op.shutdown_timeout)
11257 # TODO: Maybe ignore failures if ignore_remove_failures is set
11258 result.Raise("Could not shutdown instance %s on"
11259 " node %s" % (instance.name, src_node))
11261 # set the disks ID correctly since call_instance_start needs the
11262 # correct drbd minor to create the symlinks
11263 for disk in instance.disks:
11264 self.cfg.SetDiskID(disk, src_node)
11266 activate_disks = (not instance.admin_up)
11269 # Activate the instance disks if we'exporting a stopped instance
11270 feedback_fn("Activating disks for %s" % instance.name)
11271 _StartInstanceDisks(self, instance, None)
11274 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11277 helper.CreateSnapshots()
11279 if (self.op.shutdown and instance.admin_up and
11280 not self.op.remove_instance):
11281 assert not activate_disks
11282 feedback_fn("Starting instance %s" % instance.name)
11283 result = self.rpc.call_instance_start(src_node, instance,
11285 msg = result.fail_msg
11287 feedback_fn("Failed to start instance: %s" % msg)
11288 _ShutdownInstanceDisks(self, instance)
11289 raise errors.OpExecError("Could not start instance: %s" % msg)
11291 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11292 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11293 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11294 connect_timeout = constants.RIE_CONNECT_TIMEOUT
11295 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11297 (key_name, _, _) = self.x509_key_name
11300 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11303 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11304 key_name, dest_ca_pem,
11309 # Check for backwards compatibility
11310 assert len(dresults) == len(instance.disks)
11311 assert compat.all(isinstance(i, bool) for i in dresults), \
11312 "Not all results are boolean: %r" % dresults
11316 feedback_fn("Deactivating disks for %s" % instance.name)
11317 _ShutdownInstanceDisks(self, instance)
11319 if not (compat.all(dresults) and fin_resu):
11322 failures.append("export finalization")
11323 if not compat.all(dresults):
11324 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11326 failures.append("disk export: disk(s) %s" % fdsk)
11328 raise errors.OpExecError("Export failed, errors in %s" %
11329 utils.CommaJoin(failures))
11331 # At this point, the export was successful, we can cleanup/finish
11333 # Remove instance if requested
11334 if self.op.remove_instance:
11335 feedback_fn("Removing instance %s" % instance.name)
11336 _RemoveInstance(self, feedback_fn, instance,
11337 self.op.ignore_remove_failures)
11339 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11340 self._CleanupExports(feedback_fn)
11342 return fin_resu, dresults
11345 class LUBackupRemove(NoHooksLU):
11346 """Remove exports related to the named instance.
11351 def ExpandNames(self):
11352 self.needed_locks = {}
11353 # We need all nodes to be locked in order for RemoveExport to work, but we
11354 # don't need to lock the instance itself, as nothing will happen to it (and
11355 # we can remove exports also for a removed instance)
11356 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11358 def Exec(self, feedback_fn):
11359 """Remove any export.
11362 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11363 # If the instance was not found we'll try with the name that was passed in.
11364 # This will only work if it was an FQDN, though.
11366 if not instance_name:
11368 instance_name = self.op.instance_name
11370 locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
11371 exportlist = self.rpc.call_export_list(locked_nodes)
11373 for node in exportlist:
11374 msg = exportlist[node].fail_msg
11376 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11378 if instance_name in exportlist[node].payload:
11380 result = self.rpc.call_export_remove(node, instance_name)
11381 msg = result.fail_msg
11383 logging.error("Could not remove export for instance %s"
11384 " on node %s: %s", instance_name, node, msg)
11386 if fqdn_warn and not found:
11387 feedback_fn("Export not found. If trying to remove an export belonging"
11388 " to a deleted instance please use its Fully Qualified"
11392 class LUGroupAdd(LogicalUnit):
11393 """Logical unit for creating node groups.
11396 HPATH = "group-add"
11397 HTYPE = constants.HTYPE_GROUP
11400 def ExpandNames(self):
11401 # We need the new group's UUID here so that we can create and acquire the
11402 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11403 # that it should not check whether the UUID exists in the configuration.
11404 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11405 self.needed_locks = {}
11406 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11408 def CheckPrereq(self):
11409 """Check prerequisites.
11411 This checks that the given group name is not an existing node group
11416 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11417 except errors.OpPrereqError:
11420 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11421 " node group (UUID: %s)" %
11422 (self.op.group_name, existing_uuid),
11423 errors.ECODE_EXISTS)
11425 if self.op.ndparams:
11426 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11428 def BuildHooksEnv(self):
11429 """Build hooks env.
11433 "GROUP_NAME": self.op.group_name,
11436 def BuildHooksNodes(self):
11437 """Build hooks nodes.
11440 mn = self.cfg.GetMasterNode()
11441 return ([mn], [mn])
11443 def Exec(self, feedback_fn):
11444 """Add the node group to the cluster.
11447 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11448 uuid=self.group_uuid,
11449 alloc_policy=self.op.alloc_policy,
11450 ndparams=self.op.ndparams)
11452 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11453 del self.remove_locks[locking.LEVEL_NODEGROUP]
11456 class LUGroupAssignNodes(NoHooksLU):
11457 """Logical unit for assigning nodes to groups.
11462 def ExpandNames(self):
11463 # These raise errors.OpPrereqError on their own:
11464 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11465 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11467 # We want to lock all the affected nodes and groups. We have readily
11468 # available the list of nodes, and the *destination* group. To gather the
11469 # list of "source" groups, we need to fetch node information later on.
11470 self.needed_locks = {
11471 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11472 locking.LEVEL_NODE: self.op.nodes,
11475 def DeclareLocks(self, level):
11476 if level == locking.LEVEL_NODEGROUP:
11477 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11479 # Try to get all affected nodes' groups without having the group or node
11480 # lock yet. Needs verification later in the code flow.
11481 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11483 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11485 def CheckPrereq(self):
11486 """Check prerequisites.
11489 assert self.needed_locks[locking.LEVEL_NODEGROUP]
11490 assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11491 frozenset(self.op.nodes))
11493 expected_locks = (set([self.group_uuid]) |
11494 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11495 actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11496 if actual_locks != expected_locks:
11497 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11498 " current groups are '%s', used to be '%s'" %
11499 (utils.CommaJoin(expected_locks),
11500 utils.CommaJoin(actual_locks)))
11502 self.node_data = self.cfg.GetAllNodesInfo()
11503 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11504 instance_data = self.cfg.GetAllInstancesInfo()
11506 if self.group is None:
11507 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11508 (self.op.group_name, self.group_uuid))
11510 (new_splits, previous_splits) = \
11511 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11512 for node in self.op.nodes],
11513 self.node_data, instance_data)
11516 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11518 if not self.op.force:
11519 raise errors.OpExecError("The following instances get split by this"
11520 " change and --force was not given: %s" %
11523 self.LogWarning("This operation will split the following instances: %s",
11526 if previous_splits:
11527 self.LogWarning("In addition, these already-split instances continue"
11528 " to be split across groups: %s",
11529 utils.CommaJoin(utils.NiceSort(previous_splits)))
11531 def Exec(self, feedback_fn):
11532 """Assign nodes to a new group.
11535 for node in self.op.nodes:
11536 self.node_data[node].group = self.group_uuid
11538 # FIXME: Depends on side-effects of modifying the result of
11539 # C{cfg.GetAllNodesInfo}
11541 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11544 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11545 """Check for split instances after a node assignment.
11547 This method considers a series of node assignments as an atomic operation,
11548 and returns information about split instances after applying the set of
11551 In particular, it returns information about newly split instances, and
11552 instances that were already split, and remain so after the change.
11554 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11557 @type changes: list of (node_name, new_group_uuid) pairs.
11558 @param changes: list of node assignments to consider.
11559 @param node_data: a dict with data for all nodes
11560 @param instance_data: a dict with all instances to consider
11561 @rtype: a two-tuple
11562 @return: a list of instances that were previously okay and result split as a
11563 consequence of this change, and a list of instances that were previously
11564 split and this change does not fix.
11567 changed_nodes = dict((node, group) for node, group in changes
11568 if node_data[node].group != group)
11570 all_split_instances = set()
11571 previously_split_instances = set()
11573 def InstanceNodes(instance):
11574 return [instance.primary_node] + list(instance.secondary_nodes)
11576 for inst in instance_data.values():
11577 if inst.disk_template not in constants.DTS_INT_MIRROR:
11580 instance_nodes = InstanceNodes(inst)
11582 if len(set(node_data[node].group for node in instance_nodes)) > 1:
11583 previously_split_instances.add(inst.name)
11585 if len(set(changed_nodes.get(node, node_data[node].group)
11586 for node in instance_nodes)) > 1:
11587 all_split_instances.add(inst.name)
11589 return (list(all_split_instances - previously_split_instances),
11590 list(previously_split_instances & all_split_instances))
11593 class _GroupQuery(_QueryBase):
11594 FIELDS = query.GROUP_FIELDS
11596 def ExpandNames(self, lu):
11597 lu.needed_locks = {}
11599 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11600 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11603 self.wanted = [name_to_uuid[name]
11604 for name in utils.NiceSort(name_to_uuid.keys())]
11606 # Accept names to be either names or UUIDs.
11609 all_uuid = frozenset(self._all_groups.keys())
11611 for name in self.names:
11612 if name in all_uuid:
11613 self.wanted.append(name)
11614 elif name in name_to_uuid:
11615 self.wanted.append(name_to_uuid[name])
11617 missing.append(name)
11620 raise errors.OpPrereqError("Some groups do not exist: %s" %
11621 utils.CommaJoin(missing),
11622 errors.ECODE_NOENT)
11624 def DeclareLocks(self, lu, level):
11627 def _GetQueryData(self, lu):
11628 """Computes the list of node groups and their attributes.
11631 do_nodes = query.GQ_NODE in self.requested_data
11632 do_instances = query.GQ_INST in self.requested_data
11634 group_to_nodes = None
11635 group_to_instances = None
11637 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11638 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11639 # latter GetAllInstancesInfo() is not enough, for we have to go through
11640 # instance->node. Hence, we will need to process nodes even if we only need
11641 # instance information.
11642 if do_nodes or do_instances:
11643 all_nodes = lu.cfg.GetAllNodesInfo()
11644 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11647 for node in all_nodes.values():
11648 if node.group in group_to_nodes:
11649 group_to_nodes[node.group].append(node.name)
11650 node_to_group[node.name] = node.group
11653 all_instances = lu.cfg.GetAllInstancesInfo()
11654 group_to_instances = dict((uuid, []) for uuid in self.wanted)
11656 for instance in all_instances.values():
11657 node = instance.primary_node
11658 if node in node_to_group:
11659 group_to_instances[node_to_group[node]].append(instance.name)
11662 # Do not pass on node information if it was not requested.
11663 group_to_nodes = None
11665 return query.GroupQueryData([self._all_groups[uuid]
11666 for uuid in self.wanted],
11667 group_to_nodes, group_to_instances)
11670 class LUGroupQuery(NoHooksLU):
11671 """Logical unit for querying node groups.
11676 def CheckArguments(self):
11677 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11678 self.op.output_fields, False)
11680 def ExpandNames(self):
11681 self.gq.ExpandNames(self)
11683 def Exec(self, feedback_fn):
11684 return self.gq.OldStyleQuery(self)
11687 class LUGroupSetParams(LogicalUnit):
11688 """Modifies the parameters of a node group.
11691 HPATH = "group-modify"
11692 HTYPE = constants.HTYPE_GROUP
11695 def CheckArguments(self):
11698 self.op.alloc_policy,
11701 if all_changes.count(None) == len(all_changes):
11702 raise errors.OpPrereqError("Please pass at least one modification",
11703 errors.ECODE_INVAL)
11705 def ExpandNames(self):
11706 # This raises errors.OpPrereqError on its own:
11707 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11709 self.needed_locks = {
11710 locking.LEVEL_NODEGROUP: [self.group_uuid],
11713 def CheckPrereq(self):
11714 """Check prerequisites.
11717 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11719 if self.group is None:
11720 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11721 (self.op.group_name, self.group_uuid))
11723 if self.op.ndparams:
11724 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11725 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11726 self.new_ndparams = new_ndparams
11728 def BuildHooksEnv(self):
11729 """Build hooks env.
11733 "GROUP_NAME": self.op.group_name,
11734 "NEW_ALLOC_POLICY": self.op.alloc_policy,
11737 def BuildHooksNodes(self):
11738 """Build hooks nodes.
11741 mn = self.cfg.GetMasterNode()
11742 return ([mn], [mn])
11744 def Exec(self, feedback_fn):
11745 """Modifies the node group.
11750 if self.op.ndparams:
11751 self.group.ndparams = self.new_ndparams
11752 result.append(("ndparams", str(self.group.ndparams)))
11754 if self.op.alloc_policy:
11755 self.group.alloc_policy = self.op.alloc_policy
11757 self.cfg.Update(self.group, feedback_fn)
11762 class LUGroupRemove(LogicalUnit):
11763 HPATH = "group-remove"
11764 HTYPE = constants.HTYPE_GROUP
11767 def ExpandNames(self):
11768 # This will raises errors.OpPrereqError on its own:
11769 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11770 self.needed_locks = {
11771 locking.LEVEL_NODEGROUP: [self.group_uuid],
11774 def CheckPrereq(self):
11775 """Check prerequisites.
11777 This checks that the given group name exists as a node group, that is
11778 empty (i.e., contains no nodes), and that is not the last group of the
11782 # Verify that the group is empty.
11783 group_nodes = [node.name
11784 for node in self.cfg.GetAllNodesInfo().values()
11785 if node.group == self.group_uuid]
11788 raise errors.OpPrereqError("Group '%s' not empty, has the following"
11790 (self.op.group_name,
11791 utils.CommaJoin(utils.NiceSort(group_nodes))),
11792 errors.ECODE_STATE)
11794 # Verify the cluster would not be left group-less.
11795 if len(self.cfg.GetNodeGroupList()) == 1:
11796 raise errors.OpPrereqError("Group '%s' is the only group,"
11797 " cannot be removed" %
11798 self.op.group_name,
11799 errors.ECODE_STATE)
11801 def BuildHooksEnv(self):
11802 """Build hooks env.
11806 "GROUP_NAME": self.op.group_name,
11809 def BuildHooksNodes(self):
11810 """Build hooks nodes.
11813 mn = self.cfg.GetMasterNode()
11814 return ([mn], [mn])
11816 def Exec(self, feedback_fn):
11817 """Remove the node group.
11821 self.cfg.RemoveNodeGroup(self.group_uuid)
11822 except errors.ConfigurationError:
11823 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11824 (self.op.group_name, self.group_uuid))
11826 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11829 class LUGroupRename(LogicalUnit):
11830 HPATH = "group-rename"
11831 HTYPE = constants.HTYPE_GROUP
11834 def ExpandNames(self):
11835 # This raises errors.OpPrereqError on its own:
11836 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11838 self.needed_locks = {
11839 locking.LEVEL_NODEGROUP: [self.group_uuid],
11842 def CheckPrereq(self):
11843 """Check prerequisites.
11845 Ensures requested new name is not yet used.
11849 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11850 except errors.OpPrereqError:
11853 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11854 " node group (UUID: %s)" %
11855 (self.op.new_name, new_name_uuid),
11856 errors.ECODE_EXISTS)
11858 def BuildHooksEnv(self):
11859 """Build hooks env.
11863 "OLD_NAME": self.op.group_name,
11864 "NEW_NAME": self.op.new_name,
11867 def BuildHooksNodes(self):
11868 """Build hooks nodes.
11871 mn = self.cfg.GetMasterNode()
11873 all_nodes = self.cfg.GetAllNodesInfo()
11874 all_nodes.pop(mn, None)
11877 run_nodes.extend(node.name for node in all_nodes.values()
11878 if node.group == self.group_uuid)
11880 return (run_nodes, run_nodes)
11882 def Exec(self, feedback_fn):
11883 """Rename the node group.
11886 group = self.cfg.GetNodeGroup(self.group_uuid)
11889 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11890 (self.op.group_name, self.group_uuid))
11892 group.name = self.op.new_name
11893 self.cfg.Update(group, feedback_fn)
11895 return self.op.new_name
11898 class LUGroupEvacuate(LogicalUnit):
11899 HPATH = "group-evacuate"
11900 HTYPE = constants.HTYPE_GROUP
11903 def ExpandNames(self):
11904 # This raises errors.OpPrereqError on its own:
11905 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11907 if self.op.target_groups:
11908 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11909 self.op.target_groups)
11911 self.req_target_uuids = []
11913 if self.group_uuid in self.req_target_uuids:
11914 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
11915 " as a target group (targets are %s)" %
11917 utils.CommaJoin(self.req_target_uuids)),
11918 errors.ECODE_INVAL)
11920 if not self.op.iallocator:
11921 # Use default iallocator
11922 self.op.iallocator = self.cfg.GetDefaultIAllocator()
11924 if not self.op.iallocator:
11925 raise errors.OpPrereqError("No iallocator was specified, neither in the"
11926 " opcode nor as a cluster-wide default",
11927 errors.ECODE_INVAL)
11929 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11930 self.needed_locks = {
11931 locking.LEVEL_INSTANCE: [],
11932 locking.LEVEL_NODEGROUP: [],
11933 locking.LEVEL_NODE: [],
11936 def DeclareLocks(self, level):
11937 if level == locking.LEVEL_INSTANCE:
11938 assert not self.needed_locks[locking.LEVEL_INSTANCE]
11940 # Lock instances optimistically, needs verification once node and group
11941 # locks have been acquired
11942 self.needed_locks[locking.LEVEL_INSTANCE] = \
11943 self.cfg.GetNodeGroupInstances(self.group_uuid)
11945 elif level == locking.LEVEL_NODEGROUP:
11946 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11948 if self.req_target_uuids:
11949 lock_groups = set([self.group_uuid] + self.req_target_uuids)
11951 # Lock all groups used by instances optimistically; this requires going
11952 # via the node before it's locked, requiring verification later on
11953 lock_groups.update(group_uuid
11954 for instance_name in
11955 self.glm.list_owned(locking.LEVEL_INSTANCE)
11957 self.cfg.GetInstanceNodeGroups(instance_name))
11959 # No target groups, need to lock all of them
11960 lock_groups = locking.ALL_SET
11962 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11964 elif level == locking.LEVEL_NODE:
11965 # This will only lock the nodes in the group to be evacuated which
11966 # contain actual instances
11967 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11968 self._LockInstancesNodes()
11970 # Lock all nodes in group to be evacuated
11971 assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
11972 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
11973 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11975 def CheckPrereq(self):
11976 owned_instances = frozenset(self.glm.list_owned(locking.LEVEL_INSTANCE))
11977 owned_groups = frozenset(self.glm.list_owned(locking.LEVEL_NODEGROUP))
11978 owned_nodes = frozenset(self.glm.list_owned(locking.LEVEL_NODE))
11980 assert owned_groups.issuperset(self.req_target_uuids)
11981 assert self.group_uuid in owned_groups
11983 # Check if locked instances are still correct
11984 wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
11985 if owned_instances != wanted_instances:
11986 raise errors.OpPrereqError("Instances in node group to be evacuated (%s)"
11987 " changed since locks were acquired, wanted"
11988 " %s, have %s; retry the operation" %
11990 utils.CommaJoin(wanted_instances),
11991 utils.CommaJoin(owned_instances)),
11992 errors.ECODE_STATE)
11994 # Get instance information
11995 self.instances = dict((name, self.cfg.GetInstanceInfo(name))
11996 for name in owned_instances)
11998 # Check if node groups for locked instances are still correct
11999 for instance_name in owned_instances:
12000 inst = self.instances[instance_name]
12001 assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
12002 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12003 assert owned_nodes.issuperset(inst.all_nodes), \
12004 "Instance %s's nodes changed while we kept the lock" % instance_name
12006 inst_groups = self.cfg.GetInstanceNodeGroups(instance_name)
12007 if not owned_groups.issuperset(inst_groups):
12008 raise errors.OpPrereqError("Instance %s's node groups changed since"
12009 " locks were acquired, current groups"
12010 " are '%s', owning groups '%s'; retry the"
12013 utils.CommaJoin(inst_groups),
12014 utils.CommaJoin(owned_groups)),
12015 errors.ECODE_STATE)
12017 if self.req_target_uuids:
12018 # User requested specific target groups
12019 self.target_uuids = self.req_target_uuids
12021 # All groups except the one to be evacuated are potential targets
12022 self.target_uuids = [group_uuid for group_uuid in owned_groups
12023 if group_uuid != self.group_uuid]
12025 if not self.target_uuids:
12026 raise errors.OpExecError("There are no possible target groups")
12028 def BuildHooksEnv(self):
12029 """Build hooks env.
12033 "GROUP_NAME": self.op.group_name,
12034 "TARGET_GROUPS": " ".join(self.target_uuids),
12037 def BuildHooksNodes(self):
12038 """Build hooks nodes.
12041 mn = self.cfg.GetMasterNode()
12043 assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
12045 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12047 return (run_nodes, run_nodes)
12049 def Exec(self, feedback_fn):
12050 instances = list(self.glm.list_owned(locking.LEVEL_INSTANCE))
12052 assert self.group_uuid not in self.target_uuids
12054 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12055 instances=instances, target_groups=self.target_uuids)
12057 ial.Run(self.op.iallocator)
12059 if not ial.success:
12060 raise errors.OpPrereqError("Can't compute group evacuation using"
12061 " iallocator '%s': %s" %
12062 (self.op.iallocator, ial.info),
12063 errors.ECODE_NORES)
12065 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12067 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12068 len(jobs), self.op.group_name)
12070 return ResultWithJobs(jobs)
12073 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
12074 """Generic tags LU.
12076 This is an abstract class which is the parent of all the other tags LUs.
12079 def ExpandNames(self):
12080 self.group_uuid = None
12081 self.needed_locks = {}
12082 if self.op.kind == constants.TAG_NODE:
12083 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12084 self.needed_locks[locking.LEVEL_NODE] = self.op.name
12085 elif self.op.kind == constants.TAG_INSTANCE:
12086 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12087 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12088 elif self.op.kind == constants.TAG_NODEGROUP:
12089 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12091 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12092 # not possible to acquire the BGL based on opcode parameters)
12094 def CheckPrereq(self):
12095 """Check prerequisites.
12098 if self.op.kind == constants.TAG_CLUSTER:
12099 self.target = self.cfg.GetClusterInfo()
12100 elif self.op.kind == constants.TAG_NODE:
12101 self.target = self.cfg.GetNodeInfo(self.op.name)
12102 elif self.op.kind == constants.TAG_INSTANCE:
12103 self.target = self.cfg.GetInstanceInfo(self.op.name)
12104 elif self.op.kind == constants.TAG_NODEGROUP:
12105 self.target = self.cfg.GetNodeGroup(self.group_uuid)
12107 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12108 str(self.op.kind), errors.ECODE_INVAL)
12111 class LUTagsGet(TagsLU):
12112 """Returns the tags of a given object.
12117 def ExpandNames(self):
12118 TagsLU.ExpandNames(self)
12120 # Share locks as this is only a read operation
12121 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
12123 def Exec(self, feedback_fn):
12124 """Returns the tag list.
12127 return list(self.target.GetTags())
12130 class LUTagsSearch(NoHooksLU):
12131 """Searches the tags for a given pattern.
12136 def ExpandNames(self):
12137 self.needed_locks = {}
12139 def CheckPrereq(self):
12140 """Check prerequisites.
12142 This checks the pattern passed for validity by compiling it.
12146 self.re = re.compile(self.op.pattern)
12147 except re.error, err:
12148 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12149 (self.op.pattern, err), errors.ECODE_INVAL)
12151 def Exec(self, feedback_fn):
12152 """Returns the tag list.
12156 tgts = [("/cluster", cfg.GetClusterInfo())]
12157 ilist = cfg.GetAllInstancesInfo().values()
12158 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12159 nlist = cfg.GetAllNodesInfo().values()
12160 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12161 tgts.extend(("/nodegroup/%s" % n.name, n)
12162 for n in cfg.GetAllNodeGroupsInfo().values())
12164 for path, target in tgts:
12165 for tag in target.GetTags():
12166 if self.re.search(tag):
12167 results.append((path, tag))
12171 class LUTagsSet(TagsLU):
12172 """Sets a tag on a given object.
12177 def CheckPrereq(self):
12178 """Check prerequisites.
12180 This checks the type and length of the tag name and value.
12183 TagsLU.CheckPrereq(self)
12184 for tag in self.op.tags:
12185 objects.TaggableObject.ValidateTag(tag)
12187 def Exec(self, feedback_fn):
12192 for tag in self.op.tags:
12193 self.target.AddTag(tag)
12194 except errors.TagError, err:
12195 raise errors.OpExecError("Error while setting tag: %s" % str(err))
12196 self.cfg.Update(self.target, feedback_fn)
12199 class LUTagsDel(TagsLU):
12200 """Delete a list of tags from a given object.
12205 def CheckPrereq(self):
12206 """Check prerequisites.
12208 This checks that we have the given tag.
12211 TagsLU.CheckPrereq(self)
12212 for tag in self.op.tags:
12213 objects.TaggableObject.ValidateTag(tag)
12214 del_tags = frozenset(self.op.tags)
12215 cur_tags = self.target.GetTags()
12217 diff_tags = del_tags - cur_tags
12219 diff_names = ("'%s'" % i for i in sorted(diff_tags))
12220 raise errors.OpPrereqError("Tag(s) %s not found" %
12221 (utils.CommaJoin(diff_names), ),
12222 errors.ECODE_NOENT)
12224 def Exec(self, feedback_fn):
12225 """Remove the tag from the object.
12228 for tag in self.op.tags:
12229 self.target.RemoveTag(tag)
12230 self.cfg.Update(self.target, feedback_fn)
12233 class LUTestDelay(NoHooksLU):
12234 """Sleep for a specified amount of time.
12236 This LU sleeps on the master and/or nodes for a specified amount of
12242 def ExpandNames(self):
12243 """Expand names and set required locks.
12245 This expands the node list, if any.
12248 self.needed_locks = {}
12249 if self.op.on_nodes:
12250 # _GetWantedNodes can be used here, but is not always appropriate to use
12251 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12252 # more information.
12253 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12254 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12256 def _TestDelay(self):
12257 """Do the actual sleep.
12260 if self.op.on_master:
12261 if not utils.TestDelay(self.op.duration):
12262 raise errors.OpExecError("Error during master delay test")
12263 if self.op.on_nodes:
12264 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12265 for node, node_result in result.items():
12266 node_result.Raise("Failure during rpc call to node %s" % node)
12268 def Exec(self, feedback_fn):
12269 """Execute the test delay opcode, with the wanted repetitions.
12272 if self.op.repeat == 0:
12275 top_value = self.op.repeat - 1
12276 for i in range(self.op.repeat):
12277 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12281 class LUTestJqueue(NoHooksLU):
12282 """Utility LU to test some aspects of the job queue.
12287 # Must be lower than default timeout for WaitForJobChange to see whether it
12288 # notices changed jobs
12289 _CLIENT_CONNECT_TIMEOUT = 20.0
12290 _CLIENT_CONFIRM_TIMEOUT = 60.0
12293 def _NotifyUsingSocket(cls, cb, errcls):
12294 """Opens a Unix socket and waits for another program to connect.
12297 @param cb: Callback to send socket name to client
12298 @type errcls: class
12299 @param errcls: Exception class to use for errors
12302 # Using a temporary directory as there's no easy way to create temporary
12303 # sockets without writing a custom loop around tempfile.mktemp and
12305 tmpdir = tempfile.mkdtemp()
12307 tmpsock = utils.PathJoin(tmpdir, "sock")
12309 logging.debug("Creating temporary socket at %s", tmpsock)
12310 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12315 # Send details to client
12318 # Wait for client to connect before continuing
12319 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12321 (conn, _) = sock.accept()
12322 except socket.error, err:
12323 raise errcls("Client didn't connect in time (%s)" % err)
12327 # Remove as soon as client is connected
12328 shutil.rmtree(tmpdir)
12330 # Wait for client to close
12333 # pylint: disable-msg=E1101
12334 # Instance of '_socketobject' has no ... member
12335 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12337 except socket.error, err:
12338 raise errcls("Client failed to confirm notification (%s)" % err)
12342 def _SendNotification(self, test, arg, sockname):
12343 """Sends a notification to the client.
12346 @param test: Test name
12347 @param arg: Test argument (depends on test)
12348 @type sockname: string
12349 @param sockname: Socket path
12352 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12354 def _Notify(self, prereq, test, arg):
12355 """Notifies the client of a test.
12358 @param prereq: Whether this is a prereq-phase test
12360 @param test: Test name
12361 @param arg: Test argument (depends on test)
12365 errcls = errors.OpPrereqError
12367 errcls = errors.OpExecError
12369 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12373 def CheckArguments(self):
12374 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12375 self.expandnames_calls = 0
12377 def ExpandNames(self):
12378 checkargs_calls = getattr(self, "checkargs_calls", 0)
12379 if checkargs_calls < 1:
12380 raise errors.ProgrammerError("CheckArguments was not called")
12382 self.expandnames_calls += 1
12384 if self.op.notify_waitlock:
12385 self._Notify(True, constants.JQT_EXPANDNAMES, None)
12387 self.LogInfo("Expanding names")
12389 # Get lock on master node (just to get a lock, not for a particular reason)
12390 self.needed_locks = {
12391 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12394 def Exec(self, feedback_fn):
12395 if self.expandnames_calls < 1:
12396 raise errors.ProgrammerError("ExpandNames was not called")
12398 if self.op.notify_exec:
12399 self._Notify(False, constants.JQT_EXEC, None)
12401 self.LogInfo("Executing")
12403 if self.op.log_messages:
12404 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12405 for idx, msg in enumerate(self.op.log_messages):
12406 self.LogInfo("Sending log message %s", idx + 1)
12407 feedback_fn(constants.JQT_MSGPREFIX + msg)
12408 # Report how many test messages have been sent
12409 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12412 raise errors.OpExecError("Opcode failure was requested")
12417 class IAllocator(object):
12418 """IAllocator framework.
12420 An IAllocator instance has three sets of attributes:
12421 - cfg that is needed to query the cluster
12422 - input data (all members of the _KEYS class attribute are required)
12423 - four buffer attributes (in|out_data|text), that represent the
12424 input (to the external script) in text and data structure format,
12425 and the output from it, again in two formats
12426 - the result variables from the script (success, info, nodes) for
12430 # pylint: disable-msg=R0902
12431 # lots of instance attributes
12433 def __init__(self, cfg, rpc, mode, **kwargs):
12436 # init buffer variables
12437 self.in_text = self.out_text = self.in_data = self.out_data = None
12438 # init all input fields so that pylint is happy
12440 self.memory = self.disks = self.disk_template = None
12441 self.os = self.tags = self.nics = self.vcpus = None
12442 self.hypervisor = None
12443 self.relocate_from = None
12445 self.evac_nodes = None
12446 self.instances = None
12447 self.evac_mode = None
12448 self.target_groups = []
12450 self.required_nodes = None
12451 # init result fields
12452 self.success = self.info = self.result = None
12455 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12457 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12458 " IAllocator" % self.mode)
12460 keyset = [n for (n, _) in keydata]
12463 if key not in keyset:
12464 raise errors.ProgrammerError("Invalid input parameter '%s' to"
12465 " IAllocator" % key)
12466 setattr(self, key, kwargs[key])
12469 if key not in kwargs:
12470 raise errors.ProgrammerError("Missing input parameter '%s' to"
12471 " IAllocator" % key)
12472 self._BuildInputData(compat.partial(fn, self), keydata)
12474 def _ComputeClusterData(self):
12475 """Compute the generic allocator input data.
12477 This is the data that is independent of the actual operation.
12481 cluster_info = cfg.GetClusterInfo()
12484 "version": constants.IALLOCATOR_VERSION,
12485 "cluster_name": cfg.GetClusterName(),
12486 "cluster_tags": list(cluster_info.GetTags()),
12487 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12488 # we don't have job IDs
12490 ninfo = cfg.GetAllNodesInfo()
12491 iinfo = cfg.GetAllInstancesInfo().values()
12492 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12495 node_list = [n.name for n in ninfo.values() if n.vm_capable]
12497 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12498 hypervisor_name = self.hypervisor
12499 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12500 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12502 hypervisor_name = cluster_info.enabled_hypervisors[0]
12504 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12507 self.rpc.call_all_instances_info(node_list,
12508 cluster_info.enabled_hypervisors)
12510 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12512 config_ndata = self._ComputeBasicNodeData(ninfo)
12513 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12514 i_list, config_ndata)
12515 assert len(data["nodes"]) == len(ninfo), \
12516 "Incomplete node data computed"
12518 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12520 self.in_data = data
12523 def _ComputeNodeGroupData(cfg):
12524 """Compute node groups data.
12527 ng = dict((guuid, {
12528 "name": gdata.name,
12529 "alloc_policy": gdata.alloc_policy,
12531 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12536 def _ComputeBasicNodeData(node_cfg):
12537 """Compute global node data.
12540 @returns: a dict of name: (node dict, node config)
12543 # fill in static (config-based) values
12544 node_results = dict((ninfo.name, {
12545 "tags": list(ninfo.GetTags()),
12546 "primary_ip": ninfo.primary_ip,
12547 "secondary_ip": ninfo.secondary_ip,
12548 "offline": ninfo.offline,
12549 "drained": ninfo.drained,
12550 "master_candidate": ninfo.master_candidate,
12551 "group": ninfo.group,
12552 "master_capable": ninfo.master_capable,
12553 "vm_capable": ninfo.vm_capable,
12555 for ninfo in node_cfg.values())
12557 return node_results
12560 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12562 """Compute global node data.
12564 @param node_results: the basic node structures as filled from the config
12567 # make a copy of the current dict
12568 node_results = dict(node_results)
12569 for nname, nresult in node_data.items():
12570 assert nname in node_results, "Missing basic data for node %s" % nname
12571 ninfo = node_cfg[nname]
12573 if not (ninfo.offline or ninfo.drained):
12574 nresult.Raise("Can't get data for node %s" % nname)
12575 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12577 remote_info = nresult.payload
12579 for attr in ["memory_total", "memory_free", "memory_dom0",
12580 "vg_size", "vg_free", "cpu_total"]:
12581 if attr not in remote_info:
12582 raise errors.OpExecError("Node '%s' didn't return attribute"
12583 " '%s'" % (nname, attr))
12584 if not isinstance(remote_info[attr], int):
12585 raise errors.OpExecError("Node '%s' returned invalid value"
12587 (nname, attr, remote_info[attr]))
12588 # compute memory used by primary instances
12589 i_p_mem = i_p_up_mem = 0
12590 for iinfo, beinfo in i_list:
12591 if iinfo.primary_node == nname:
12592 i_p_mem += beinfo[constants.BE_MEMORY]
12593 if iinfo.name not in node_iinfo[nname].payload:
12596 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
12597 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12598 remote_info["memory_free"] -= max(0, i_mem_diff)
12601 i_p_up_mem += beinfo[constants.BE_MEMORY]
12603 # compute memory used by instances
12605 "total_memory": remote_info["memory_total"],
12606 "reserved_memory": remote_info["memory_dom0"],
12607 "free_memory": remote_info["memory_free"],
12608 "total_disk": remote_info["vg_size"],
12609 "free_disk": remote_info["vg_free"],
12610 "total_cpus": remote_info["cpu_total"],
12611 "i_pri_memory": i_p_mem,
12612 "i_pri_up_memory": i_p_up_mem,
12614 pnr_dyn.update(node_results[nname])
12615 node_results[nname] = pnr_dyn
12617 return node_results
12620 def _ComputeInstanceData(cluster_info, i_list):
12621 """Compute global instance data.
12625 for iinfo, beinfo in i_list:
12627 for nic in iinfo.nics:
12628 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12632 "mode": filled_params[constants.NIC_MODE],
12633 "link": filled_params[constants.NIC_LINK],
12635 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12636 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12637 nic_data.append(nic_dict)
12639 "tags": list(iinfo.GetTags()),
12640 "admin_up": iinfo.admin_up,
12641 "vcpus": beinfo[constants.BE_VCPUS],
12642 "memory": beinfo[constants.BE_MEMORY],
12644 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12646 "disks": [{constants.IDISK_SIZE: dsk.size,
12647 constants.IDISK_MODE: dsk.mode}
12648 for dsk in iinfo.disks],
12649 "disk_template": iinfo.disk_template,
12650 "hypervisor": iinfo.hypervisor,
12652 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12654 instance_data[iinfo.name] = pir
12656 return instance_data
12658 def _AddNewInstance(self):
12659 """Add new instance data to allocator structure.
12661 This in combination with _AllocatorGetClusterData will create the
12662 correct structure needed as input for the allocator.
12664 The checks for the completeness of the opcode must have already been
12668 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12670 if self.disk_template in constants.DTS_INT_MIRROR:
12671 self.required_nodes = 2
12673 self.required_nodes = 1
12677 "disk_template": self.disk_template,
12680 "vcpus": self.vcpus,
12681 "memory": self.memory,
12682 "disks": self.disks,
12683 "disk_space_total": disk_space,
12685 "required_nodes": self.required_nodes,
12686 "hypervisor": self.hypervisor,
12691 def _AddRelocateInstance(self):
12692 """Add relocate instance data to allocator structure.
12694 This in combination with _IAllocatorGetClusterData will create the
12695 correct structure needed as input for the allocator.
12697 The checks for the completeness of the opcode must have already been
12701 instance = self.cfg.GetInstanceInfo(self.name)
12702 if instance is None:
12703 raise errors.ProgrammerError("Unknown instance '%s' passed to"
12704 " IAllocator" % self.name)
12706 if instance.disk_template not in constants.DTS_MIRRORED:
12707 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12708 errors.ECODE_INVAL)
12710 if instance.disk_template in constants.DTS_INT_MIRROR and \
12711 len(instance.secondary_nodes) != 1:
12712 raise errors.OpPrereqError("Instance has not exactly one secondary node",
12713 errors.ECODE_STATE)
12715 self.required_nodes = 1
12716 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12717 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12721 "disk_space_total": disk_space,
12722 "required_nodes": self.required_nodes,
12723 "relocate_from": self.relocate_from,
12727 def _AddEvacuateNodes(self):
12728 """Add evacuate nodes data to allocator structure.
12732 "evac_nodes": self.evac_nodes
12736 def _AddNodeEvacuate(self):
12737 """Get data for node-evacuate requests.
12741 "instances": self.instances,
12742 "evac_mode": self.evac_mode,
12745 def _AddChangeGroup(self):
12746 """Get data for node-evacuate requests.
12750 "instances": self.instances,
12751 "target_groups": self.target_groups,
12754 def _BuildInputData(self, fn, keydata):
12755 """Build input data structures.
12758 self._ComputeClusterData()
12761 request["type"] = self.mode
12762 for keyname, keytype in keydata:
12763 if keyname not in request:
12764 raise errors.ProgrammerError("Request parameter %s is missing" %
12766 val = request[keyname]
12767 if not keytype(val):
12768 raise errors.ProgrammerError("Request parameter %s doesn't pass"
12769 " validation, value %s, expected"
12770 " type %s" % (keyname, val, keytype))
12771 self.in_data["request"] = request
12773 self.in_text = serializer.Dump(self.in_data)
12775 _STRING_LIST = ht.TListOf(ht.TString)
12776 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12777 # pylint: disable-msg=E1101
12778 # Class '...' has no 'OP_ID' member
12779 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12780 opcodes.OpInstanceMigrate.OP_ID,
12781 opcodes.OpInstanceReplaceDisks.OP_ID])
12785 ht.TListOf(ht.TAnd(ht.TIsLength(3),
12786 ht.TItems([ht.TNonEmptyString,
12787 ht.TNonEmptyString,
12788 ht.TListOf(ht.TNonEmptyString),
12791 ht.TListOf(ht.TAnd(ht.TIsLength(2),
12792 ht.TItems([ht.TNonEmptyString,
12795 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
12796 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
12799 constants.IALLOCATOR_MODE_ALLOC:
12802 ("name", ht.TString),
12803 ("memory", ht.TInt),
12804 ("disks", ht.TListOf(ht.TDict)),
12805 ("disk_template", ht.TString),
12806 ("os", ht.TString),
12807 ("tags", _STRING_LIST),
12808 ("nics", ht.TListOf(ht.TDict)),
12809 ("vcpus", ht.TInt),
12810 ("hypervisor", ht.TString),
12812 constants.IALLOCATOR_MODE_RELOC:
12813 (_AddRelocateInstance,
12814 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
12816 constants.IALLOCATOR_MODE_MEVAC:
12817 (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
12818 ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
12819 constants.IALLOCATOR_MODE_NODE_EVAC:
12820 (_AddNodeEvacuate, [
12821 ("instances", _STRING_LIST),
12822 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
12824 constants.IALLOCATOR_MODE_CHG_GROUP:
12825 (_AddChangeGroup, [
12826 ("instances", _STRING_LIST),
12827 ("target_groups", _STRING_LIST),
12831 def Run(self, name, validate=True, call_fn=None):
12832 """Run an instance allocator and return the results.
12835 if call_fn is None:
12836 call_fn = self.rpc.call_iallocator_runner
12838 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
12839 result.Raise("Failure while running the iallocator script")
12841 self.out_text = result.payload
12843 self._ValidateResult()
12845 def _ValidateResult(self):
12846 """Process the allocator results.
12848 This will process and if successful save the result in
12849 self.out_data and the other parameters.
12853 rdict = serializer.Load(self.out_text)
12854 except Exception, err:
12855 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
12857 if not isinstance(rdict, dict):
12858 raise errors.OpExecError("Can't parse iallocator results: not a dict")
12860 # TODO: remove backwards compatiblity in later versions
12861 if "nodes" in rdict and "result" not in rdict:
12862 rdict["result"] = rdict["nodes"]
12865 for key in "success", "info", "result":
12866 if key not in rdict:
12867 raise errors.OpExecError("Can't parse iallocator results:"
12868 " missing key '%s'" % key)
12869 setattr(self, key, rdict[key])
12871 if not self._result_check(self.result):
12872 raise errors.OpExecError("Iallocator returned invalid result,"
12873 " expected %s, got %s" %
12874 (self._result_check, self.result),
12875 errors.ECODE_INVAL)
12877 if self.mode in (constants.IALLOCATOR_MODE_RELOC,
12878 constants.IALLOCATOR_MODE_MEVAC):
12879 node2group = dict((name, ndata["group"])
12880 for (name, ndata) in self.in_data["nodes"].items())
12882 fn = compat.partial(self._NodesToGroups, node2group,
12883 self.in_data["nodegroups"])
12885 if self.mode == constants.IALLOCATOR_MODE_RELOC:
12886 assert self.relocate_from is not None
12887 assert self.required_nodes == 1
12889 request_groups = fn(self.relocate_from)
12890 result_groups = fn(rdict["result"])
12892 if result_groups != request_groups:
12893 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12894 " differ from original groups (%s)" %
12895 (utils.CommaJoin(result_groups),
12896 utils.CommaJoin(request_groups)))
12897 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
12898 request_groups = fn(self.evac_nodes)
12899 for (instance_name, secnode) in self.result:
12900 result_groups = fn([secnode])
12901 if result_groups != request_groups:
12902 raise errors.OpExecError("Iallocator returned new secondary node"
12903 " '%s' (group '%s') for instance '%s'"
12904 " which is not in original group '%s'" %
12905 (secnode, utils.CommaJoin(result_groups),
12907 utils.CommaJoin(request_groups)))
12909 raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
12911 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
12912 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
12914 self.out_data = rdict
12917 def _NodesToGroups(node2group, groups, nodes):
12918 """Returns a list of unique group names for a list of nodes.
12920 @type node2group: dict
12921 @param node2group: Map from node name to group UUID
12923 @param groups: Group information
12925 @param nodes: Node names
12932 group_uuid = node2group[node]
12934 # Ignore unknown node
12938 group = groups[group_uuid]
12940 # Can't find group, let's use UUID
12941 group_name = group_uuid
12943 group_name = group["name"]
12945 result.add(group_name)
12947 return sorted(result)
12950 class LUTestAllocator(NoHooksLU):
12951 """Run allocator tests.
12953 This LU runs the allocator tests
12956 def CheckPrereq(self):
12957 """Check prerequisites.
12959 This checks the opcode parameters depending on the director and mode test.
12962 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12963 for attr in ["memory", "disks", "disk_template",
12964 "os", "tags", "nics", "vcpus"]:
12965 if not hasattr(self.op, attr):
12966 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
12967 attr, errors.ECODE_INVAL)
12968 iname = self.cfg.ExpandInstanceName(self.op.name)
12969 if iname is not None:
12970 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
12971 iname, errors.ECODE_EXISTS)
12972 if not isinstance(self.op.nics, list):
12973 raise errors.OpPrereqError("Invalid parameter 'nics'",
12974 errors.ECODE_INVAL)
12975 if not isinstance(self.op.disks, list):
12976 raise errors.OpPrereqError("Invalid parameter 'disks'",
12977 errors.ECODE_INVAL)
12978 for row in self.op.disks:
12979 if (not isinstance(row, dict) or
12980 constants.IDISK_SIZE not in row or
12981 not isinstance(row[constants.IDISK_SIZE], int) or
12982 constants.IDISK_MODE not in row or
12983 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
12984 raise errors.OpPrereqError("Invalid contents of the 'disks'"
12985 " parameter", errors.ECODE_INVAL)
12986 if self.op.hypervisor is None:
12987 self.op.hypervisor = self.cfg.GetHypervisorType()
12988 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12989 fname = _ExpandInstanceName(self.cfg, self.op.name)
12990 self.op.name = fname
12991 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12992 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12993 if not hasattr(self.op, "evac_nodes"):
12994 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12995 " opcode input", errors.ECODE_INVAL)
12996 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
12997 constants.IALLOCATOR_MODE_NODE_EVAC):
12998 if not self.op.instances:
12999 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13000 self.op.instances = _GetWantedInstances(self, self.op.instances)
13002 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13003 self.op.mode, errors.ECODE_INVAL)
13005 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13006 if self.op.allocator is None:
13007 raise errors.OpPrereqError("Missing allocator name",
13008 errors.ECODE_INVAL)
13009 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13010 raise errors.OpPrereqError("Wrong allocator test '%s'" %
13011 self.op.direction, errors.ECODE_INVAL)
13013 def Exec(self, feedback_fn):
13014 """Run the allocator test.
13017 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13018 ial = IAllocator(self.cfg, self.rpc,
13021 memory=self.op.memory,
13022 disks=self.op.disks,
13023 disk_template=self.op.disk_template,
13027 vcpus=self.op.vcpus,
13028 hypervisor=self.op.hypervisor,
13030 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13031 ial = IAllocator(self.cfg, self.rpc,
13034 relocate_from=list(self.relocate_from),
13036 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13037 ial = IAllocator(self.cfg, self.rpc,
13039 evac_nodes=self.op.evac_nodes)
13040 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13041 ial = IAllocator(self.cfg, self.rpc,
13043 instances=self.op.instances,
13044 target_groups=self.op.target_groups)
13045 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13046 ial = IAllocator(self.cfg, self.rpc,
13048 instances=self.op.instances,
13049 evac_mode=self.op.evac_mode)
13051 raise errors.ProgrammerError("Uncatched mode %s in"
13052 " LUTestAllocator.Exec", self.op.mode)
13054 if self.op.direction == constants.IALLOCATOR_DIR_IN:
13055 result = ial.in_text
13057 ial.Run(self.op.allocator, validate=False)
13058 result = ial.out_text
13062 #: Query type implementations
13064 constants.QR_INSTANCE: _InstanceQuery,
13065 constants.QR_NODE: _NodeQuery,
13066 constants.QR_GROUP: _GroupQuery,
13067 constants.QR_OS: _OsQuery,
13070 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13073 def _GetQueryImplementation(name):
13074 """Returns the implemtnation for a query type.
13076 @param name: Query type, must be one of L{constants.QR_VIA_OP}
13080 return _QUERY_IMPL[name]
13082 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13083 errors.ECODE_INVAL)