4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
63 import ganeti.masterd.instance # pylint: disable-msg=W0611
67 """Data container for LU results with jobs.
69 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71 contained in the C{jobs} attribute and include the job IDs in the opcode
75 def __init__(self, jobs, **kwargs):
76 """Initializes this class.
78 Additional return values can be specified as keyword arguments.
80 @type jobs: list of lists of L{opcode.OpCode}
81 @param jobs: A list of lists of opcode objects
88 class LogicalUnit(object):
89 """Logical Unit base class.
91 Subclasses must follow these rules:
92 - implement ExpandNames
93 - implement CheckPrereq (except when tasklets are used)
94 - implement Exec (except when tasklets are used)
95 - implement BuildHooksEnv
96 - implement BuildHooksNodes
97 - redefine HPATH and HTYPE
98 - optionally redefine their run requirements:
99 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
101 Note that all commands require root permissions.
103 @ivar dry_run_result: the value (if any) that will be returned to the caller
104 in dry-run mode (signalled by opcode dry_run parameter)
111 def __init__(self, processor, op, context, rpc):
112 """Constructor for LogicalUnit.
114 This needs to be overridden in derived classes in order to check op
118 self.proc = processor
120 self.cfg = context.cfg
121 self.glm = context.glm
122 self.context = context
124 # Dicts used to declare locking needs to mcpu
125 self.needed_locks = None
126 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
128 self.remove_locks = {}
129 # Used to force good behavior when calling helper functions
130 self.recalculate_locks = {}
132 self.Log = processor.Log # pylint: disable-msg=C0103
133 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
134 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
135 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
136 # support for dry-run
137 self.dry_run_result = None
138 # support for generic debug attribute
139 if (not hasattr(self.op, "debug_level") or
140 not isinstance(self.op.debug_level, int)):
141 self.op.debug_level = 0
146 # Validate opcode parameters and set defaults
147 self.op.Validate(True)
149 self.CheckArguments()
151 def CheckArguments(self):
152 """Check syntactic validity for the opcode arguments.
154 This method is for doing a simple syntactic check and ensure
155 validity of opcode parameters, without any cluster-related
156 checks. While the same can be accomplished in ExpandNames and/or
157 CheckPrereq, doing these separate is better because:
159 - ExpandNames is left as as purely a lock-related function
160 - CheckPrereq is run after we have acquired locks (and possible
163 The function is allowed to change the self.op attribute so that
164 later methods can no longer worry about missing parameters.
169 def ExpandNames(self):
170 """Expand names for this LU.
172 This method is called before starting to execute the opcode, and it should
173 update all the parameters of the opcode to their canonical form (e.g. a
174 short node name must be fully expanded after this method has successfully
175 completed). This way locking, hooks, logging, etc. can work correctly.
177 LUs which implement this method must also populate the self.needed_locks
178 member, as a dict with lock levels as keys, and a list of needed lock names
181 - use an empty dict if you don't need any lock
182 - if you don't need any lock at a particular level omit that level
183 - don't put anything for the BGL level
184 - if you want all locks at a level use locking.ALL_SET as a value
186 If you need to share locks (rather than acquire them exclusively) at one
187 level you can modify self.share_locks, setting a true value (usually 1) for
188 that level. By default locks are not shared.
190 This function can also define a list of tasklets, which then will be
191 executed in order instead of the usual LU-level CheckPrereq and Exec
192 functions, if those are not defined by the LU.
196 # Acquire all nodes and one instance
197 self.needed_locks = {
198 locking.LEVEL_NODE: locking.ALL_SET,
199 locking.LEVEL_INSTANCE: ['instance1.example.com'],
201 # Acquire just two nodes
202 self.needed_locks = {
203 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206 self.needed_locks = {} # No, you can't leave it to the default value None
209 # The implementation of this method is mandatory only if the new LU is
210 # concurrent, so that old LUs don't need to be changed all at the same
213 self.needed_locks = {} # Exclusive LUs don't need locks.
215 raise NotImplementedError
217 def DeclareLocks(self, level):
218 """Declare LU locking needs for a level
220 While most LUs can just declare their locking needs at ExpandNames time,
221 sometimes there's the need to calculate some locks after having acquired
222 the ones before. This function is called just before acquiring locks at a
223 particular level, but after acquiring the ones at lower levels, and permits
224 such calculations. It can be used to modify self.needed_locks, and by
225 default it does nothing.
227 This function is only called if you have something already set in
228 self.needed_locks for the level.
230 @param level: Locking level which is going to be locked
231 @type level: member of ganeti.locking.LEVELS
235 def CheckPrereq(self):
236 """Check prerequisites for this LU.
238 This method should check that the prerequisites for the execution
239 of this LU are fulfilled. It can do internode communication, but
240 it should be idempotent - no cluster or system changes are
243 The method should raise errors.OpPrereqError in case something is
244 not fulfilled. Its return value is ignored.
246 This method should also update all the parameters of the opcode to
247 their canonical form if it hasn't been done by ExpandNames before.
250 if self.tasklets is not None:
251 for (idx, tl) in enumerate(self.tasklets):
252 logging.debug("Checking prerequisites for tasklet %s/%s",
253 idx + 1, len(self.tasklets))
258 def Exec(self, feedback_fn):
261 This method should implement the actual work. It should raise
262 errors.OpExecError for failures that are somewhat dealt with in
266 if self.tasklets is not None:
267 for (idx, tl) in enumerate(self.tasklets):
268 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271 raise NotImplementedError
273 def BuildHooksEnv(self):
274 """Build hooks environment for this LU.
277 @return: Dictionary containing the environment that will be used for
278 running the hooks for this LU. The keys of the dict must not be prefixed
279 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
280 will extend the environment with additional variables. If no environment
281 should be defined, an empty dictionary should be returned (not C{None}).
282 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
286 raise NotImplementedError
288 def BuildHooksNodes(self):
289 """Build list of nodes to run LU's hooks.
291 @rtype: tuple; (list, list)
292 @return: Tuple containing a list of node names on which the hook
293 should run before the execution and a list of node names on which the
294 hook should run after the execution. No nodes should be returned as an
295 empty list (and not None).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
303 """Notify the LU about the results of its hooks.
305 This method is called every time a hooks phase is executed, and notifies
306 the Logical Unit about the hooks' result. The LU can then use it to alter
307 its result based on the hooks. By default the method does nothing and the
308 previous result is passed back unchanged but any LU can define it if it
309 wants to use the local cluster hook-scripts somehow.
311 @param phase: one of L{constants.HOOKS_PHASE_POST} or
312 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
313 @param hook_results: the results of the multi-node hooks rpc call
314 @param feedback_fn: function used send feedback back to the caller
315 @param lu_result: the previous Exec result this LU had, or None
317 @return: the new Exec result, based on the previous result
321 # API must be kept, thus we ignore the unused argument and could
322 # be a function warnings
323 # pylint: disable-msg=W0613,R0201
326 def _ExpandAndLockInstance(self):
327 """Helper function to expand and lock an instance.
329 Many LUs that work on an instance take its name in self.op.instance_name
330 and need to expand it and then declare the expanded name for locking. This
331 function does it, and then updates self.op.instance_name to the expanded
332 name. It also initializes needed_locks as a dict, if this hasn't been done
336 if self.needed_locks is None:
337 self.needed_locks = {}
339 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
340 "_ExpandAndLockInstance called with instance-level locks set"
341 self.op.instance_name = _ExpandInstanceName(self.cfg,
342 self.op.instance_name)
343 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
345 def _LockInstancesNodes(self, primary_only=False):
346 """Helper function to declare instances' nodes for locking.
348 This function should be called after locking one or more instances to lock
349 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
350 with all primary or secondary nodes for instances already locked and
351 present in self.needed_locks[locking.LEVEL_INSTANCE].
353 It should be called from DeclareLocks, and for safety only works if
354 self.recalculate_locks[locking.LEVEL_NODE] is set.
356 In the future it may grow parameters to just lock some instance's nodes, or
357 to just lock primaries or secondary nodes, if needed.
359 If should be called in DeclareLocks in a way similar to::
361 if level == locking.LEVEL_NODE:
362 self._LockInstancesNodes()
364 @type primary_only: boolean
365 @param primary_only: only lock primary nodes of locked instances
368 assert locking.LEVEL_NODE in self.recalculate_locks, \
369 "_LockInstancesNodes helper function called with no nodes to recalculate"
371 # TODO: check if we're really been called with the instance locks held
373 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
374 # future we might want to have different behaviors depending on the value
375 # of self.recalculate_locks[locking.LEVEL_NODE]
377 locked_i = self.glm.list_owned(locking.LEVEL_INSTANCE)
378 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
379 wanted_nodes.append(instance.primary_node)
381 wanted_nodes.extend(instance.secondary_nodes)
383 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
384 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
385 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
386 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
388 del self.recalculate_locks[locking.LEVEL_NODE]
391 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
392 """Simple LU which runs no hooks.
394 This LU is intended as a parent for other LogicalUnits which will
395 run no hooks, in order to reduce duplicate code.
401 def BuildHooksEnv(self):
402 """Empty BuildHooksEnv for NoHooksLu.
404 This just raises an error.
407 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
409 def BuildHooksNodes(self):
410 """Empty BuildHooksNodes for NoHooksLU.
413 raise AssertionError("BuildHooksNodes called for NoHooksLU")
417 """Tasklet base class.
419 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
420 they can mix legacy code with tasklets. Locking needs to be done in the LU,
421 tasklets know nothing about locks.
423 Subclasses must follow these rules:
424 - Implement CheckPrereq
428 def __init__(self, lu):
435 def CheckPrereq(self):
436 """Check prerequisites for this tasklets.
438 This method should check whether the prerequisites for the execution of
439 this tasklet are fulfilled. It can do internode communication, but it
440 should be idempotent - no cluster or system changes are allowed.
442 The method should raise errors.OpPrereqError in case something is not
443 fulfilled. Its return value is ignored.
445 This method should also update all parameters to their canonical form if it
446 hasn't been done before.
451 def Exec(self, feedback_fn):
452 """Execute the tasklet.
454 This method should implement the actual work. It should raise
455 errors.OpExecError for failures that are somewhat dealt with in code, or
459 raise NotImplementedError
463 """Base for query utility classes.
466 #: Attribute holding field definitions
469 def __init__(self, filter_, fields, use_locking):
470 """Initializes this class.
473 self.use_locking = use_locking
475 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
477 self.requested_data = self.query.RequestedData()
478 self.names = self.query.RequestedNames()
480 # Sort only if no names were requested
481 self.sort_by_name = not self.names
483 self.do_locking = None
486 def _GetNames(self, lu, all_names, lock_level):
487 """Helper function to determine names asked for in the query.
491 names = lu.glm.list_owned(lock_level)
495 if self.wanted == locking.ALL_SET:
496 assert not self.names
497 # caller didn't specify names, so ordering is not important
498 return utils.NiceSort(names)
500 # caller specified names and we must keep the same order
502 assert not self.do_locking or lu.glm.is_owned(lock_level)
504 missing = set(self.wanted).difference(names)
506 raise errors.OpExecError("Some items were removed before retrieving"
507 " their data: %s" % missing)
509 # Return expanded names
512 def ExpandNames(self, lu):
513 """Expand names for this query.
515 See L{LogicalUnit.ExpandNames}.
518 raise NotImplementedError()
520 def DeclareLocks(self, lu, level):
521 """Declare locks for this query.
523 See L{LogicalUnit.DeclareLocks}.
526 raise NotImplementedError()
528 def _GetQueryData(self, lu):
529 """Collects all data for this query.
531 @return: Query data object
534 raise NotImplementedError()
536 def NewStyleQuery(self, lu):
537 """Collect data and execute query.
540 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
541 sort_by_name=self.sort_by_name)
543 def OldStyleQuery(self, lu):
544 """Collect data and execute query.
547 return self.query.OldStyleQuery(self._GetQueryData(lu),
548 sort_by_name=self.sort_by_name)
552 """Returns a dict declaring all lock levels shared.
555 return dict.fromkeys(locking.LEVELS, 1)
558 def _SupportsOob(cfg, node):
559 """Tells if node supports OOB.
561 @type cfg: L{config.ConfigWriter}
562 @param cfg: The cluster configuration
563 @type node: L{objects.Node}
564 @param node: The node
565 @return: The OOB script if supported or an empty string otherwise
568 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
571 def _GetWantedNodes(lu, nodes):
572 """Returns list of checked and expanded node names.
574 @type lu: L{LogicalUnit}
575 @param lu: the logical unit on whose behalf we execute
577 @param nodes: list of node names or None for all nodes
579 @return: the list of nodes, sorted
580 @raise errors.ProgrammerError: if the nodes parameter is wrong type
584 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
586 return utils.NiceSort(lu.cfg.GetNodeList())
589 def _GetWantedInstances(lu, instances):
590 """Returns list of checked and expanded instance names.
592 @type lu: L{LogicalUnit}
593 @param lu: the logical unit on whose behalf we execute
594 @type instances: list
595 @param instances: list of instance names or None for all instances
597 @return: the list of instances, sorted
598 @raise errors.OpPrereqError: if the instances parameter is wrong type
599 @raise errors.OpPrereqError: if any of the passed instances is not found
603 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
605 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
609 def _GetUpdatedParams(old_params, update_dict,
610 use_default=True, use_none=False):
611 """Return the new version of a parameter dictionary.
613 @type old_params: dict
614 @param old_params: old parameters
615 @type update_dict: dict
616 @param update_dict: dict containing new parameter values, or
617 constants.VALUE_DEFAULT to reset the parameter to its default
619 @param use_default: boolean
620 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
621 values as 'to be deleted' values
622 @param use_none: boolean
623 @type use_none: whether to recognise C{None} values as 'to be
626 @return: the new parameter dictionary
629 params_copy = copy.deepcopy(old_params)
630 for key, val in update_dict.iteritems():
631 if ((use_default and val == constants.VALUE_DEFAULT) or
632 (use_none and val is None)):
638 params_copy[key] = val
642 def _ReleaseLocks(lu, level, names=None, keep=None):
643 """Releases locks owned by an LU.
645 @type lu: L{LogicalUnit}
646 @param level: Lock level
647 @type names: list or None
648 @param names: Names of locks to release
649 @type keep: list or None
650 @param keep: Names of locks to retain
653 assert not (keep is not None and names is not None), \
654 "Only one of the 'names' and the 'keep' parameters can be given"
656 if names is not None:
657 should_release = names.__contains__
659 should_release = lambda name: name not in keep
661 should_release = None
667 # Determine which locks to release
668 for name in lu.glm.list_owned(level):
669 if should_release(name):
674 assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
676 # Release just some locks
677 lu.glm.release(level, names=release)
679 assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
682 lu.glm.release(level)
684 assert not lu.glm.is_owned(level), "No locks should be owned"
687 def _MapInstanceDisksToNodes(instances):
688 """Creates a map from (node, volume) to instance name.
690 @type instances: list of L{objects.Instance}
691 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
694 return dict(((node, vol), inst.name)
695 for inst in instances
696 for (node, vols) in inst.MapLVsByNode().items()
700 def _RunPostHook(lu, node_name):
701 """Runs the post-hook for an opcode on a single node.
704 hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
706 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
708 # pylint: disable-msg=W0702
709 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
712 def _CheckOutputFields(static, dynamic, selected):
713 """Checks whether all selected fields are valid.
715 @type static: L{utils.FieldSet}
716 @param static: static fields set
717 @type dynamic: L{utils.FieldSet}
718 @param dynamic: dynamic fields set
725 delta = f.NonMatching(selected)
727 raise errors.OpPrereqError("Unknown output fields selected: %s"
728 % ",".join(delta), errors.ECODE_INVAL)
731 def _CheckGlobalHvParams(params):
732 """Validates that given hypervisor params are not global ones.
734 This will ensure that instances don't get customised versions of
738 used_globals = constants.HVC_GLOBALS.intersection(params)
740 msg = ("The following hypervisor parameters are global and cannot"
741 " be customized at instance level, please modify them at"
742 " cluster level: %s" % utils.CommaJoin(used_globals))
743 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
746 def _CheckNodeOnline(lu, node, msg=None):
747 """Ensure that a given node is online.
749 @param lu: the LU on behalf of which we make the check
750 @param node: the node to check
751 @param msg: if passed, should be a message to replace the default one
752 @raise errors.OpPrereqError: if the node is offline
756 msg = "Can't use offline node"
757 if lu.cfg.GetNodeInfo(node).offline:
758 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
761 def _CheckNodeNotDrained(lu, node):
762 """Ensure that a given node is not drained.
764 @param lu: the LU on behalf of which we make the check
765 @param node: the node to check
766 @raise errors.OpPrereqError: if the node is drained
769 if lu.cfg.GetNodeInfo(node).drained:
770 raise errors.OpPrereqError("Can't use drained node %s" % node,
774 def _CheckNodeVmCapable(lu, node):
775 """Ensure that a given node is vm capable.
777 @param lu: the LU on behalf of which we make the check
778 @param node: the node to check
779 @raise errors.OpPrereqError: if the node is not vm capable
782 if not lu.cfg.GetNodeInfo(node).vm_capable:
783 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
787 def _CheckNodeHasOS(lu, node, os_name, force_variant):
788 """Ensure that a node supports a given OS.
790 @param lu: the LU on behalf of which we make the check
791 @param node: the node to check
792 @param os_name: the OS to query about
793 @param force_variant: whether to ignore variant errors
794 @raise errors.OpPrereqError: if the node is not supporting the OS
797 result = lu.rpc.call_os_get(node, os_name)
798 result.Raise("OS '%s' not in supported OS list for node %s" %
800 prereq=True, ecode=errors.ECODE_INVAL)
801 if not force_variant:
802 _CheckOSVariant(result.payload, os_name)
805 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
806 """Ensure that a node has the given secondary ip.
808 @type lu: L{LogicalUnit}
809 @param lu: the LU on behalf of which we make the check
811 @param node: the node to check
812 @type secondary_ip: string
813 @param secondary_ip: the ip to check
814 @type prereq: boolean
815 @param prereq: whether to throw a prerequisite or an execute error
816 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
817 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
820 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
821 result.Raise("Failure checking secondary ip on node %s" % node,
822 prereq=prereq, ecode=errors.ECODE_ENVIRON)
823 if not result.payload:
824 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
825 " please fix and re-run this command" % secondary_ip)
827 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
829 raise errors.OpExecError(msg)
832 def _GetClusterDomainSecret():
833 """Reads the cluster domain secret.
836 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
840 def _CheckInstanceDown(lu, instance, reason):
841 """Ensure that an instance is not running."""
842 if instance.admin_up:
843 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
844 (instance.name, reason), errors.ECODE_STATE)
846 pnode = instance.primary_node
847 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
848 ins_l.Raise("Can't contact node %s for instance information" % pnode,
849 prereq=True, ecode=errors.ECODE_ENVIRON)
851 if instance.name in ins_l.payload:
852 raise errors.OpPrereqError("Instance %s is running, %s" %
853 (instance.name, reason), errors.ECODE_STATE)
856 def _ExpandItemName(fn, name, kind):
857 """Expand an item name.
859 @param fn: the function to use for expansion
860 @param name: requested item name
861 @param kind: text description ('Node' or 'Instance')
862 @return: the resolved (full) name
863 @raise errors.OpPrereqError: if the item is not found
867 if full_name is None:
868 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
873 def _ExpandNodeName(cfg, name):
874 """Wrapper over L{_ExpandItemName} for nodes."""
875 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
878 def _ExpandInstanceName(cfg, name):
879 """Wrapper over L{_ExpandItemName} for instance."""
880 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
883 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
884 memory, vcpus, nics, disk_template, disks,
885 bep, hvp, hypervisor_name, tags):
886 """Builds instance related env variables for hooks
888 This builds the hook environment from individual variables.
891 @param name: the name of the instance
892 @type primary_node: string
893 @param primary_node: the name of the instance's primary node
894 @type secondary_nodes: list
895 @param secondary_nodes: list of secondary nodes as strings
896 @type os_type: string
897 @param os_type: the name of the instance's OS
898 @type status: boolean
899 @param status: the should_run status of the instance
901 @param memory: the memory size of the instance
903 @param vcpus: the count of VCPUs the instance has
905 @param nics: list of tuples (ip, mac, mode, link) representing
906 the NICs the instance has
907 @type disk_template: string
908 @param disk_template: the disk template of the instance
910 @param disks: the list of (size, mode) pairs
912 @param bep: the backend parameters for the instance
914 @param hvp: the hypervisor parameters for the instance
915 @type hypervisor_name: string
916 @param hypervisor_name: the hypervisor for the instance
918 @param tags: list of instance tags as strings
920 @return: the hook environment for this instance
929 "INSTANCE_NAME": name,
930 "INSTANCE_PRIMARY": primary_node,
931 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
932 "INSTANCE_OS_TYPE": os_type,
933 "INSTANCE_STATUS": str_status,
934 "INSTANCE_MEMORY": memory,
935 "INSTANCE_VCPUS": vcpus,
936 "INSTANCE_DISK_TEMPLATE": disk_template,
937 "INSTANCE_HYPERVISOR": hypervisor_name,
941 nic_count = len(nics)
942 for idx, (ip, mac, mode, link) in enumerate(nics):
945 env["INSTANCE_NIC%d_IP" % idx] = ip
946 env["INSTANCE_NIC%d_MAC" % idx] = mac
947 env["INSTANCE_NIC%d_MODE" % idx] = mode
948 env["INSTANCE_NIC%d_LINK" % idx] = link
949 if mode == constants.NIC_MODE_BRIDGED:
950 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
954 env["INSTANCE_NIC_COUNT"] = nic_count
957 disk_count = len(disks)
958 for idx, (size, mode) in enumerate(disks):
959 env["INSTANCE_DISK%d_SIZE" % idx] = size
960 env["INSTANCE_DISK%d_MODE" % idx] = mode
964 env["INSTANCE_DISK_COUNT"] = disk_count
969 env["INSTANCE_TAGS"] = " ".join(tags)
971 for source, kind in [(bep, "BE"), (hvp, "HV")]:
972 for key, value in source.items():
973 env["INSTANCE_%s_%s" % (kind, key)] = value
978 def _NICListToTuple(lu, nics):
979 """Build a list of nic information tuples.
981 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
982 value in LUInstanceQueryData.
984 @type lu: L{LogicalUnit}
985 @param lu: the logical unit on whose behalf we execute
986 @type nics: list of L{objects.NIC}
987 @param nics: list of nics to convert to hooks tuples
991 cluster = lu.cfg.GetClusterInfo()
995 filled_params = cluster.SimpleFillNIC(nic.nicparams)
996 mode = filled_params[constants.NIC_MODE]
997 link = filled_params[constants.NIC_LINK]
998 hooks_nics.append((ip, mac, mode, link))
1002 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1003 """Builds instance related env variables for hooks from an object.
1005 @type lu: L{LogicalUnit}
1006 @param lu: the logical unit on whose behalf we execute
1007 @type instance: L{objects.Instance}
1008 @param instance: the instance for which we should build the
1010 @type override: dict
1011 @param override: dictionary with key/values that will override
1014 @return: the hook environment dictionary
1017 cluster = lu.cfg.GetClusterInfo()
1018 bep = cluster.FillBE(instance)
1019 hvp = cluster.FillHV(instance)
1021 "name": instance.name,
1022 "primary_node": instance.primary_node,
1023 "secondary_nodes": instance.secondary_nodes,
1024 "os_type": instance.os,
1025 "status": instance.admin_up,
1026 "memory": bep[constants.BE_MEMORY],
1027 "vcpus": bep[constants.BE_VCPUS],
1028 "nics": _NICListToTuple(lu, instance.nics),
1029 "disk_template": instance.disk_template,
1030 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1033 "hypervisor_name": instance.hypervisor,
1034 "tags": instance.tags,
1037 args.update(override)
1038 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1041 def _AdjustCandidatePool(lu, exceptions):
1042 """Adjust the candidate pool after node operations.
1045 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1047 lu.LogInfo("Promoted nodes to master candidate role: %s",
1048 utils.CommaJoin(node.name for node in mod_list))
1049 for name in mod_list:
1050 lu.context.ReaddNode(name)
1051 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1053 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1057 def _DecideSelfPromotion(lu, exceptions=None):
1058 """Decide whether I should promote myself as a master candidate.
1061 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1062 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1063 # the new node will increase mc_max with one, so:
1064 mc_should = min(mc_should + 1, cp_size)
1065 return mc_now < mc_should
1068 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1069 """Check that the brigdes needed by a list of nics exist.
1072 cluster = lu.cfg.GetClusterInfo()
1073 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1074 brlist = [params[constants.NIC_LINK] for params in paramslist
1075 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1077 result = lu.rpc.call_bridges_exist(target_node, brlist)
1078 result.Raise("Error checking bridges on destination node '%s'" %
1079 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1082 def _CheckInstanceBridgesExist(lu, instance, node=None):
1083 """Check that the brigdes needed by an instance exist.
1087 node = instance.primary_node
1088 _CheckNicsBridgesExist(lu, instance.nics, node)
1091 def _CheckOSVariant(os_obj, name):
1092 """Check whether an OS name conforms to the os variants specification.
1094 @type os_obj: L{objects.OS}
1095 @param os_obj: OS object to check
1097 @param name: OS name passed by the user, to check for validity
1100 variant = objects.OS.GetVariant(name)
1101 if not os_obj.supported_variants:
1103 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1104 " passed)" % (os_obj.name, variant),
1108 raise errors.OpPrereqError("OS name must include a variant",
1111 if variant not in os_obj.supported_variants:
1112 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1115 def _GetNodeInstancesInner(cfg, fn):
1116 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1119 def _GetNodeInstances(cfg, node_name):
1120 """Returns a list of all primary and secondary instances on a node.
1124 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1127 def _GetNodePrimaryInstances(cfg, node_name):
1128 """Returns primary instances on a node.
1131 return _GetNodeInstancesInner(cfg,
1132 lambda inst: node_name == inst.primary_node)
1135 def _GetNodeSecondaryInstances(cfg, node_name):
1136 """Returns secondary instances on a node.
1139 return _GetNodeInstancesInner(cfg,
1140 lambda inst: node_name in inst.secondary_nodes)
1143 def _GetStorageTypeArgs(cfg, storage_type):
1144 """Returns the arguments for a storage type.
1147 # Special case for file storage
1148 if storage_type == constants.ST_FILE:
1149 # storage.FileStorage wants a list of storage directories
1150 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1155 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1158 for dev in instance.disks:
1159 cfg.SetDiskID(dev, node_name)
1161 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1162 result.Raise("Failed to get disk status from node %s" % node_name,
1163 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1165 for idx, bdev_status in enumerate(result.payload):
1166 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1172 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1173 """Check the sanity of iallocator and node arguments and use the
1174 cluster-wide iallocator if appropriate.
1176 Check that at most one of (iallocator, node) is specified. If none is
1177 specified, then the LU's opcode's iallocator slot is filled with the
1178 cluster-wide default iallocator.
1180 @type iallocator_slot: string
1181 @param iallocator_slot: the name of the opcode iallocator slot
1182 @type node_slot: string
1183 @param node_slot: the name of the opcode target node slot
1186 node = getattr(lu.op, node_slot, None)
1187 iallocator = getattr(lu.op, iallocator_slot, None)
1189 if node is not None and iallocator is not None:
1190 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1192 elif node is None and iallocator is None:
1193 default_iallocator = lu.cfg.GetDefaultIAllocator()
1194 if default_iallocator:
1195 setattr(lu.op, iallocator_slot, default_iallocator)
1197 raise errors.OpPrereqError("No iallocator or node given and no"
1198 " cluster-wide default iallocator found;"
1199 " please specify either an iallocator or a"
1200 " node, or set a cluster-wide default"
1204 def _GetDefaultIAllocator(cfg, iallocator):
1205 """Decides on which iallocator to use.
1207 @type cfg: L{config.ConfigWriter}
1208 @param cfg: Cluster configuration object
1209 @type iallocator: string or None
1210 @param iallocator: Iallocator specified in opcode
1212 @return: Iallocator name
1216 # Use default iallocator
1217 iallocator = cfg.GetDefaultIAllocator()
1220 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1221 " opcode nor as a cluster-wide default",
1227 class LUClusterPostInit(LogicalUnit):
1228 """Logical unit for running hooks after cluster initialization.
1231 HPATH = "cluster-init"
1232 HTYPE = constants.HTYPE_CLUSTER
1234 def BuildHooksEnv(self):
1239 "OP_TARGET": self.cfg.GetClusterName(),
1242 def BuildHooksNodes(self):
1243 """Build hooks nodes.
1246 return ([], [self.cfg.GetMasterNode()])
1248 def Exec(self, feedback_fn):
1255 class LUClusterDestroy(LogicalUnit):
1256 """Logical unit for destroying the cluster.
1259 HPATH = "cluster-destroy"
1260 HTYPE = constants.HTYPE_CLUSTER
1262 def BuildHooksEnv(self):
1267 "OP_TARGET": self.cfg.GetClusterName(),
1270 def BuildHooksNodes(self):
1271 """Build hooks nodes.
1276 def CheckPrereq(self):
1277 """Check prerequisites.
1279 This checks whether the cluster is empty.
1281 Any errors are signaled by raising errors.OpPrereqError.
1284 master = self.cfg.GetMasterNode()
1286 nodelist = self.cfg.GetNodeList()
1287 if len(nodelist) != 1 or nodelist[0] != master:
1288 raise errors.OpPrereqError("There are still %d node(s) in"
1289 " this cluster." % (len(nodelist) - 1),
1291 instancelist = self.cfg.GetInstanceList()
1293 raise errors.OpPrereqError("There are still %d instance(s) in"
1294 " this cluster." % len(instancelist),
1297 def Exec(self, feedback_fn):
1298 """Destroys the cluster.
1301 master = self.cfg.GetMasterNode()
1303 # Run post hooks on master node before it's removed
1304 _RunPostHook(self, master)
1306 result = self.rpc.call_node_stop_master(master, False)
1307 result.Raise("Could not disable the master role")
1312 def _VerifyCertificate(filename):
1313 """Verifies a certificate for L{LUClusterVerifyConfig}.
1315 @type filename: string
1316 @param filename: Path to PEM file
1320 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1321 utils.ReadFile(filename))
1322 except Exception, err: # pylint: disable-msg=W0703
1323 return (LUClusterVerifyConfig.ETYPE_ERROR,
1324 "Failed to load X509 certificate %s: %s" % (filename, err))
1327 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1328 constants.SSL_CERT_EXPIRATION_ERROR)
1331 fnamemsg = "While verifying %s: %s" % (filename, msg)
1336 return (None, fnamemsg)
1337 elif errcode == utils.CERT_WARNING:
1338 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1339 elif errcode == utils.CERT_ERROR:
1340 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1342 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1345 def _GetAllHypervisorParameters(cluster, instances):
1346 """Compute the set of all hypervisor parameters.
1348 @type cluster: L{objects.Cluster}
1349 @param cluster: the cluster object
1350 @param instances: list of L{objects.Instance}
1351 @param instances: additional instances from which to obtain parameters
1352 @rtype: list of (origin, hypervisor, parameters)
1353 @return: a list with all parameters found, indicating the hypervisor they
1354 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1359 for hv_name in cluster.enabled_hypervisors:
1360 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1362 for os_name, os_hvp in cluster.os_hvp.items():
1363 for hv_name, hv_params in os_hvp.items():
1365 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1366 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1368 # TODO: collapse identical parameter values in a single one
1369 for instance in instances:
1370 if instance.hvparams:
1371 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1372 cluster.FillHV(instance)))
1377 class _VerifyErrors(object):
1378 """Mix-in for cluster/group verify LUs.
1380 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1381 self.op and self._feedback_fn to be available.)
1384 TCLUSTER = "cluster"
1386 TINSTANCE = "instance"
1388 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1389 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1390 ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1391 ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1392 ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1393 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1394 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1395 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1396 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1397 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1398 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1399 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1400 ENODEDRBD = (TNODE, "ENODEDRBD")
1401 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1402 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1403 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1404 ENODEHV = (TNODE, "ENODEHV")
1405 ENODELVM = (TNODE, "ENODELVM")
1406 ENODEN1 = (TNODE, "ENODEN1")
1407 ENODENET = (TNODE, "ENODENET")
1408 ENODEOS = (TNODE, "ENODEOS")
1409 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1410 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1411 ENODERPC = (TNODE, "ENODERPC")
1412 ENODESSH = (TNODE, "ENODESSH")
1413 ENODEVERSION = (TNODE, "ENODEVERSION")
1414 ENODESETUP = (TNODE, "ENODESETUP")
1415 ENODETIME = (TNODE, "ENODETIME")
1416 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1418 ETYPE_FIELD = "code"
1419 ETYPE_ERROR = "ERROR"
1420 ETYPE_WARNING = "WARNING"
1422 def _Error(self, ecode, item, msg, *args, **kwargs):
1423 """Format an error message.
1425 Based on the opcode's error_codes parameter, either format a
1426 parseable error code, or a simpler error string.
1428 This must be called only from Exec and functions called from Exec.
1431 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1433 # first complete the msg
1436 # then format the whole message
1437 if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1438 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1444 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1445 # and finally report it via the feedback_fn
1446 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1448 def _ErrorIf(self, cond, *args, **kwargs):
1449 """Log an error message if the passed condition is True.
1453 or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1455 self._Error(*args, **kwargs)
1456 # do not mark the operation as failed for WARN cases only
1457 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1458 self.bad = self.bad or cond
1461 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1462 """Verifies the cluster config.
1467 def _VerifyHVP(self, hvp_data):
1468 """Verifies locally the syntax of the hypervisor parameters.
1471 for item, hv_name, hv_params in hvp_data:
1472 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1475 hv_class = hypervisor.GetHypervisor(hv_name)
1476 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1477 hv_class.CheckParameterSyntax(hv_params)
1478 except errors.GenericError, err:
1479 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1481 def ExpandNames(self):
1482 # Information can be safely retrieved as the BGL is acquired in exclusive
1484 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1485 self.all_node_info = self.cfg.GetAllNodesInfo()
1486 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1487 self.needed_locks = {}
1489 def Exec(self, feedback_fn):
1490 """Verify integrity of cluster, performing various test on nodes.
1494 self._feedback_fn = feedback_fn
1496 feedback_fn("* Verifying cluster config")
1498 for msg in self.cfg.VerifyConfig():
1499 self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1501 feedback_fn("* Verifying cluster certificate files")
1503 for cert_filename in constants.ALL_CERT_FILES:
1504 (errcode, msg) = _VerifyCertificate(cert_filename)
1505 self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1507 feedback_fn("* Verifying hypervisor parameters")
1509 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1510 self.all_inst_info.values()))
1512 feedback_fn("* Verifying all nodes belong to an existing group")
1514 # We do this verification here because, should this bogus circumstance
1515 # occur, it would never be caught by VerifyGroup, which only acts on
1516 # nodes/instances reachable from existing node groups.
1518 dangling_nodes = set(node.name for node in self.all_node_info.values()
1519 if node.group not in self.all_group_info)
1521 dangling_instances = {}
1522 no_node_instances = []
1524 for inst in self.all_inst_info.values():
1525 if inst.primary_node in dangling_nodes:
1526 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1527 elif inst.primary_node not in self.all_node_info:
1528 no_node_instances.append(inst.name)
1533 utils.CommaJoin(dangling_instances.get(node.name,
1535 for node in dangling_nodes]
1537 self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1538 "the following nodes (and their instances) belong to a non"
1539 " existing group: %s", utils.CommaJoin(pretty_dangling))
1541 self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1542 "the following instances have a non-existing primary-node:"
1543 " %s", utils.CommaJoin(no_node_instances))
1545 return (not self.bad, [g.name for g in self.all_group_info.values()])
1548 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1549 """Verifies the status of a node group.
1552 HPATH = "cluster-verify"
1553 HTYPE = constants.HTYPE_CLUSTER
1556 _HOOKS_INDENT_RE = re.compile("^", re.M)
1558 class NodeImage(object):
1559 """A class representing the logical and physical status of a node.
1562 @ivar name: the node name to which this object refers
1563 @ivar volumes: a structure as returned from
1564 L{ganeti.backend.GetVolumeList} (runtime)
1565 @ivar instances: a list of running instances (runtime)
1566 @ivar pinst: list of configured primary instances (config)
1567 @ivar sinst: list of configured secondary instances (config)
1568 @ivar sbp: dictionary of {primary-node: list of instances} for all
1569 instances for which this node is secondary (config)
1570 @ivar mfree: free memory, as reported by hypervisor (runtime)
1571 @ivar dfree: free disk, as reported by the node (runtime)
1572 @ivar offline: the offline status (config)
1573 @type rpc_fail: boolean
1574 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1575 not whether the individual keys were correct) (runtime)
1576 @type lvm_fail: boolean
1577 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1578 @type hyp_fail: boolean
1579 @ivar hyp_fail: whether the RPC call didn't return the instance list
1580 @type ghost: boolean
1581 @ivar ghost: whether this is a known node or not (config)
1582 @type os_fail: boolean
1583 @ivar os_fail: whether the RPC call didn't return valid OS data
1585 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1586 @type vm_capable: boolean
1587 @ivar vm_capable: whether the node can host instances
1590 def __init__(self, offline=False, name=None, vm_capable=True):
1599 self.offline = offline
1600 self.vm_capable = vm_capable
1601 self.rpc_fail = False
1602 self.lvm_fail = False
1603 self.hyp_fail = False
1605 self.os_fail = False
1608 def ExpandNames(self):
1609 # This raises errors.OpPrereqError on its own:
1610 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1612 # Get instances in node group; this is unsafe and needs verification later
1613 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1615 self.needed_locks = {
1616 locking.LEVEL_INSTANCE: inst_names,
1617 locking.LEVEL_NODEGROUP: [self.group_uuid],
1618 locking.LEVEL_NODE: [],
1621 self.share_locks = _ShareAll()
1623 def DeclareLocks(self, level):
1624 if level == locking.LEVEL_NODE:
1625 # Get members of node group; this is unsafe and needs verification later
1626 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1628 all_inst_info = self.cfg.GetAllInstancesInfo()
1630 # In Exec(), we warn about mirrored instances that have primary and
1631 # secondary living in separate node groups. To fully verify that
1632 # volumes for these instances are healthy, we will need to do an
1633 # extra call to their secondaries. We ensure here those nodes will
1635 for inst in self.glm.list_owned(locking.LEVEL_INSTANCE):
1636 # Important: access only the instances whose lock is owned
1637 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1638 nodes.update(all_inst_info[inst].secondary_nodes)
1640 self.needed_locks[locking.LEVEL_NODE] = nodes
1642 def CheckPrereq(self):
1643 group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1644 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1647 group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1649 unlocked_instances = \
1650 group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
1653 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1654 utils.CommaJoin(unlocked_nodes))
1656 if unlocked_instances:
1657 raise errors.OpPrereqError("Missing lock for instances: %s" %
1658 utils.CommaJoin(unlocked_instances))
1660 self.all_node_info = self.cfg.GetAllNodesInfo()
1661 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1663 self.my_node_names = utils.NiceSort(group_nodes)
1664 self.my_inst_names = utils.NiceSort(group_instances)
1666 self.my_node_info = dict((name, self.all_node_info[name])
1667 for name in self.my_node_names)
1669 self.my_inst_info = dict((name, self.all_inst_info[name])
1670 for name in self.my_inst_names)
1672 # We detect here the nodes that will need the extra RPC calls for verifying
1673 # split LV volumes; they should be locked.
1674 extra_lv_nodes = set()
1676 for inst in self.my_inst_info.values():
1677 if inst.disk_template in constants.DTS_INT_MIRROR:
1678 group = self.my_node_info[inst.primary_node].group
1679 for nname in inst.secondary_nodes:
1680 if self.all_node_info[nname].group != group:
1681 extra_lv_nodes.add(nname)
1683 unlocked_lv_nodes = \
1684 extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1686 if unlocked_lv_nodes:
1687 raise errors.OpPrereqError("these nodes could be locked: %s" %
1688 utils.CommaJoin(unlocked_lv_nodes))
1689 self.extra_lv_nodes = list(extra_lv_nodes)
1691 def _VerifyNode(self, ninfo, nresult):
1692 """Perform some basic validation on data returned from a node.
1694 - check the result data structure is well formed and has all the
1696 - check ganeti version
1698 @type ninfo: L{objects.Node}
1699 @param ninfo: the node to check
1700 @param nresult: the results from the node
1702 @return: whether overall this call was successful (and we can expect
1703 reasonable values in the respose)
1707 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1709 # main result, nresult should be a non-empty dict
1710 test = not nresult or not isinstance(nresult, dict)
1711 _ErrorIf(test, self.ENODERPC, node,
1712 "unable to verify node: no data returned")
1716 # compares ganeti version
1717 local_version = constants.PROTOCOL_VERSION
1718 remote_version = nresult.get("version", None)
1719 test = not (remote_version and
1720 isinstance(remote_version, (list, tuple)) and
1721 len(remote_version) == 2)
1722 _ErrorIf(test, self.ENODERPC, node,
1723 "connection to node returned invalid data")
1727 test = local_version != remote_version[0]
1728 _ErrorIf(test, self.ENODEVERSION, node,
1729 "incompatible protocol versions: master %s,"
1730 " node %s", local_version, remote_version[0])
1734 # node seems compatible, we can actually try to look into its results
1736 # full package version
1737 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1738 self.ENODEVERSION, node,
1739 "software version mismatch: master %s, node %s",
1740 constants.RELEASE_VERSION, remote_version[1],
1741 code=self.ETYPE_WARNING)
1743 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1744 if ninfo.vm_capable and isinstance(hyp_result, dict):
1745 for hv_name, hv_result in hyp_result.iteritems():
1746 test = hv_result is not None
1747 _ErrorIf(test, self.ENODEHV, node,
1748 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1750 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1751 if ninfo.vm_capable and isinstance(hvp_result, list):
1752 for item, hv_name, hv_result in hvp_result:
1753 _ErrorIf(True, self.ENODEHV, node,
1754 "hypervisor %s parameter verify failure (source %s): %s",
1755 hv_name, item, hv_result)
1757 test = nresult.get(constants.NV_NODESETUP,
1758 ["Missing NODESETUP results"])
1759 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1764 def _VerifyNodeTime(self, ninfo, nresult,
1765 nvinfo_starttime, nvinfo_endtime):
1766 """Check the node time.
1768 @type ninfo: L{objects.Node}
1769 @param ninfo: the node to check
1770 @param nresult: the remote results for the node
1771 @param nvinfo_starttime: the start time of the RPC call
1772 @param nvinfo_endtime: the end time of the RPC call
1776 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1778 ntime = nresult.get(constants.NV_TIME, None)
1780 ntime_merged = utils.MergeTime(ntime)
1781 except (ValueError, TypeError):
1782 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1785 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1786 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1787 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1788 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1792 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1793 "Node time diverges by at least %s from master node time",
1796 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1797 """Check the node LVM results.
1799 @type ninfo: L{objects.Node}
1800 @param ninfo: the node to check
1801 @param nresult: the remote results for the node
1802 @param vg_name: the configured VG name
1809 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1811 # checks vg existence and size > 20G
1812 vglist = nresult.get(constants.NV_VGLIST, None)
1814 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1816 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1817 constants.MIN_VG_SIZE)
1818 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1821 pvlist = nresult.get(constants.NV_PVLIST, None)
1822 test = pvlist is None
1823 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1825 # check that ':' is not present in PV names, since it's a
1826 # special character for lvcreate (denotes the range of PEs to
1828 for _, pvname, owner_vg in pvlist:
1829 test = ":" in pvname
1830 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1831 " '%s' of VG '%s'", pvname, owner_vg)
1833 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1834 """Check the node bridges.
1836 @type ninfo: L{objects.Node}
1837 @param ninfo: the node to check
1838 @param nresult: the remote results for the node
1839 @param bridges: the expected list of bridges
1846 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1848 missing = nresult.get(constants.NV_BRIDGES, None)
1849 test = not isinstance(missing, list)
1850 _ErrorIf(test, self.ENODENET, node,
1851 "did not return valid bridge information")
1853 _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1854 utils.CommaJoin(sorted(missing)))
1856 def _VerifyNodeNetwork(self, ninfo, nresult):
1857 """Check the node network connectivity results.
1859 @type ninfo: L{objects.Node}
1860 @param ninfo: the node to check
1861 @param nresult: the remote results for the node
1865 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1867 test = constants.NV_NODELIST not in nresult
1868 _ErrorIf(test, self.ENODESSH, node,
1869 "node hasn't returned node ssh connectivity data")
1871 if nresult[constants.NV_NODELIST]:
1872 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1873 _ErrorIf(True, self.ENODESSH, node,
1874 "ssh communication with node '%s': %s", a_node, a_msg)
1876 test = constants.NV_NODENETTEST not in nresult
1877 _ErrorIf(test, self.ENODENET, node,
1878 "node hasn't returned node tcp connectivity data")
1880 if nresult[constants.NV_NODENETTEST]:
1881 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1883 _ErrorIf(True, self.ENODENET, node,
1884 "tcp communication with node '%s': %s",
1885 anode, nresult[constants.NV_NODENETTEST][anode])
1887 test = constants.NV_MASTERIP not in nresult
1888 _ErrorIf(test, self.ENODENET, node,
1889 "node hasn't returned node master IP reachability data")
1891 if not nresult[constants.NV_MASTERIP]:
1892 if node == self.master_node:
1893 msg = "the master node cannot reach the master IP (not configured?)"
1895 msg = "cannot reach the master IP"
1896 _ErrorIf(True, self.ENODENET, node, msg)
1898 def _VerifyInstance(self, instance, instanceconfig, node_image,
1900 """Verify an instance.
1902 This function checks to see if the required block devices are
1903 available on the instance's node.
1906 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1907 node_current = instanceconfig.primary_node
1909 node_vol_should = {}
1910 instanceconfig.MapLVsByNode(node_vol_should)
1912 for node in node_vol_should:
1913 n_img = node_image[node]
1914 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1915 # ignore missing volumes on offline or broken nodes
1917 for volume in node_vol_should[node]:
1918 test = volume not in n_img.volumes
1919 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1920 "volume %s missing on node %s", volume, node)
1922 if instanceconfig.admin_up:
1923 pri_img = node_image[node_current]
1924 test = instance not in pri_img.instances and not pri_img.offline
1925 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1926 "instance not running on its primary node %s",
1929 diskdata = [(nname, success, status, idx)
1930 for (nname, disks) in diskstatus.items()
1931 for idx, (success, status) in enumerate(disks)]
1933 for nname, success, bdev_status, idx in diskdata:
1934 # the 'ghost node' construction in Exec() ensures that we have a
1936 snode = node_image[nname]
1937 bad_snode = snode.ghost or snode.offline
1938 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1939 self.EINSTANCEFAULTYDISK, instance,
1940 "couldn't retrieve status for disk/%s on %s: %s",
1941 idx, nname, bdev_status)
1942 _ErrorIf((instanceconfig.admin_up and success and
1943 bdev_status.ldisk_status == constants.LDS_FAULTY),
1944 self.EINSTANCEFAULTYDISK, instance,
1945 "disk/%s on %s is faulty", idx, nname)
1947 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1948 """Verify if there are any unknown volumes in the cluster.
1950 The .os, .swap and backup volumes are ignored. All other volumes are
1951 reported as unknown.
1953 @type reserved: L{ganeti.utils.FieldSet}
1954 @param reserved: a FieldSet of reserved volume names
1957 for node, n_img in node_image.items():
1958 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1959 # skip non-healthy nodes
1961 for volume in n_img.volumes:
1962 test = ((node not in node_vol_should or
1963 volume not in node_vol_should[node]) and
1964 not reserved.Matches(volume))
1965 self._ErrorIf(test, self.ENODEORPHANLV, node,
1966 "volume %s is unknown", volume)
1968 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1969 """Verify N+1 Memory Resilience.
1971 Check that if one single node dies we can still start all the
1972 instances it was primary for.
1975 cluster_info = self.cfg.GetClusterInfo()
1976 for node, n_img in node_image.items():
1977 # This code checks that every node which is now listed as
1978 # secondary has enough memory to host all instances it is
1979 # supposed to should a single other node in the cluster fail.
1980 # FIXME: not ready for failover to an arbitrary node
1981 # FIXME: does not support file-backed instances
1982 # WARNING: we currently take into account down instances as well
1983 # as up ones, considering that even if they're down someone
1984 # might want to start them even in the event of a node failure.
1986 # we're skipping offline nodes from the N+1 warning, since
1987 # most likely we don't have good memory infromation from them;
1988 # we already list instances living on such nodes, and that's
1991 for prinode, instances in n_img.sbp.items():
1993 for instance in instances:
1994 bep = cluster_info.FillBE(instance_cfg[instance])
1995 if bep[constants.BE_AUTO_BALANCE]:
1996 needed_mem += bep[constants.BE_MEMORY]
1997 test = n_img.mfree < needed_mem
1998 self._ErrorIf(test, self.ENODEN1, node,
1999 "not enough memory to accomodate instance failovers"
2000 " should node %s fail (%dMiB needed, %dMiB available)",
2001 prinode, needed_mem, n_img.mfree)
2004 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2005 (files_all, files_all_opt, files_mc, files_vm)):
2006 """Verifies file checksums collected from all nodes.
2008 @param errorif: Callback for reporting errors
2009 @param nodeinfo: List of L{objects.Node} objects
2010 @param master_node: Name of master node
2011 @param all_nvinfo: RPC results
2014 node_names = frozenset(node.name for node in nodeinfo if not node.offline)
2016 assert master_node in node_names
2017 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2018 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2019 "Found file listed in more than one file list"
2021 # Define functions determining which nodes to consider for a file
2022 file2nodefn = dict([(filename, fn)
2023 for (files, fn) in [(files_all, None),
2024 (files_all_opt, None),
2025 (files_mc, lambda node: (node.master_candidate or
2026 node.name == master_node)),
2027 (files_vm, lambda node: node.vm_capable)]
2028 for filename in files])
2030 fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
2032 for node in nodeinfo:
2036 nresult = all_nvinfo[node.name]
2038 if nresult.fail_msg or not nresult.payload:
2041 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2043 test = not (node_files and isinstance(node_files, dict))
2044 errorif(test, cls.ENODEFILECHECK, node.name,
2045 "Node did not return file checksum data")
2049 for (filename, checksum) in node_files.items():
2050 # Check if the file should be considered for a node
2051 fn = file2nodefn[filename]
2052 if fn is None or fn(node):
2053 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2055 for (filename, checksums) in fileinfo.items():
2056 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2058 # Nodes having the file
2059 with_file = frozenset(node_name
2060 for nodes in fileinfo[filename].values()
2061 for node_name in nodes)
2063 # Nodes missing file
2064 missing_file = node_names - with_file
2066 if filename in files_all_opt:
2068 errorif(missing_file and missing_file != node_names,
2069 cls.ECLUSTERFILECHECK, None,
2070 "File %s is optional, but it must exist on all or no"
2071 " nodes (not found on %s)",
2072 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2074 errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2075 "File %s is missing from node(s) %s", filename,
2076 utils.CommaJoin(utils.NiceSort(missing_file)))
2078 # See if there are multiple versions of the file
2079 test = len(checksums) > 1
2081 variants = ["variant %s on %s" %
2082 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2083 for (idx, (checksum, nodes)) in
2084 enumerate(sorted(checksums.items()))]
2088 errorif(test, cls.ECLUSTERFILECHECK, None,
2089 "File %s found with %s different checksums (%s)",
2090 filename, len(checksums), "; ".join(variants))
2092 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2094 """Verifies and the node DRBD status.
2096 @type ninfo: L{objects.Node}
2097 @param ninfo: the node to check
2098 @param nresult: the remote results for the node
2099 @param instanceinfo: the dict of instances
2100 @param drbd_helper: the configured DRBD usermode helper
2101 @param drbd_map: the DRBD map as returned by
2102 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2106 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2109 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2110 test = (helper_result == None)
2111 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2112 "no drbd usermode helper returned")
2114 status, payload = helper_result
2116 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2117 "drbd usermode helper check unsuccessful: %s", payload)
2118 test = status and (payload != drbd_helper)
2119 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2120 "wrong drbd usermode helper: %s", payload)
2122 # compute the DRBD minors
2124 for minor, instance in drbd_map[node].items():
2125 test = instance not in instanceinfo
2126 _ErrorIf(test, self.ECLUSTERCFG, None,
2127 "ghost instance '%s' in temporary DRBD map", instance)
2128 # ghost instance should not be running, but otherwise we
2129 # don't give double warnings (both ghost instance and
2130 # unallocated minor in use)
2132 node_drbd[minor] = (instance, False)
2134 instance = instanceinfo[instance]
2135 node_drbd[minor] = (instance.name, instance.admin_up)
2137 # and now check them
2138 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2139 test = not isinstance(used_minors, (tuple, list))
2140 _ErrorIf(test, self.ENODEDRBD, node,
2141 "cannot parse drbd status file: %s", str(used_minors))
2143 # we cannot check drbd status
2146 for minor, (iname, must_exist) in node_drbd.items():
2147 test = minor not in used_minors and must_exist
2148 _ErrorIf(test, self.ENODEDRBD, node,
2149 "drbd minor %d of instance %s is not active", minor, iname)
2150 for minor in used_minors:
2151 test = minor not in node_drbd
2152 _ErrorIf(test, self.ENODEDRBD, node,
2153 "unallocated drbd minor %d is in use", minor)
2155 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2156 """Builds the node OS structures.
2158 @type ninfo: L{objects.Node}
2159 @param ninfo: the node to check
2160 @param nresult: the remote results for the node
2161 @param nimg: the node image object
2165 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2167 remote_os = nresult.get(constants.NV_OSLIST, None)
2168 test = (not isinstance(remote_os, list) or
2169 not compat.all(isinstance(v, list) and len(v) == 7
2170 for v in remote_os))
2172 _ErrorIf(test, self.ENODEOS, node,
2173 "node hasn't returned valid OS data")
2182 for (name, os_path, status, diagnose,
2183 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2185 if name not in os_dict:
2188 # parameters is a list of lists instead of list of tuples due to
2189 # JSON lacking a real tuple type, fix it:
2190 parameters = [tuple(v) for v in parameters]
2191 os_dict[name].append((os_path, status, diagnose,
2192 set(variants), set(parameters), set(api_ver)))
2194 nimg.oslist = os_dict
2196 def _VerifyNodeOS(self, ninfo, nimg, base):
2197 """Verifies the node OS list.
2199 @type ninfo: L{objects.Node}
2200 @param ninfo: the node to check
2201 @param nimg: the node image object
2202 @param base: the 'template' node we match against (e.g. from the master)
2206 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2208 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2210 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2211 for os_name, os_data in nimg.oslist.items():
2212 assert os_data, "Empty OS status for OS %s?!" % os_name
2213 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2214 _ErrorIf(not f_status, self.ENODEOS, node,
2215 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2216 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2217 "OS '%s' has multiple entries (first one shadows the rest): %s",
2218 os_name, utils.CommaJoin([v[0] for v in os_data]))
2219 # comparisons with the 'base' image
2220 test = os_name not in base.oslist
2221 _ErrorIf(test, self.ENODEOS, node,
2222 "Extra OS %s not present on reference node (%s)",
2226 assert base.oslist[os_name], "Base node has empty OS status?"
2227 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2229 # base OS is invalid, skipping
2231 for kind, a, b in [("API version", f_api, b_api),
2232 ("variants list", f_var, b_var),
2233 ("parameters", beautify_params(f_param),
2234 beautify_params(b_param))]:
2235 _ErrorIf(a != b, self.ENODEOS, node,
2236 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2237 kind, os_name, base.name,
2238 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2240 # check any missing OSes
2241 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2242 _ErrorIf(missing, self.ENODEOS, node,
2243 "OSes present on reference node %s but missing on this node: %s",
2244 base.name, utils.CommaJoin(missing))
2246 def _VerifyOob(self, ninfo, nresult):
2247 """Verifies out of band functionality of a node.
2249 @type ninfo: L{objects.Node}
2250 @param ninfo: the node to check
2251 @param nresult: the remote results for the node
2255 # We just have to verify the paths on master and/or master candidates
2256 # as the oob helper is invoked on the master
2257 if ((ninfo.master_candidate or ninfo.master_capable) and
2258 constants.NV_OOB_PATHS in nresult):
2259 for path_result in nresult[constants.NV_OOB_PATHS]:
2260 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2262 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2263 """Verifies and updates the node volume data.
2265 This function will update a L{NodeImage}'s internal structures
2266 with data from the remote call.
2268 @type ninfo: L{objects.Node}
2269 @param ninfo: the node to check
2270 @param nresult: the remote results for the node
2271 @param nimg: the node image object
2272 @param vg_name: the configured VG name
2276 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2278 nimg.lvm_fail = True
2279 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2282 elif isinstance(lvdata, basestring):
2283 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2284 utils.SafeEncode(lvdata))
2285 elif not isinstance(lvdata, dict):
2286 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2288 nimg.volumes = lvdata
2289 nimg.lvm_fail = False
2291 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2292 """Verifies and updates the node instance list.
2294 If the listing was successful, then updates this node's instance
2295 list. Otherwise, it marks the RPC call as failed for the instance
2298 @type ninfo: L{objects.Node}
2299 @param ninfo: the node to check
2300 @param nresult: the remote results for the node
2301 @param nimg: the node image object
2304 idata = nresult.get(constants.NV_INSTANCELIST, None)
2305 test = not isinstance(idata, list)
2306 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2307 " (instancelist): %s", utils.SafeEncode(str(idata)))
2309 nimg.hyp_fail = True
2311 nimg.instances = idata
2313 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2314 """Verifies and computes a node information map
2316 @type ninfo: L{objects.Node}
2317 @param ninfo: the node to check
2318 @param nresult: the remote results for the node
2319 @param nimg: the node image object
2320 @param vg_name: the configured VG name
2324 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2326 # try to read free memory (from the hypervisor)
2327 hv_info = nresult.get(constants.NV_HVINFO, None)
2328 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2329 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2332 nimg.mfree = int(hv_info["memory_free"])
2333 except (ValueError, TypeError):
2334 _ErrorIf(True, self.ENODERPC, node,
2335 "node returned invalid nodeinfo, check hypervisor")
2337 # FIXME: devise a free space model for file based instances as well
2338 if vg_name is not None:
2339 test = (constants.NV_VGLIST not in nresult or
2340 vg_name not in nresult[constants.NV_VGLIST])
2341 _ErrorIf(test, self.ENODELVM, node,
2342 "node didn't return data for the volume group '%s'"
2343 " - it is either missing or broken", vg_name)
2346 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2347 except (ValueError, TypeError):
2348 _ErrorIf(True, self.ENODERPC, node,
2349 "node returned invalid LVM info, check LVM status")
2351 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2352 """Gets per-disk status information for all instances.
2354 @type nodelist: list of strings
2355 @param nodelist: Node names
2356 @type node_image: dict of (name, L{objects.Node})
2357 @param node_image: Node objects
2358 @type instanceinfo: dict of (name, L{objects.Instance})
2359 @param instanceinfo: Instance objects
2360 @rtype: {instance: {node: [(succes, payload)]}}
2361 @return: a dictionary of per-instance dictionaries with nodes as
2362 keys and disk information as values; the disk information is a
2363 list of tuples (success, payload)
2366 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2369 node_disks_devonly = {}
2370 diskless_instances = set()
2371 diskless = constants.DT_DISKLESS
2373 for nname in nodelist:
2374 node_instances = list(itertools.chain(node_image[nname].pinst,
2375 node_image[nname].sinst))
2376 diskless_instances.update(inst for inst in node_instances
2377 if instanceinfo[inst].disk_template == diskless)
2378 disks = [(inst, disk)
2379 for inst in node_instances
2380 for disk in instanceinfo[inst].disks]
2383 # No need to collect data
2386 node_disks[nname] = disks
2388 # Creating copies as SetDiskID below will modify the objects and that can
2389 # lead to incorrect data returned from nodes
2390 devonly = [dev.Copy() for (_, dev) in disks]
2393 self.cfg.SetDiskID(dev, nname)
2395 node_disks_devonly[nname] = devonly
2397 assert len(node_disks) == len(node_disks_devonly)
2399 # Collect data from all nodes with disks
2400 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2403 assert len(result) == len(node_disks)
2407 for (nname, nres) in result.items():
2408 disks = node_disks[nname]
2411 # No data from this node
2412 data = len(disks) * [(False, "node offline")]
2415 _ErrorIf(msg, self.ENODERPC, nname,
2416 "while getting disk information: %s", msg)
2418 # No data from this node
2419 data = len(disks) * [(False, msg)]
2422 for idx, i in enumerate(nres.payload):
2423 if isinstance(i, (tuple, list)) and len(i) == 2:
2426 logging.warning("Invalid result from node %s, entry %d: %s",
2428 data.append((False, "Invalid result from the remote node"))
2430 for ((inst, _), status) in zip(disks, data):
2431 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2433 # Add empty entries for diskless instances.
2434 for inst in diskless_instances:
2435 assert inst not in instdisk
2438 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2439 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2440 compat.all(isinstance(s, (tuple, list)) and
2441 len(s) == 2 for s in statuses)
2442 for inst, nnames in instdisk.items()
2443 for nname, statuses in nnames.items())
2444 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2448 def BuildHooksEnv(self):
2451 Cluster-Verify hooks just ran in the post phase and their failure makes
2452 the output be logged in the verify output and the verification to fail.
2456 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2459 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2460 for node in self.my_node_info.values())
2464 def BuildHooksNodes(self):
2465 """Build hooks nodes.
2468 return ([], self.my_node_names)
2470 def Exec(self, feedback_fn):
2471 """Verify integrity of the node group, performing various test on nodes.
2474 # This method has too many local variables. pylint: disable-msg=R0914
2476 if not self.my_node_names:
2478 feedback_fn("* Empty node group, skipping verification")
2482 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2483 verbose = self.op.verbose
2484 self._feedback_fn = feedback_fn
2486 vg_name = self.cfg.GetVGName()
2487 drbd_helper = self.cfg.GetDRBDHelper()
2488 cluster = self.cfg.GetClusterInfo()
2489 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2490 hypervisors = cluster.enabled_hypervisors
2491 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2493 i_non_redundant = [] # Non redundant instances
2494 i_non_a_balanced = [] # Non auto-balanced instances
2495 n_offline = 0 # Count of offline nodes
2496 n_drained = 0 # Count of nodes being drained
2497 node_vol_should = {}
2499 # FIXME: verify OS list
2502 filemap = _ComputeAncillaryFiles(cluster, False)
2504 # do local checksums
2505 master_node = self.master_node = self.cfg.GetMasterNode()
2506 master_ip = self.cfg.GetMasterIP()
2508 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2510 # We will make nodes contact all nodes in their group, and one node from
2511 # every other group.
2512 # TODO: should it be a *random* node, different every time?
2513 online_nodes = [node.name for node in node_data_list if not node.offline]
2514 other_group_nodes = {}
2516 for name in sorted(self.all_node_info):
2517 node = self.all_node_info[name]
2518 if (node.group not in other_group_nodes
2519 and node.group != self.group_uuid
2520 and not node.offline):
2521 other_group_nodes[node.group] = node.name
2523 node_verify_param = {
2524 constants.NV_FILELIST:
2525 utils.UniqueSequence(filename
2526 for files in filemap
2527 for filename in files),
2528 constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2529 constants.NV_HYPERVISOR: hypervisors,
2530 constants.NV_HVPARAMS:
2531 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2532 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2533 for node in node_data_list
2534 if not node.offline],
2535 constants.NV_INSTANCELIST: hypervisors,
2536 constants.NV_VERSION: None,
2537 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2538 constants.NV_NODESETUP: None,
2539 constants.NV_TIME: None,
2540 constants.NV_MASTERIP: (master_node, master_ip),
2541 constants.NV_OSLIST: None,
2542 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2545 if vg_name is not None:
2546 node_verify_param[constants.NV_VGLIST] = None
2547 node_verify_param[constants.NV_LVLIST] = vg_name
2548 node_verify_param[constants.NV_PVLIST] = [vg_name]
2549 node_verify_param[constants.NV_DRBDLIST] = None
2552 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2555 # FIXME: this needs to be changed per node-group, not cluster-wide
2557 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2558 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2559 bridges.add(default_nicpp[constants.NIC_LINK])
2560 for instance in self.my_inst_info.values():
2561 for nic in instance.nics:
2562 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2563 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2564 bridges.add(full_nic[constants.NIC_LINK])
2567 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2569 # Build our expected cluster state
2570 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2572 vm_capable=node.vm_capable))
2573 for node in node_data_list)
2577 for node in self.all_node_info.values():
2578 path = _SupportsOob(self.cfg, node)
2579 if path and path not in oob_paths:
2580 oob_paths.append(path)
2583 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2585 for instance in self.my_inst_names:
2586 inst_config = self.my_inst_info[instance]
2588 for nname in inst_config.all_nodes:
2589 if nname not in node_image:
2590 gnode = self.NodeImage(name=nname)
2591 gnode.ghost = (nname not in self.all_node_info)
2592 node_image[nname] = gnode
2594 inst_config.MapLVsByNode(node_vol_should)
2596 pnode = inst_config.primary_node
2597 node_image[pnode].pinst.append(instance)
2599 for snode in inst_config.secondary_nodes:
2600 nimg = node_image[snode]
2601 nimg.sinst.append(instance)
2602 if pnode not in nimg.sbp:
2603 nimg.sbp[pnode] = []
2604 nimg.sbp[pnode].append(instance)
2606 # At this point, we have the in-memory data structures complete,
2607 # except for the runtime information, which we'll gather next
2609 # Due to the way our RPC system works, exact response times cannot be
2610 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2611 # time before and after executing the request, we can at least have a time
2613 nvinfo_starttime = time.time()
2614 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2616 self.cfg.GetClusterName())
2617 nvinfo_endtime = time.time()
2619 if self.extra_lv_nodes and vg_name is not None:
2621 self.rpc.call_node_verify(self.extra_lv_nodes,
2622 {constants.NV_LVLIST: vg_name},
2623 self.cfg.GetClusterName())
2625 extra_lv_nvinfo = {}
2627 all_drbd_map = self.cfg.ComputeDRBDMap()
2629 feedback_fn("* Gathering disk information (%s nodes)" %
2630 len(self.my_node_names))
2631 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2634 feedback_fn("* Verifying configuration file consistency")
2636 # If not all nodes are being checked, we need to make sure the master node
2637 # and a non-checked vm_capable node are in the list.
2638 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2640 vf_nvinfo = all_nvinfo.copy()
2641 vf_node_info = list(self.my_node_info.values())
2642 additional_nodes = []
2643 if master_node not in self.my_node_info:
2644 additional_nodes.append(master_node)
2645 vf_node_info.append(self.all_node_info[master_node])
2646 # Add the first vm_capable node we find which is not included
2647 for node in absent_nodes:
2648 nodeinfo = self.all_node_info[node]
2649 if nodeinfo.vm_capable and not nodeinfo.offline:
2650 additional_nodes.append(node)
2651 vf_node_info.append(self.all_node_info[node])
2653 key = constants.NV_FILELIST
2654 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2655 {key: node_verify_param[key]},
2656 self.cfg.GetClusterName()))
2658 vf_nvinfo = all_nvinfo
2659 vf_node_info = self.my_node_info.values()
2661 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2663 feedback_fn("* Verifying node status")
2667 for node_i in node_data_list:
2669 nimg = node_image[node]
2673 feedback_fn("* Skipping offline node %s" % (node,))
2677 if node == master_node:
2679 elif node_i.master_candidate:
2680 ntype = "master candidate"
2681 elif node_i.drained:
2687 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2689 msg = all_nvinfo[node].fail_msg
2690 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2692 nimg.rpc_fail = True
2695 nresult = all_nvinfo[node].payload
2697 nimg.call_ok = self._VerifyNode(node_i, nresult)
2698 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2699 self._VerifyNodeNetwork(node_i, nresult)
2700 self._VerifyOob(node_i, nresult)
2703 self._VerifyNodeLVM(node_i, nresult, vg_name)
2704 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2707 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2708 self._UpdateNodeInstances(node_i, nresult, nimg)
2709 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2710 self._UpdateNodeOS(node_i, nresult, nimg)
2712 if not nimg.os_fail:
2713 if refos_img is None:
2715 self._VerifyNodeOS(node_i, nimg, refos_img)
2716 self._VerifyNodeBridges(node_i, nresult, bridges)
2718 # Check whether all running instancies are primary for the node. (This
2719 # can no longer be done from _VerifyInstance below, since some of the
2720 # wrong instances could be from other node groups.)
2721 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2723 for inst in non_primary_inst:
2724 test = inst in self.all_inst_info
2725 _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2726 "instance should not run on node %s", node_i.name)
2727 _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2728 "node is running unknown instance %s", inst)
2730 for node, result in extra_lv_nvinfo.items():
2731 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2732 node_image[node], vg_name)
2734 feedback_fn("* Verifying instance status")
2735 for instance in self.my_inst_names:
2737 feedback_fn("* Verifying instance %s" % instance)
2738 inst_config = self.my_inst_info[instance]
2739 self._VerifyInstance(instance, inst_config, node_image,
2741 inst_nodes_offline = []
2743 pnode = inst_config.primary_node
2744 pnode_img = node_image[pnode]
2745 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2746 self.ENODERPC, pnode, "instance %s, connection to"
2747 " primary node failed", instance)
2749 _ErrorIf(inst_config.admin_up and pnode_img.offline,
2750 self.EINSTANCEBADNODE, instance,
2751 "instance is marked as running and lives on offline node %s",
2752 inst_config.primary_node)
2754 # If the instance is non-redundant we cannot survive losing its primary
2755 # node, so we are not N+1 compliant. On the other hand we have no disk
2756 # templates with more than one secondary so that situation is not well
2758 # FIXME: does not support file-backed instances
2759 if not inst_config.secondary_nodes:
2760 i_non_redundant.append(instance)
2762 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2763 instance, "instance has multiple secondary nodes: %s",
2764 utils.CommaJoin(inst_config.secondary_nodes),
2765 code=self.ETYPE_WARNING)
2767 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2768 pnode = inst_config.primary_node
2769 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2770 instance_groups = {}
2772 for node in instance_nodes:
2773 instance_groups.setdefault(self.all_node_info[node].group,
2777 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2778 # Sort so that we always list the primary node first.
2779 for group, nodes in sorted(instance_groups.items(),
2780 key=lambda (_, nodes): pnode in nodes,
2783 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2784 instance, "instance has primary and secondary nodes in"
2785 " different groups: %s", utils.CommaJoin(pretty_list),
2786 code=self.ETYPE_WARNING)
2788 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2789 i_non_a_balanced.append(instance)
2791 for snode in inst_config.secondary_nodes:
2792 s_img = node_image[snode]
2793 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2794 "instance %s, connection to secondary node failed", instance)
2797 inst_nodes_offline.append(snode)
2799 # warn that the instance lives on offline nodes
2800 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2801 "instance has offline secondary node(s) %s",
2802 utils.CommaJoin(inst_nodes_offline))
2803 # ... or ghost/non-vm_capable nodes
2804 for node in inst_config.all_nodes:
2805 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2806 "instance lives on ghost node %s", node)
2807 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2808 instance, "instance lives on non-vm_capable node %s", node)
2810 feedback_fn("* Verifying orphan volumes")
2811 reserved = utils.FieldSet(*cluster.reserved_lvs)
2813 # We will get spurious "unknown volume" warnings if any node of this group
2814 # is secondary for an instance whose primary is in another group. To avoid
2815 # them, we find these instances and add their volumes to node_vol_should.
2816 for inst in self.all_inst_info.values():
2817 for secondary in inst.secondary_nodes:
2818 if (secondary in self.my_node_info
2819 and inst.name not in self.my_inst_info):
2820 inst.MapLVsByNode(node_vol_should)
2823 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2825 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2826 feedback_fn("* Verifying N+1 Memory redundancy")
2827 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2829 feedback_fn("* Other Notes")
2831 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2832 % len(i_non_redundant))
2834 if i_non_a_balanced:
2835 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2836 % len(i_non_a_balanced))
2839 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2842 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2846 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2847 """Analyze the post-hooks' result
2849 This method analyses the hook result, handles it, and sends some
2850 nicely-formatted feedback back to the user.
2852 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2853 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2854 @param hooks_results: the results of the multi-node hooks rpc call
2855 @param feedback_fn: function used send feedback back to the caller
2856 @param lu_result: previous Exec result
2857 @return: the new Exec result, based on the previous result
2861 # We only really run POST phase hooks, only for non-empty groups,
2862 # and are only interested in their results
2863 if not self.my_node_names:
2866 elif phase == constants.HOOKS_PHASE_POST:
2867 # Used to change hooks' output to proper indentation
2868 feedback_fn("* Hooks Results")
2869 assert hooks_results, "invalid result from hooks"
2871 for node_name in hooks_results:
2872 res = hooks_results[node_name]
2874 test = msg and not res.offline
2875 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2876 "Communication failure in hooks execution: %s", msg)
2877 if res.offline or msg:
2878 # No need to investigate payload if node is offline or gave an error.
2879 # override manually lu_result here as _ErrorIf only
2880 # overrides self.bad
2883 for script, hkr, output in res.payload:
2884 test = hkr == constants.HKR_FAIL
2885 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2886 "Script %s failed, output:", script)
2888 output = self._HOOKS_INDENT_RE.sub(" ", output)
2889 feedback_fn("%s" % output)
2895 class LUClusterVerifyDisks(NoHooksLU):
2896 """Verifies the cluster disks status.
2901 def ExpandNames(self):
2902 self.share_locks = _ShareAll()
2903 self.needed_locks = {
2904 locking.LEVEL_NODEGROUP: locking.ALL_SET,
2907 def Exec(self, feedback_fn):
2908 group_names = self.glm.list_owned(locking.LEVEL_NODEGROUP)
2910 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
2911 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
2912 for group in group_names])
2915 class LUGroupVerifyDisks(NoHooksLU):
2916 """Verifies the status of all disks in a node group.
2921 def ExpandNames(self):
2922 # Raises errors.OpPrereqError on its own if group can't be found
2923 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2925 self.share_locks = _ShareAll()
2926 self.needed_locks = {
2927 locking.LEVEL_INSTANCE: [],
2928 locking.LEVEL_NODEGROUP: [],
2929 locking.LEVEL_NODE: [],
2932 def DeclareLocks(self, level):
2933 if level == locking.LEVEL_INSTANCE:
2934 assert not self.needed_locks[locking.LEVEL_INSTANCE]
2936 # Lock instances optimistically, needs verification once node and group
2937 # locks have been acquired
2938 self.needed_locks[locking.LEVEL_INSTANCE] = \
2939 self.cfg.GetNodeGroupInstances(self.group_uuid)
2941 elif level == locking.LEVEL_NODEGROUP:
2942 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
2944 self.needed_locks[locking.LEVEL_NODEGROUP] = \
2945 set([self.group_uuid] +
2946 # Lock all groups used by instances optimistically; this requires
2947 # going via the node before it's locked, requiring verification
2950 for instance_name in
2951 self.glm.list_owned(locking.LEVEL_INSTANCE)
2953 self.cfg.GetInstanceNodeGroups(instance_name)])
2955 elif level == locking.LEVEL_NODE:
2956 # This will only lock the nodes in the group to be verified which contain
2958 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
2959 self._LockInstancesNodes()
2961 # Lock all nodes in group to be verified
2962 assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
2963 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
2964 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
2966 def CheckPrereq(self):
2967 owned_instances = frozenset(self.glm.list_owned(locking.LEVEL_INSTANCE))
2968 owned_groups = frozenset(self.glm.list_owned(locking.LEVEL_NODEGROUP))
2969 owned_nodes = frozenset(self.glm.list_owned(locking.LEVEL_NODE))
2971 assert self.group_uuid in owned_groups
2973 # Check if locked instances are still correct
2974 wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2975 if owned_instances != wanted_instances:
2976 raise errors.OpPrereqError("Instances in node group %s changed since"
2977 " locks were acquired, wanted %s, have %s;"
2978 " retry the operation" %
2979 (self.op.group_name,
2980 utils.CommaJoin(wanted_instances),
2981 utils.CommaJoin(owned_instances)),
2984 # Get instance information
2985 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
2987 # Check if node groups for locked instances are still correct
2988 for (instance_name, inst) in self.instances.items():
2989 assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
2990 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
2991 assert owned_nodes.issuperset(inst.all_nodes), \
2992 "Instance %s's nodes changed while we kept the lock" % instance_name
2994 inst_groups = self.cfg.GetInstanceNodeGroups(instance_name)
2995 if not owned_groups.issuperset(inst_groups):
2996 raise errors.OpPrereqError("Instance %s's node groups changed since"
2997 " locks were acquired, current groups are"
2998 " are '%s', owning groups '%s'; retry the"
3001 utils.CommaJoin(inst_groups),
3002 utils.CommaJoin(owned_groups)),
3005 def Exec(self, feedback_fn):
3006 """Verify integrity of cluster disks.
3008 @rtype: tuple of three items
3009 @return: a tuple of (dict of node-to-node_error, list of instances
3010 which need activate-disks, dict of instance: (node, volume) for
3015 res_instances = set()
3018 nv_dict = _MapInstanceDisksToNodes([inst
3019 for inst in self.instances.values()
3023 nodes = utils.NiceSort(set(self.glm.list_owned(locking.LEVEL_NODE)) &
3024 set(self.cfg.GetVmCapableNodeList()))
3026 node_lvs = self.rpc.call_lv_list(nodes, [])
3028 for (node, node_res) in node_lvs.items():
3029 if node_res.offline:
3032 msg = node_res.fail_msg
3034 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3035 res_nodes[node] = msg
3038 for lv_name, (_, _, lv_online) in node_res.payload.items():
3039 inst = nv_dict.pop((node, lv_name), None)
3040 if not (lv_online or inst is None):
3041 res_instances.add(inst)
3043 # any leftover items in nv_dict are missing LVs, let's arrange the data
3045 for key, inst in nv_dict.iteritems():
3046 res_missing.setdefault(inst, []).append(key)
3048 return (res_nodes, list(res_instances), res_missing)
3051 class LUClusterRepairDiskSizes(NoHooksLU):
3052 """Verifies the cluster disks sizes.
3057 def ExpandNames(self):
3058 if self.op.instances:
3059 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3060 self.needed_locks = {
3061 locking.LEVEL_NODE: [],
3062 locking.LEVEL_INSTANCE: self.wanted_names,
3064 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3066 self.wanted_names = None
3067 self.needed_locks = {
3068 locking.LEVEL_NODE: locking.ALL_SET,
3069 locking.LEVEL_INSTANCE: locking.ALL_SET,
3071 self.share_locks = _ShareAll()
3073 def DeclareLocks(self, level):
3074 if level == locking.LEVEL_NODE and self.wanted_names is not None:
3075 self._LockInstancesNodes(primary_only=True)
3077 def CheckPrereq(self):
3078 """Check prerequisites.
3080 This only checks the optional instance list against the existing names.
3083 if self.wanted_names is None:
3084 self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
3086 self.wanted_instances = \
3087 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3089 def _EnsureChildSizes(self, disk):
3090 """Ensure children of the disk have the needed disk size.
3092 This is valid mainly for DRBD8 and fixes an issue where the
3093 children have smaller disk size.
3095 @param disk: an L{ganeti.objects.Disk} object
3098 if disk.dev_type == constants.LD_DRBD8:
3099 assert disk.children, "Empty children for DRBD8?"
3100 fchild = disk.children[0]
3101 mismatch = fchild.size < disk.size
3103 self.LogInfo("Child disk has size %d, parent %d, fixing",
3104 fchild.size, disk.size)
3105 fchild.size = disk.size
3107 # and we recurse on this child only, not on the metadev
3108 return self._EnsureChildSizes(fchild) or mismatch
3112 def Exec(self, feedback_fn):
3113 """Verify the size of cluster disks.
3116 # TODO: check child disks too
3117 # TODO: check differences in size between primary/secondary nodes
3119 for instance in self.wanted_instances:
3120 pnode = instance.primary_node
3121 if pnode not in per_node_disks:
3122 per_node_disks[pnode] = []
3123 for idx, disk in enumerate(instance.disks):
3124 per_node_disks[pnode].append((instance, idx, disk))
3127 for node, dskl in per_node_disks.items():
3128 newl = [v[2].Copy() for v in dskl]
3130 self.cfg.SetDiskID(dsk, node)
3131 result = self.rpc.call_blockdev_getsize(node, newl)
3133 self.LogWarning("Failure in blockdev_getsize call to node"
3134 " %s, ignoring", node)
3136 if len(result.payload) != len(dskl):
3137 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3138 " result.payload=%s", node, len(dskl), result.payload)
3139 self.LogWarning("Invalid result from node %s, ignoring node results",
3142 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3144 self.LogWarning("Disk %d of instance %s did not return size"
3145 " information, ignoring", idx, instance.name)
3147 if not isinstance(size, (int, long)):
3148 self.LogWarning("Disk %d of instance %s did not return valid"
3149 " size information, ignoring", idx, instance.name)
3152 if size != disk.size:
3153 self.LogInfo("Disk %d of instance %s has mismatched size,"
3154 " correcting: recorded %d, actual %d", idx,
3155 instance.name, disk.size, size)
3157 self.cfg.Update(instance, feedback_fn)
3158 changed.append((instance.name, idx, size))
3159 if self._EnsureChildSizes(disk):
3160 self.cfg.Update(instance, feedback_fn)
3161 changed.append((instance.name, idx, disk.size))
3165 class LUClusterRename(LogicalUnit):
3166 """Rename the cluster.
3169 HPATH = "cluster-rename"
3170 HTYPE = constants.HTYPE_CLUSTER
3172 def BuildHooksEnv(self):
3177 "OP_TARGET": self.cfg.GetClusterName(),
3178 "NEW_NAME": self.op.name,
3181 def BuildHooksNodes(self):
3182 """Build hooks nodes.
3185 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3187 def CheckPrereq(self):
3188 """Verify that the passed name is a valid one.
3191 hostname = netutils.GetHostname(name=self.op.name,
3192 family=self.cfg.GetPrimaryIPFamily())
3194 new_name = hostname.name
3195 self.ip = new_ip = hostname.ip
3196 old_name = self.cfg.GetClusterName()
3197 old_ip = self.cfg.GetMasterIP()
3198 if new_name == old_name and new_ip == old_ip:
3199 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3200 " cluster has changed",
3202 if new_ip != old_ip:
3203 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3204 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3205 " reachable on the network" %
3206 new_ip, errors.ECODE_NOTUNIQUE)
3208 self.op.name = new_name
3210 def Exec(self, feedback_fn):
3211 """Rename the cluster.
3214 clustername = self.op.name
3217 # shutdown the master IP
3218 master = self.cfg.GetMasterNode()
3219 result = self.rpc.call_node_stop_master(master, False)
3220 result.Raise("Could not disable the master role")
3223 cluster = self.cfg.GetClusterInfo()
3224 cluster.cluster_name = clustername
3225 cluster.master_ip = ip
3226 self.cfg.Update(cluster, feedback_fn)
3228 # update the known hosts file
3229 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3230 node_list = self.cfg.GetOnlineNodeList()
3232 node_list.remove(master)
3235 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3237 result = self.rpc.call_node_start_master(master, False, False)
3238 msg = result.fail_msg
3240 self.LogWarning("Could not re-enable the master role on"
3241 " the master, please restart manually: %s", msg)
3246 class LUClusterSetParams(LogicalUnit):
3247 """Change the parameters of the cluster.
3250 HPATH = "cluster-modify"
3251 HTYPE = constants.HTYPE_CLUSTER
3254 def CheckArguments(self):
3258 if self.op.uid_pool:
3259 uidpool.CheckUidPool(self.op.uid_pool)
3261 if self.op.add_uids:
3262 uidpool.CheckUidPool(self.op.add_uids)
3264 if self.op.remove_uids:
3265 uidpool.CheckUidPool(self.op.remove_uids)
3267 def ExpandNames(self):
3268 # FIXME: in the future maybe other cluster params won't require checking on
3269 # all nodes to be modified.
3270 self.needed_locks = {
3271 locking.LEVEL_NODE: locking.ALL_SET,
3273 self.share_locks[locking.LEVEL_NODE] = 1
3275 def BuildHooksEnv(self):
3280 "OP_TARGET": self.cfg.GetClusterName(),
3281 "NEW_VG_NAME": self.op.vg_name,
3284 def BuildHooksNodes(self):
3285 """Build hooks nodes.
3288 mn = self.cfg.GetMasterNode()
3291 def CheckPrereq(self):
3292 """Check prerequisites.
3294 This checks whether the given params don't conflict and
3295 if the given volume group is valid.
3298 if self.op.vg_name is not None and not self.op.vg_name:
3299 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3300 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3301 " instances exist", errors.ECODE_INVAL)
3303 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3304 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3305 raise errors.OpPrereqError("Cannot disable drbd helper while"
3306 " drbd-based instances exist",
3309 node_list = self.glm.list_owned(locking.LEVEL_NODE)
3311 # if vg_name not None, checks given volume group on all nodes
3313 vglist = self.rpc.call_vg_list(node_list)
3314 for node in node_list:
3315 msg = vglist[node].fail_msg
3317 # ignoring down node
3318 self.LogWarning("Error while gathering data on node %s"
3319 " (ignoring node): %s", node, msg)
3321 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3323 constants.MIN_VG_SIZE)
3325 raise errors.OpPrereqError("Error on node '%s': %s" %
3326 (node, vgstatus), errors.ECODE_ENVIRON)
3328 if self.op.drbd_helper:
3329 # checks given drbd helper on all nodes
3330 helpers = self.rpc.call_drbd_helper(node_list)
3331 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3333 self.LogInfo("Not checking drbd helper on offline node %s", node)
3335 msg = helpers[node].fail_msg
3337 raise errors.OpPrereqError("Error checking drbd helper on node"
3338 " '%s': %s" % (node, msg),
3339 errors.ECODE_ENVIRON)
3340 node_helper = helpers[node].payload
3341 if node_helper != self.op.drbd_helper:
3342 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3343 (node, node_helper), errors.ECODE_ENVIRON)
3345 self.cluster = cluster = self.cfg.GetClusterInfo()
3346 # validate params changes
3347 if self.op.beparams:
3348 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3349 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3351 if self.op.ndparams:
3352 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3353 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3355 # TODO: we need a more general way to handle resetting
3356 # cluster-level parameters to default values
3357 if self.new_ndparams["oob_program"] == "":
3358 self.new_ndparams["oob_program"] = \
3359 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3361 if self.op.nicparams:
3362 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3363 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3364 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3367 # check all instances for consistency
3368 for instance in self.cfg.GetAllInstancesInfo().values():
3369 for nic_idx, nic in enumerate(instance.nics):
3370 params_copy = copy.deepcopy(nic.nicparams)
3371 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3373 # check parameter syntax
3375 objects.NIC.CheckParameterSyntax(params_filled)
3376 except errors.ConfigurationError, err:
3377 nic_errors.append("Instance %s, nic/%d: %s" %
3378 (instance.name, nic_idx, err))
3380 # if we're moving instances to routed, check that they have an ip
3381 target_mode = params_filled[constants.NIC_MODE]
3382 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3383 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3384 " address" % (instance.name, nic_idx))
3386 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3387 "\n".join(nic_errors))
3389 # hypervisor list/parameters
3390 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3391 if self.op.hvparams:
3392 for hv_name, hv_dict in self.op.hvparams.items():
3393 if hv_name not in self.new_hvparams:
3394 self.new_hvparams[hv_name] = hv_dict
3396 self.new_hvparams[hv_name].update(hv_dict)
3398 # os hypervisor parameters
3399 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3401 for os_name, hvs in self.op.os_hvp.items():
3402 if os_name not in self.new_os_hvp:
3403 self.new_os_hvp[os_name] = hvs
3405 for hv_name, hv_dict in hvs.items():
3406 if hv_name not in self.new_os_hvp[os_name]:
3407 self.new_os_hvp[os_name][hv_name] = hv_dict
3409 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3412 self.new_osp = objects.FillDict(cluster.osparams, {})
3413 if self.op.osparams:
3414 for os_name, osp in self.op.osparams.items():
3415 if os_name not in self.new_osp:
3416 self.new_osp[os_name] = {}
3418 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3421 if not self.new_osp[os_name]:
3422 # we removed all parameters
3423 del self.new_osp[os_name]
3425 # check the parameter validity (remote check)
3426 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3427 os_name, self.new_osp[os_name])
3429 # changes to the hypervisor list
3430 if self.op.enabled_hypervisors is not None:
3431 self.hv_list = self.op.enabled_hypervisors
3432 for hv in self.hv_list:
3433 # if the hypervisor doesn't already exist in the cluster
3434 # hvparams, we initialize it to empty, and then (in both
3435 # cases) we make sure to fill the defaults, as we might not
3436 # have a complete defaults list if the hypervisor wasn't
3438 if hv not in new_hvp:
3440 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3441 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3443 self.hv_list = cluster.enabled_hypervisors
3445 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3446 # either the enabled list has changed, or the parameters have, validate
3447 for hv_name, hv_params in self.new_hvparams.items():
3448 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3449 (self.op.enabled_hypervisors and
3450 hv_name in self.op.enabled_hypervisors)):
3451 # either this is a new hypervisor, or its parameters have changed
3452 hv_class = hypervisor.GetHypervisor(hv_name)
3453 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3454 hv_class.CheckParameterSyntax(hv_params)
3455 _CheckHVParams(self, node_list, hv_name, hv_params)
3458 # no need to check any newly-enabled hypervisors, since the
3459 # defaults have already been checked in the above code-block
3460 for os_name, os_hvp in self.new_os_hvp.items():
3461 for hv_name, hv_params in os_hvp.items():
3462 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3463 # we need to fill in the new os_hvp on top of the actual hv_p
3464 cluster_defaults = self.new_hvparams.get(hv_name, {})
3465 new_osp = objects.FillDict(cluster_defaults, hv_params)
3466 hv_class = hypervisor.GetHypervisor(hv_name)
3467 hv_class.CheckParameterSyntax(new_osp)
3468 _CheckHVParams(self, node_list, hv_name, new_osp)
3470 if self.op.default_iallocator:
3471 alloc_script = utils.FindFile(self.op.default_iallocator,
3472 constants.IALLOCATOR_SEARCH_PATH,
3474 if alloc_script is None:
3475 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3476 " specified" % self.op.default_iallocator,
3479 def Exec(self, feedback_fn):
3480 """Change the parameters of the cluster.
3483 if self.op.vg_name is not None:
3484 new_volume = self.op.vg_name
3487 if new_volume != self.cfg.GetVGName():
3488 self.cfg.SetVGName(new_volume)
3490 feedback_fn("Cluster LVM configuration already in desired"
3491 " state, not changing")
3492 if self.op.drbd_helper is not None:
3493 new_helper = self.op.drbd_helper
3496 if new_helper != self.cfg.GetDRBDHelper():
3497 self.cfg.SetDRBDHelper(new_helper)
3499 feedback_fn("Cluster DRBD helper already in desired state,"
3501 if self.op.hvparams:
3502 self.cluster.hvparams = self.new_hvparams
3504 self.cluster.os_hvp = self.new_os_hvp
3505 if self.op.enabled_hypervisors is not None:
3506 self.cluster.hvparams = self.new_hvparams
3507 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3508 if self.op.beparams:
3509 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3510 if self.op.nicparams:
3511 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3512 if self.op.osparams:
3513 self.cluster.osparams = self.new_osp
3514 if self.op.ndparams:
3515 self.cluster.ndparams = self.new_ndparams
3517 if self.op.candidate_pool_size is not None:
3518 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3519 # we need to update the pool size here, otherwise the save will fail
3520 _AdjustCandidatePool(self, [])
3522 if self.op.maintain_node_health is not None:
3523 self.cluster.maintain_node_health = self.op.maintain_node_health
3525 if self.op.prealloc_wipe_disks is not None:
3526 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3528 if self.op.add_uids is not None:
3529 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3531 if self.op.remove_uids is not None:
3532 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3534 if self.op.uid_pool is not None:
3535 self.cluster.uid_pool = self.op.uid_pool
3537 if self.op.default_iallocator is not None:
3538 self.cluster.default_iallocator = self.op.default_iallocator
3540 if self.op.reserved_lvs is not None:
3541 self.cluster.reserved_lvs = self.op.reserved_lvs
3543 def helper_os(aname, mods, desc):
3545 lst = getattr(self.cluster, aname)
3546 for key, val in mods:
3547 if key == constants.DDM_ADD:
3549 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3552 elif key == constants.DDM_REMOVE:
3556 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3558 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3560 if self.op.hidden_os:
3561 helper_os("hidden_os", self.op.hidden_os, "hidden")
3563 if self.op.blacklisted_os:
3564 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3566 if self.op.master_netdev:
3567 master = self.cfg.GetMasterNode()
3568 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3569 self.cluster.master_netdev)
3570 result = self.rpc.call_node_stop_master(master, False)
3571 result.Raise("Could not disable the master ip")
3572 feedback_fn("Changing master_netdev from %s to %s" %
3573 (self.cluster.master_netdev, self.op.master_netdev))
3574 self.cluster.master_netdev = self.op.master_netdev
3576 self.cfg.Update(self.cluster, feedback_fn)
3578 if self.op.master_netdev:
3579 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3580 self.op.master_netdev)
3581 result = self.rpc.call_node_start_master(master, False, False)
3583 self.LogWarning("Could not re-enable the master ip on"
3584 " the master, please restart manually: %s",
3588 def _UploadHelper(lu, nodes, fname):
3589 """Helper for uploading a file and showing warnings.
3592 if os.path.exists(fname):
3593 result = lu.rpc.call_upload_file(nodes, fname)
3594 for to_node, to_result in result.items():
3595 msg = to_result.fail_msg
3597 msg = ("Copy of file %s to node %s failed: %s" %
3598 (fname, to_node, msg))
3599 lu.proc.LogWarning(msg)
3602 def _ComputeAncillaryFiles(cluster, redist):
3603 """Compute files external to Ganeti which need to be consistent.
3605 @type redist: boolean
3606 @param redist: Whether to include files which need to be redistributed
3609 # Compute files for all nodes
3611 constants.SSH_KNOWN_HOSTS_FILE,
3612 constants.CONFD_HMAC_KEY,
3613 constants.CLUSTER_DOMAIN_SECRET_FILE,
3617 files_all.update(constants.ALL_CERT_FILES)
3618 files_all.update(ssconf.SimpleStore().GetFileList())
3620 if cluster.modify_etc_hosts:
3621 files_all.add(constants.ETC_HOSTS)
3623 # Files which must either exist on all nodes or on none
3624 files_all_opt = set([
3625 constants.RAPI_USERS_FILE,
3628 # Files which should only be on master candidates
3631 files_mc.add(constants.CLUSTER_CONF_FILE)
3633 # Files which should only be on VM-capable nodes
3634 files_vm = set(filename
3635 for hv_name in cluster.enabled_hypervisors
3636 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3638 # Filenames must be unique
3639 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3640 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3641 "Found file listed in more than one file list"
3643 return (files_all, files_all_opt, files_mc, files_vm)
3646 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3647 """Distribute additional files which are part of the cluster configuration.
3649 ConfigWriter takes care of distributing the config and ssconf files, but
3650 there are more files which should be distributed to all nodes. This function
3651 makes sure those are copied.
3653 @param lu: calling logical unit
3654 @param additional_nodes: list of nodes not in the config to distribute to
3655 @type additional_vm: boolean
3656 @param additional_vm: whether the additional nodes are vm-capable or not
3659 # Gather target nodes
3660 cluster = lu.cfg.GetClusterInfo()
3661 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3663 online_nodes = lu.cfg.GetOnlineNodeList()
3664 vm_nodes = lu.cfg.GetVmCapableNodeList()
3666 if additional_nodes is not None:
3667 online_nodes.extend(additional_nodes)
3669 vm_nodes.extend(additional_nodes)
3671 # Never distribute to master node
3672 for nodelist in [online_nodes, vm_nodes]:
3673 if master_info.name in nodelist:
3674 nodelist.remove(master_info.name)
3677 (files_all, files_all_opt, files_mc, files_vm) = \
3678 _ComputeAncillaryFiles(cluster, True)
3680 # Never re-distribute configuration file from here
3681 assert not (constants.CLUSTER_CONF_FILE in files_all or
3682 constants.CLUSTER_CONF_FILE in files_vm)
3683 assert not files_mc, "Master candidates not handled in this function"
3686 (online_nodes, files_all),
3687 (online_nodes, files_all_opt),
3688 (vm_nodes, files_vm),
3692 for (node_list, files) in filemap:
3694 _UploadHelper(lu, node_list, fname)
3697 class LUClusterRedistConf(NoHooksLU):
3698 """Force the redistribution of cluster configuration.
3700 This is a very simple LU.
3705 def ExpandNames(self):
3706 self.needed_locks = {
3707 locking.LEVEL_NODE: locking.ALL_SET,
3709 self.share_locks[locking.LEVEL_NODE] = 1
3711 def Exec(self, feedback_fn):
3712 """Redistribute the configuration.
3715 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3716 _RedistributeAncillaryFiles(self)
3719 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3720 """Sleep and poll for an instance's disk to sync.
3723 if not instance.disks or disks is not None and not disks:
3726 disks = _ExpandCheckDisks(instance, disks)
3729 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3731 node = instance.primary_node
3734 lu.cfg.SetDiskID(dev, node)
3736 # TODO: Convert to utils.Retry
3739 degr_retries = 10 # in seconds, as we sleep 1 second each time
3743 cumul_degraded = False
3744 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3745 msg = rstats.fail_msg
3747 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3750 raise errors.RemoteError("Can't contact node %s for mirror data,"
3751 " aborting." % node)
3754 rstats = rstats.payload
3756 for i, mstat in enumerate(rstats):
3758 lu.LogWarning("Can't compute data for node %s/%s",
3759 node, disks[i].iv_name)
3762 cumul_degraded = (cumul_degraded or
3763 (mstat.is_degraded and mstat.sync_percent is None))
3764 if mstat.sync_percent is not None:
3766 if mstat.estimated_time is not None:
3767 rem_time = ("%s remaining (estimated)" %
3768 utils.FormatSeconds(mstat.estimated_time))
3769 max_time = mstat.estimated_time
3771 rem_time = "no time estimate"
3772 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3773 (disks[i].iv_name, mstat.sync_percent, rem_time))
3775 # if we're done but degraded, let's do a few small retries, to
3776 # make sure we see a stable and not transient situation; therefore
3777 # we force restart of the loop
3778 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3779 logging.info("Degraded disks found, %d retries left", degr_retries)
3787 time.sleep(min(60, max_time))
3790 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3791 return not cumul_degraded
3794 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3795 """Check that mirrors are not degraded.
3797 The ldisk parameter, if True, will change the test from the
3798 is_degraded attribute (which represents overall non-ok status for
3799 the device(s)) to the ldisk (representing the local storage status).
3802 lu.cfg.SetDiskID(dev, node)
3806 if on_primary or dev.AssembleOnSecondary():
3807 rstats = lu.rpc.call_blockdev_find(node, dev)
3808 msg = rstats.fail_msg
3810 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3812 elif not rstats.payload:
3813 lu.LogWarning("Can't find disk on node %s", node)
3817 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3819 result = result and not rstats.payload.is_degraded
3822 for child in dev.children:
3823 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3828 class LUOobCommand(NoHooksLU):
3829 """Logical unit for OOB handling.
3833 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3835 def ExpandNames(self):
3836 """Gather locks we need.
3839 if self.op.node_names:
3840 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3841 lock_names = self.op.node_names
3843 lock_names = locking.ALL_SET
3845 self.needed_locks = {
3846 locking.LEVEL_NODE: lock_names,
3849 def CheckPrereq(self):
3850 """Check prerequisites.
3853 - the node exists in the configuration
3856 Any errors are signaled by raising errors.OpPrereqError.
3860 self.master_node = self.cfg.GetMasterNode()
3862 assert self.op.power_delay >= 0.0
3864 if self.op.node_names:
3865 if (self.op.command in self._SKIP_MASTER and
3866 self.master_node in self.op.node_names):
3867 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3868 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3870 if master_oob_handler:
3871 additional_text = ("run '%s %s %s' if you want to operate on the"
3872 " master regardless") % (master_oob_handler,
3876 additional_text = "it does not support out-of-band operations"
3878 raise errors.OpPrereqError(("Operating on the master node %s is not"
3879 " allowed for %s; %s") %
3880 (self.master_node, self.op.command,
3881 additional_text), errors.ECODE_INVAL)
3883 self.op.node_names = self.cfg.GetNodeList()
3884 if self.op.command in self._SKIP_MASTER:
3885 self.op.node_names.remove(self.master_node)
3887 if self.op.command in self._SKIP_MASTER:
3888 assert self.master_node not in self.op.node_names
3890 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
3892 raise errors.OpPrereqError("Node %s not found" % node_name,
3895 self.nodes.append(node)
3897 if (not self.op.ignore_status and
3898 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3899 raise errors.OpPrereqError(("Cannot power off node %s because it is"
3900 " not marked offline") % node_name,
3903 def Exec(self, feedback_fn):
3904 """Execute OOB and return result if we expect any.
3907 master_node = self.master_node
3910 for idx, node in enumerate(utils.NiceSort(self.nodes,
3911 key=lambda node: node.name)):
3912 node_entry = [(constants.RS_NORMAL, node.name)]
3913 ret.append(node_entry)
3915 oob_program = _SupportsOob(self.cfg, node)
3918 node_entry.append((constants.RS_UNAVAIL, None))
3921 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3922 self.op.command, oob_program, node.name)
3923 result = self.rpc.call_run_oob(master_node, oob_program,
3924 self.op.command, node.name,
3928 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3929 node.name, result.fail_msg)
3930 node_entry.append((constants.RS_NODATA, None))
3933 self._CheckPayload(result)
3934 except errors.OpExecError, err:
3935 self.LogWarning("Payload returned by node '%s' is not valid: %s",
3937 node_entry.append((constants.RS_NODATA, None))
3939 if self.op.command == constants.OOB_HEALTH:
3940 # For health we should log important events
3941 for item, status in result.payload:
3942 if status in [constants.OOB_STATUS_WARNING,
3943 constants.OOB_STATUS_CRITICAL]:
3944 self.LogWarning("Item '%s' on node '%s' has status '%s'",
3945 item, node.name, status)
3947 if self.op.command == constants.OOB_POWER_ON:
3949 elif self.op.command == constants.OOB_POWER_OFF:
3950 node.powered = False
3951 elif self.op.command == constants.OOB_POWER_STATUS:
3952 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3953 if powered != node.powered:
3954 logging.warning(("Recorded power state (%s) of node '%s' does not"
3955 " match actual power state (%s)"), node.powered,
3958 # For configuration changing commands we should update the node
3959 if self.op.command in (constants.OOB_POWER_ON,
3960 constants.OOB_POWER_OFF):
3961 self.cfg.Update(node, feedback_fn)
3963 node_entry.append((constants.RS_NORMAL, result.payload))
3965 if (self.op.command == constants.OOB_POWER_ON and
3966 idx < len(self.nodes) - 1):
3967 time.sleep(self.op.power_delay)
3971 def _CheckPayload(self, result):
3972 """Checks if the payload is valid.
3974 @param result: RPC result
3975 @raises errors.OpExecError: If payload is not valid
3979 if self.op.command == constants.OOB_HEALTH:
3980 if not isinstance(result.payload, list):
3981 errs.append("command 'health' is expected to return a list but got %s" %
3982 type(result.payload))
3984 for item, status in result.payload:
3985 if status not in constants.OOB_STATUSES:
3986 errs.append("health item '%s' has invalid status '%s'" %
3989 if self.op.command == constants.OOB_POWER_STATUS:
3990 if not isinstance(result.payload, dict):
3991 errs.append("power-status is expected to return a dict but got %s" %
3992 type(result.payload))
3994 if self.op.command in [
3995 constants.OOB_POWER_ON,
3996 constants.OOB_POWER_OFF,
3997 constants.OOB_POWER_CYCLE,
3999 if result.payload is not None:
4000 errs.append("%s is expected to not return payload but got '%s'" %
4001 (self.op.command, result.payload))
4004 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4005 utils.CommaJoin(errs))
4007 class _OsQuery(_QueryBase):
4008 FIELDS = query.OS_FIELDS
4010 def ExpandNames(self, lu):
4011 # Lock all nodes in shared mode
4012 # Temporary removal of locks, should be reverted later
4013 # TODO: reintroduce locks when they are lighter-weight
4014 lu.needed_locks = {}
4015 #self.share_locks[locking.LEVEL_NODE] = 1
4016 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4018 # The following variables interact with _QueryBase._GetNames
4020 self.wanted = self.names
4022 self.wanted = locking.ALL_SET
4024 self.do_locking = self.use_locking
4026 def DeclareLocks(self, lu, level):
4030 def _DiagnoseByOS(rlist):
4031 """Remaps a per-node return list into an a per-os per-node dictionary
4033 @param rlist: a map with node names as keys and OS objects as values
4036 @return: a dictionary with osnames as keys and as value another
4037 map, with nodes as keys and tuples of (path, status, diagnose,
4038 variants, parameters, api_versions) as values, eg::
4040 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4041 (/srv/..., False, "invalid api")],
4042 "node2": [(/srv/..., True, "", [], [])]}
4047 # we build here the list of nodes that didn't fail the RPC (at RPC
4048 # level), so that nodes with a non-responding node daemon don't
4049 # make all OSes invalid
4050 good_nodes = [node_name for node_name in rlist
4051 if not rlist[node_name].fail_msg]
4052 for node_name, nr in rlist.items():
4053 if nr.fail_msg or not nr.payload:
4055 for (name, path, status, diagnose, variants,
4056 params, api_versions) in nr.payload:
4057 if name not in all_os:
4058 # build a list of nodes for this os containing empty lists
4059 # for each node in node_list
4061 for nname in good_nodes:
4062 all_os[name][nname] = []
4063 # convert params from [name, help] to (name, help)
4064 params = [tuple(v) for v in params]
4065 all_os[name][node_name].append((path, status, diagnose,
4066 variants, params, api_versions))
4069 def _GetQueryData(self, lu):
4070 """Computes the list of nodes and their attributes.
4073 # Locking is not used
4074 assert not (compat.any(lu.glm.is_owned(level)
4075 for level in locking.LEVELS
4076 if level != locking.LEVEL_CLUSTER) or
4077 self.do_locking or self.use_locking)
4079 valid_nodes = [node.name
4080 for node in lu.cfg.GetAllNodesInfo().values()
4081 if not node.offline and node.vm_capable]
4082 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4083 cluster = lu.cfg.GetClusterInfo()
4087 for (os_name, os_data) in pol.items():
4088 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4089 hidden=(os_name in cluster.hidden_os),
4090 blacklisted=(os_name in cluster.blacklisted_os))
4094 api_versions = set()
4096 for idx, osl in enumerate(os_data.values()):
4097 info.valid = bool(info.valid and osl and osl[0][1])
4101 (node_variants, node_params, node_api) = osl[0][3:6]
4104 variants.update(node_variants)
4105 parameters.update(node_params)
4106 api_versions.update(node_api)
4108 # Filter out inconsistent values
4109 variants.intersection_update(node_variants)
4110 parameters.intersection_update(node_params)
4111 api_versions.intersection_update(node_api)
4113 info.variants = list(variants)
4114 info.parameters = list(parameters)
4115 info.api_versions = list(api_versions)
4117 data[os_name] = info
4119 # Prepare data in requested order
4120 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4124 class LUOsDiagnose(NoHooksLU):
4125 """Logical unit for OS diagnose/query.
4131 def _BuildFilter(fields, names):
4132 """Builds a filter for querying OSes.
4135 name_filter = qlang.MakeSimpleFilter("name", names)
4137 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4138 # respective field is not requested
4139 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4140 for fname in ["hidden", "blacklisted"]
4141 if fname not in fields]
4142 if "valid" not in fields:
4143 status_filter.append([qlang.OP_TRUE, "valid"])
4146 status_filter.insert(0, qlang.OP_AND)
4148 status_filter = None
4150 if name_filter and status_filter:
4151 return [qlang.OP_AND, name_filter, status_filter]
4155 return status_filter
4157 def CheckArguments(self):
4158 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4159 self.op.output_fields, False)
4161 def ExpandNames(self):
4162 self.oq.ExpandNames(self)
4164 def Exec(self, feedback_fn):
4165 return self.oq.OldStyleQuery(self)
4168 class LUNodeRemove(LogicalUnit):
4169 """Logical unit for removing a node.
4172 HPATH = "node-remove"
4173 HTYPE = constants.HTYPE_NODE
4175 def BuildHooksEnv(self):
4178 This doesn't run on the target node in the pre phase as a failed
4179 node would then be impossible to remove.
4183 "OP_TARGET": self.op.node_name,
4184 "NODE_NAME": self.op.node_name,
4187 def BuildHooksNodes(self):
4188 """Build hooks nodes.
4191 all_nodes = self.cfg.GetNodeList()
4193 all_nodes.remove(self.op.node_name)
4195 logging.warning("Node '%s', which is about to be removed, was not found"
4196 " in the list of all nodes", self.op.node_name)
4197 return (all_nodes, all_nodes)
4199 def CheckPrereq(self):
4200 """Check prerequisites.
4203 - the node exists in the configuration
4204 - it does not have primary or secondary instances
4205 - it's not the master
4207 Any errors are signaled by raising errors.OpPrereqError.
4210 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4211 node = self.cfg.GetNodeInfo(self.op.node_name)
4212 assert node is not None
4214 masternode = self.cfg.GetMasterNode()
4215 if node.name == masternode:
4216 raise errors.OpPrereqError("Node is the master node, failover to another"
4217 " node is required", errors.ECODE_INVAL)
4219 for instance_name, instance in self.cfg.GetAllInstancesInfo():
4220 if node.name in instance.all_nodes:
4221 raise errors.OpPrereqError("Instance %s is still running on the node,"
4222 " please remove first" % instance_name,
4224 self.op.node_name = node.name
4227 def Exec(self, feedback_fn):
4228 """Removes the node from the cluster.
4232 logging.info("Stopping the node daemon and removing configs from node %s",
4235 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4237 # Promote nodes to master candidate as needed
4238 _AdjustCandidatePool(self, exceptions=[node.name])
4239 self.context.RemoveNode(node.name)
4241 # Run post hooks on the node before it's removed
4242 _RunPostHook(self, node.name)
4244 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4245 msg = result.fail_msg
4247 self.LogWarning("Errors encountered on the remote node while leaving"
4248 " the cluster: %s", msg)
4250 # Remove node from our /etc/hosts
4251 if self.cfg.GetClusterInfo().modify_etc_hosts:
4252 master_node = self.cfg.GetMasterNode()
4253 result = self.rpc.call_etc_hosts_modify(master_node,
4254 constants.ETC_HOSTS_REMOVE,
4256 result.Raise("Can't update hosts file with new host data")
4257 _RedistributeAncillaryFiles(self)
4260 class _NodeQuery(_QueryBase):
4261 FIELDS = query.NODE_FIELDS
4263 def ExpandNames(self, lu):
4264 lu.needed_locks = {}
4265 lu.share_locks[locking.LEVEL_NODE] = 1
4268 self.wanted = _GetWantedNodes(lu, self.names)
4270 self.wanted = locking.ALL_SET
4272 self.do_locking = (self.use_locking and
4273 query.NQ_LIVE in self.requested_data)
4276 # if we don't request only static fields, we need to lock the nodes
4277 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4279 def DeclareLocks(self, lu, level):
4282 def _GetQueryData(self, lu):
4283 """Computes the list of nodes and their attributes.
4286 all_info = lu.cfg.GetAllNodesInfo()
4288 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4290 # Gather data as requested
4291 if query.NQ_LIVE in self.requested_data:
4292 # filter out non-vm_capable nodes
4293 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4295 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4296 lu.cfg.GetHypervisorType())
4297 live_data = dict((name, nresult.payload)
4298 for (name, nresult) in node_data.items()
4299 if not nresult.fail_msg and nresult.payload)
4303 if query.NQ_INST in self.requested_data:
4304 node_to_primary = dict([(name, set()) for name in nodenames])
4305 node_to_secondary = dict([(name, set()) for name in nodenames])
4307 inst_data = lu.cfg.GetAllInstancesInfo()
4309 for inst in inst_data.values():
4310 if inst.primary_node in node_to_primary:
4311 node_to_primary[inst.primary_node].add(inst.name)
4312 for secnode in inst.secondary_nodes:
4313 if secnode in node_to_secondary:
4314 node_to_secondary[secnode].add(inst.name)
4316 node_to_primary = None
4317 node_to_secondary = None
4319 if query.NQ_OOB in self.requested_data:
4320 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4321 for name, node in all_info.iteritems())
4325 if query.NQ_GROUP in self.requested_data:
4326 groups = lu.cfg.GetAllNodeGroupsInfo()
4330 return query.NodeQueryData([all_info[name] for name in nodenames],
4331 live_data, lu.cfg.GetMasterNode(),
4332 node_to_primary, node_to_secondary, groups,
4333 oob_support, lu.cfg.GetClusterInfo())
4336 class LUNodeQuery(NoHooksLU):
4337 """Logical unit for querying nodes.
4340 # pylint: disable-msg=W0142
4343 def CheckArguments(self):
4344 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4345 self.op.output_fields, self.op.use_locking)
4347 def ExpandNames(self):
4348 self.nq.ExpandNames(self)
4350 def Exec(self, feedback_fn):
4351 return self.nq.OldStyleQuery(self)
4354 class LUNodeQueryvols(NoHooksLU):
4355 """Logical unit for getting volumes on node(s).
4359 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4360 _FIELDS_STATIC = utils.FieldSet("node")
4362 def CheckArguments(self):
4363 _CheckOutputFields(static=self._FIELDS_STATIC,
4364 dynamic=self._FIELDS_DYNAMIC,
4365 selected=self.op.output_fields)
4367 def ExpandNames(self):
4368 self.needed_locks = {}
4369 self.share_locks[locking.LEVEL_NODE] = 1
4370 if not self.op.nodes:
4371 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4373 self.needed_locks[locking.LEVEL_NODE] = \
4374 _GetWantedNodes(self, self.op.nodes)
4376 def Exec(self, feedback_fn):
4377 """Computes the list of nodes and their attributes.
4380 nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4381 volumes = self.rpc.call_node_volumes(nodenames)
4383 ilist = self.cfg.GetAllInstancesInfo()
4384 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4387 for node in nodenames:
4388 nresult = volumes[node]
4391 msg = nresult.fail_msg
4393 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4396 node_vols = sorted(nresult.payload,
4397 key=operator.itemgetter("dev"))
4399 for vol in node_vols:
4401 for field in self.op.output_fields:
4404 elif field == "phys":
4408 elif field == "name":
4410 elif field == "size":
4411 val = int(float(vol["size"]))
4412 elif field == "instance":
4413 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4415 raise errors.ParameterError(field)
4416 node_output.append(str(val))
4418 output.append(node_output)
4423 class LUNodeQueryStorage(NoHooksLU):
4424 """Logical unit for getting information on storage units on node(s).
4427 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4430 def CheckArguments(self):
4431 _CheckOutputFields(static=self._FIELDS_STATIC,
4432 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4433 selected=self.op.output_fields)
4435 def ExpandNames(self):
4436 self.needed_locks = {}
4437 self.share_locks[locking.LEVEL_NODE] = 1
4440 self.needed_locks[locking.LEVEL_NODE] = \
4441 _GetWantedNodes(self, self.op.nodes)
4443 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4445 def Exec(self, feedback_fn):
4446 """Computes the list of nodes and their attributes.
4449 self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4451 # Always get name to sort by
4452 if constants.SF_NAME in self.op.output_fields:
4453 fields = self.op.output_fields[:]
4455 fields = [constants.SF_NAME] + self.op.output_fields
4457 # Never ask for node or type as it's only known to the LU
4458 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4459 while extra in fields:
4460 fields.remove(extra)
4462 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4463 name_idx = field_idx[constants.SF_NAME]
4465 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4466 data = self.rpc.call_storage_list(self.nodes,
4467 self.op.storage_type, st_args,
4468 self.op.name, fields)
4472 for node in utils.NiceSort(self.nodes):
4473 nresult = data[node]
4477 msg = nresult.fail_msg
4479 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4482 rows = dict([(row[name_idx], row) for row in nresult.payload])
4484 for name in utils.NiceSort(rows.keys()):
4489 for field in self.op.output_fields:
4490 if field == constants.SF_NODE:
4492 elif field == constants.SF_TYPE:
4493 val = self.op.storage_type
4494 elif field in field_idx:
4495 val = row[field_idx[field]]
4497 raise errors.ParameterError(field)
4506 class _InstanceQuery(_QueryBase):
4507 FIELDS = query.INSTANCE_FIELDS
4509 def ExpandNames(self, lu):
4510 lu.needed_locks = {}
4511 lu.share_locks = _ShareAll()
4514 self.wanted = _GetWantedInstances(lu, self.names)
4516 self.wanted = locking.ALL_SET
4518 self.do_locking = (self.use_locking and
4519 query.IQ_LIVE in self.requested_data)
4521 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4522 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4523 lu.needed_locks[locking.LEVEL_NODE] = []
4524 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4526 self.do_grouplocks = (self.do_locking and
4527 query.IQ_NODES in self.requested_data)
4529 def DeclareLocks(self, lu, level):
4531 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4532 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4534 # Lock all groups used by instances optimistically; this requires going
4535 # via the node before it's locked, requiring verification later on
4536 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4538 for instance_name in
4539 lu.glm.list_owned(locking.LEVEL_INSTANCE)
4541 lu.cfg.GetInstanceNodeGroups(instance_name))
4542 elif level == locking.LEVEL_NODE:
4543 lu._LockInstancesNodes() # pylint: disable-msg=W0212
4546 def _CheckGroupLocks(lu):
4547 owned_instances = frozenset(lu.glm.list_owned(locking.LEVEL_INSTANCE))
4548 owned_groups = frozenset(lu.glm.list_owned(locking.LEVEL_NODEGROUP))
4550 # Check if node groups for locked instances are still correct
4551 for instance_name in owned_instances:
4552 inst_groups = lu.cfg.GetInstanceNodeGroups(instance_name)
4553 if not owned_groups.issuperset(inst_groups):
4554 raise errors.OpPrereqError("Instance %s's node groups changed since"
4555 " locks were acquired, current groups are"
4556 " are '%s', owning groups '%s'; retry the"
4559 utils.CommaJoin(inst_groups),
4560 utils.CommaJoin(owned_groups)),
4563 def _GetQueryData(self, lu):
4564 """Computes the list of instances and their attributes.
4567 if self.do_grouplocks:
4568 self._CheckGroupLocks(lu)
4570 cluster = lu.cfg.GetClusterInfo()
4571 all_info = lu.cfg.GetAllInstancesInfo()
4573 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4575 instance_list = [all_info[name] for name in instance_names]
4576 nodes = frozenset(itertools.chain(*(inst.all_nodes
4577 for inst in instance_list)))
4578 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4581 wrongnode_inst = set()
4583 # Gather data as requested
4584 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4586 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4588 result = node_data[name]
4590 # offline nodes will be in both lists
4591 assert result.fail_msg
4592 offline_nodes.append(name)
4594 bad_nodes.append(name)
4595 elif result.payload:
4596 for inst in result.payload:
4597 if inst in all_info:
4598 if all_info[inst].primary_node == name:
4599 live_data.update(result.payload)
4601 wrongnode_inst.add(inst)
4603 # orphan instance; we don't list it here as we don't
4604 # handle this case yet in the output of instance listing
4605 logging.warning("Orphan instance '%s' found on node %s",
4607 # else no instance is alive
4611 if query.IQ_DISKUSAGE in self.requested_data:
4612 disk_usage = dict((inst.name,
4613 _ComputeDiskSize(inst.disk_template,
4614 [{constants.IDISK_SIZE: disk.size}
4615 for disk in inst.disks]))
4616 for inst in instance_list)
4620 if query.IQ_CONSOLE in self.requested_data:
4622 for inst in instance_list:
4623 if inst.name in live_data:
4624 # Instance is running
4625 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4627 consinfo[inst.name] = None
4628 assert set(consinfo.keys()) == set(instance_names)
4632 if query.IQ_NODES in self.requested_data:
4633 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4635 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4636 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4637 for uuid in set(map(operator.attrgetter("group"),
4643 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4644 disk_usage, offline_nodes, bad_nodes,
4645 live_data, wrongnode_inst, consinfo,
4649 class LUQuery(NoHooksLU):
4650 """Query for resources/items of a certain kind.
4653 # pylint: disable-msg=W0142
4656 def CheckArguments(self):
4657 qcls = _GetQueryImplementation(self.op.what)
4659 self.impl = qcls(self.op.filter, self.op.fields, False)
4661 def ExpandNames(self):
4662 self.impl.ExpandNames(self)
4664 def DeclareLocks(self, level):
4665 self.impl.DeclareLocks(self, level)
4667 def Exec(self, feedback_fn):
4668 return self.impl.NewStyleQuery(self)
4671 class LUQueryFields(NoHooksLU):
4672 """Query for resources/items of a certain kind.
4675 # pylint: disable-msg=W0142
4678 def CheckArguments(self):
4679 self.qcls = _GetQueryImplementation(self.op.what)
4681 def ExpandNames(self):
4682 self.needed_locks = {}
4684 def Exec(self, feedback_fn):
4685 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4688 class LUNodeModifyStorage(NoHooksLU):
4689 """Logical unit for modifying a storage volume on a node.
4694 def CheckArguments(self):
4695 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4697 storage_type = self.op.storage_type
4700 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4702 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4703 " modified" % storage_type,
4706 diff = set(self.op.changes.keys()) - modifiable
4708 raise errors.OpPrereqError("The following fields can not be modified for"
4709 " storage units of type '%s': %r" %
4710 (storage_type, list(diff)),
4713 def ExpandNames(self):
4714 self.needed_locks = {
4715 locking.LEVEL_NODE: self.op.node_name,
4718 def Exec(self, feedback_fn):
4719 """Computes the list of nodes and their attributes.
4722 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4723 result = self.rpc.call_storage_modify(self.op.node_name,
4724 self.op.storage_type, st_args,
4725 self.op.name, self.op.changes)
4726 result.Raise("Failed to modify storage unit '%s' on %s" %
4727 (self.op.name, self.op.node_name))
4730 class LUNodeAdd(LogicalUnit):
4731 """Logical unit for adding node to the cluster.
4735 HTYPE = constants.HTYPE_NODE
4736 _NFLAGS = ["master_capable", "vm_capable"]
4738 def CheckArguments(self):
4739 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4740 # validate/normalize the node name
4741 self.hostname = netutils.GetHostname(name=self.op.node_name,
4742 family=self.primary_ip_family)
4743 self.op.node_name = self.hostname.name
4745 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4746 raise errors.OpPrereqError("Cannot readd the master node",
4749 if self.op.readd and self.op.group:
4750 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4751 " being readded", errors.ECODE_INVAL)
4753 def BuildHooksEnv(self):
4756 This will run on all nodes before, and on all nodes + the new node after.
4760 "OP_TARGET": self.op.node_name,
4761 "NODE_NAME": self.op.node_name,
4762 "NODE_PIP": self.op.primary_ip,
4763 "NODE_SIP": self.op.secondary_ip,
4764 "MASTER_CAPABLE": str(self.op.master_capable),
4765 "VM_CAPABLE": str(self.op.vm_capable),
4768 def BuildHooksNodes(self):
4769 """Build hooks nodes.
4772 # Exclude added node
4773 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4774 post_nodes = pre_nodes + [self.op.node_name, ]
4776 return (pre_nodes, post_nodes)
4778 def CheckPrereq(self):
4779 """Check prerequisites.
4782 - the new node is not already in the config
4784 - its parameters (single/dual homed) matches the cluster
4786 Any errors are signaled by raising errors.OpPrereqError.
4790 hostname = self.hostname
4791 node = hostname.name
4792 primary_ip = self.op.primary_ip = hostname.ip
4793 if self.op.secondary_ip is None:
4794 if self.primary_ip_family == netutils.IP6Address.family:
4795 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4796 " IPv4 address must be given as secondary",
4798 self.op.secondary_ip = primary_ip
4800 secondary_ip = self.op.secondary_ip
4801 if not netutils.IP4Address.IsValid(secondary_ip):
4802 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4803 " address" % secondary_ip, errors.ECODE_INVAL)
4805 node_list = cfg.GetNodeList()
4806 if not self.op.readd and node in node_list:
4807 raise errors.OpPrereqError("Node %s is already in the configuration" %
4808 node, errors.ECODE_EXISTS)
4809 elif self.op.readd and node not in node_list:
4810 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4813 self.changed_primary_ip = False
4815 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4816 if self.op.readd and node == existing_node_name:
4817 if existing_node.secondary_ip != secondary_ip:
4818 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4819 " address configuration as before",
4821 if existing_node.primary_ip != primary_ip:
4822 self.changed_primary_ip = True
4826 if (existing_node.primary_ip == primary_ip or
4827 existing_node.secondary_ip == primary_ip or
4828 existing_node.primary_ip == secondary_ip or
4829 existing_node.secondary_ip == secondary_ip):
4830 raise errors.OpPrereqError("New node ip address(es) conflict with"
4831 " existing node %s" % existing_node.name,
4832 errors.ECODE_NOTUNIQUE)
4834 # After this 'if' block, None is no longer a valid value for the
4835 # _capable op attributes
4837 old_node = self.cfg.GetNodeInfo(node)
4838 assert old_node is not None, "Can't retrieve locked node %s" % node
4839 for attr in self._NFLAGS:
4840 if getattr(self.op, attr) is None:
4841 setattr(self.op, attr, getattr(old_node, attr))
4843 for attr in self._NFLAGS:
4844 if getattr(self.op, attr) is None:
4845 setattr(self.op, attr, True)
4847 if self.op.readd and not self.op.vm_capable:
4848 pri, sec = cfg.GetNodeInstances(node)
4850 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4851 " flag set to false, but it already holds"
4852 " instances" % node,
4855 # check that the type of the node (single versus dual homed) is the
4856 # same as for the master
4857 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4858 master_singlehomed = myself.secondary_ip == myself.primary_ip
4859 newbie_singlehomed = secondary_ip == primary_ip
4860 if master_singlehomed != newbie_singlehomed:
4861 if master_singlehomed:
4862 raise errors.OpPrereqError("The master has no secondary ip but the"
4863 " new node has one",
4866 raise errors.OpPrereqError("The master has a secondary ip but the"
4867 " new node doesn't have one",
4870 # checks reachability
4871 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4872 raise errors.OpPrereqError("Node not reachable by ping",
4873 errors.ECODE_ENVIRON)
4875 if not newbie_singlehomed:
4876 # check reachability from my secondary ip to newbie's secondary ip
4877 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4878 source=myself.secondary_ip):
4879 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4880 " based ping to node daemon port",
4881 errors.ECODE_ENVIRON)
4888 if self.op.master_capable:
4889 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4891 self.master_candidate = False
4894 self.new_node = old_node
4896 node_group = cfg.LookupNodeGroup(self.op.group)
4897 self.new_node = objects.Node(name=node,
4898 primary_ip=primary_ip,
4899 secondary_ip=secondary_ip,
4900 master_candidate=self.master_candidate,
4901 offline=False, drained=False,
4904 if self.op.ndparams:
4905 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4907 def Exec(self, feedback_fn):
4908 """Adds the new node to the cluster.
4911 new_node = self.new_node
4912 node = new_node.name
4914 # We adding a new node so we assume it's powered
4915 new_node.powered = True
4917 # for re-adds, reset the offline/drained/master-candidate flags;
4918 # we need to reset here, otherwise offline would prevent RPC calls
4919 # later in the procedure; this also means that if the re-add
4920 # fails, we are left with a non-offlined, broken node
4922 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4923 self.LogInfo("Readding a node, the offline/drained flags were reset")
4924 # if we demote the node, we do cleanup later in the procedure
4925 new_node.master_candidate = self.master_candidate
4926 if self.changed_primary_ip:
4927 new_node.primary_ip = self.op.primary_ip
4929 # copy the master/vm_capable flags
4930 for attr in self._NFLAGS:
4931 setattr(new_node, attr, getattr(self.op, attr))
4933 # notify the user about any possible mc promotion
4934 if new_node.master_candidate:
4935 self.LogInfo("Node will be a master candidate")
4937 if self.op.ndparams:
4938 new_node.ndparams = self.op.ndparams
4940 new_node.ndparams = {}
4942 # check connectivity
4943 result = self.rpc.call_version([node])[node]
4944 result.Raise("Can't get version information from node %s" % node)
4945 if constants.PROTOCOL_VERSION == result.payload:
4946 logging.info("Communication to node %s fine, sw version %s match",
4947 node, result.payload)
4949 raise errors.OpExecError("Version mismatch master version %s,"
4950 " node version %s" %
4951 (constants.PROTOCOL_VERSION, result.payload))
4953 # Add node to our /etc/hosts, and add key to known_hosts
4954 if self.cfg.GetClusterInfo().modify_etc_hosts:
4955 master_node = self.cfg.GetMasterNode()
4956 result = self.rpc.call_etc_hosts_modify(master_node,
4957 constants.ETC_HOSTS_ADD,
4960 result.Raise("Can't update hosts file with new host data")
4962 if new_node.secondary_ip != new_node.primary_ip:
4963 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4966 node_verify_list = [self.cfg.GetMasterNode()]
4967 node_verify_param = {
4968 constants.NV_NODELIST: [node],
4969 # TODO: do a node-net-test as well?
4972 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4973 self.cfg.GetClusterName())
4974 for verifier in node_verify_list:
4975 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4976 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4978 for failed in nl_payload:
4979 feedback_fn("ssh/hostname verification failed"
4980 " (checking from %s): %s" %
4981 (verifier, nl_payload[failed]))
4982 raise errors.OpExecError("ssh/hostname verification failed")
4985 _RedistributeAncillaryFiles(self)
4986 self.context.ReaddNode(new_node)
4987 # make sure we redistribute the config
4988 self.cfg.Update(new_node, feedback_fn)
4989 # and make sure the new node will not have old files around
4990 if not new_node.master_candidate:
4991 result = self.rpc.call_node_demote_from_mc(new_node.name)
4992 msg = result.fail_msg
4994 self.LogWarning("Node failed to demote itself from master"
4995 " candidate status: %s" % msg)
4997 _RedistributeAncillaryFiles(self, additional_nodes=[node],
4998 additional_vm=self.op.vm_capable)
4999 self.context.AddNode(new_node, self.proc.GetECId())
5002 class LUNodeSetParams(LogicalUnit):
5003 """Modifies the parameters of a node.
5005 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5006 to the node role (as _ROLE_*)
5007 @cvar _R2F: a dictionary from node role to tuples of flags
5008 @cvar _FLAGS: a list of attribute names corresponding to the flags
5011 HPATH = "node-modify"
5012 HTYPE = constants.HTYPE_NODE
5014 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5016 (True, False, False): _ROLE_CANDIDATE,
5017 (False, True, False): _ROLE_DRAINED,
5018 (False, False, True): _ROLE_OFFLINE,
5019 (False, False, False): _ROLE_REGULAR,
5021 _R2F = dict((v, k) for k, v in _F2R.items())
5022 _FLAGS = ["master_candidate", "drained", "offline"]
5024 def CheckArguments(self):
5025 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5026 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5027 self.op.master_capable, self.op.vm_capable,
5028 self.op.secondary_ip, self.op.ndparams]
5029 if all_mods.count(None) == len(all_mods):
5030 raise errors.OpPrereqError("Please pass at least one modification",
5032 if all_mods.count(True) > 1:
5033 raise errors.OpPrereqError("Can't set the node into more than one"
5034 " state at the same time",
5037 # Boolean value that tells us whether we might be demoting from MC
5038 self.might_demote = (self.op.master_candidate == False or
5039 self.op.offline == True or
5040 self.op.drained == True or
5041 self.op.master_capable == False)
5043 if self.op.secondary_ip:
5044 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5045 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5046 " address" % self.op.secondary_ip,
5049 self.lock_all = self.op.auto_promote and self.might_demote
5050 self.lock_instances = self.op.secondary_ip is not None
5052 def ExpandNames(self):
5054 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5056 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5058 if self.lock_instances:
5059 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5061 def DeclareLocks(self, level):
5062 # If we have locked all instances, before waiting to lock nodes, release
5063 # all the ones living on nodes unrelated to the current operation.
5064 if level == locking.LEVEL_NODE and self.lock_instances:
5065 self.affected_instances = []
5066 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5069 # Build list of instances to release
5070 locked_i = self.glm.list_owned(locking.LEVEL_INSTANCE)
5071 for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5072 if (instance.disk_template in constants.DTS_INT_MIRROR and
5073 self.op.node_name in instance.all_nodes):
5074 instances_keep.append(instance_name)
5075 self.affected_instances.append(instance)
5077 _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5079 assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
5080 set(instances_keep))
5082 def BuildHooksEnv(self):
5085 This runs on the master node.
5089 "OP_TARGET": self.op.node_name,
5090 "MASTER_CANDIDATE": str(self.op.master_candidate),
5091 "OFFLINE": str(self.op.offline),
5092 "DRAINED": str(self.op.drained),
5093 "MASTER_CAPABLE": str(self.op.master_capable),
5094 "VM_CAPABLE": str(self.op.vm_capable),
5097 def BuildHooksNodes(self):
5098 """Build hooks nodes.
5101 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5104 def CheckPrereq(self):
5105 """Check prerequisites.
5107 This only checks the instance list against the existing names.
5110 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5112 if (self.op.master_candidate is not None or
5113 self.op.drained is not None or
5114 self.op.offline is not None):
5115 # we can't change the master's node flags
5116 if self.op.node_name == self.cfg.GetMasterNode():
5117 raise errors.OpPrereqError("The master role can be changed"
5118 " only via master-failover",
5121 if self.op.master_candidate and not node.master_capable:
5122 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5123 " it a master candidate" % node.name,
5126 if self.op.vm_capable == False:
5127 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5129 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5130 " the vm_capable flag" % node.name,
5133 if node.master_candidate and self.might_demote and not self.lock_all:
5134 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5135 # check if after removing the current node, we're missing master
5137 (mc_remaining, mc_should, _) = \
5138 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5139 if mc_remaining < mc_should:
5140 raise errors.OpPrereqError("Not enough master candidates, please"
5141 " pass auto promote option to allow"
5142 " promotion", errors.ECODE_STATE)
5144 self.old_flags = old_flags = (node.master_candidate,
5145 node.drained, node.offline)
5146 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5147 self.old_role = old_role = self._F2R[old_flags]
5149 # Check for ineffective changes
5150 for attr in self._FLAGS:
5151 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5152 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5153 setattr(self.op, attr, None)
5155 # Past this point, any flag change to False means a transition
5156 # away from the respective state, as only real changes are kept
5158 # TODO: We might query the real power state if it supports OOB
5159 if _SupportsOob(self.cfg, node):
5160 if self.op.offline is False and not (node.powered or
5161 self.op.powered == True):
5162 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5163 " offline status can be reset") %
5165 elif self.op.powered is not None:
5166 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5167 " as it does not support out-of-band"
5168 " handling") % self.op.node_name)
5170 # If we're being deofflined/drained, we'll MC ourself if needed
5171 if (self.op.drained == False or self.op.offline == False or
5172 (self.op.master_capable and not node.master_capable)):
5173 if _DecideSelfPromotion(self):
5174 self.op.master_candidate = True
5175 self.LogInfo("Auto-promoting node to master candidate")
5177 # If we're no longer master capable, we'll demote ourselves from MC
5178 if self.op.master_capable == False and node.master_candidate:
5179 self.LogInfo("Demoting from master candidate")
5180 self.op.master_candidate = False
5183 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5184 if self.op.master_candidate:
5185 new_role = self._ROLE_CANDIDATE
5186 elif self.op.drained:
5187 new_role = self._ROLE_DRAINED
5188 elif self.op.offline:
5189 new_role = self._ROLE_OFFLINE
5190 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5191 # False is still in new flags, which means we're un-setting (the
5193 new_role = self._ROLE_REGULAR
5194 else: # no new flags, nothing, keep old role
5197 self.new_role = new_role
5199 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5200 # Trying to transition out of offline status
5201 result = self.rpc.call_version([node.name])[node.name]
5203 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5204 " to report its version: %s" %
5205 (node.name, result.fail_msg),
5208 self.LogWarning("Transitioning node from offline to online state"
5209 " without using re-add. Please make sure the node"
5212 if self.op.secondary_ip:
5213 # Ok even without locking, because this can't be changed by any LU
5214 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5215 master_singlehomed = master.secondary_ip == master.primary_ip
5216 if master_singlehomed and self.op.secondary_ip:
5217 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5218 " homed cluster", errors.ECODE_INVAL)
5221 if self.affected_instances:
5222 raise errors.OpPrereqError("Cannot change secondary ip: offline"
5223 " node has instances (%s) configured"
5224 " to use it" % self.affected_instances)
5226 # On online nodes, check that no instances are running, and that
5227 # the node has the new ip and we can reach it.
5228 for instance in self.affected_instances:
5229 _CheckInstanceDown(self, instance, "cannot change secondary ip")
5231 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5232 if master.name != node.name:
5233 # check reachability from master secondary ip to new secondary ip
5234 if not netutils.TcpPing(self.op.secondary_ip,
5235 constants.DEFAULT_NODED_PORT,
5236 source=master.secondary_ip):
5237 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5238 " based ping to node daemon port",
5239 errors.ECODE_ENVIRON)
5241 if self.op.ndparams:
5242 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5243 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5244 self.new_ndparams = new_ndparams
5246 def Exec(self, feedback_fn):
5251 old_role = self.old_role
5252 new_role = self.new_role
5256 if self.op.ndparams:
5257 node.ndparams = self.new_ndparams
5259 if self.op.powered is not None:
5260 node.powered = self.op.powered
5262 for attr in ["master_capable", "vm_capable"]:
5263 val = getattr(self.op, attr)
5265 setattr(node, attr, val)
5266 result.append((attr, str(val)))
5268 if new_role != old_role:
5269 # Tell the node to demote itself, if no longer MC and not offline
5270 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5271 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5273 self.LogWarning("Node failed to demote itself: %s", msg)
5275 new_flags = self._R2F[new_role]
5276 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5278 result.append((desc, str(nf)))
5279 (node.master_candidate, node.drained, node.offline) = new_flags
5281 # we locked all nodes, we adjust the CP before updating this node
5283 _AdjustCandidatePool(self, [node.name])
5285 if self.op.secondary_ip:
5286 node.secondary_ip = self.op.secondary_ip
5287 result.append(("secondary_ip", self.op.secondary_ip))
5289 # this will trigger configuration file update, if needed
5290 self.cfg.Update(node, feedback_fn)
5292 # this will trigger job queue propagation or cleanup if the mc
5294 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5295 self.context.ReaddNode(node)
5300 class LUNodePowercycle(NoHooksLU):
5301 """Powercycles a node.
5306 def CheckArguments(self):
5307 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5308 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5309 raise errors.OpPrereqError("The node is the master and the force"
5310 " parameter was not set",
5313 def ExpandNames(self):
5314 """Locking for PowercycleNode.
5316 This is a last-resort option and shouldn't block on other
5317 jobs. Therefore, we grab no locks.
5320 self.needed_locks = {}
5322 def Exec(self, feedback_fn):
5326 result = self.rpc.call_node_powercycle(self.op.node_name,
5327 self.cfg.GetHypervisorType())
5328 result.Raise("Failed to schedule the reboot")
5329 return result.payload
5332 class LUClusterQuery(NoHooksLU):
5333 """Query cluster configuration.
5338 def ExpandNames(self):
5339 self.needed_locks = {}
5341 def Exec(self, feedback_fn):
5342 """Return cluster config.
5345 cluster = self.cfg.GetClusterInfo()
5348 # Filter just for enabled hypervisors
5349 for os_name, hv_dict in cluster.os_hvp.items():
5350 os_hvp[os_name] = {}
5351 for hv_name, hv_params in hv_dict.items():
5352 if hv_name in cluster.enabled_hypervisors:
5353 os_hvp[os_name][hv_name] = hv_params
5355 # Convert ip_family to ip_version
5356 primary_ip_version = constants.IP4_VERSION
5357 if cluster.primary_ip_family == netutils.IP6Address.family:
5358 primary_ip_version = constants.IP6_VERSION
5361 "software_version": constants.RELEASE_VERSION,
5362 "protocol_version": constants.PROTOCOL_VERSION,
5363 "config_version": constants.CONFIG_VERSION,
5364 "os_api_version": max(constants.OS_API_VERSIONS),
5365 "export_version": constants.EXPORT_VERSION,
5366 "architecture": (platform.architecture()[0], platform.machine()),
5367 "name": cluster.cluster_name,
5368 "master": cluster.master_node,
5369 "default_hypervisor": cluster.enabled_hypervisors[0],
5370 "enabled_hypervisors": cluster.enabled_hypervisors,
5371 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5372 for hypervisor_name in cluster.enabled_hypervisors]),
5374 "beparams": cluster.beparams,
5375 "osparams": cluster.osparams,
5376 "nicparams": cluster.nicparams,
5377 "ndparams": cluster.ndparams,
5378 "candidate_pool_size": cluster.candidate_pool_size,
5379 "master_netdev": cluster.master_netdev,
5380 "volume_group_name": cluster.volume_group_name,
5381 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5382 "file_storage_dir": cluster.file_storage_dir,
5383 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5384 "maintain_node_health": cluster.maintain_node_health,
5385 "ctime": cluster.ctime,
5386 "mtime": cluster.mtime,
5387 "uuid": cluster.uuid,
5388 "tags": list(cluster.GetTags()),
5389 "uid_pool": cluster.uid_pool,
5390 "default_iallocator": cluster.default_iallocator,
5391 "reserved_lvs": cluster.reserved_lvs,
5392 "primary_ip_version": primary_ip_version,
5393 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5394 "hidden_os": cluster.hidden_os,
5395 "blacklisted_os": cluster.blacklisted_os,
5401 class LUClusterConfigQuery(NoHooksLU):
5402 """Return configuration values.
5406 _FIELDS_DYNAMIC = utils.FieldSet()
5407 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5408 "watcher_pause", "volume_group_name")
5410 def CheckArguments(self):
5411 _CheckOutputFields(static=self._FIELDS_STATIC,
5412 dynamic=self._FIELDS_DYNAMIC,
5413 selected=self.op.output_fields)
5415 def ExpandNames(self):
5416 self.needed_locks = {}
5418 def Exec(self, feedback_fn):
5419 """Dump a representation of the cluster config to the standard output.
5423 for field in self.op.output_fields:
5424 if field == "cluster_name":
5425 entry = self.cfg.GetClusterName()
5426 elif field == "master_node":
5427 entry = self.cfg.GetMasterNode()
5428 elif field == "drain_flag":
5429 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5430 elif field == "watcher_pause":
5431 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5432 elif field == "volume_group_name":
5433 entry = self.cfg.GetVGName()
5435 raise errors.ParameterError(field)
5436 values.append(entry)
5440 class LUInstanceActivateDisks(NoHooksLU):
5441 """Bring up an instance's disks.
5446 def ExpandNames(self):
5447 self._ExpandAndLockInstance()
5448 self.needed_locks[locking.LEVEL_NODE] = []
5449 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5451 def DeclareLocks(self, level):
5452 if level == locking.LEVEL_NODE:
5453 self._LockInstancesNodes()
5455 def CheckPrereq(self):
5456 """Check prerequisites.
5458 This checks that the instance is in the cluster.
5461 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5462 assert self.instance is not None, \
5463 "Cannot retrieve locked instance %s" % self.op.instance_name
5464 _CheckNodeOnline(self, self.instance.primary_node)
5466 def Exec(self, feedback_fn):
5467 """Activate the disks.
5470 disks_ok, disks_info = \
5471 _AssembleInstanceDisks(self, self.instance,
5472 ignore_size=self.op.ignore_size)
5474 raise errors.OpExecError("Cannot activate block devices")
5479 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5481 """Prepare the block devices for an instance.
5483 This sets up the block devices on all nodes.
5485 @type lu: L{LogicalUnit}
5486 @param lu: the logical unit on whose behalf we execute
5487 @type instance: L{objects.Instance}
5488 @param instance: the instance for whose disks we assemble
5489 @type disks: list of L{objects.Disk} or None
5490 @param disks: which disks to assemble (or all, if None)
5491 @type ignore_secondaries: boolean
5492 @param ignore_secondaries: if true, errors on secondary nodes
5493 won't result in an error return from the function
5494 @type ignore_size: boolean
5495 @param ignore_size: if true, the current known size of the disk
5496 will not be used during the disk activation, useful for cases
5497 when the size is wrong
5498 @return: False if the operation failed, otherwise a list of
5499 (host, instance_visible_name, node_visible_name)
5500 with the mapping from node devices to instance devices
5505 iname = instance.name
5506 disks = _ExpandCheckDisks(instance, disks)
5508 # With the two passes mechanism we try to reduce the window of
5509 # opportunity for the race condition of switching DRBD to primary
5510 # before handshaking occured, but we do not eliminate it
5512 # The proper fix would be to wait (with some limits) until the
5513 # connection has been made and drbd transitions from WFConnection
5514 # into any other network-connected state (Connected, SyncTarget,
5517 # 1st pass, assemble on all nodes in secondary mode
5518 for idx, inst_disk in enumerate(disks):
5519 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5521 node_disk = node_disk.Copy()
5522 node_disk.UnsetSize()
5523 lu.cfg.SetDiskID(node_disk, node)
5524 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5525 msg = result.fail_msg
5527 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5528 " (is_primary=False, pass=1): %s",
5529 inst_disk.iv_name, node, msg)
5530 if not ignore_secondaries:
5533 # FIXME: race condition on drbd migration to primary
5535 # 2nd pass, do only the primary node
5536 for idx, inst_disk in enumerate(disks):
5539 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5540 if node != instance.primary_node:
5543 node_disk = node_disk.Copy()
5544 node_disk.UnsetSize()
5545 lu.cfg.SetDiskID(node_disk, node)
5546 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5547 msg = result.fail_msg
5549 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5550 " (is_primary=True, pass=2): %s",
5551 inst_disk.iv_name, node, msg)
5554 dev_path = result.payload
5556 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5558 # leave the disks configured for the primary node
5559 # this is a workaround that would be fixed better by
5560 # improving the logical/physical id handling
5562 lu.cfg.SetDiskID(disk, instance.primary_node)
5564 return disks_ok, device_info
5567 def _StartInstanceDisks(lu, instance, force):
5568 """Start the disks of an instance.
5571 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5572 ignore_secondaries=force)
5574 _ShutdownInstanceDisks(lu, instance)
5575 if force is not None and not force:
5576 lu.proc.LogWarning("", hint="If the message above refers to a"
5578 " you can retry the operation using '--force'.")
5579 raise errors.OpExecError("Disk consistency error")
5582 class LUInstanceDeactivateDisks(NoHooksLU):
5583 """Shutdown an instance's disks.
5588 def ExpandNames(self):
5589 self._ExpandAndLockInstance()
5590 self.needed_locks[locking.LEVEL_NODE] = []
5591 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5593 def DeclareLocks(self, level):
5594 if level == locking.LEVEL_NODE:
5595 self._LockInstancesNodes()
5597 def CheckPrereq(self):
5598 """Check prerequisites.
5600 This checks that the instance is in the cluster.
5603 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5604 assert self.instance is not None, \
5605 "Cannot retrieve locked instance %s" % self.op.instance_name
5607 def Exec(self, feedback_fn):
5608 """Deactivate the disks
5611 instance = self.instance
5613 _ShutdownInstanceDisks(self, instance)
5615 _SafeShutdownInstanceDisks(self, instance)
5618 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5619 """Shutdown block devices of an instance.
5621 This function checks if an instance is running, before calling
5622 _ShutdownInstanceDisks.
5625 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5626 _ShutdownInstanceDisks(lu, instance, disks=disks)
5629 def _ExpandCheckDisks(instance, disks):
5630 """Return the instance disks selected by the disks list
5632 @type disks: list of L{objects.Disk} or None
5633 @param disks: selected disks
5634 @rtype: list of L{objects.Disk}
5635 @return: selected instance disks to act on
5639 return instance.disks
5641 if not set(disks).issubset(instance.disks):
5642 raise errors.ProgrammerError("Can only act on disks belonging to the"
5647 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5648 """Shutdown block devices of an instance.
5650 This does the shutdown on all nodes of the instance.
5652 If the ignore_primary is false, errors on the primary node are
5657 disks = _ExpandCheckDisks(instance, disks)
5660 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5661 lu.cfg.SetDiskID(top_disk, node)
5662 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5663 msg = result.fail_msg
5665 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5666 disk.iv_name, node, msg)
5667 if ((node == instance.primary_node and not ignore_primary) or
5668 (node != instance.primary_node and not result.offline)):
5673 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5674 """Checks if a node has enough free memory.
5676 This function check if a given node has the needed amount of free
5677 memory. In case the node has less memory or we cannot get the
5678 information from the node, this function raise an OpPrereqError
5681 @type lu: C{LogicalUnit}
5682 @param lu: a logical unit from which we get configuration data
5684 @param node: the node to check
5685 @type reason: C{str}
5686 @param reason: string to use in the error message
5687 @type requested: C{int}
5688 @param requested: the amount of memory in MiB to check for
5689 @type hypervisor_name: C{str}
5690 @param hypervisor_name: the hypervisor to ask for memory stats
5691 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5692 we cannot check the node
5695 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5696 nodeinfo[node].Raise("Can't get data from node %s" % node,
5697 prereq=True, ecode=errors.ECODE_ENVIRON)
5698 free_mem = nodeinfo[node].payload.get("memory_free", None)
5699 if not isinstance(free_mem, int):
5700 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5701 " was '%s'" % (node, free_mem),
5702 errors.ECODE_ENVIRON)
5703 if requested > free_mem:
5704 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5705 " needed %s MiB, available %s MiB" %
5706 (node, reason, requested, free_mem),
5710 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5711 """Checks if nodes have enough free disk space in the all VGs.
5713 This function check if all given nodes have the needed amount of
5714 free disk. In case any node has less disk or we cannot get the
5715 information from the node, this function raise an OpPrereqError
5718 @type lu: C{LogicalUnit}
5719 @param lu: a logical unit from which we get configuration data
5720 @type nodenames: C{list}
5721 @param nodenames: the list of node names to check
5722 @type req_sizes: C{dict}
5723 @param req_sizes: the hash of vg and corresponding amount of disk in
5725 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5726 or we cannot check the node
5729 for vg, req_size in req_sizes.items():
5730 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5733 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5734 """Checks if nodes have enough free disk space in the specified VG.
5736 This function check if all given nodes have the needed amount of
5737 free disk. In case any node has less disk or we cannot get the
5738 information from the node, this function raise an OpPrereqError
5741 @type lu: C{LogicalUnit}
5742 @param lu: a logical unit from which we get configuration data
5743 @type nodenames: C{list}
5744 @param nodenames: the list of node names to check
5746 @param vg: the volume group to check
5747 @type requested: C{int}
5748 @param requested: the amount of disk in MiB to check for
5749 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5750 or we cannot check the node
5753 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5754 for node in nodenames:
5755 info = nodeinfo[node]
5756 info.Raise("Cannot get current information from node %s" % node,
5757 prereq=True, ecode=errors.ECODE_ENVIRON)
5758 vg_free = info.payload.get("vg_free", None)
5759 if not isinstance(vg_free, int):
5760 raise errors.OpPrereqError("Can't compute free disk space on node"
5761 " %s for vg %s, result was '%s'" %
5762 (node, vg, vg_free), errors.ECODE_ENVIRON)
5763 if requested > vg_free:
5764 raise errors.OpPrereqError("Not enough disk space on target node %s"
5765 " vg %s: required %d MiB, available %d MiB" %
5766 (node, vg, requested, vg_free),
5770 class LUInstanceStartup(LogicalUnit):
5771 """Starts an instance.
5774 HPATH = "instance-start"
5775 HTYPE = constants.HTYPE_INSTANCE
5778 def CheckArguments(self):
5780 if self.op.beparams:
5781 # fill the beparams dict
5782 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5784 def ExpandNames(self):
5785 self._ExpandAndLockInstance()
5787 def BuildHooksEnv(self):
5790 This runs on master, primary and secondary nodes of the instance.
5794 "FORCE": self.op.force,
5797 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5801 def BuildHooksNodes(self):
5802 """Build hooks nodes.
5805 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5808 def CheckPrereq(self):
5809 """Check prerequisites.
5811 This checks that the instance is in the cluster.
5814 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5815 assert self.instance is not None, \
5816 "Cannot retrieve locked instance %s" % self.op.instance_name
5819 if self.op.hvparams:
5820 # check hypervisor parameter syntax (locally)
5821 cluster = self.cfg.GetClusterInfo()
5822 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5823 filled_hvp = cluster.FillHV(instance)
5824 filled_hvp.update(self.op.hvparams)
5825 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5826 hv_type.CheckParameterSyntax(filled_hvp)
5827 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5829 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5831 if self.primary_offline and self.op.ignore_offline_nodes:
5832 self.proc.LogWarning("Ignoring offline primary node")
5834 if self.op.hvparams or self.op.beparams:
5835 self.proc.LogWarning("Overridden parameters are ignored")
5837 _CheckNodeOnline(self, instance.primary_node)
5839 bep = self.cfg.GetClusterInfo().FillBE(instance)
5841 # check bridges existence
5842 _CheckInstanceBridgesExist(self, instance)
5844 remote_info = self.rpc.call_instance_info(instance.primary_node,
5846 instance.hypervisor)
5847 remote_info.Raise("Error checking node %s" % instance.primary_node,
5848 prereq=True, ecode=errors.ECODE_ENVIRON)
5849 if not remote_info.payload: # not running already
5850 _CheckNodeFreeMemory(self, instance.primary_node,
5851 "starting instance %s" % instance.name,
5852 bep[constants.BE_MEMORY], instance.hypervisor)
5854 def Exec(self, feedback_fn):
5855 """Start the instance.
5858 instance = self.instance
5859 force = self.op.force
5861 if not self.op.no_remember:
5862 self.cfg.MarkInstanceUp(instance.name)
5864 if self.primary_offline:
5865 assert self.op.ignore_offline_nodes
5866 self.proc.LogInfo("Primary node offline, marked instance as started")
5868 node_current = instance.primary_node
5870 _StartInstanceDisks(self, instance, force)
5872 result = self.rpc.call_instance_start(node_current, instance,
5873 self.op.hvparams, self.op.beparams,
5874 self.op.startup_paused)
5875 msg = result.fail_msg
5877 _ShutdownInstanceDisks(self, instance)
5878 raise errors.OpExecError("Could not start instance: %s" % msg)
5881 class LUInstanceReboot(LogicalUnit):
5882 """Reboot an instance.
5885 HPATH = "instance-reboot"
5886 HTYPE = constants.HTYPE_INSTANCE
5889 def ExpandNames(self):
5890 self._ExpandAndLockInstance()
5892 def BuildHooksEnv(self):
5895 This runs on master, primary and secondary nodes of the instance.
5899 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5900 "REBOOT_TYPE": self.op.reboot_type,
5901 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5904 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5908 def BuildHooksNodes(self):
5909 """Build hooks nodes.
5912 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5915 def CheckPrereq(self):
5916 """Check prerequisites.
5918 This checks that the instance is in the cluster.
5921 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5922 assert self.instance is not None, \
5923 "Cannot retrieve locked instance %s" % self.op.instance_name
5925 _CheckNodeOnline(self, instance.primary_node)
5927 # check bridges existence
5928 _CheckInstanceBridgesExist(self, instance)
5930 def Exec(self, feedback_fn):
5931 """Reboot the instance.
5934 instance = self.instance
5935 ignore_secondaries = self.op.ignore_secondaries
5936 reboot_type = self.op.reboot_type
5938 remote_info = self.rpc.call_instance_info(instance.primary_node,
5940 instance.hypervisor)
5941 remote_info.Raise("Error checking node %s" % instance.primary_node)
5942 instance_running = bool(remote_info.payload)
5944 node_current = instance.primary_node
5946 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5947 constants.INSTANCE_REBOOT_HARD]:
5948 for disk in instance.disks:
5949 self.cfg.SetDiskID(disk, node_current)
5950 result = self.rpc.call_instance_reboot(node_current, instance,
5952 self.op.shutdown_timeout)
5953 result.Raise("Could not reboot instance")
5955 if instance_running:
5956 result = self.rpc.call_instance_shutdown(node_current, instance,
5957 self.op.shutdown_timeout)
5958 result.Raise("Could not shutdown instance for full reboot")
5959 _ShutdownInstanceDisks(self, instance)
5961 self.LogInfo("Instance %s was already stopped, starting now",
5963 _StartInstanceDisks(self, instance, ignore_secondaries)
5964 result = self.rpc.call_instance_start(node_current, instance,
5966 msg = result.fail_msg
5968 _ShutdownInstanceDisks(self, instance)
5969 raise errors.OpExecError("Could not start instance for"
5970 " full reboot: %s" % msg)
5972 self.cfg.MarkInstanceUp(instance.name)
5975 class LUInstanceShutdown(LogicalUnit):
5976 """Shutdown an instance.
5979 HPATH = "instance-stop"
5980 HTYPE = constants.HTYPE_INSTANCE
5983 def ExpandNames(self):
5984 self._ExpandAndLockInstance()
5986 def BuildHooksEnv(self):
5989 This runs on master, primary and secondary nodes of the instance.
5992 env = _BuildInstanceHookEnvByObject(self, self.instance)
5993 env["TIMEOUT"] = self.op.timeout
5996 def BuildHooksNodes(self):
5997 """Build hooks nodes.
6000 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6003 def CheckPrereq(self):
6004 """Check prerequisites.
6006 This checks that the instance is in the cluster.
6009 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6010 assert self.instance is not None, \
6011 "Cannot retrieve locked instance %s" % self.op.instance_name
6013 self.primary_offline = \
6014 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6016 if self.primary_offline and self.op.ignore_offline_nodes:
6017 self.proc.LogWarning("Ignoring offline primary node")
6019 _CheckNodeOnline(self, self.instance.primary_node)
6021 def Exec(self, feedback_fn):
6022 """Shutdown the instance.
6025 instance = self.instance
6026 node_current = instance.primary_node
6027 timeout = self.op.timeout
6029 if not self.op.no_remember:
6030 self.cfg.MarkInstanceDown(instance.name)
6032 if self.primary_offline:
6033 assert self.op.ignore_offline_nodes
6034 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6036 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6037 msg = result.fail_msg
6039 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6041 _ShutdownInstanceDisks(self, instance)
6044 class LUInstanceReinstall(LogicalUnit):
6045 """Reinstall an instance.
6048 HPATH = "instance-reinstall"
6049 HTYPE = constants.HTYPE_INSTANCE
6052 def ExpandNames(self):
6053 self._ExpandAndLockInstance()
6055 def BuildHooksEnv(self):
6058 This runs on master, primary and secondary nodes of the instance.
6061 return _BuildInstanceHookEnvByObject(self, self.instance)
6063 def BuildHooksNodes(self):
6064 """Build hooks nodes.
6067 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6070 def CheckPrereq(self):
6071 """Check prerequisites.
6073 This checks that the instance is in the cluster and is not running.
6076 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6077 assert instance is not None, \
6078 "Cannot retrieve locked instance %s" % self.op.instance_name
6079 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6080 " offline, cannot reinstall")
6081 for node in instance.secondary_nodes:
6082 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6083 " cannot reinstall")
6085 if instance.disk_template == constants.DT_DISKLESS:
6086 raise errors.OpPrereqError("Instance '%s' has no disks" %
6087 self.op.instance_name,
6089 _CheckInstanceDown(self, instance, "cannot reinstall")
6091 if self.op.os_type is not None:
6093 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6094 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6095 instance_os = self.op.os_type
6097 instance_os = instance.os
6099 nodelist = list(instance.all_nodes)
6101 if self.op.osparams:
6102 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6103 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6104 self.os_inst = i_osdict # the new dict (without defaults)
6108 self.instance = instance
6110 def Exec(self, feedback_fn):
6111 """Reinstall the instance.
6114 inst = self.instance
6116 if self.op.os_type is not None:
6117 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6118 inst.os = self.op.os_type
6119 # Write to configuration
6120 self.cfg.Update(inst, feedback_fn)
6122 _StartInstanceDisks(self, inst, None)
6124 feedback_fn("Running the instance OS create scripts...")
6125 # FIXME: pass debug option from opcode to backend
6126 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6127 self.op.debug_level,
6128 osparams=self.os_inst)
6129 result.Raise("Could not install OS for instance %s on node %s" %
6130 (inst.name, inst.primary_node))
6132 _ShutdownInstanceDisks(self, inst)
6135 class LUInstanceRecreateDisks(LogicalUnit):
6136 """Recreate an instance's missing disks.
6139 HPATH = "instance-recreate-disks"
6140 HTYPE = constants.HTYPE_INSTANCE
6143 def CheckArguments(self):
6144 # normalise the disk list
6145 self.op.disks = sorted(frozenset(self.op.disks))
6147 def ExpandNames(self):
6148 self._ExpandAndLockInstance()
6149 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6151 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6152 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6154 self.needed_locks[locking.LEVEL_NODE] = []
6156 def DeclareLocks(self, level):
6157 if level == locking.LEVEL_NODE:
6158 # if we replace the nodes, we only need to lock the old primary,
6159 # otherwise we need to lock all nodes for disk re-creation
6160 primary_only = bool(self.op.nodes)
6161 self._LockInstancesNodes(primary_only=primary_only)
6163 def BuildHooksEnv(self):
6166 This runs on master, primary and secondary nodes of the instance.
6169 return _BuildInstanceHookEnvByObject(self, self.instance)
6171 def BuildHooksNodes(self):
6172 """Build hooks nodes.
6175 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6178 def CheckPrereq(self):
6179 """Check prerequisites.
6181 This checks that the instance is in the cluster and is not running.
6184 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6185 assert instance is not None, \
6186 "Cannot retrieve locked instance %s" % self.op.instance_name
6188 if len(self.op.nodes) != len(instance.all_nodes):
6189 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6190 " %d replacement nodes were specified" %
6191 (instance.name, len(instance.all_nodes),
6192 len(self.op.nodes)),
6194 assert instance.disk_template != constants.DT_DRBD8 or \
6195 len(self.op.nodes) == 2
6196 assert instance.disk_template != constants.DT_PLAIN or \
6197 len(self.op.nodes) == 1
6198 primary_node = self.op.nodes[0]
6200 primary_node = instance.primary_node
6201 _CheckNodeOnline(self, primary_node)
6203 if instance.disk_template == constants.DT_DISKLESS:
6204 raise errors.OpPrereqError("Instance '%s' has no disks" %
6205 self.op.instance_name, errors.ECODE_INVAL)
6206 # if we replace nodes *and* the old primary is offline, we don't
6208 assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6209 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6210 if not (self.op.nodes and old_pnode.offline):
6211 _CheckInstanceDown(self, instance, "cannot recreate disks")
6213 if not self.op.disks:
6214 self.op.disks = range(len(instance.disks))
6216 for idx in self.op.disks:
6217 if idx >= len(instance.disks):
6218 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6220 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6221 raise errors.OpPrereqError("Can't recreate disks partially and"
6222 " change the nodes at the same time",
6224 self.instance = instance
6226 def Exec(self, feedback_fn):
6227 """Recreate the disks.
6230 instance = self.instance
6233 mods = [] # keeps track of needed logical_id changes
6235 for idx, disk in enumerate(instance.disks):
6236 if idx not in self.op.disks: # disk idx has not been passed in
6239 # update secondaries for disks, if needed
6241 if disk.dev_type == constants.LD_DRBD8:
6242 # need to update the nodes and minors
6243 assert len(self.op.nodes) == 2
6244 assert len(disk.logical_id) == 6 # otherwise disk internals
6246 (_, _, old_port, _, _, old_secret) = disk.logical_id
6247 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6248 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6249 new_minors[0], new_minors[1], old_secret)
6250 assert len(disk.logical_id) == len(new_id)
6251 mods.append((idx, new_id))
6253 # now that we have passed all asserts above, we can apply the mods
6254 # in a single run (to avoid partial changes)
6255 for idx, new_id in mods:
6256 instance.disks[idx].logical_id = new_id
6258 # change primary node, if needed
6260 instance.primary_node = self.op.nodes[0]
6261 self.LogWarning("Changing the instance's nodes, you will have to"
6262 " remove any disks left on the older nodes manually")
6265 self.cfg.Update(instance, feedback_fn)
6267 _CreateDisks(self, instance, to_skip=to_skip)
6270 class LUInstanceRename(LogicalUnit):
6271 """Rename an instance.
6274 HPATH = "instance-rename"
6275 HTYPE = constants.HTYPE_INSTANCE
6277 def CheckArguments(self):
6281 if self.op.ip_check and not self.op.name_check:
6282 # TODO: make the ip check more flexible and not depend on the name check
6283 raise errors.OpPrereqError("IP address check requires a name check",
6286 def BuildHooksEnv(self):
6289 This runs on master, primary and secondary nodes of the instance.
6292 env = _BuildInstanceHookEnvByObject(self, self.instance)
6293 env["INSTANCE_NEW_NAME"] = self.op.new_name
6296 def BuildHooksNodes(self):
6297 """Build hooks nodes.
6300 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6303 def CheckPrereq(self):
6304 """Check prerequisites.
6306 This checks that the instance is in the cluster and is not running.
6309 self.op.instance_name = _ExpandInstanceName(self.cfg,
6310 self.op.instance_name)
6311 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6312 assert instance is not None
6313 _CheckNodeOnline(self, instance.primary_node)
6314 _CheckInstanceDown(self, instance, "cannot rename")
6315 self.instance = instance
6317 new_name = self.op.new_name
6318 if self.op.name_check:
6319 hostname = netutils.GetHostname(name=new_name)
6320 if hostname != new_name:
6321 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6323 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6324 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6325 " same as given hostname '%s'") %
6326 (hostname.name, self.op.new_name),
6328 new_name = self.op.new_name = hostname.name
6329 if (self.op.ip_check and
6330 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6331 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6332 (hostname.ip, new_name),
6333 errors.ECODE_NOTUNIQUE)
6335 instance_list = self.cfg.GetInstanceList()
6336 if new_name in instance_list and new_name != instance.name:
6337 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6338 new_name, errors.ECODE_EXISTS)
6340 def Exec(self, feedback_fn):
6341 """Rename the instance.
6344 inst = self.instance
6345 old_name = inst.name
6347 rename_file_storage = False
6348 if (inst.disk_template in constants.DTS_FILEBASED and
6349 self.op.new_name != inst.name):
6350 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6351 rename_file_storage = True
6353 self.cfg.RenameInstance(inst.name, self.op.new_name)
6354 # Change the instance lock. This is definitely safe while we hold the BGL.
6355 # Otherwise the new lock would have to be added in acquired mode.
6357 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6358 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6360 # re-read the instance from the configuration after rename
6361 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6363 if rename_file_storage:
6364 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6365 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6366 old_file_storage_dir,
6367 new_file_storage_dir)
6368 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6369 " (but the instance has been renamed in Ganeti)" %
6370 (inst.primary_node, old_file_storage_dir,
6371 new_file_storage_dir))
6373 _StartInstanceDisks(self, inst, None)
6375 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6376 old_name, self.op.debug_level)
6377 msg = result.fail_msg
6379 msg = ("Could not run OS rename script for instance %s on node %s"
6380 " (but the instance has been renamed in Ganeti): %s" %
6381 (inst.name, inst.primary_node, msg))
6382 self.proc.LogWarning(msg)
6384 _ShutdownInstanceDisks(self, inst)
6389 class LUInstanceRemove(LogicalUnit):
6390 """Remove an instance.
6393 HPATH = "instance-remove"
6394 HTYPE = constants.HTYPE_INSTANCE
6397 def ExpandNames(self):
6398 self._ExpandAndLockInstance()
6399 self.needed_locks[locking.LEVEL_NODE] = []
6400 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6402 def DeclareLocks(self, level):
6403 if level == locking.LEVEL_NODE:
6404 self._LockInstancesNodes()
6406 def BuildHooksEnv(self):
6409 This runs on master, primary and secondary nodes of the instance.
6412 env = _BuildInstanceHookEnvByObject(self, self.instance)
6413 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6416 def BuildHooksNodes(self):
6417 """Build hooks nodes.
6420 nl = [self.cfg.GetMasterNode()]
6421 nl_post = list(self.instance.all_nodes) + nl
6422 return (nl, nl_post)
6424 def CheckPrereq(self):
6425 """Check prerequisites.
6427 This checks that the instance is in the cluster.
6430 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6431 assert self.instance is not None, \
6432 "Cannot retrieve locked instance %s" % self.op.instance_name
6434 def Exec(self, feedback_fn):
6435 """Remove the instance.
6438 instance = self.instance
6439 logging.info("Shutting down instance %s on node %s",
6440 instance.name, instance.primary_node)
6442 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6443 self.op.shutdown_timeout)
6444 msg = result.fail_msg
6446 if self.op.ignore_failures:
6447 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6449 raise errors.OpExecError("Could not shutdown instance %s on"
6451 (instance.name, instance.primary_node, msg))
6453 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6456 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6457 """Utility function to remove an instance.
6460 logging.info("Removing block devices for instance %s", instance.name)
6462 if not _RemoveDisks(lu, instance):
6463 if not ignore_failures:
6464 raise errors.OpExecError("Can't remove instance's disks")
6465 feedback_fn("Warning: can't remove instance's disks")
6467 logging.info("Removing instance %s out of cluster config", instance.name)
6469 lu.cfg.RemoveInstance(instance.name)
6471 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6472 "Instance lock removal conflict"
6474 # Remove lock for the instance
6475 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6478 class LUInstanceQuery(NoHooksLU):
6479 """Logical unit for querying instances.
6482 # pylint: disable-msg=W0142
6485 def CheckArguments(self):
6486 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6487 self.op.output_fields, self.op.use_locking)
6489 def ExpandNames(self):
6490 self.iq.ExpandNames(self)
6492 def DeclareLocks(self, level):
6493 self.iq.DeclareLocks(self, level)
6495 def Exec(self, feedback_fn):
6496 return self.iq.OldStyleQuery(self)
6499 class LUInstanceFailover(LogicalUnit):
6500 """Failover an instance.
6503 HPATH = "instance-failover"
6504 HTYPE = constants.HTYPE_INSTANCE
6507 def CheckArguments(self):
6508 """Check the arguments.
6511 self.iallocator = getattr(self.op, "iallocator", None)
6512 self.target_node = getattr(self.op, "target_node", None)
6514 def ExpandNames(self):
6515 self._ExpandAndLockInstance()
6517 if self.op.target_node is not None:
6518 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6520 self.needed_locks[locking.LEVEL_NODE] = []
6521 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6523 ignore_consistency = self.op.ignore_consistency
6524 shutdown_timeout = self.op.shutdown_timeout
6525 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6528 ignore_consistency=ignore_consistency,
6529 shutdown_timeout=shutdown_timeout)
6530 self.tasklets = [self._migrater]
6532 def DeclareLocks(self, level):
6533 if level == locking.LEVEL_NODE:
6534 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6535 if instance.disk_template in constants.DTS_EXT_MIRROR:
6536 if self.op.target_node is None:
6537 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6539 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6540 self.op.target_node]
6541 del self.recalculate_locks[locking.LEVEL_NODE]
6543 self._LockInstancesNodes()
6545 def BuildHooksEnv(self):
6548 This runs on master, primary and secondary nodes of the instance.
6551 instance = self._migrater.instance
6552 source_node = instance.primary_node
6553 target_node = self.op.target_node
6555 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6556 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6557 "OLD_PRIMARY": source_node,
6558 "NEW_PRIMARY": target_node,
6561 if instance.disk_template in constants.DTS_INT_MIRROR:
6562 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6563 env["NEW_SECONDARY"] = source_node
6565 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6567 env.update(_BuildInstanceHookEnvByObject(self, instance))
6571 def BuildHooksNodes(self):
6572 """Build hooks nodes.
6575 instance = self._migrater.instance
6576 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6577 return (nl, nl + [instance.primary_node])
6580 class LUInstanceMigrate(LogicalUnit):
6581 """Migrate an instance.
6583 This is migration without shutting down, compared to the failover,
6584 which is done with shutdown.
6587 HPATH = "instance-migrate"
6588 HTYPE = constants.HTYPE_INSTANCE
6591 def ExpandNames(self):
6592 self._ExpandAndLockInstance()
6594 if self.op.target_node is not None:
6595 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6597 self.needed_locks[locking.LEVEL_NODE] = []
6598 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6600 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6601 cleanup=self.op.cleanup,
6603 fallback=self.op.allow_failover)
6604 self.tasklets = [self._migrater]
6606 def DeclareLocks(self, level):
6607 if level == locking.LEVEL_NODE:
6608 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6609 if instance.disk_template in constants.DTS_EXT_MIRROR:
6610 if self.op.target_node is None:
6611 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6613 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6614 self.op.target_node]
6615 del self.recalculate_locks[locking.LEVEL_NODE]
6617 self._LockInstancesNodes()
6619 def BuildHooksEnv(self):
6622 This runs on master, primary and secondary nodes of the instance.
6625 instance = self._migrater.instance
6626 source_node = instance.primary_node
6627 target_node = self.op.target_node
6628 env = _BuildInstanceHookEnvByObject(self, instance)
6630 "MIGRATE_LIVE": self._migrater.live,
6631 "MIGRATE_CLEANUP": self.op.cleanup,
6632 "OLD_PRIMARY": source_node,
6633 "NEW_PRIMARY": target_node,
6636 if instance.disk_template in constants.DTS_INT_MIRROR:
6637 env["OLD_SECONDARY"] = target_node
6638 env["NEW_SECONDARY"] = source_node
6640 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6644 def BuildHooksNodes(self):
6645 """Build hooks nodes.
6648 instance = self._migrater.instance
6649 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6650 return (nl, nl + [instance.primary_node])
6653 class LUInstanceMove(LogicalUnit):
6654 """Move an instance by data-copying.
6657 HPATH = "instance-move"
6658 HTYPE = constants.HTYPE_INSTANCE
6661 def ExpandNames(self):
6662 self._ExpandAndLockInstance()
6663 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6664 self.op.target_node = target_node
6665 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6666 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6668 def DeclareLocks(self, level):
6669 if level == locking.LEVEL_NODE:
6670 self._LockInstancesNodes(primary_only=True)
6672 def BuildHooksEnv(self):
6675 This runs on master, primary and secondary nodes of the instance.
6679 "TARGET_NODE": self.op.target_node,
6680 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6682 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6685 def BuildHooksNodes(self):
6686 """Build hooks nodes.
6690 self.cfg.GetMasterNode(),
6691 self.instance.primary_node,
6692 self.op.target_node,
6696 def CheckPrereq(self):
6697 """Check prerequisites.
6699 This checks that the instance is in the cluster.
6702 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6703 assert self.instance is not None, \
6704 "Cannot retrieve locked instance %s" % self.op.instance_name
6706 node = self.cfg.GetNodeInfo(self.op.target_node)
6707 assert node is not None, \
6708 "Cannot retrieve locked node %s" % self.op.target_node
6710 self.target_node = target_node = node.name
6712 if target_node == instance.primary_node:
6713 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6714 (instance.name, target_node),
6717 bep = self.cfg.GetClusterInfo().FillBE(instance)
6719 for idx, dsk in enumerate(instance.disks):
6720 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6721 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6722 " cannot copy" % idx, errors.ECODE_STATE)
6724 _CheckNodeOnline(self, target_node)
6725 _CheckNodeNotDrained(self, target_node)
6726 _CheckNodeVmCapable(self, target_node)
6728 if instance.admin_up:
6729 # check memory requirements on the secondary node
6730 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6731 instance.name, bep[constants.BE_MEMORY],
6732 instance.hypervisor)
6734 self.LogInfo("Not checking memory on the secondary node as"
6735 " instance will not be started")
6737 # check bridge existance
6738 _CheckInstanceBridgesExist(self, instance, node=target_node)
6740 def Exec(self, feedback_fn):
6741 """Move an instance.
6743 The move is done by shutting it down on its present node, copying
6744 the data over (slow) and starting it on the new node.
6747 instance = self.instance
6749 source_node = instance.primary_node
6750 target_node = self.target_node
6752 self.LogInfo("Shutting down instance %s on source node %s",
6753 instance.name, source_node)
6755 result = self.rpc.call_instance_shutdown(source_node, instance,
6756 self.op.shutdown_timeout)
6757 msg = result.fail_msg
6759 if self.op.ignore_consistency:
6760 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6761 " Proceeding anyway. Please make sure node"
6762 " %s is down. Error details: %s",
6763 instance.name, source_node, source_node, msg)
6765 raise errors.OpExecError("Could not shutdown instance %s on"
6767 (instance.name, source_node, msg))
6769 # create the target disks
6771 _CreateDisks(self, instance, target_node=target_node)
6772 except errors.OpExecError:
6773 self.LogWarning("Device creation failed, reverting...")
6775 _RemoveDisks(self, instance, target_node=target_node)
6777 self.cfg.ReleaseDRBDMinors(instance.name)
6780 cluster_name = self.cfg.GetClusterInfo().cluster_name
6783 # activate, get path, copy the data over
6784 for idx, disk in enumerate(instance.disks):
6785 self.LogInfo("Copying data for disk %d", idx)
6786 result = self.rpc.call_blockdev_assemble(target_node, disk,
6787 instance.name, True, idx)
6789 self.LogWarning("Can't assemble newly created disk %d: %s",
6790 idx, result.fail_msg)
6791 errs.append(result.fail_msg)
6793 dev_path = result.payload
6794 result = self.rpc.call_blockdev_export(source_node, disk,
6795 target_node, dev_path,
6798 self.LogWarning("Can't copy data over for disk %d: %s",
6799 idx, result.fail_msg)
6800 errs.append(result.fail_msg)
6804 self.LogWarning("Some disks failed to copy, aborting")
6806 _RemoveDisks(self, instance, target_node=target_node)
6808 self.cfg.ReleaseDRBDMinors(instance.name)
6809 raise errors.OpExecError("Errors during disk copy: %s" %
6812 instance.primary_node = target_node
6813 self.cfg.Update(instance, feedback_fn)
6815 self.LogInfo("Removing the disks on the original node")
6816 _RemoveDisks(self, instance, target_node=source_node)
6818 # Only start the instance if it's marked as up
6819 if instance.admin_up:
6820 self.LogInfo("Starting instance %s on node %s",
6821 instance.name, target_node)
6823 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6824 ignore_secondaries=True)
6826 _ShutdownInstanceDisks(self, instance)
6827 raise errors.OpExecError("Can't activate the instance's disks")
6829 result = self.rpc.call_instance_start(target_node, instance,
6831 msg = result.fail_msg
6833 _ShutdownInstanceDisks(self, instance)
6834 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6835 (instance.name, target_node, msg))
6838 class LUNodeMigrate(LogicalUnit):
6839 """Migrate all instances from a node.
6842 HPATH = "node-migrate"
6843 HTYPE = constants.HTYPE_NODE
6846 def CheckArguments(self):
6849 def ExpandNames(self):
6850 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6852 self.share_locks = _ShareAll()
6853 self.needed_locks = {
6854 locking.LEVEL_NODE: [self.op.node_name],
6857 def BuildHooksEnv(self):
6860 This runs on the master, the primary and all the secondaries.
6864 "NODE_NAME": self.op.node_name,
6867 def BuildHooksNodes(self):
6868 """Build hooks nodes.
6871 nl = [self.cfg.GetMasterNode()]
6874 def CheckPrereq(self):
6877 def Exec(self, feedback_fn):
6878 # Prepare jobs for migration instances
6880 [opcodes.OpInstanceMigrate(instance_name=inst.name,
6883 iallocator=self.op.iallocator,
6884 target_node=self.op.target_node)]
6885 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6888 # TODO: Run iallocator in this opcode and pass correct placement options to
6889 # OpInstanceMigrate. Since other jobs can modify the cluster between
6890 # running the iallocator and the actual migration, a good consistency model
6891 # will have to be found.
6893 assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
6894 frozenset([self.op.node_name]))
6896 return ResultWithJobs(jobs)
6899 class TLMigrateInstance(Tasklet):
6900 """Tasklet class for instance migration.
6903 @ivar live: whether the migration will be done live or non-live;
6904 this variable is initalized only after CheckPrereq has run
6905 @type cleanup: boolean
6906 @ivar cleanup: Wheater we cleanup from a failed migration
6907 @type iallocator: string
6908 @ivar iallocator: The iallocator used to determine target_node
6909 @type target_node: string
6910 @ivar target_node: If given, the target_node to reallocate the instance to
6911 @type failover: boolean
6912 @ivar failover: Whether operation results in failover or migration
6913 @type fallback: boolean
6914 @ivar fallback: Whether fallback to failover is allowed if migration not
6916 @type ignore_consistency: boolean
6917 @ivar ignore_consistency: Wheter we should ignore consistency between source
6919 @type shutdown_timeout: int
6920 @ivar shutdown_timeout: In case of failover timeout of the shutdown
6923 def __init__(self, lu, instance_name, cleanup=False,
6924 failover=False, fallback=False,
6925 ignore_consistency=False,
6926 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6927 """Initializes this class.
6930 Tasklet.__init__(self, lu)
6933 self.instance_name = instance_name
6934 self.cleanup = cleanup
6935 self.live = False # will be overridden later
6936 self.failover = failover
6937 self.fallback = fallback
6938 self.ignore_consistency = ignore_consistency
6939 self.shutdown_timeout = shutdown_timeout
6941 def CheckPrereq(self):
6942 """Check prerequisites.
6944 This checks that the instance is in the cluster.
6947 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6948 instance = self.cfg.GetInstanceInfo(instance_name)
6949 assert instance is not None
6950 self.instance = instance
6952 if (not self.cleanup and not instance.admin_up and not self.failover and
6954 self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6956 self.failover = True
6958 if instance.disk_template not in constants.DTS_MIRRORED:
6963 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6964 " %s" % (instance.disk_template, text),
6967 if instance.disk_template in constants.DTS_EXT_MIRROR:
6968 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6970 if self.lu.op.iallocator:
6971 self._RunAllocator()
6973 # We set set self.target_node as it is required by
6975 self.target_node = self.lu.op.target_node
6977 # self.target_node is already populated, either directly or by the
6979 target_node = self.target_node
6980 if self.target_node == instance.primary_node:
6981 raise errors.OpPrereqError("Cannot migrate instance %s"
6982 " to its primary (%s)" %
6983 (instance.name, instance.primary_node))
6985 if len(self.lu.tasklets) == 1:
6986 # It is safe to release locks only when we're the only tasklet
6988 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6989 keep=[instance.primary_node, self.target_node])
6992 secondary_nodes = instance.secondary_nodes
6993 if not secondary_nodes:
6994 raise errors.ConfigurationError("No secondary node but using"
6995 " %s disk template" %
6996 instance.disk_template)
6997 target_node = secondary_nodes[0]
6998 if self.lu.op.iallocator or (self.lu.op.target_node and
6999 self.lu.op.target_node != target_node):
7001 text = "failed over"
7004 raise errors.OpPrereqError("Instances with disk template %s cannot"
7005 " be %s to arbitrary nodes"
7006 " (neither an iallocator nor a target"
7007 " node can be passed)" %
7008 (instance.disk_template, text),
7011 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7013 # check memory requirements on the secondary node
7014 if not self.failover or instance.admin_up:
7015 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7016 instance.name, i_be[constants.BE_MEMORY],
7017 instance.hypervisor)
7019 self.lu.LogInfo("Not checking memory on the secondary node as"
7020 " instance will not be started")
7022 # check bridge existance
7023 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7025 if not self.cleanup:
7026 _CheckNodeNotDrained(self.lu, target_node)
7027 if not self.failover:
7028 result = self.rpc.call_instance_migratable(instance.primary_node,
7030 if result.fail_msg and self.fallback:
7031 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7033 self.failover = True
7035 result.Raise("Can't migrate, please use failover",
7036 prereq=True, ecode=errors.ECODE_STATE)
7038 assert not (self.failover and self.cleanup)
7040 if not self.failover:
7041 if self.lu.op.live is not None and self.lu.op.mode is not None:
7042 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7043 " parameters are accepted",
7045 if self.lu.op.live is not None:
7047 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7049 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7050 # reset the 'live' parameter to None so that repeated
7051 # invocations of CheckPrereq do not raise an exception
7052 self.lu.op.live = None
7053 elif self.lu.op.mode is None:
7054 # read the default value from the hypervisor
7055 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7057 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7059 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7061 # Failover is never live
7064 def _RunAllocator(self):
7065 """Run the allocator based on input opcode.
7068 ial = IAllocator(self.cfg, self.rpc,
7069 mode=constants.IALLOCATOR_MODE_RELOC,
7070 name=self.instance_name,
7071 # TODO See why hail breaks with a single node below
7072 relocate_from=[self.instance.primary_node,
7073 self.instance.primary_node],
7076 ial.Run(self.lu.op.iallocator)
7079 raise errors.OpPrereqError("Can't compute nodes using"
7080 " iallocator '%s': %s" %
7081 (self.lu.op.iallocator, ial.info),
7083 if len(ial.result) != ial.required_nodes:
7084 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7085 " of nodes (%s), required %s" %
7086 (self.lu.op.iallocator, len(ial.result),
7087 ial.required_nodes), errors.ECODE_FAULT)
7088 self.target_node = ial.result[0]
7089 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7090 self.instance_name, self.lu.op.iallocator,
7091 utils.CommaJoin(ial.result))
7093 def _WaitUntilSync(self):
7094 """Poll with custom rpc for disk sync.
7096 This uses our own step-based rpc call.
7099 self.feedback_fn("* wait until resync is done")
7103 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7105 self.instance.disks)
7107 for node, nres in result.items():
7108 nres.Raise("Cannot resync disks on node %s" % node)
7109 node_done, node_percent = nres.payload
7110 all_done = all_done and node_done
7111 if node_percent is not None:
7112 min_percent = min(min_percent, node_percent)
7114 if min_percent < 100:
7115 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7118 def _EnsureSecondary(self, node):
7119 """Demote a node to secondary.
7122 self.feedback_fn("* switching node %s to secondary mode" % node)
7124 for dev in self.instance.disks:
7125 self.cfg.SetDiskID(dev, node)
7127 result = self.rpc.call_blockdev_close(node, self.instance.name,
7128 self.instance.disks)
7129 result.Raise("Cannot change disk to secondary on node %s" % node)
7131 def _GoStandalone(self):
7132 """Disconnect from the network.
7135 self.feedback_fn("* changing into standalone mode")
7136 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7137 self.instance.disks)
7138 for node, nres in result.items():
7139 nres.Raise("Cannot disconnect disks node %s" % node)
7141 def _GoReconnect(self, multimaster):
7142 """Reconnect to the network.
7148 msg = "single-master"
7149 self.feedback_fn("* changing disks into %s mode" % msg)
7150 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7151 self.instance.disks,
7152 self.instance.name, multimaster)
7153 for node, nres in result.items():
7154 nres.Raise("Cannot change disks config on node %s" % node)
7156 def _ExecCleanup(self):
7157 """Try to cleanup after a failed migration.
7159 The cleanup is done by:
7160 - check that the instance is running only on one node
7161 (and update the config if needed)
7162 - change disks on its secondary node to secondary
7163 - wait until disks are fully synchronized
7164 - disconnect from the network
7165 - change disks into single-master mode
7166 - wait again until disks are fully synchronized
7169 instance = self.instance
7170 target_node = self.target_node
7171 source_node = self.source_node
7173 # check running on only one node
7174 self.feedback_fn("* checking where the instance actually runs"
7175 " (if this hangs, the hypervisor might be in"
7177 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7178 for node, result in ins_l.items():
7179 result.Raise("Can't contact node %s" % node)
7181 runningon_source = instance.name in ins_l[source_node].payload
7182 runningon_target = instance.name in ins_l[target_node].payload
7184 if runningon_source and runningon_target:
7185 raise errors.OpExecError("Instance seems to be running on two nodes,"
7186 " or the hypervisor is confused; you will have"
7187 " to ensure manually that it runs only on one"
7188 " and restart this operation")
7190 if not (runningon_source or runningon_target):
7191 raise errors.OpExecError("Instance does not seem to be running at all;"
7192 " in this case it's safer to repair by"
7193 " running 'gnt-instance stop' to ensure disk"
7194 " shutdown, and then restarting it")
7196 if runningon_target:
7197 # the migration has actually succeeded, we need to update the config
7198 self.feedback_fn("* instance running on secondary node (%s),"
7199 " updating config" % target_node)
7200 instance.primary_node = target_node
7201 self.cfg.Update(instance, self.feedback_fn)
7202 demoted_node = source_node
7204 self.feedback_fn("* instance confirmed to be running on its"
7205 " primary node (%s)" % source_node)
7206 demoted_node = target_node
7208 if instance.disk_template in constants.DTS_INT_MIRROR:
7209 self._EnsureSecondary(demoted_node)
7211 self._WaitUntilSync()
7212 except errors.OpExecError:
7213 # we ignore here errors, since if the device is standalone, it
7214 # won't be able to sync
7216 self._GoStandalone()
7217 self._GoReconnect(False)
7218 self._WaitUntilSync()
7220 self.feedback_fn("* done")
7222 def _RevertDiskStatus(self):
7223 """Try to revert the disk status after a failed migration.
7226 target_node = self.target_node
7227 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7231 self._EnsureSecondary(target_node)
7232 self._GoStandalone()
7233 self._GoReconnect(False)
7234 self._WaitUntilSync()
7235 except errors.OpExecError, err:
7236 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7237 " please try to recover the instance manually;"
7238 " error '%s'" % str(err))
7240 def _AbortMigration(self):
7241 """Call the hypervisor code to abort a started migration.
7244 instance = self.instance
7245 target_node = self.target_node
7246 migration_info = self.migration_info
7248 abort_result = self.rpc.call_finalize_migration(target_node,
7252 abort_msg = abort_result.fail_msg
7254 logging.error("Aborting migration failed on target node %s: %s",
7255 target_node, abort_msg)
7256 # Don't raise an exception here, as we stil have to try to revert the
7257 # disk status, even if this step failed.
7259 def _ExecMigration(self):
7260 """Migrate an instance.
7262 The migrate is done by:
7263 - change the disks into dual-master mode
7264 - wait until disks are fully synchronized again
7265 - migrate the instance
7266 - change disks on the new secondary node (the old primary) to secondary
7267 - wait until disks are fully synchronized
7268 - change disks into single-master mode
7271 instance = self.instance
7272 target_node = self.target_node
7273 source_node = self.source_node
7275 self.feedback_fn("* checking disk consistency between source and target")
7276 for dev in instance.disks:
7277 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7278 raise errors.OpExecError("Disk %s is degraded or not fully"
7279 " synchronized on target node,"
7280 " aborting migration" % dev.iv_name)
7282 # First get the migration information from the remote node
7283 result = self.rpc.call_migration_info(source_node, instance)
7284 msg = result.fail_msg
7286 log_err = ("Failed fetching source migration information from %s: %s" %
7288 logging.error(log_err)
7289 raise errors.OpExecError(log_err)
7291 self.migration_info = migration_info = result.payload
7293 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7294 # Then switch the disks to master/master mode
7295 self._EnsureSecondary(target_node)
7296 self._GoStandalone()
7297 self._GoReconnect(True)
7298 self._WaitUntilSync()
7300 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7301 result = self.rpc.call_accept_instance(target_node,
7304 self.nodes_ip[target_node])
7306 msg = result.fail_msg
7308 logging.error("Instance pre-migration failed, trying to revert"
7309 " disk status: %s", msg)
7310 self.feedback_fn("Pre-migration failed, aborting")
7311 self._AbortMigration()
7312 self._RevertDiskStatus()
7313 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7314 (instance.name, msg))
7316 self.feedback_fn("* migrating instance to %s" % target_node)
7317 result = self.rpc.call_instance_migrate(source_node, instance,
7318 self.nodes_ip[target_node],
7320 msg = result.fail_msg
7322 logging.error("Instance migration failed, trying to revert"
7323 " disk status: %s", msg)
7324 self.feedback_fn("Migration failed, aborting")
7325 self._AbortMigration()
7326 self._RevertDiskStatus()
7327 raise errors.OpExecError("Could not migrate instance %s: %s" %
7328 (instance.name, msg))
7330 instance.primary_node = target_node
7331 # distribute new instance config to the other nodes
7332 self.cfg.Update(instance, self.feedback_fn)
7334 result = self.rpc.call_finalize_migration(target_node,
7338 msg = result.fail_msg
7340 logging.error("Instance migration succeeded, but finalization failed:"
7342 raise errors.OpExecError("Could not finalize instance migration: %s" %
7345 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7346 self._EnsureSecondary(source_node)
7347 self._WaitUntilSync()
7348 self._GoStandalone()
7349 self._GoReconnect(False)
7350 self._WaitUntilSync()
7352 self.feedback_fn("* done")
7354 def _ExecFailover(self):
7355 """Failover an instance.
7357 The failover is done by shutting it down on its present node and
7358 starting it on the secondary.
7361 instance = self.instance
7362 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7364 source_node = instance.primary_node
7365 target_node = self.target_node
7367 if instance.admin_up:
7368 self.feedback_fn("* checking disk consistency between source and target")
7369 for dev in instance.disks:
7370 # for drbd, these are drbd over lvm
7371 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7372 if primary_node.offline:
7373 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7375 (primary_node.name, dev.iv_name, target_node))
7376 elif not self.ignore_consistency:
7377 raise errors.OpExecError("Disk %s is degraded on target node,"
7378 " aborting failover" % dev.iv_name)
7380 self.feedback_fn("* not checking disk consistency as instance is not"
7383 self.feedback_fn("* shutting down instance on source node")
7384 logging.info("Shutting down instance %s on node %s",
7385 instance.name, source_node)
7387 result = self.rpc.call_instance_shutdown(source_node, instance,
7388 self.shutdown_timeout)
7389 msg = result.fail_msg
7391 if self.ignore_consistency or primary_node.offline:
7392 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7393 " proceeding anyway; please make sure node"
7394 " %s is down; error details: %s",
7395 instance.name, source_node, source_node, msg)
7397 raise errors.OpExecError("Could not shutdown instance %s on"
7399 (instance.name, source_node, msg))
7401 self.feedback_fn("* deactivating the instance's disks on source node")
7402 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7403 raise errors.OpExecError("Can't shut down the instance's disks")
7405 instance.primary_node = target_node
7406 # distribute new instance config to the other nodes
7407 self.cfg.Update(instance, self.feedback_fn)
7409 # Only start the instance if it's marked as up
7410 if instance.admin_up:
7411 self.feedback_fn("* activating the instance's disks on target node %s" %
7413 logging.info("Starting instance %s on node %s",
7414 instance.name, target_node)
7416 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7417 ignore_secondaries=True)
7419 _ShutdownInstanceDisks(self.lu, instance)
7420 raise errors.OpExecError("Can't activate the instance's disks")
7422 self.feedback_fn("* starting the instance on the target node %s" %
7424 result = self.rpc.call_instance_start(target_node, instance, None, None,
7426 msg = result.fail_msg
7428 _ShutdownInstanceDisks(self.lu, instance)
7429 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7430 (instance.name, target_node, msg))
7432 def Exec(self, feedback_fn):
7433 """Perform the migration.
7436 self.feedback_fn = feedback_fn
7437 self.source_node = self.instance.primary_node
7439 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7440 if self.instance.disk_template in constants.DTS_INT_MIRROR:
7441 self.target_node = self.instance.secondary_nodes[0]
7442 # Otherwise self.target_node has been populated either
7443 # directly, or through an iallocator.
7445 self.all_nodes = [self.source_node, self.target_node]
7446 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7447 in self.cfg.GetMultiNodeInfo(self.all_nodes))
7450 feedback_fn("Failover instance %s" % self.instance.name)
7451 self._ExecFailover()
7453 feedback_fn("Migrating instance %s" % self.instance.name)
7456 return self._ExecCleanup()
7458 return self._ExecMigration()
7461 def _CreateBlockDev(lu, node, instance, device, force_create,
7463 """Create a tree of block devices on a given node.
7465 If this device type has to be created on secondaries, create it and
7468 If not, just recurse to children keeping the same 'force' value.
7470 @param lu: the lu on whose behalf we execute
7471 @param node: the node on which to create the device
7472 @type instance: L{objects.Instance}
7473 @param instance: the instance which owns the device
7474 @type device: L{objects.Disk}
7475 @param device: the device to create
7476 @type force_create: boolean
7477 @param force_create: whether to force creation of this device; this
7478 will be change to True whenever we find a device which has
7479 CreateOnSecondary() attribute
7480 @param info: the extra 'metadata' we should attach to the device
7481 (this will be represented as a LVM tag)
7482 @type force_open: boolean
7483 @param force_open: this parameter will be passes to the
7484 L{backend.BlockdevCreate} function where it specifies
7485 whether we run on primary or not, and it affects both
7486 the child assembly and the device own Open() execution
7489 if device.CreateOnSecondary():
7493 for child in device.children:
7494 _CreateBlockDev(lu, node, instance, child, force_create,
7497 if not force_create:
7500 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7503 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7504 """Create a single block device on a given node.
7506 This will not recurse over children of the device, so they must be
7509 @param lu: the lu on whose behalf we execute
7510 @param node: the node on which to create the device
7511 @type instance: L{objects.Instance}
7512 @param instance: the instance which owns the device
7513 @type device: L{objects.Disk}
7514 @param device: the device to create
7515 @param info: the extra 'metadata' we should attach to the device
7516 (this will be represented as a LVM tag)
7517 @type force_open: boolean
7518 @param force_open: this parameter will be passes to the
7519 L{backend.BlockdevCreate} function where it specifies
7520 whether we run on primary or not, and it affects both
7521 the child assembly and the device own Open() execution
7524 lu.cfg.SetDiskID(device, node)
7525 result = lu.rpc.call_blockdev_create(node, device, device.size,
7526 instance.name, force_open, info)
7527 result.Raise("Can't create block device %s on"
7528 " node %s for instance %s" % (device, node, instance.name))
7529 if device.physical_id is None:
7530 device.physical_id = result.payload
7533 def _GenerateUniqueNames(lu, exts):
7534 """Generate a suitable LV name.
7536 This will generate a logical volume name for the given instance.
7541 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7542 results.append("%s%s" % (new_id, val))
7546 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7547 iv_name, p_minor, s_minor):
7548 """Generate a drbd8 device complete with its children.
7551 assert len(vgnames) == len(names) == 2
7552 port = lu.cfg.AllocatePort()
7553 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7554 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7555 logical_id=(vgnames[0], names[0]))
7556 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7557 logical_id=(vgnames[1], names[1]))
7558 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7559 logical_id=(primary, secondary, port,
7562 children=[dev_data, dev_meta],
7567 def _GenerateDiskTemplate(lu, template_name,
7568 instance_name, primary_node,
7569 secondary_nodes, disk_info,
7570 file_storage_dir, file_driver,
7571 base_index, feedback_fn):
7572 """Generate the entire disk layout for a given template type.
7575 #TODO: compute space requirements
7577 vgname = lu.cfg.GetVGName()
7578 disk_count = len(disk_info)
7580 if template_name == constants.DT_DISKLESS:
7582 elif template_name == constants.DT_PLAIN:
7583 if len(secondary_nodes) != 0:
7584 raise errors.ProgrammerError("Wrong template configuration")
7586 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7587 for i in range(disk_count)])
7588 for idx, disk in enumerate(disk_info):
7589 disk_index = idx + base_index
7590 vg = disk.get(constants.IDISK_VG, vgname)
7591 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7592 disk_dev = objects.Disk(dev_type=constants.LD_LV,
7593 size=disk[constants.IDISK_SIZE],
7594 logical_id=(vg, names[idx]),
7595 iv_name="disk/%d" % disk_index,
7596 mode=disk[constants.IDISK_MODE])
7597 disks.append(disk_dev)
7598 elif template_name == constants.DT_DRBD8:
7599 if len(secondary_nodes) != 1:
7600 raise errors.ProgrammerError("Wrong template configuration")
7601 remote_node = secondary_nodes[0]
7602 minors = lu.cfg.AllocateDRBDMinor(
7603 [primary_node, remote_node] * len(disk_info), instance_name)
7606 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7607 for i in range(disk_count)]):
7608 names.append(lv_prefix + "_data")
7609 names.append(lv_prefix + "_meta")
7610 for idx, disk in enumerate(disk_info):
7611 disk_index = idx + base_index
7612 data_vg = disk.get(constants.IDISK_VG, vgname)
7613 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7614 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7615 disk[constants.IDISK_SIZE],
7617 names[idx * 2:idx * 2 + 2],
7618 "disk/%d" % disk_index,
7619 minors[idx * 2], minors[idx * 2 + 1])
7620 disk_dev.mode = disk[constants.IDISK_MODE]
7621 disks.append(disk_dev)
7622 elif template_name == constants.DT_FILE:
7623 if len(secondary_nodes) != 0:
7624 raise errors.ProgrammerError("Wrong template configuration")
7626 opcodes.RequireFileStorage()
7628 for idx, disk in enumerate(disk_info):
7629 disk_index = idx + base_index
7630 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7631 size=disk[constants.IDISK_SIZE],
7632 iv_name="disk/%d" % disk_index,
7633 logical_id=(file_driver,
7634 "%s/disk%d" % (file_storage_dir,
7636 mode=disk[constants.IDISK_MODE])
7637 disks.append(disk_dev)
7638 elif template_name == constants.DT_SHARED_FILE:
7639 if len(secondary_nodes) != 0:
7640 raise errors.ProgrammerError("Wrong template configuration")
7642 opcodes.RequireSharedFileStorage()
7644 for idx, disk in enumerate(disk_info):
7645 disk_index = idx + base_index
7646 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7647 size=disk[constants.IDISK_SIZE],
7648 iv_name="disk/%d" % disk_index,
7649 logical_id=(file_driver,
7650 "%s/disk%d" % (file_storage_dir,
7652 mode=disk[constants.IDISK_MODE])
7653 disks.append(disk_dev)
7654 elif template_name == constants.DT_BLOCK:
7655 if len(secondary_nodes) != 0:
7656 raise errors.ProgrammerError("Wrong template configuration")
7658 for idx, disk in enumerate(disk_info):
7659 disk_index = idx + base_index
7660 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7661 size=disk[constants.IDISK_SIZE],
7662 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7663 disk[constants.IDISK_ADOPT]),
7664 iv_name="disk/%d" % disk_index,
7665 mode=disk[constants.IDISK_MODE])
7666 disks.append(disk_dev)
7669 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7673 def _GetInstanceInfoText(instance):
7674 """Compute that text that should be added to the disk's metadata.
7677 return "originstname+%s" % instance.name
7680 def _CalcEta(time_taken, written, total_size):
7681 """Calculates the ETA based on size written and total size.
7683 @param time_taken: The time taken so far
7684 @param written: amount written so far
7685 @param total_size: The total size of data to be written
7686 @return: The remaining time in seconds
7689 avg_time = time_taken / float(written)
7690 return (total_size - written) * avg_time
7693 def _WipeDisks(lu, instance):
7694 """Wipes instance disks.
7696 @type lu: L{LogicalUnit}
7697 @param lu: the logical unit on whose behalf we execute
7698 @type instance: L{objects.Instance}
7699 @param instance: the instance whose disks we should create
7700 @return: the success of the wipe
7703 node = instance.primary_node
7705 for device in instance.disks:
7706 lu.cfg.SetDiskID(device, node)
7708 logging.info("Pause sync of instance %s disks", instance.name)
7709 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7711 for idx, success in enumerate(result.payload):
7713 logging.warn("pause-sync of instance %s for disks %d failed",
7717 for idx, device in enumerate(instance.disks):
7718 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7719 # MAX_WIPE_CHUNK at max
7720 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7721 constants.MIN_WIPE_CHUNK_PERCENT)
7722 # we _must_ make this an int, otherwise rounding errors will
7724 wipe_chunk_size = int(wipe_chunk_size)
7726 lu.LogInfo("* Wiping disk %d", idx)
7727 logging.info("Wiping disk %d for instance %s, node %s using"
7728 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7733 start_time = time.time()
7735 while offset < size:
7736 wipe_size = min(wipe_chunk_size, size - offset)
7737 logging.debug("Wiping disk %d, offset %s, chunk %s",
7738 idx, offset, wipe_size)
7739 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7740 result.Raise("Could not wipe disk %d at offset %d for size %d" %
7741 (idx, offset, wipe_size))
7744 if now - last_output >= 60:
7745 eta = _CalcEta(now - start_time, offset, size)
7746 lu.LogInfo(" - done: %.1f%% ETA: %s" %
7747 (offset / float(size) * 100, utils.FormatSeconds(eta)))
7750 logging.info("Resume sync of instance %s disks", instance.name)
7752 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7754 for idx, success in enumerate(result.payload):
7756 lu.LogWarning("Resume sync of disk %d failed, please have a"
7757 " look at the status and troubleshoot the issue", idx)
7758 logging.warn("resume-sync of instance %s for disks %d failed",
7762 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7763 """Create all disks for an instance.
7765 This abstracts away some work from AddInstance.
7767 @type lu: L{LogicalUnit}
7768 @param lu: the logical unit on whose behalf we execute
7769 @type instance: L{objects.Instance}
7770 @param instance: the instance whose disks we should create
7772 @param to_skip: list of indices to skip
7773 @type target_node: string
7774 @param target_node: if passed, overrides the target node for creation
7776 @return: the success of the creation
7779 info = _GetInstanceInfoText(instance)
7780 if target_node is None:
7781 pnode = instance.primary_node
7782 all_nodes = instance.all_nodes
7787 if instance.disk_template in constants.DTS_FILEBASED:
7788 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7789 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7791 result.Raise("Failed to create directory '%s' on"
7792 " node %s" % (file_storage_dir, pnode))
7794 # Note: this needs to be kept in sync with adding of disks in
7795 # LUInstanceSetParams
7796 for idx, device in enumerate(instance.disks):
7797 if to_skip and idx in to_skip:
7799 logging.info("Creating volume %s for instance %s",
7800 device.iv_name, instance.name)
7802 for node in all_nodes:
7803 f_create = node == pnode
7804 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7807 def _RemoveDisks(lu, instance, target_node=None):
7808 """Remove all disks for an instance.
7810 This abstracts away some work from `AddInstance()` and
7811 `RemoveInstance()`. Note that in case some of the devices couldn't
7812 be removed, the removal will continue with the other ones (compare
7813 with `_CreateDisks()`).
7815 @type lu: L{LogicalUnit}
7816 @param lu: the logical unit on whose behalf we execute
7817 @type instance: L{objects.Instance}
7818 @param instance: the instance whose disks we should remove
7819 @type target_node: string
7820 @param target_node: used to override the node on which to remove the disks
7822 @return: the success of the removal
7825 logging.info("Removing block devices for instance %s", instance.name)
7828 for device in instance.disks:
7830 edata = [(target_node, device)]
7832 edata = device.ComputeNodeTree(instance.primary_node)
7833 for node, disk in edata:
7834 lu.cfg.SetDiskID(disk, node)
7835 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7837 lu.LogWarning("Could not remove block device %s on node %s,"
7838 " continuing anyway: %s", device.iv_name, node, msg)
7841 if instance.disk_template == constants.DT_FILE:
7842 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7846 tgt = instance.primary_node
7847 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7849 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7850 file_storage_dir, instance.primary_node, result.fail_msg)
7856 def _ComputeDiskSizePerVG(disk_template, disks):
7857 """Compute disk size requirements in the volume group
7860 def _compute(disks, payload):
7861 """Universal algorithm.
7866 vgs[disk[constants.IDISK_VG]] = \
7867 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7871 # Required free disk space as a function of disk and swap space
7873 constants.DT_DISKLESS: {},
7874 constants.DT_PLAIN: _compute(disks, 0),
7875 # 128 MB are added for drbd metadata for each disk
7876 constants.DT_DRBD8: _compute(disks, 128),
7877 constants.DT_FILE: {},
7878 constants.DT_SHARED_FILE: {},
7881 if disk_template not in req_size_dict:
7882 raise errors.ProgrammerError("Disk template '%s' size requirement"
7883 " is unknown" % disk_template)
7885 return req_size_dict[disk_template]
7888 def _ComputeDiskSize(disk_template, disks):
7889 """Compute disk size requirements in the volume group
7892 # Required free disk space as a function of disk and swap space
7894 constants.DT_DISKLESS: None,
7895 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7896 # 128 MB are added for drbd metadata for each disk
7897 constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7898 constants.DT_FILE: None,
7899 constants.DT_SHARED_FILE: 0,
7900 constants.DT_BLOCK: 0,
7903 if disk_template not in req_size_dict:
7904 raise errors.ProgrammerError("Disk template '%s' size requirement"
7905 " is unknown" % disk_template)
7907 return req_size_dict[disk_template]
7910 def _FilterVmNodes(lu, nodenames):
7911 """Filters out non-vm_capable nodes from a list.
7913 @type lu: L{LogicalUnit}
7914 @param lu: the logical unit for which we check
7915 @type nodenames: list
7916 @param nodenames: the list of nodes on which we should check
7918 @return: the list of vm-capable nodes
7921 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7922 return [name for name in nodenames if name not in vm_nodes]
7925 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7926 """Hypervisor parameter validation.
7928 This function abstract the hypervisor parameter validation to be
7929 used in both instance create and instance modify.
7931 @type lu: L{LogicalUnit}
7932 @param lu: the logical unit for which we check
7933 @type nodenames: list
7934 @param nodenames: the list of nodes on which we should check
7935 @type hvname: string
7936 @param hvname: the name of the hypervisor we should use
7937 @type hvparams: dict
7938 @param hvparams: the parameters which we need to check
7939 @raise errors.OpPrereqError: if the parameters are not valid
7942 nodenames = _FilterVmNodes(lu, nodenames)
7943 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7946 for node in nodenames:
7950 info.Raise("Hypervisor parameter validation failed on node %s" % node)
7953 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7954 """OS parameters validation.
7956 @type lu: L{LogicalUnit}
7957 @param lu: the logical unit for which we check
7958 @type required: boolean
7959 @param required: whether the validation should fail if the OS is not
7961 @type nodenames: list
7962 @param nodenames: the list of nodes on which we should check
7963 @type osname: string
7964 @param osname: the name of the hypervisor we should use
7965 @type osparams: dict
7966 @param osparams: the parameters which we need to check
7967 @raise errors.OpPrereqError: if the parameters are not valid
7970 nodenames = _FilterVmNodes(lu, nodenames)
7971 result = lu.rpc.call_os_validate(required, nodenames, osname,
7972 [constants.OS_VALIDATE_PARAMETERS],
7974 for node, nres in result.items():
7975 # we don't check for offline cases since this should be run only
7976 # against the master node and/or an instance's nodes
7977 nres.Raise("OS Parameters validation failed on node %s" % node)
7978 if not nres.payload:
7979 lu.LogInfo("OS %s not found on node %s, validation skipped",
7983 class LUInstanceCreate(LogicalUnit):
7984 """Create an instance.
7987 HPATH = "instance-add"
7988 HTYPE = constants.HTYPE_INSTANCE
7991 def CheckArguments(self):
7995 # do not require name_check to ease forward/backward compatibility
7997 if self.op.no_install and self.op.start:
7998 self.LogInfo("No-installation mode selected, disabling startup")
7999 self.op.start = False
8000 # validate/normalize the instance name
8001 self.op.instance_name = \
8002 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8004 if self.op.ip_check and not self.op.name_check:
8005 # TODO: make the ip check more flexible and not depend on the name check
8006 raise errors.OpPrereqError("Cannot do IP address check without a name"
8007 " check", errors.ECODE_INVAL)
8009 # check nics' parameter names
8010 for nic in self.op.nics:
8011 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8013 # check disks. parameter names and consistent adopt/no-adopt strategy
8014 has_adopt = has_no_adopt = False
8015 for disk in self.op.disks:
8016 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8017 if constants.IDISK_ADOPT in disk:
8021 if has_adopt and has_no_adopt:
8022 raise errors.OpPrereqError("Either all disks are adopted or none is",
8025 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8026 raise errors.OpPrereqError("Disk adoption is not supported for the"
8027 " '%s' disk template" %
8028 self.op.disk_template,
8030 if self.op.iallocator is not None:
8031 raise errors.OpPrereqError("Disk adoption not allowed with an"
8032 " iallocator script", errors.ECODE_INVAL)
8033 if self.op.mode == constants.INSTANCE_IMPORT:
8034 raise errors.OpPrereqError("Disk adoption not allowed for"
8035 " instance import", errors.ECODE_INVAL)
8037 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8038 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8039 " but no 'adopt' parameter given" %
8040 self.op.disk_template,
8043 self.adopt_disks = has_adopt
8045 # instance name verification
8046 if self.op.name_check:
8047 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8048 self.op.instance_name = self.hostname1.name
8049 # used in CheckPrereq for ip ping check
8050 self.check_ip = self.hostname1.ip
8052 self.check_ip = None
8054 # file storage checks
8055 if (self.op.file_driver and
8056 not self.op.file_driver in constants.FILE_DRIVER):
8057 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8058 self.op.file_driver, errors.ECODE_INVAL)
8060 if self.op.disk_template == constants.DT_FILE:
8061 opcodes.RequireFileStorage()
8062 elif self.op.disk_template == constants.DT_SHARED_FILE:
8063 opcodes.RequireSharedFileStorage()
8065 ### Node/iallocator related checks
8066 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8068 if self.op.pnode is not None:
8069 if self.op.disk_template in constants.DTS_INT_MIRROR:
8070 if self.op.snode is None:
8071 raise errors.OpPrereqError("The networked disk templates need"
8072 " a mirror node", errors.ECODE_INVAL)
8074 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8076 self.op.snode = None
8078 self._cds = _GetClusterDomainSecret()
8080 if self.op.mode == constants.INSTANCE_IMPORT:
8081 # On import force_variant must be True, because if we forced it at
8082 # initial install, our only chance when importing it back is that it
8084 self.op.force_variant = True
8086 if self.op.no_install:
8087 self.LogInfo("No-installation mode has no effect during import")
8089 elif self.op.mode == constants.INSTANCE_CREATE:
8090 if self.op.os_type is None:
8091 raise errors.OpPrereqError("No guest OS specified",
8093 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8094 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8095 " installation" % self.op.os_type,
8097 if self.op.disk_template is None:
8098 raise errors.OpPrereqError("No disk template specified",
8101 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8102 # Check handshake to ensure both clusters have the same domain secret
8103 src_handshake = self.op.source_handshake
8104 if not src_handshake:
8105 raise errors.OpPrereqError("Missing source handshake",
8108 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8111 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8114 # Load and check source CA
8115 self.source_x509_ca_pem = self.op.source_x509_ca
8116 if not self.source_x509_ca_pem:
8117 raise errors.OpPrereqError("Missing source X509 CA",
8121 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8123 except OpenSSL.crypto.Error, err:
8124 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8125 (err, ), errors.ECODE_INVAL)
8127 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8128 if errcode is not None:
8129 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8132 self.source_x509_ca = cert
8134 src_instance_name = self.op.source_instance_name
8135 if not src_instance_name:
8136 raise errors.OpPrereqError("Missing source instance name",
8139 self.source_instance_name = \
8140 netutils.GetHostname(name=src_instance_name).name
8143 raise errors.OpPrereqError("Invalid instance creation mode %r" %
8144 self.op.mode, errors.ECODE_INVAL)
8146 def ExpandNames(self):
8147 """ExpandNames for CreateInstance.
8149 Figure out the right locks for instance creation.
8152 self.needed_locks = {}
8154 instance_name = self.op.instance_name
8155 # this is just a preventive check, but someone might still add this
8156 # instance in the meantime, and creation will fail at lock-add time
8157 if instance_name in self.cfg.GetInstanceList():
8158 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8159 instance_name, errors.ECODE_EXISTS)
8161 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8163 if self.op.iallocator:
8164 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8166 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8167 nodelist = [self.op.pnode]
8168 if self.op.snode is not None:
8169 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8170 nodelist.append(self.op.snode)
8171 self.needed_locks[locking.LEVEL_NODE] = nodelist
8173 # in case of import lock the source node too
8174 if self.op.mode == constants.INSTANCE_IMPORT:
8175 src_node = self.op.src_node
8176 src_path = self.op.src_path
8178 if src_path is None:
8179 self.op.src_path = src_path = self.op.instance_name
8181 if src_node is None:
8182 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8183 self.op.src_node = None
8184 if os.path.isabs(src_path):
8185 raise errors.OpPrereqError("Importing an instance from an absolute"
8186 " path requires a source node option",
8189 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8190 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8191 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8192 if not os.path.isabs(src_path):
8193 self.op.src_path = src_path = \
8194 utils.PathJoin(constants.EXPORT_DIR, src_path)
8196 def _RunAllocator(self):
8197 """Run the allocator based on input opcode.
8200 nics = [n.ToDict() for n in self.nics]
8201 ial = IAllocator(self.cfg, self.rpc,
8202 mode=constants.IALLOCATOR_MODE_ALLOC,
8203 name=self.op.instance_name,
8204 disk_template=self.op.disk_template,
8207 vcpus=self.be_full[constants.BE_VCPUS],
8208 memory=self.be_full[constants.BE_MEMORY],
8211 hypervisor=self.op.hypervisor,
8214 ial.Run(self.op.iallocator)
8217 raise errors.OpPrereqError("Can't compute nodes using"
8218 " iallocator '%s': %s" %
8219 (self.op.iallocator, ial.info),
8221 if len(ial.result) != ial.required_nodes:
8222 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8223 " of nodes (%s), required %s" %
8224 (self.op.iallocator, len(ial.result),
8225 ial.required_nodes), errors.ECODE_FAULT)
8226 self.op.pnode = ial.result[0]
8227 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8228 self.op.instance_name, self.op.iallocator,
8229 utils.CommaJoin(ial.result))
8230 if ial.required_nodes == 2:
8231 self.op.snode = ial.result[1]
8233 def BuildHooksEnv(self):
8236 This runs on master, primary and secondary nodes of the instance.
8240 "ADD_MODE": self.op.mode,
8242 if self.op.mode == constants.INSTANCE_IMPORT:
8243 env["SRC_NODE"] = self.op.src_node
8244 env["SRC_PATH"] = self.op.src_path
8245 env["SRC_IMAGES"] = self.src_images
8247 env.update(_BuildInstanceHookEnv(
8248 name=self.op.instance_name,
8249 primary_node=self.op.pnode,
8250 secondary_nodes=self.secondaries,
8251 status=self.op.start,
8252 os_type=self.op.os_type,
8253 memory=self.be_full[constants.BE_MEMORY],
8254 vcpus=self.be_full[constants.BE_VCPUS],
8255 nics=_NICListToTuple(self, self.nics),
8256 disk_template=self.op.disk_template,
8257 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8258 for d in self.disks],
8261 hypervisor_name=self.op.hypervisor,
8267 def BuildHooksNodes(self):
8268 """Build hooks nodes.
8271 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8274 def _ReadExportInfo(self):
8275 """Reads the export information from disk.
8277 It will override the opcode source node and path with the actual
8278 information, if these two were not specified before.
8280 @return: the export information
8283 assert self.op.mode == constants.INSTANCE_IMPORT
8285 src_node = self.op.src_node
8286 src_path = self.op.src_path
8288 if src_node is None:
8289 locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
8290 exp_list = self.rpc.call_export_list(locked_nodes)
8292 for node in exp_list:
8293 if exp_list[node].fail_msg:
8295 if src_path in exp_list[node].payload:
8297 self.op.src_node = src_node = node
8298 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8302 raise errors.OpPrereqError("No export found for relative path %s" %
8303 src_path, errors.ECODE_INVAL)
8305 _CheckNodeOnline(self, src_node)
8306 result = self.rpc.call_export_info(src_node, src_path)
8307 result.Raise("No export or invalid export found in dir %s" % src_path)
8309 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8310 if not export_info.has_section(constants.INISECT_EXP):
8311 raise errors.ProgrammerError("Corrupted export config",
8312 errors.ECODE_ENVIRON)
8314 ei_version = export_info.get(constants.INISECT_EXP, "version")
8315 if (int(ei_version) != constants.EXPORT_VERSION):
8316 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8317 (ei_version, constants.EXPORT_VERSION),
8318 errors.ECODE_ENVIRON)
8321 def _ReadExportParams(self, einfo):
8322 """Use export parameters as defaults.
8324 In case the opcode doesn't specify (as in override) some instance
8325 parameters, then try to use them from the export information, if
8329 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8331 if self.op.disk_template is None:
8332 if einfo.has_option(constants.INISECT_INS, "disk_template"):
8333 self.op.disk_template = einfo.get(constants.INISECT_INS,
8336 raise errors.OpPrereqError("No disk template specified and the export"
8337 " is missing the disk_template information",
8340 if not self.op.disks:
8341 if einfo.has_option(constants.INISECT_INS, "disk_count"):
8343 # TODO: import the disk iv_name too
8344 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8345 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8346 disks.append({constants.IDISK_SIZE: disk_sz})
8347 self.op.disks = disks
8349 raise errors.OpPrereqError("No disk info specified and the export"
8350 " is missing the disk information",
8353 if (not self.op.nics and
8354 einfo.has_option(constants.INISECT_INS, "nic_count")):
8356 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8358 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8359 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8364 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8365 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8367 if (self.op.hypervisor is None and
8368 einfo.has_option(constants.INISECT_INS, "hypervisor")):
8369 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8371 if einfo.has_section(constants.INISECT_HYP):
8372 # use the export parameters but do not override the ones
8373 # specified by the user
8374 for name, value in einfo.items(constants.INISECT_HYP):
8375 if name not in self.op.hvparams:
8376 self.op.hvparams[name] = value
8378 if einfo.has_section(constants.INISECT_BEP):
8379 # use the parameters, without overriding
8380 for name, value in einfo.items(constants.INISECT_BEP):
8381 if name not in self.op.beparams:
8382 self.op.beparams[name] = value
8384 # try to read the parameters old style, from the main section
8385 for name in constants.BES_PARAMETERS:
8386 if (name not in self.op.beparams and
8387 einfo.has_option(constants.INISECT_INS, name)):
8388 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8390 if einfo.has_section(constants.INISECT_OSP):
8391 # use the parameters, without overriding
8392 for name, value in einfo.items(constants.INISECT_OSP):
8393 if name not in self.op.osparams:
8394 self.op.osparams[name] = value
8396 def _RevertToDefaults(self, cluster):
8397 """Revert the instance parameters to the default values.
8401 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8402 for name in self.op.hvparams.keys():
8403 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8404 del self.op.hvparams[name]
8406 be_defs = cluster.SimpleFillBE({})
8407 for name in self.op.beparams.keys():
8408 if name in be_defs and be_defs[name] == self.op.beparams[name]:
8409 del self.op.beparams[name]
8411 nic_defs = cluster.SimpleFillNIC({})
8412 for nic in self.op.nics:
8413 for name in constants.NICS_PARAMETERS:
8414 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8417 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8418 for name in self.op.osparams.keys():
8419 if name in os_defs and os_defs[name] == self.op.osparams[name]:
8420 del self.op.osparams[name]
8422 def _CalculateFileStorageDir(self):
8423 """Calculate final instance file storage dir.
8426 # file storage dir calculation/check
8427 self.instance_file_storage_dir = None
8428 if self.op.disk_template in constants.DTS_FILEBASED:
8429 # build the full file storage dir path
8432 if self.op.disk_template == constants.DT_SHARED_FILE:
8433 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8435 get_fsd_fn = self.cfg.GetFileStorageDir
8437 cfg_storagedir = get_fsd_fn()
8438 if not cfg_storagedir:
8439 raise errors.OpPrereqError("Cluster file storage dir not defined")
8440 joinargs.append(cfg_storagedir)
8442 if self.op.file_storage_dir is not None:
8443 joinargs.append(self.op.file_storage_dir)
8445 joinargs.append(self.op.instance_name)
8447 # pylint: disable-msg=W0142
8448 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8450 def CheckPrereq(self):
8451 """Check prerequisites.
8454 self._CalculateFileStorageDir()
8456 if self.op.mode == constants.INSTANCE_IMPORT:
8457 export_info = self._ReadExportInfo()
8458 self._ReadExportParams(export_info)
8460 if (not self.cfg.GetVGName() and
8461 self.op.disk_template not in constants.DTS_NOT_LVM):
8462 raise errors.OpPrereqError("Cluster does not support lvm-based"
8463 " instances", errors.ECODE_STATE)
8465 if self.op.hypervisor is None:
8466 self.op.hypervisor = self.cfg.GetHypervisorType()
8468 cluster = self.cfg.GetClusterInfo()
8469 enabled_hvs = cluster.enabled_hypervisors
8470 if self.op.hypervisor not in enabled_hvs:
8471 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8472 " cluster (%s)" % (self.op.hypervisor,
8473 ",".join(enabled_hvs)),
8476 # Check tag validity
8477 for tag in self.op.tags:
8478 objects.TaggableObject.ValidateTag(tag)
8480 # check hypervisor parameter syntax (locally)
8481 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8482 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8484 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8485 hv_type.CheckParameterSyntax(filled_hvp)
8486 self.hv_full = filled_hvp
8487 # check that we don't specify global parameters on an instance
8488 _CheckGlobalHvParams(self.op.hvparams)
8490 # fill and remember the beparams dict
8491 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8492 self.be_full = cluster.SimpleFillBE(self.op.beparams)
8494 # build os parameters
8495 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8497 # now that hvp/bep are in final format, let's reset to defaults,
8499 if self.op.identify_defaults:
8500 self._RevertToDefaults(cluster)
8504 for idx, nic in enumerate(self.op.nics):
8505 nic_mode_req = nic.get(constants.INIC_MODE, None)
8506 nic_mode = nic_mode_req
8507 if nic_mode is None:
8508 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8510 # in routed mode, for the first nic, the default ip is 'auto'
8511 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8512 default_ip_mode = constants.VALUE_AUTO
8514 default_ip_mode = constants.VALUE_NONE
8516 # ip validity checks
8517 ip = nic.get(constants.INIC_IP, default_ip_mode)
8518 if ip is None or ip.lower() == constants.VALUE_NONE:
8520 elif ip.lower() == constants.VALUE_AUTO:
8521 if not self.op.name_check:
8522 raise errors.OpPrereqError("IP address set to auto but name checks"
8523 " have been skipped",
8525 nic_ip = self.hostname1.ip
8527 if not netutils.IPAddress.IsValid(ip):
8528 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8532 # TODO: check the ip address for uniqueness
8533 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8534 raise errors.OpPrereqError("Routed nic mode requires an ip address",
8537 # MAC address verification
8538 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8539 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8540 mac = utils.NormalizeAndValidateMac(mac)
8543 self.cfg.ReserveMAC(mac, self.proc.GetECId())
8544 except errors.ReservationError:
8545 raise errors.OpPrereqError("MAC address %s already in use"
8546 " in cluster" % mac,
8547 errors.ECODE_NOTUNIQUE)
8549 # Build nic parameters
8550 link = nic.get(constants.INIC_LINK, None)
8553 nicparams[constants.NIC_MODE] = nic_mode_req
8555 nicparams[constants.NIC_LINK] = link
8557 check_params = cluster.SimpleFillNIC(nicparams)
8558 objects.NIC.CheckParameterSyntax(check_params)
8559 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8561 # disk checks/pre-build
8562 default_vg = self.cfg.GetVGName()
8564 for disk in self.op.disks:
8565 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8566 if mode not in constants.DISK_ACCESS_SET:
8567 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8568 mode, errors.ECODE_INVAL)
8569 size = disk.get(constants.IDISK_SIZE, None)
8571 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8574 except (TypeError, ValueError):
8575 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8578 data_vg = disk.get(constants.IDISK_VG, default_vg)
8580 constants.IDISK_SIZE: size,
8581 constants.IDISK_MODE: mode,
8582 constants.IDISK_VG: data_vg,
8583 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8585 if constants.IDISK_ADOPT in disk:
8586 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8587 self.disks.append(new_disk)
8589 if self.op.mode == constants.INSTANCE_IMPORT:
8591 # Check that the new instance doesn't have less disks than the export
8592 instance_disks = len(self.disks)
8593 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8594 if instance_disks < export_disks:
8595 raise errors.OpPrereqError("Not enough disks to import."
8596 " (instance: %d, export: %d)" %
8597 (instance_disks, export_disks),
8601 for idx in range(export_disks):
8602 option = "disk%d_dump" % idx
8603 if export_info.has_option(constants.INISECT_INS, option):
8604 # FIXME: are the old os-es, disk sizes, etc. useful?
8605 export_name = export_info.get(constants.INISECT_INS, option)
8606 image = utils.PathJoin(self.op.src_path, export_name)
8607 disk_images.append(image)
8609 disk_images.append(False)
8611 self.src_images = disk_images
8613 old_name = export_info.get(constants.INISECT_INS, "name")
8615 exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8616 except (TypeError, ValueError), err:
8617 raise errors.OpPrereqError("Invalid export file, nic_count is not"
8618 " an integer: %s" % str(err),
8620 if self.op.instance_name == old_name:
8621 for idx, nic in enumerate(self.nics):
8622 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8623 nic_mac_ini = "nic%d_mac" % idx
8624 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8626 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8628 # ip ping checks (we use the same ip that was resolved in ExpandNames)
8629 if self.op.ip_check:
8630 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8631 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8632 (self.check_ip, self.op.instance_name),
8633 errors.ECODE_NOTUNIQUE)
8635 #### mac address generation
8636 # By generating here the mac address both the allocator and the hooks get
8637 # the real final mac address rather than the 'auto' or 'generate' value.
8638 # There is a race condition between the generation and the instance object
8639 # creation, which means that we know the mac is valid now, but we're not
8640 # sure it will be when we actually add the instance. If things go bad
8641 # adding the instance will abort because of a duplicate mac, and the
8642 # creation job will fail.
8643 for nic in self.nics:
8644 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8645 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8649 if self.op.iallocator is not None:
8650 self._RunAllocator()
8652 #### node related checks
8654 # check primary node
8655 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8656 assert self.pnode is not None, \
8657 "Cannot retrieve locked node %s" % self.op.pnode
8659 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8660 pnode.name, errors.ECODE_STATE)
8662 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8663 pnode.name, errors.ECODE_STATE)
8664 if not pnode.vm_capable:
8665 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8666 " '%s'" % pnode.name, errors.ECODE_STATE)
8668 self.secondaries = []
8670 # mirror node verification
8671 if self.op.disk_template in constants.DTS_INT_MIRROR:
8672 if self.op.snode == pnode.name:
8673 raise errors.OpPrereqError("The secondary node cannot be the"
8674 " primary node", errors.ECODE_INVAL)
8675 _CheckNodeOnline(self, self.op.snode)
8676 _CheckNodeNotDrained(self, self.op.snode)
8677 _CheckNodeVmCapable(self, self.op.snode)
8678 self.secondaries.append(self.op.snode)
8680 nodenames = [pnode.name] + self.secondaries
8682 if not self.adopt_disks:
8683 # Check lv size requirements, if not adopting
8684 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8685 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8687 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8688 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8689 disk[constants.IDISK_ADOPT])
8690 for disk in self.disks])
8691 if len(all_lvs) != len(self.disks):
8692 raise errors.OpPrereqError("Duplicate volume names given for adoption",
8694 for lv_name in all_lvs:
8696 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8697 # to ReserveLV uses the same syntax
8698 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8699 except errors.ReservationError:
8700 raise errors.OpPrereqError("LV named %s used by another instance" %
8701 lv_name, errors.ECODE_NOTUNIQUE)
8703 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8704 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8706 node_lvs = self.rpc.call_lv_list([pnode.name],
8707 vg_names.payload.keys())[pnode.name]
8708 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8709 node_lvs = node_lvs.payload
8711 delta = all_lvs.difference(node_lvs.keys())
8713 raise errors.OpPrereqError("Missing logical volume(s): %s" %
8714 utils.CommaJoin(delta),
8716 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8718 raise errors.OpPrereqError("Online logical volumes found, cannot"
8719 " adopt: %s" % utils.CommaJoin(online_lvs),
8721 # update the size of disk based on what is found
8722 for dsk in self.disks:
8723 dsk[constants.IDISK_SIZE] = \
8724 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8725 dsk[constants.IDISK_ADOPT])][0]))
8727 elif self.op.disk_template == constants.DT_BLOCK:
8728 # Normalize and de-duplicate device paths
8729 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8730 for disk in self.disks])
8731 if len(all_disks) != len(self.disks):
8732 raise errors.OpPrereqError("Duplicate disk names given for adoption",
8734 baddisks = [d for d in all_disks
8735 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8737 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8738 " cannot be adopted" %
8739 (", ".join(baddisks),
8740 constants.ADOPTABLE_BLOCKDEV_ROOT),
8743 node_disks = self.rpc.call_bdev_sizes([pnode.name],
8744 list(all_disks))[pnode.name]
8745 node_disks.Raise("Cannot get block device information from node %s" %
8747 node_disks = node_disks.payload
8748 delta = all_disks.difference(node_disks.keys())
8750 raise errors.OpPrereqError("Missing block device(s): %s" %
8751 utils.CommaJoin(delta),
8753 for dsk in self.disks:
8754 dsk[constants.IDISK_SIZE] = \
8755 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8757 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8759 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8760 # check OS parameters (remotely)
8761 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8763 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8765 # memory check on primary node
8767 _CheckNodeFreeMemory(self, self.pnode.name,
8768 "creating instance %s" % self.op.instance_name,
8769 self.be_full[constants.BE_MEMORY],
8772 self.dry_run_result = list(nodenames)
8774 def Exec(self, feedback_fn):
8775 """Create and add the instance to the cluster.
8778 instance = self.op.instance_name
8779 pnode_name = self.pnode.name
8781 ht_kind = self.op.hypervisor
8782 if ht_kind in constants.HTS_REQ_PORT:
8783 network_port = self.cfg.AllocatePort()
8787 disks = _GenerateDiskTemplate(self,
8788 self.op.disk_template,
8789 instance, pnode_name,
8792 self.instance_file_storage_dir,
8793 self.op.file_driver,
8797 iobj = objects.Instance(name=instance, os=self.op.os_type,
8798 primary_node=pnode_name,
8799 nics=self.nics, disks=disks,
8800 disk_template=self.op.disk_template,
8802 network_port=network_port,
8803 beparams=self.op.beparams,
8804 hvparams=self.op.hvparams,
8805 hypervisor=self.op.hypervisor,
8806 osparams=self.op.osparams,
8810 for tag in self.op.tags:
8813 if self.adopt_disks:
8814 if self.op.disk_template == constants.DT_PLAIN:
8815 # rename LVs to the newly-generated names; we need to construct
8816 # 'fake' LV disks with the old data, plus the new unique_id
8817 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8819 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8820 rename_to.append(t_dsk.logical_id)
8821 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8822 self.cfg.SetDiskID(t_dsk, pnode_name)
8823 result = self.rpc.call_blockdev_rename(pnode_name,
8824 zip(tmp_disks, rename_to))
8825 result.Raise("Failed to rename adoped LVs")
8827 feedback_fn("* creating instance disks...")
8829 _CreateDisks(self, iobj)
8830 except errors.OpExecError:
8831 self.LogWarning("Device creation failed, reverting...")
8833 _RemoveDisks(self, iobj)
8835 self.cfg.ReleaseDRBDMinors(instance)
8838 feedback_fn("adding instance %s to cluster config" % instance)
8840 self.cfg.AddInstance(iobj, self.proc.GetECId())
8842 # Declare that we don't want to remove the instance lock anymore, as we've
8843 # added the instance to the config
8844 del self.remove_locks[locking.LEVEL_INSTANCE]
8846 if self.op.mode == constants.INSTANCE_IMPORT:
8847 # Release unused nodes
8848 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8851 _ReleaseLocks(self, locking.LEVEL_NODE)
8854 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8855 feedback_fn("* wiping instance disks...")
8857 _WipeDisks(self, iobj)
8858 except errors.OpExecError, err:
8859 logging.exception("Wiping disks failed")
8860 self.LogWarning("Wiping instance disks failed (%s)", err)
8864 # Something is already wrong with the disks, don't do anything else
8866 elif self.op.wait_for_sync:
8867 disk_abort = not _WaitForSync(self, iobj)
8868 elif iobj.disk_template in constants.DTS_INT_MIRROR:
8869 # make sure the disks are not degraded (still sync-ing is ok)
8871 feedback_fn("* checking mirrors status")
8872 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8877 _RemoveDisks(self, iobj)
8878 self.cfg.RemoveInstance(iobj.name)
8879 # Make sure the instance lock gets removed
8880 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8881 raise errors.OpExecError("There are some degraded disks for"
8884 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8885 if self.op.mode == constants.INSTANCE_CREATE:
8886 if not self.op.no_install:
8887 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
8888 not self.op.wait_for_sync)
8890 feedback_fn("* pausing disk sync to install instance OS")
8891 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8893 for idx, success in enumerate(result.payload):
8895 logging.warn("pause-sync of instance %s for disk %d failed",
8898 feedback_fn("* running the instance OS create scripts...")
8899 # FIXME: pass debug option from opcode to backend
8900 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8901 self.op.debug_level)
8903 feedback_fn("* resuming disk sync")
8904 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8906 for idx, success in enumerate(result.payload):
8908 logging.warn("resume-sync of instance %s for disk %d failed",
8911 result.Raise("Could not add os for instance %s"
8912 " on node %s" % (instance, pnode_name))
8914 elif self.op.mode == constants.INSTANCE_IMPORT:
8915 feedback_fn("* running the instance OS import scripts...")
8919 for idx, image in enumerate(self.src_images):
8923 # FIXME: pass debug option from opcode to backend
8924 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8925 constants.IEIO_FILE, (image, ),
8926 constants.IEIO_SCRIPT,
8927 (iobj.disks[idx], idx),
8929 transfers.append(dt)
8932 masterd.instance.TransferInstanceData(self, feedback_fn,
8933 self.op.src_node, pnode_name,
8934 self.pnode.secondary_ip,
8936 if not compat.all(import_result):
8937 self.LogWarning("Some disks for instance %s on node %s were not"
8938 " imported successfully" % (instance, pnode_name))
8940 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8941 feedback_fn("* preparing remote import...")
8942 # The source cluster will stop the instance before attempting to make a
8943 # connection. In some cases stopping an instance can take a long time,
8944 # hence the shutdown timeout is added to the connection timeout.
8945 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8946 self.op.source_shutdown_timeout)
8947 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8949 assert iobj.primary_node == self.pnode.name
8951 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8952 self.source_x509_ca,
8953 self._cds, timeouts)
8954 if not compat.all(disk_results):
8955 # TODO: Should the instance still be started, even if some disks
8956 # failed to import (valid for local imports, too)?
8957 self.LogWarning("Some disks for instance %s on node %s were not"
8958 " imported successfully" % (instance, pnode_name))
8960 # Run rename script on newly imported instance
8961 assert iobj.name == instance
8962 feedback_fn("Running rename script for %s" % instance)
8963 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8964 self.source_instance_name,
8965 self.op.debug_level)
8967 self.LogWarning("Failed to run rename script for %s on node"
8968 " %s: %s" % (instance, pnode_name, result.fail_msg))
8971 # also checked in the prereq part
8972 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8976 iobj.admin_up = True
8977 self.cfg.Update(iobj, feedback_fn)
8978 logging.info("Starting instance %s on node %s", instance, pnode_name)
8979 feedback_fn("* starting instance...")
8980 result = self.rpc.call_instance_start(pnode_name, iobj,
8982 result.Raise("Could not start instance")
8984 return list(iobj.all_nodes)
8987 class LUInstanceConsole(NoHooksLU):
8988 """Connect to an instance's console.
8990 This is somewhat special in that it returns the command line that
8991 you need to run on the master node in order to connect to the
8997 def ExpandNames(self):
8998 self._ExpandAndLockInstance()
9000 def CheckPrereq(self):
9001 """Check prerequisites.
9003 This checks that the instance is in the cluster.
9006 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9007 assert self.instance is not None, \
9008 "Cannot retrieve locked instance %s" % self.op.instance_name
9009 _CheckNodeOnline(self, self.instance.primary_node)
9011 def Exec(self, feedback_fn):
9012 """Connect to the console of an instance
9015 instance = self.instance
9016 node = instance.primary_node
9018 node_insts = self.rpc.call_instance_list([node],
9019 [instance.hypervisor])[node]
9020 node_insts.Raise("Can't get node information from %s" % node)
9022 if instance.name not in node_insts.payload:
9023 if instance.admin_up:
9024 state = constants.INSTST_ERRORDOWN
9026 state = constants.INSTST_ADMINDOWN
9027 raise errors.OpExecError("Instance %s is not running (state %s)" %
9028 (instance.name, state))
9030 logging.debug("Connecting to console of %s on %s", instance.name, node)
9032 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9035 def _GetInstanceConsole(cluster, instance):
9036 """Returns console information for an instance.
9038 @type cluster: L{objects.Cluster}
9039 @type instance: L{objects.Instance}
9043 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9044 # beparams and hvparams are passed separately, to avoid editing the
9045 # instance and then saving the defaults in the instance itself.
9046 hvparams = cluster.FillHV(instance)
9047 beparams = cluster.FillBE(instance)
9048 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9050 assert console.instance == instance.name
9051 assert console.Validate()
9053 return console.ToDict()
9056 class LUInstanceReplaceDisks(LogicalUnit):
9057 """Replace the disks of an instance.
9060 HPATH = "mirrors-replace"
9061 HTYPE = constants.HTYPE_INSTANCE
9064 def CheckArguments(self):
9065 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9068 def ExpandNames(self):
9069 self._ExpandAndLockInstance()
9071 assert locking.LEVEL_NODE not in self.needed_locks
9072 assert locking.LEVEL_NODEGROUP not in self.needed_locks
9074 assert self.op.iallocator is None or self.op.remote_node is None, \
9075 "Conflicting options"
9077 if self.op.remote_node is not None:
9078 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9080 # Warning: do not remove the locking of the new secondary here
9081 # unless DRBD8.AddChildren is changed to work in parallel;
9082 # currently it doesn't since parallel invocations of
9083 # FindUnusedMinor will conflict
9084 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9085 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9087 self.needed_locks[locking.LEVEL_NODE] = []
9088 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9090 if self.op.iallocator is not None:
9091 # iallocator will select a new node in the same group
9092 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9094 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9095 self.op.iallocator, self.op.remote_node,
9096 self.op.disks, False, self.op.early_release)
9098 self.tasklets = [self.replacer]
9100 def DeclareLocks(self, level):
9101 if level == locking.LEVEL_NODEGROUP:
9102 assert self.op.remote_node is None
9103 assert self.op.iallocator is not None
9104 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9106 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9107 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9108 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9110 elif level == locking.LEVEL_NODE:
9111 if self.op.iallocator is not None:
9112 assert self.op.remote_node is None
9113 assert not self.needed_locks[locking.LEVEL_NODE]
9115 # Lock member nodes of all locked groups
9116 self.needed_locks[locking.LEVEL_NODE] = [node_name
9117 for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
9118 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9120 self._LockInstancesNodes()
9122 def BuildHooksEnv(self):
9125 This runs on the master, the primary and all the secondaries.
9128 instance = self.replacer.instance
9130 "MODE": self.op.mode,
9131 "NEW_SECONDARY": self.op.remote_node,
9132 "OLD_SECONDARY": instance.secondary_nodes[0],
9134 env.update(_BuildInstanceHookEnvByObject(self, instance))
9137 def BuildHooksNodes(self):
9138 """Build hooks nodes.
9141 instance = self.replacer.instance
9143 self.cfg.GetMasterNode(),
9144 instance.primary_node,
9146 if self.op.remote_node is not None:
9147 nl.append(self.op.remote_node)
9150 def CheckPrereq(self):
9151 """Check prerequisites.
9154 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9155 self.op.iallocator is None)
9157 owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
9159 groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9160 if owned_groups != groups:
9161 raise errors.OpExecError("Node groups used by instance '%s' changed"
9162 " since lock was acquired, current list is %r,"
9163 " used to be '%s'" %
9164 (self.op.instance_name,
9165 utils.CommaJoin(groups),
9166 utils.CommaJoin(owned_groups)))
9168 return LogicalUnit.CheckPrereq(self)
9171 class TLReplaceDisks(Tasklet):
9172 """Replaces disks for an instance.
9174 Note: Locking is not within the scope of this class.
9177 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9178 disks, delay_iallocator, early_release):
9179 """Initializes this class.
9182 Tasklet.__init__(self, lu)
9185 self.instance_name = instance_name
9187 self.iallocator_name = iallocator_name
9188 self.remote_node = remote_node
9190 self.delay_iallocator = delay_iallocator
9191 self.early_release = early_release
9194 self.instance = None
9195 self.new_node = None
9196 self.target_node = None
9197 self.other_node = None
9198 self.remote_node_info = None
9199 self.node_secondary_ip = None
9202 def CheckArguments(mode, remote_node, iallocator):
9203 """Helper function for users of this class.
9206 # check for valid parameter combination
9207 if mode == constants.REPLACE_DISK_CHG:
9208 if remote_node is None and iallocator is None:
9209 raise errors.OpPrereqError("When changing the secondary either an"
9210 " iallocator script must be used or the"
9211 " new node given", errors.ECODE_INVAL)
9213 if remote_node is not None and iallocator is not None:
9214 raise errors.OpPrereqError("Give either the iallocator or the new"
9215 " secondary, not both", errors.ECODE_INVAL)
9217 elif remote_node is not None or iallocator is not None:
9218 # Not replacing the secondary
9219 raise errors.OpPrereqError("The iallocator and new node options can"
9220 " only be used when changing the"
9221 " secondary node", errors.ECODE_INVAL)
9224 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9225 """Compute a new secondary node using an IAllocator.
9228 ial = IAllocator(lu.cfg, lu.rpc,
9229 mode=constants.IALLOCATOR_MODE_RELOC,
9231 relocate_from=list(relocate_from))
9233 ial.Run(iallocator_name)
9236 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9237 " %s" % (iallocator_name, ial.info),
9240 if len(ial.result) != ial.required_nodes:
9241 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9242 " of nodes (%s), required %s" %
9244 len(ial.result), ial.required_nodes),
9247 remote_node_name = ial.result[0]
9249 lu.LogInfo("Selected new secondary for instance '%s': %s",
9250 instance_name, remote_node_name)
9252 return remote_node_name
9254 def _FindFaultyDisks(self, node_name):
9255 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9258 def _CheckDisksActivated(self, instance):
9259 """Checks if the instance disks are activated.
9261 @param instance: The instance to check disks
9262 @return: True if they are activated, False otherwise
9265 nodes = instance.all_nodes
9267 for idx, dev in enumerate(instance.disks):
9269 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9270 self.cfg.SetDiskID(dev, node)
9272 result = self.rpc.call_blockdev_find(node, dev)
9276 elif result.fail_msg or not result.payload:
9281 def CheckPrereq(self):
9282 """Check prerequisites.
9284 This checks that the instance is in the cluster.
9287 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9288 assert instance is not None, \
9289 "Cannot retrieve locked instance %s" % self.instance_name
9291 if instance.disk_template != constants.DT_DRBD8:
9292 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9293 " instances", errors.ECODE_INVAL)
9295 if len(instance.secondary_nodes) != 1:
9296 raise errors.OpPrereqError("The instance has a strange layout,"
9297 " expected one secondary but found %d" %
9298 len(instance.secondary_nodes),
9301 if not self.delay_iallocator:
9302 self._CheckPrereq2()
9304 def _CheckPrereq2(self):
9305 """Check prerequisites, second part.
9307 This function should always be part of CheckPrereq. It was separated and is
9308 now called from Exec because during node evacuation iallocator was only
9309 called with an unmodified cluster model, not taking planned changes into
9313 instance = self.instance
9314 secondary_node = instance.secondary_nodes[0]
9316 if self.iallocator_name is None:
9317 remote_node = self.remote_node
9319 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9320 instance.name, instance.secondary_nodes)
9322 if remote_node is None:
9323 self.remote_node_info = None
9325 assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
9326 "Remote node '%s' is not locked" % remote_node
9328 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9329 assert self.remote_node_info is not None, \
9330 "Cannot retrieve locked node %s" % remote_node
9332 if remote_node == self.instance.primary_node:
9333 raise errors.OpPrereqError("The specified node is the primary node of"
9334 " the instance", errors.ECODE_INVAL)
9336 if remote_node == secondary_node:
9337 raise errors.OpPrereqError("The specified node is already the"
9338 " secondary node of the instance",
9341 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9342 constants.REPLACE_DISK_CHG):
9343 raise errors.OpPrereqError("Cannot specify disks to be replaced",
9346 if self.mode == constants.REPLACE_DISK_AUTO:
9347 if not self._CheckDisksActivated(instance):
9348 raise errors.OpPrereqError("Please run activate-disks on instance %s"
9349 " first" % self.instance_name,
9351 faulty_primary = self._FindFaultyDisks(instance.primary_node)
9352 faulty_secondary = self._FindFaultyDisks(secondary_node)
9354 if faulty_primary and faulty_secondary:
9355 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9356 " one node and can not be repaired"
9357 " automatically" % self.instance_name,
9361 self.disks = faulty_primary
9362 self.target_node = instance.primary_node
9363 self.other_node = secondary_node
9364 check_nodes = [self.target_node, self.other_node]
9365 elif faulty_secondary:
9366 self.disks = faulty_secondary
9367 self.target_node = secondary_node
9368 self.other_node = instance.primary_node
9369 check_nodes = [self.target_node, self.other_node]
9375 # Non-automatic modes
9376 if self.mode == constants.REPLACE_DISK_PRI:
9377 self.target_node = instance.primary_node
9378 self.other_node = secondary_node
9379 check_nodes = [self.target_node, self.other_node]
9381 elif self.mode == constants.REPLACE_DISK_SEC:
9382 self.target_node = secondary_node
9383 self.other_node = instance.primary_node
9384 check_nodes = [self.target_node, self.other_node]
9386 elif self.mode == constants.REPLACE_DISK_CHG:
9387 self.new_node = remote_node
9388 self.other_node = instance.primary_node
9389 self.target_node = secondary_node
9390 check_nodes = [self.new_node, self.other_node]
9392 _CheckNodeNotDrained(self.lu, remote_node)
9393 _CheckNodeVmCapable(self.lu, remote_node)
9395 old_node_info = self.cfg.GetNodeInfo(secondary_node)
9396 assert old_node_info is not None
9397 if old_node_info.offline and not self.early_release:
9398 # doesn't make sense to delay the release
9399 self.early_release = True
9400 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9401 " early-release mode", secondary_node)
9404 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9407 # If not specified all disks should be replaced
9409 self.disks = range(len(self.instance.disks))
9411 for node in check_nodes:
9412 _CheckNodeOnline(self.lu, node)
9414 touched_nodes = frozenset(node_name for node_name in [self.new_node,
9417 if node_name is not None)
9419 # Release unneeded node locks
9420 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9422 # Release any owned node group
9423 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9424 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9426 # Check whether disks are valid
9427 for disk_idx in self.disks:
9428 instance.FindDisk(disk_idx)
9430 # Get secondary node IP addresses
9431 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9432 in self.cfg.GetMultiNodeInfo(touched_nodes))
9434 def Exec(self, feedback_fn):
9435 """Execute disk replacement.
9437 This dispatches the disk replacement to the appropriate handler.
9440 if self.delay_iallocator:
9441 self._CheckPrereq2()
9444 # Verify owned locks before starting operation
9445 owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9446 assert set(owned_locks) == set(self.node_secondary_ip), \
9447 ("Incorrect node locks, owning %s, expected %s" %
9448 (owned_locks, self.node_secondary_ip.keys()))
9450 owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9451 assert list(owned_locks) == [self.instance_name], \
9452 "Instance '%s' not locked" % self.instance_name
9454 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9455 "Should not own any node group lock at this point"
9458 feedback_fn("No disks need replacement")
9461 feedback_fn("Replacing disk(s) %s for %s" %
9462 (utils.CommaJoin(self.disks), self.instance.name))
9464 activate_disks = (not self.instance.admin_up)
9466 # Activate the instance disks if we're replacing them on a down instance
9468 _StartInstanceDisks(self.lu, self.instance, True)
9471 # Should we replace the secondary node?
9472 if self.new_node is not None:
9473 fn = self._ExecDrbd8Secondary
9475 fn = self._ExecDrbd8DiskOnly
9477 result = fn(feedback_fn)
9479 # Deactivate the instance disks if we're replacing them on a
9482 _SafeShutdownInstanceDisks(self.lu, self.instance)
9485 # Verify owned locks
9486 owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9487 nodes = frozenset(self.node_secondary_ip)
9488 assert ((self.early_release and not owned_locks) or
9489 (not self.early_release and not (set(owned_locks) - nodes))), \
9490 ("Not owning the correct locks, early_release=%s, owned=%r,"
9491 " nodes=%r" % (self.early_release, owned_locks, nodes))
9495 def _CheckVolumeGroup(self, nodes):
9496 self.lu.LogInfo("Checking volume groups")
9498 vgname = self.cfg.GetVGName()
9500 # Make sure volume group exists on all involved nodes
9501 results = self.rpc.call_vg_list(nodes)
9503 raise errors.OpExecError("Can't list volume groups on the nodes")
9507 res.Raise("Error checking node %s" % node)
9508 if vgname not in res.payload:
9509 raise errors.OpExecError("Volume group '%s' not found on node %s" %
9512 def _CheckDisksExistence(self, nodes):
9513 # Check disk existence
9514 for idx, dev in enumerate(self.instance.disks):
9515 if idx not in self.disks:
9519 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9520 self.cfg.SetDiskID(dev, node)
9522 result = self.rpc.call_blockdev_find(node, dev)
9524 msg = result.fail_msg
9525 if msg or not result.payload:
9527 msg = "disk not found"
9528 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9531 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9532 for idx, dev in enumerate(self.instance.disks):
9533 if idx not in self.disks:
9536 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9539 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9541 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9542 " replace disks for instance %s" %
9543 (node_name, self.instance.name))
9545 def _CreateNewStorage(self, node_name):
9546 """Create new storage on the primary or secondary node.
9548 This is only used for same-node replaces, not for changing the
9549 secondary node, hence we don't want to modify the existing disk.
9554 for idx, dev in enumerate(self.instance.disks):
9555 if idx not in self.disks:
9558 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9560 self.cfg.SetDiskID(dev, node_name)
9562 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9563 names = _GenerateUniqueNames(self.lu, lv_names)
9565 vg_data = dev.children[0].logical_id[0]
9566 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9567 logical_id=(vg_data, names[0]))
9568 vg_meta = dev.children[1].logical_id[0]
9569 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9570 logical_id=(vg_meta, names[1]))
9572 new_lvs = [lv_data, lv_meta]
9573 old_lvs = [child.Copy() for child in dev.children]
9574 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9576 # we pass force_create=True to force the LVM creation
9577 for new_lv in new_lvs:
9578 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9579 _GetInstanceInfoText(self.instance), False)
9583 def _CheckDevices(self, node_name, iv_names):
9584 for name, (dev, _, _) in iv_names.iteritems():
9585 self.cfg.SetDiskID(dev, node_name)
9587 result = self.rpc.call_blockdev_find(node_name, dev)
9589 msg = result.fail_msg
9590 if msg or not result.payload:
9592 msg = "disk not found"
9593 raise errors.OpExecError("Can't find DRBD device %s: %s" %
9596 if result.payload.is_degraded:
9597 raise errors.OpExecError("DRBD device %s is degraded!" % name)
9599 def _RemoveOldStorage(self, node_name, iv_names):
9600 for name, (_, old_lvs, _) in iv_names.iteritems():
9601 self.lu.LogInfo("Remove logical volumes for %s" % name)
9604 self.cfg.SetDiskID(lv, node_name)
9606 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9608 self.lu.LogWarning("Can't remove old LV: %s" % msg,
9609 hint="remove unused LVs manually")
9611 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
9612 """Replace a disk on the primary or secondary for DRBD 8.
9614 The algorithm for replace is quite complicated:
9616 1. for each disk to be replaced:
9618 1. create new LVs on the target node with unique names
9619 1. detach old LVs from the drbd device
9620 1. rename old LVs to name_replaced.<time_t>
9621 1. rename new LVs to old LVs
9622 1. attach the new LVs (with the old names now) to the drbd device
9624 1. wait for sync across all devices
9626 1. for each modified disk:
9628 1. remove old LVs (which have the name name_replaces.<time_t>)
9630 Failures are not very well handled.
9635 # Step: check device activation
9636 self.lu.LogStep(1, steps_total, "Check device existence")
9637 self._CheckDisksExistence([self.other_node, self.target_node])
9638 self._CheckVolumeGroup([self.target_node, self.other_node])
9640 # Step: check other node consistency
9641 self.lu.LogStep(2, steps_total, "Check peer consistency")
9642 self._CheckDisksConsistency(self.other_node,
9643 self.other_node == self.instance.primary_node,
9646 # Step: create new storage
9647 self.lu.LogStep(3, steps_total, "Allocate new storage")
9648 iv_names = self._CreateNewStorage(self.target_node)
9650 # Step: for each lv, detach+rename*2+attach
9651 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9652 for dev, old_lvs, new_lvs in iv_names.itervalues():
9653 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9655 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9657 result.Raise("Can't detach drbd from local storage on node"
9658 " %s for device %s" % (self.target_node, dev.iv_name))
9660 #cfg.Update(instance)
9662 # ok, we created the new LVs, so now we know we have the needed
9663 # storage; as such, we proceed on the target node to rename
9664 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9665 # using the assumption that logical_id == physical_id (which in
9666 # turn is the unique_id on that node)
9668 # FIXME(iustin): use a better name for the replaced LVs
9669 temp_suffix = int(time.time())
9670 ren_fn = lambda d, suff: (d.physical_id[0],
9671 d.physical_id[1] + "_replaced-%s" % suff)
9673 # Build the rename list based on what LVs exist on the node
9674 rename_old_to_new = []
9675 for to_ren in old_lvs:
9676 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9677 if not result.fail_msg and result.payload:
9679 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9681 self.lu.LogInfo("Renaming the old LVs on the target node")
9682 result = self.rpc.call_blockdev_rename(self.target_node,
9684 result.Raise("Can't rename old LVs on node %s" % self.target_node)
9686 # Now we rename the new LVs to the old LVs
9687 self.lu.LogInfo("Renaming the new LVs on the target node")
9688 rename_new_to_old = [(new, old.physical_id)
9689 for old, new in zip(old_lvs, new_lvs)]
9690 result = self.rpc.call_blockdev_rename(self.target_node,
9692 result.Raise("Can't rename new LVs on node %s" % self.target_node)
9694 # Intermediate steps of in memory modifications
9695 for old, new in zip(old_lvs, new_lvs):
9696 new.logical_id = old.logical_id
9697 self.cfg.SetDiskID(new, self.target_node)
9699 # We need to modify old_lvs so that removal later removes the
9700 # right LVs, not the newly added ones; note that old_lvs is a
9702 for disk in old_lvs:
9703 disk.logical_id = ren_fn(disk, temp_suffix)
9704 self.cfg.SetDiskID(disk, self.target_node)
9706 # Now that the new lvs have the old name, we can add them to the device
9707 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9708 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9710 msg = result.fail_msg
9712 for new_lv in new_lvs:
9713 msg2 = self.rpc.call_blockdev_remove(self.target_node,
9716 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9717 hint=("cleanup manually the unused logical"
9719 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9722 if self.early_release:
9723 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9725 self._RemoveOldStorage(self.target_node, iv_names)
9726 # WARNING: we release both node locks here, do not do other RPCs
9727 # than WaitForSync to the primary node
9728 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9729 names=[self.target_node, self.other_node])
9732 # This can fail as the old devices are degraded and _WaitForSync
9733 # does a combined result over all disks, so we don't check its return value
9734 self.lu.LogStep(cstep, steps_total, "Sync devices")
9736 _WaitForSync(self.lu, self.instance)
9738 # Check all devices manually
9739 self._CheckDevices(self.instance.primary_node, iv_names)
9741 # Step: remove old storage
9742 if not self.early_release:
9743 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9745 self._RemoveOldStorage(self.target_node, iv_names)
9747 def _ExecDrbd8Secondary(self, feedback_fn):
9748 """Replace the secondary node for DRBD 8.
9750 The algorithm for replace is quite complicated:
9751 - for all disks of the instance:
9752 - create new LVs on the new node with same names
9753 - shutdown the drbd device on the old secondary
9754 - disconnect the drbd network on the primary
9755 - create the drbd device on the new secondary
9756 - network attach the drbd on the primary, using an artifice:
9757 the drbd code for Attach() will connect to the network if it
9758 finds a device which is connected to the good local disks but
9760 - wait for sync across all devices
9761 - remove all disks from the old secondary
9763 Failures are not very well handled.
9768 # Step: check device activation
9769 self.lu.LogStep(1, steps_total, "Check device existence")
9770 self._CheckDisksExistence([self.instance.primary_node])
9771 self._CheckVolumeGroup([self.instance.primary_node])
9773 # Step: check other node consistency
9774 self.lu.LogStep(2, steps_total, "Check peer consistency")
9775 self._CheckDisksConsistency(self.instance.primary_node, True, True)
9777 # Step: create new storage
9778 self.lu.LogStep(3, steps_total, "Allocate new storage")
9779 for idx, dev in enumerate(self.instance.disks):
9780 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9781 (self.new_node, idx))
9782 # we pass force_create=True to force LVM creation
9783 for new_lv in dev.children:
9784 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9785 _GetInstanceInfoText(self.instance), False)
9787 # Step 4: dbrd minors and drbd setups changes
9788 # after this, we must manually remove the drbd minors on both the
9789 # error and the success paths
9790 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9791 minors = self.cfg.AllocateDRBDMinor([self.new_node
9792 for dev in self.instance.disks],
9794 logging.debug("Allocated minors %r", minors)
9797 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9798 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9799 (self.new_node, idx))
9800 # create new devices on new_node; note that we create two IDs:
9801 # one without port, so the drbd will be activated without
9802 # networking information on the new node at this stage, and one
9803 # with network, for the latter activation in step 4
9804 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9805 if self.instance.primary_node == o_node1:
9808 assert self.instance.primary_node == o_node2, "Three-node instance?"
9811 new_alone_id = (self.instance.primary_node, self.new_node, None,
9812 p_minor, new_minor, o_secret)
9813 new_net_id = (self.instance.primary_node, self.new_node, o_port,
9814 p_minor, new_minor, o_secret)
9816 iv_names[idx] = (dev, dev.children, new_net_id)
9817 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9819 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9820 logical_id=new_alone_id,
9821 children=dev.children,
9824 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9825 _GetInstanceInfoText(self.instance), False)
9826 except errors.GenericError:
9827 self.cfg.ReleaseDRBDMinors(self.instance.name)
9830 # We have new devices, shutdown the drbd on the old secondary
9831 for idx, dev in enumerate(self.instance.disks):
9832 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9833 self.cfg.SetDiskID(dev, self.target_node)
9834 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9836 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9837 "node: %s" % (idx, msg),
9838 hint=("Please cleanup this device manually as"
9839 " soon as possible"))
9841 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9842 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9843 self.node_secondary_ip,
9844 self.instance.disks)\
9845 [self.instance.primary_node]
9847 msg = result.fail_msg
9849 # detaches didn't succeed (unlikely)
9850 self.cfg.ReleaseDRBDMinors(self.instance.name)
9851 raise errors.OpExecError("Can't detach the disks from the network on"
9852 " old node: %s" % (msg,))
9854 # if we managed to detach at least one, we update all the disks of
9855 # the instance to point to the new secondary
9856 self.lu.LogInfo("Updating instance configuration")
9857 for dev, _, new_logical_id in iv_names.itervalues():
9858 dev.logical_id = new_logical_id
9859 self.cfg.SetDiskID(dev, self.instance.primary_node)
9861 self.cfg.Update(self.instance, feedback_fn)
9863 # and now perform the drbd attach
9864 self.lu.LogInfo("Attaching primary drbds to new secondary"
9865 " (standalone => connected)")
9866 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9868 self.node_secondary_ip,
9869 self.instance.disks,
9872 for to_node, to_result in result.items():
9873 msg = to_result.fail_msg
9875 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9877 hint=("please do a gnt-instance info to see the"
9878 " status of disks"))
9880 if self.early_release:
9881 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9883 self._RemoveOldStorage(self.target_node, iv_names)
9884 # WARNING: we release all node locks here, do not do other RPCs
9885 # than WaitForSync to the primary node
9886 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9887 names=[self.instance.primary_node,
9892 # This can fail as the old devices are degraded and _WaitForSync
9893 # does a combined result over all disks, so we don't check its return value
9894 self.lu.LogStep(cstep, steps_total, "Sync devices")
9896 _WaitForSync(self.lu, self.instance)
9898 # Check all devices manually
9899 self._CheckDevices(self.instance.primary_node, iv_names)
9901 # Step: remove old storage
9902 if not self.early_release:
9903 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9904 self._RemoveOldStorage(self.target_node, iv_names)
9907 class LURepairNodeStorage(NoHooksLU):
9908 """Repairs the volume group on a node.
9913 def CheckArguments(self):
9914 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9916 storage_type = self.op.storage_type
9918 if (constants.SO_FIX_CONSISTENCY not in
9919 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9920 raise errors.OpPrereqError("Storage units of type '%s' can not be"
9921 " repaired" % storage_type,
9924 def ExpandNames(self):
9925 self.needed_locks = {
9926 locking.LEVEL_NODE: [self.op.node_name],
9929 def _CheckFaultyDisks(self, instance, node_name):
9930 """Ensure faulty disks abort the opcode or at least warn."""
9932 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9934 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9935 " node '%s'" % (instance.name, node_name),
9937 except errors.OpPrereqError, err:
9938 if self.op.ignore_consistency:
9939 self.proc.LogWarning(str(err.args[0]))
9943 def CheckPrereq(self):
9944 """Check prerequisites.
9947 # Check whether any instance on this node has faulty disks
9948 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9949 if not inst.admin_up:
9951 check_nodes = set(inst.all_nodes)
9952 check_nodes.discard(self.op.node_name)
9953 for inst_node_name in check_nodes:
9954 self._CheckFaultyDisks(inst, inst_node_name)
9956 def Exec(self, feedback_fn):
9957 feedback_fn("Repairing storage unit '%s' on %s ..." %
9958 (self.op.name, self.op.node_name))
9960 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9961 result = self.rpc.call_storage_execute(self.op.node_name,
9962 self.op.storage_type, st_args,
9964 constants.SO_FIX_CONSISTENCY)
9965 result.Raise("Failed to repair storage unit '%s' on %s" %
9966 (self.op.name, self.op.node_name))
9969 class LUNodeEvacuate(NoHooksLU):
9970 """Evacuates instances off a list of nodes.
9975 def CheckArguments(self):
9976 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9978 def ExpandNames(self):
9979 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9981 if self.op.remote_node is not None:
9982 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9983 assert self.op.remote_node
9985 if self.op.remote_node == self.op.node_name:
9986 raise errors.OpPrereqError("Can not use evacuated node as a new"
9987 " secondary node", errors.ECODE_INVAL)
9989 if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
9990 raise errors.OpPrereqError("Without the use of an iallocator only"
9991 " secondary instances can be evacuated",
9995 self.share_locks = _ShareAll()
9996 self.needed_locks = {
9997 locking.LEVEL_INSTANCE: [],
9998 locking.LEVEL_NODEGROUP: [],
9999 locking.LEVEL_NODE: [],
10002 if self.op.remote_node is None:
10003 # Iallocator will choose any node(s) in the same group
10004 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10006 group_nodes = frozenset([self.op.remote_node])
10008 # Determine nodes to be locked
10009 self.lock_nodes = set([self.op.node_name]) | group_nodes
10011 def _DetermineInstances(self):
10012 """Builds list of instances to operate on.
10015 assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10017 if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10018 # Primary instances only
10019 inst_fn = _GetNodePrimaryInstances
10020 assert self.op.remote_node is None, \
10021 "Evacuating primary instances requires iallocator"
10022 elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10023 # Secondary instances only
10024 inst_fn = _GetNodeSecondaryInstances
10027 assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10028 inst_fn = _GetNodeInstances
10030 return inst_fn(self.cfg, self.op.node_name)
10032 def DeclareLocks(self, level):
10033 if level == locking.LEVEL_INSTANCE:
10034 # Lock instances optimistically, needs verification once node and group
10035 # locks have been acquired
10036 self.needed_locks[locking.LEVEL_INSTANCE] = \
10037 set(i.name for i in self._DetermineInstances())
10039 elif level == locking.LEVEL_NODEGROUP:
10040 # Lock node groups optimistically, needs verification once nodes have
10042 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10043 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10045 elif level == locking.LEVEL_NODE:
10046 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10048 def CheckPrereq(self):
10050 owned_instances = self.glm.list_owned(locking.LEVEL_INSTANCE)
10051 owned_nodes = self.glm.list_owned(locking.LEVEL_NODE)
10052 owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
10054 assert owned_nodes == self.lock_nodes
10056 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10057 if owned_groups != wanted_groups:
10058 raise errors.OpExecError("Node groups changed since locks were acquired,"
10059 " current groups are '%s', used to be '%s'" %
10060 (utils.CommaJoin(wanted_groups),
10061 utils.CommaJoin(owned_groups)))
10063 # Determine affected instances
10064 self.instances = self._DetermineInstances()
10065 self.instance_names = [i.name for i in self.instances]
10067 if set(self.instance_names) != owned_instances:
10068 raise errors.OpExecError("Instances on node '%s' changed since locks"
10069 " were acquired, current instances are '%s',"
10070 " used to be '%s'" %
10071 (self.op.node_name,
10072 utils.CommaJoin(self.instance_names),
10073 utils.CommaJoin(owned_instances)))
10075 if self.instance_names:
10076 self.LogInfo("Evacuating instances from node '%s': %s",
10078 utils.CommaJoin(utils.NiceSort(self.instance_names)))
10080 self.LogInfo("No instances to evacuate from node '%s'",
10083 if self.op.remote_node is not None:
10084 for i in self.instances:
10085 if i.primary_node == self.op.remote_node:
10086 raise errors.OpPrereqError("Node %s is the primary node of"
10087 " instance %s, cannot use it as"
10089 (self.op.remote_node, i.name),
10090 errors.ECODE_INVAL)
10092 def Exec(self, feedback_fn):
10093 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10095 if not self.instance_names:
10096 # No instances to evacuate
10099 elif self.op.iallocator is not None:
10100 # TODO: Implement relocation to other group
10101 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10102 evac_mode=self.op.mode,
10103 instances=list(self.instance_names))
10105 ial.Run(self.op.iallocator)
10107 if not ial.success:
10108 raise errors.OpPrereqError("Can't compute node evacuation using"
10109 " iallocator '%s': %s" %
10110 (self.op.iallocator, ial.info),
10111 errors.ECODE_NORES)
10113 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10115 elif self.op.remote_node is not None:
10116 assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10118 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10119 remote_node=self.op.remote_node,
10121 mode=constants.REPLACE_DISK_CHG,
10122 early_release=self.op.early_release)]
10123 for instance_name in self.instance_names
10127 raise errors.ProgrammerError("No iallocator or remote node")
10129 return ResultWithJobs(jobs)
10132 def _SetOpEarlyRelease(early_release, op):
10133 """Sets C{early_release} flag on opcodes if available.
10137 op.early_release = early_release
10138 except AttributeError:
10139 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10144 def _NodeEvacDest(use_nodes, group, nodes):
10145 """Returns group or nodes depending on caller's choice.
10149 return utils.CommaJoin(nodes)
10154 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10155 """Unpacks the result of change-group and node-evacuate iallocator requests.
10157 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10158 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10160 @type lu: L{LogicalUnit}
10161 @param lu: Logical unit instance
10162 @type alloc_result: tuple/list
10163 @param alloc_result: Result from iallocator
10164 @type early_release: bool
10165 @param early_release: Whether to release locks early if possible
10166 @type use_nodes: bool
10167 @param use_nodes: Whether to display node names instead of groups
10170 (moved, failed, jobs) = alloc_result
10173 lu.LogWarning("Unable to evacuate instances %s",
10174 utils.CommaJoin("%s (%s)" % (name, reason)
10175 for (name, reason) in failed))
10178 lu.LogInfo("Instances to be moved: %s",
10179 utils.CommaJoin("%s (to %s)" %
10180 (name, _NodeEvacDest(use_nodes, group, nodes))
10181 for (name, group, nodes) in moved))
10183 return [map(compat.partial(_SetOpEarlyRelease, early_release),
10184 map(opcodes.OpCode.LoadOpCode, ops))
10188 class LUInstanceGrowDisk(LogicalUnit):
10189 """Grow a disk of an instance.
10192 HPATH = "disk-grow"
10193 HTYPE = constants.HTYPE_INSTANCE
10196 def ExpandNames(self):
10197 self._ExpandAndLockInstance()
10198 self.needed_locks[locking.LEVEL_NODE] = []
10199 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10201 def DeclareLocks(self, level):
10202 if level == locking.LEVEL_NODE:
10203 self._LockInstancesNodes()
10205 def BuildHooksEnv(self):
10206 """Build hooks env.
10208 This runs on the master, the primary and all the secondaries.
10212 "DISK": self.op.disk,
10213 "AMOUNT": self.op.amount,
10215 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10218 def BuildHooksNodes(self):
10219 """Build hooks nodes.
10222 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10225 def CheckPrereq(self):
10226 """Check prerequisites.
10228 This checks that the instance is in the cluster.
10231 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10232 assert instance is not None, \
10233 "Cannot retrieve locked instance %s" % self.op.instance_name
10234 nodenames = list(instance.all_nodes)
10235 for node in nodenames:
10236 _CheckNodeOnline(self, node)
10238 self.instance = instance
10240 if instance.disk_template not in constants.DTS_GROWABLE:
10241 raise errors.OpPrereqError("Instance's disk layout does not support"
10242 " growing", errors.ECODE_INVAL)
10244 self.disk = instance.FindDisk(self.op.disk)
10246 if instance.disk_template not in (constants.DT_FILE,
10247 constants.DT_SHARED_FILE):
10248 # TODO: check the free disk space for file, when that feature will be
10250 _CheckNodesFreeDiskPerVG(self, nodenames,
10251 self.disk.ComputeGrowth(self.op.amount))
10253 def Exec(self, feedback_fn):
10254 """Execute disk grow.
10257 instance = self.instance
10260 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10262 raise errors.OpExecError("Cannot activate block device to grow")
10264 # First run all grow ops in dry-run mode
10265 for node in instance.all_nodes:
10266 self.cfg.SetDiskID(disk, node)
10267 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10268 result.Raise("Grow request failed to node %s" % node)
10270 # We know that (as far as we can test) operations across different
10271 # nodes will succeed, time to run it for real
10272 for node in instance.all_nodes:
10273 self.cfg.SetDiskID(disk, node)
10274 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10275 result.Raise("Grow request failed to node %s" % node)
10277 # TODO: Rewrite code to work properly
10278 # DRBD goes into sync mode for a short amount of time after executing the
10279 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10280 # calling "resize" in sync mode fails. Sleeping for a short amount of
10281 # time is a work-around.
10284 disk.RecordGrow(self.op.amount)
10285 self.cfg.Update(instance, feedback_fn)
10286 if self.op.wait_for_sync:
10287 disk_abort = not _WaitForSync(self, instance, disks=[disk])
10289 self.proc.LogWarning("Disk sync-ing has not returned a good"
10290 " status; please check the instance")
10291 if not instance.admin_up:
10292 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10293 elif not instance.admin_up:
10294 self.proc.LogWarning("Not shutting down the disk even if the instance is"
10295 " not supposed to be running because no wait for"
10296 " sync mode was requested")
10299 class LUInstanceQueryData(NoHooksLU):
10300 """Query runtime instance data.
10305 def ExpandNames(self):
10306 self.needed_locks = {}
10308 # Use locking if requested or when non-static information is wanted
10309 if not (self.op.static or self.op.use_locking):
10310 self.LogWarning("Non-static data requested, locks need to be acquired")
10311 self.op.use_locking = True
10313 if self.op.instances or not self.op.use_locking:
10314 # Expand instance names right here
10315 self.wanted_names = _GetWantedInstances(self, self.op.instances)
10317 # Will use acquired locks
10318 self.wanted_names = None
10320 if self.op.use_locking:
10321 self.share_locks = _ShareAll()
10323 if self.wanted_names is None:
10324 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10326 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10328 self.needed_locks[locking.LEVEL_NODE] = []
10329 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10331 def DeclareLocks(self, level):
10332 if self.op.use_locking and level == locking.LEVEL_NODE:
10333 self._LockInstancesNodes()
10335 def CheckPrereq(self):
10336 """Check prerequisites.
10338 This only checks the optional instance list against the existing names.
10341 if self.wanted_names is None:
10342 assert self.op.use_locking, "Locking was not used"
10343 self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
10345 self.wanted_instances = \
10346 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10348 def _ComputeBlockdevStatus(self, node, instance_name, dev):
10349 """Returns the status of a block device
10352 if self.op.static or not node:
10355 self.cfg.SetDiskID(dev, node)
10357 result = self.rpc.call_blockdev_find(node, dev)
10361 result.Raise("Can't compute disk status for %s" % instance_name)
10363 status = result.payload
10367 return (status.dev_path, status.major, status.minor,
10368 status.sync_percent, status.estimated_time,
10369 status.is_degraded, status.ldisk_status)
10371 def _ComputeDiskStatus(self, instance, snode, dev):
10372 """Compute block device status.
10375 if dev.dev_type in constants.LDS_DRBD:
10376 # we change the snode then (otherwise we use the one passed in)
10377 if dev.logical_id[0] == instance.primary_node:
10378 snode = dev.logical_id[1]
10380 snode = dev.logical_id[0]
10382 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10383 instance.name, dev)
10384 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10387 dev_children = map(compat.partial(self._ComputeDiskStatus,
10394 "iv_name": dev.iv_name,
10395 "dev_type": dev.dev_type,
10396 "logical_id": dev.logical_id,
10397 "physical_id": dev.physical_id,
10398 "pstatus": dev_pstatus,
10399 "sstatus": dev_sstatus,
10400 "children": dev_children,
10405 def Exec(self, feedback_fn):
10406 """Gather and return data"""
10409 cluster = self.cfg.GetClusterInfo()
10411 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10412 for i in self.wanted_instances)
10413 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10414 if self.op.static or pnode.offline:
10415 remote_state = None
10417 self.LogWarning("Primary node %s is marked offline, returning static"
10418 " information only for instance %s" %
10419 (pnode.name, instance.name))
10421 remote_info = self.rpc.call_instance_info(instance.primary_node,
10423 instance.hypervisor)
10424 remote_info.Raise("Error checking node %s" % instance.primary_node)
10425 remote_info = remote_info.payload
10426 if remote_info and "state" in remote_info:
10427 remote_state = "up"
10429 remote_state = "down"
10431 if instance.admin_up:
10432 config_state = "up"
10434 config_state = "down"
10436 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10439 result[instance.name] = {
10440 "name": instance.name,
10441 "config_state": config_state,
10442 "run_state": remote_state,
10443 "pnode": instance.primary_node,
10444 "snodes": instance.secondary_nodes,
10446 # this happens to be the same format used for hooks
10447 "nics": _NICListToTuple(self, instance.nics),
10448 "disk_template": instance.disk_template,
10450 "hypervisor": instance.hypervisor,
10451 "network_port": instance.network_port,
10452 "hv_instance": instance.hvparams,
10453 "hv_actual": cluster.FillHV(instance, skip_globals=True),
10454 "be_instance": instance.beparams,
10455 "be_actual": cluster.FillBE(instance),
10456 "os_instance": instance.osparams,
10457 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10458 "serial_no": instance.serial_no,
10459 "mtime": instance.mtime,
10460 "ctime": instance.ctime,
10461 "uuid": instance.uuid,
10467 class LUInstanceSetParams(LogicalUnit):
10468 """Modifies an instances's parameters.
10471 HPATH = "instance-modify"
10472 HTYPE = constants.HTYPE_INSTANCE
10475 def CheckArguments(self):
10476 if not (self.op.nics or self.op.disks or self.op.disk_template or
10477 self.op.hvparams or self.op.beparams or self.op.os_name):
10478 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10480 if self.op.hvparams:
10481 _CheckGlobalHvParams(self.op.hvparams)
10485 for disk_op, disk_dict in self.op.disks:
10486 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10487 if disk_op == constants.DDM_REMOVE:
10488 disk_addremove += 1
10490 elif disk_op == constants.DDM_ADD:
10491 disk_addremove += 1
10493 if not isinstance(disk_op, int):
10494 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10495 if not isinstance(disk_dict, dict):
10496 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10497 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10499 if disk_op == constants.DDM_ADD:
10500 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10501 if mode not in constants.DISK_ACCESS_SET:
10502 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10503 errors.ECODE_INVAL)
10504 size = disk_dict.get(constants.IDISK_SIZE, None)
10506 raise errors.OpPrereqError("Required disk parameter size missing",
10507 errors.ECODE_INVAL)
10510 except (TypeError, ValueError), err:
10511 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10512 str(err), errors.ECODE_INVAL)
10513 disk_dict[constants.IDISK_SIZE] = size
10515 # modification of disk
10516 if constants.IDISK_SIZE in disk_dict:
10517 raise errors.OpPrereqError("Disk size change not possible, use"
10518 " grow-disk", errors.ECODE_INVAL)
10520 if disk_addremove > 1:
10521 raise errors.OpPrereqError("Only one disk add or remove operation"
10522 " supported at a time", errors.ECODE_INVAL)
10524 if self.op.disks and self.op.disk_template is not None:
10525 raise errors.OpPrereqError("Disk template conversion and other disk"
10526 " changes not supported at the same time",
10527 errors.ECODE_INVAL)
10529 if (self.op.disk_template and
10530 self.op.disk_template in constants.DTS_INT_MIRROR and
10531 self.op.remote_node is None):
10532 raise errors.OpPrereqError("Changing the disk template to a mirrored"
10533 " one requires specifying a secondary node",
10534 errors.ECODE_INVAL)
10538 for nic_op, nic_dict in self.op.nics:
10539 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10540 if nic_op == constants.DDM_REMOVE:
10543 elif nic_op == constants.DDM_ADD:
10546 if not isinstance(nic_op, int):
10547 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10548 if not isinstance(nic_dict, dict):
10549 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10550 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10552 # nic_dict should be a dict
10553 nic_ip = nic_dict.get(constants.INIC_IP, None)
10554 if nic_ip is not None:
10555 if nic_ip.lower() == constants.VALUE_NONE:
10556 nic_dict[constants.INIC_IP] = None
10558 if not netutils.IPAddress.IsValid(nic_ip):
10559 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10560 errors.ECODE_INVAL)
10562 nic_bridge = nic_dict.get("bridge", None)
10563 nic_link = nic_dict.get(constants.INIC_LINK, None)
10564 if nic_bridge and nic_link:
10565 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10566 " at the same time", errors.ECODE_INVAL)
10567 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10568 nic_dict["bridge"] = None
10569 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10570 nic_dict[constants.INIC_LINK] = None
10572 if nic_op == constants.DDM_ADD:
10573 nic_mac = nic_dict.get(constants.INIC_MAC, None)
10574 if nic_mac is None:
10575 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10577 if constants.INIC_MAC in nic_dict:
10578 nic_mac = nic_dict[constants.INIC_MAC]
10579 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10580 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10582 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10583 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10584 " modifying an existing nic",
10585 errors.ECODE_INVAL)
10587 if nic_addremove > 1:
10588 raise errors.OpPrereqError("Only one NIC add or remove operation"
10589 " supported at a time", errors.ECODE_INVAL)
10591 def ExpandNames(self):
10592 self._ExpandAndLockInstance()
10593 self.needed_locks[locking.LEVEL_NODE] = []
10594 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10596 def DeclareLocks(self, level):
10597 if level == locking.LEVEL_NODE:
10598 self._LockInstancesNodes()
10599 if self.op.disk_template and self.op.remote_node:
10600 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10601 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10603 def BuildHooksEnv(self):
10604 """Build hooks env.
10606 This runs on the master, primary and secondaries.
10610 if constants.BE_MEMORY in self.be_new:
10611 args["memory"] = self.be_new[constants.BE_MEMORY]
10612 if constants.BE_VCPUS in self.be_new:
10613 args["vcpus"] = self.be_new[constants.BE_VCPUS]
10614 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10615 # information at all.
10618 nic_override = dict(self.op.nics)
10619 for idx, nic in enumerate(self.instance.nics):
10620 if idx in nic_override:
10621 this_nic_override = nic_override[idx]
10623 this_nic_override = {}
10624 if constants.INIC_IP in this_nic_override:
10625 ip = this_nic_override[constants.INIC_IP]
10628 if constants.INIC_MAC in this_nic_override:
10629 mac = this_nic_override[constants.INIC_MAC]
10632 if idx in self.nic_pnew:
10633 nicparams = self.nic_pnew[idx]
10635 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10636 mode = nicparams[constants.NIC_MODE]
10637 link = nicparams[constants.NIC_LINK]
10638 args["nics"].append((ip, mac, mode, link))
10639 if constants.DDM_ADD in nic_override:
10640 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10641 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10642 nicparams = self.nic_pnew[constants.DDM_ADD]
10643 mode = nicparams[constants.NIC_MODE]
10644 link = nicparams[constants.NIC_LINK]
10645 args["nics"].append((ip, mac, mode, link))
10646 elif constants.DDM_REMOVE in nic_override:
10647 del args["nics"][-1]
10649 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10650 if self.op.disk_template:
10651 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10655 def BuildHooksNodes(self):
10656 """Build hooks nodes.
10659 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10662 def CheckPrereq(self):
10663 """Check prerequisites.
10665 This only checks the instance list against the existing names.
10668 # checking the new params on the primary/secondary nodes
10670 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10671 cluster = self.cluster = self.cfg.GetClusterInfo()
10672 assert self.instance is not None, \
10673 "Cannot retrieve locked instance %s" % self.op.instance_name
10674 pnode = instance.primary_node
10675 nodelist = list(instance.all_nodes)
10678 if self.op.os_name and not self.op.force:
10679 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10680 self.op.force_variant)
10681 instance_os = self.op.os_name
10683 instance_os = instance.os
10685 if self.op.disk_template:
10686 if instance.disk_template == self.op.disk_template:
10687 raise errors.OpPrereqError("Instance already has disk template %s" %
10688 instance.disk_template, errors.ECODE_INVAL)
10690 if (instance.disk_template,
10691 self.op.disk_template) not in self._DISK_CONVERSIONS:
10692 raise errors.OpPrereqError("Unsupported disk template conversion from"
10693 " %s to %s" % (instance.disk_template,
10694 self.op.disk_template),
10695 errors.ECODE_INVAL)
10696 _CheckInstanceDown(self, instance, "cannot change disk template")
10697 if self.op.disk_template in constants.DTS_INT_MIRROR:
10698 if self.op.remote_node == pnode:
10699 raise errors.OpPrereqError("Given new secondary node %s is the same"
10700 " as the primary node of the instance" %
10701 self.op.remote_node, errors.ECODE_STATE)
10702 _CheckNodeOnline(self, self.op.remote_node)
10703 _CheckNodeNotDrained(self, self.op.remote_node)
10704 # FIXME: here we assume that the old instance type is DT_PLAIN
10705 assert instance.disk_template == constants.DT_PLAIN
10706 disks = [{constants.IDISK_SIZE: d.size,
10707 constants.IDISK_VG: d.logical_id[0]}
10708 for d in instance.disks]
10709 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10710 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10712 # hvparams processing
10713 if self.op.hvparams:
10714 hv_type = instance.hypervisor
10715 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10716 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10717 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10720 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10721 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10722 self.hv_new = hv_new # the new actual values
10723 self.hv_inst = i_hvdict # the new dict (without defaults)
10725 self.hv_new = self.hv_inst = {}
10727 # beparams processing
10728 if self.op.beparams:
10729 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10731 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10732 be_new = cluster.SimpleFillBE(i_bedict)
10733 self.be_new = be_new # the new actual values
10734 self.be_inst = i_bedict # the new dict (without defaults)
10736 self.be_new = self.be_inst = {}
10737 be_old = cluster.FillBE(instance)
10739 # osparams processing
10740 if self.op.osparams:
10741 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10742 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10743 self.os_inst = i_osdict # the new dict (without defaults)
10749 if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10750 be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10751 mem_check_list = [pnode]
10752 if be_new[constants.BE_AUTO_BALANCE]:
10753 # either we changed auto_balance to yes or it was from before
10754 mem_check_list.extend(instance.secondary_nodes)
10755 instance_info = self.rpc.call_instance_info(pnode, instance.name,
10756 instance.hypervisor)
10757 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10758 instance.hypervisor)
10759 pninfo = nodeinfo[pnode]
10760 msg = pninfo.fail_msg
10762 # Assume the primary node is unreachable and go ahead
10763 self.warn.append("Can't get info from primary node %s: %s" %
10765 elif not isinstance(pninfo.payload.get("memory_free", None), int):
10766 self.warn.append("Node data from primary node %s doesn't contain"
10767 " free memory information" % pnode)
10768 elif instance_info.fail_msg:
10769 self.warn.append("Can't get instance runtime information: %s" %
10770 instance_info.fail_msg)
10772 if instance_info.payload:
10773 current_mem = int(instance_info.payload["memory"])
10775 # Assume instance not running
10776 # (there is a slight race condition here, but it's not very probable,
10777 # and we have no other way to check)
10779 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10780 pninfo.payload["memory_free"])
10782 raise errors.OpPrereqError("This change will prevent the instance"
10783 " from starting, due to %d MB of memory"
10784 " missing on its primary node" % miss_mem,
10785 errors.ECODE_NORES)
10787 if be_new[constants.BE_AUTO_BALANCE]:
10788 for node, nres in nodeinfo.items():
10789 if node not in instance.secondary_nodes:
10791 nres.Raise("Can't get info from secondary node %s" % node,
10792 prereq=True, ecode=errors.ECODE_STATE)
10793 if not isinstance(nres.payload.get("memory_free", None), int):
10794 raise errors.OpPrereqError("Secondary node %s didn't return free"
10795 " memory information" % node,
10796 errors.ECODE_STATE)
10797 elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10798 raise errors.OpPrereqError("This change will prevent the instance"
10799 " from failover to its secondary node"
10800 " %s, due to not enough memory" % node,
10801 errors.ECODE_STATE)
10805 self.nic_pinst = {}
10806 for nic_op, nic_dict in self.op.nics:
10807 if nic_op == constants.DDM_REMOVE:
10808 if not instance.nics:
10809 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10810 errors.ECODE_INVAL)
10812 if nic_op != constants.DDM_ADD:
10814 if not instance.nics:
10815 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10816 " no NICs" % nic_op,
10817 errors.ECODE_INVAL)
10818 if nic_op < 0 or nic_op >= len(instance.nics):
10819 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10821 (nic_op, len(instance.nics) - 1),
10822 errors.ECODE_INVAL)
10823 old_nic_params = instance.nics[nic_op].nicparams
10824 old_nic_ip = instance.nics[nic_op].ip
10826 old_nic_params = {}
10829 update_params_dict = dict([(key, nic_dict[key])
10830 for key in constants.NICS_PARAMETERS
10831 if key in nic_dict])
10833 if "bridge" in nic_dict:
10834 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10836 new_nic_params = _GetUpdatedParams(old_nic_params,
10837 update_params_dict)
10838 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10839 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10840 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10841 self.nic_pinst[nic_op] = new_nic_params
10842 self.nic_pnew[nic_op] = new_filled_nic_params
10843 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10845 if new_nic_mode == constants.NIC_MODE_BRIDGED:
10846 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10847 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10849 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10851 self.warn.append(msg)
10853 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10854 if new_nic_mode == constants.NIC_MODE_ROUTED:
10855 if constants.INIC_IP in nic_dict:
10856 nic_ip = nic_dict[constants.INIC_IP]
10858 nic_ip = old_nic_ip
10860 raise errors.OpPrereqError("Cannot set the nic ip to None"
10861 " on a routed nic", errors.ECODE_INVAL)
10862 if constants.INIC_MAC in nic_dict:
10863 nic_mac = nic_dict[constants.INIC_MAC]
10864 if nic_mac is None:
10865 raise errors.OpPrereqError("Cannot set the nic mac to None",
10866 errors.ECODE_INVAL)
10867 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10868 # otherwise generate the mac
10869 nic_dict[constants.INIC_MAC] = \
10870 self.cfg.GenerateMAC(self.proc.GetECId())
10872 # or validate/reserve the current one
10874 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10875 except errors.ReservationError:
10876 raise errors.OpPrereqError("MAC address %s already in use"
10877 " in cluster" % nic_mac,
10878 errors.ECODE_NOTUNIQUE)
10881 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10882 raise errors.OpPrereqError("Disk operations not supported for"
10883 " diskless instances",
10884 errors.ECODE_INVAL)
10885 for disk_op, _ in self.op.disks:
10886 if disk_op == constants.DDM_REMOVE:
10887 if len(instance.disks) == 1:
10888 raise errors.OpPrereqError("Cannot remove the last disk of"
10889 " an instance", errors.ECODE_INVAL)
10890 _CheckInstanceDown(self, instance, "cannot remove disks")
10892 if (disk_op == constants.DDM_ADD and
10893 len(instance.disks) >= constants.MAX_DISKS):
10894 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10895 " add more" % constants.MAX_DISKS,
10896 errors.ECODE_STATE)
10897 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10899 if disk_op < 0 or disk_op >= len(instance.disks):
10900 raise errors.OpPrereqError("Invalid disk index %s, valid values"
10902 (disk_op, len(instance.disks)),
10903 errors.ECODE_INVAL)
10907 def _ConvertPlainToDrbd(self, feedback_fn):
10908 """Converts an instance from plain to drbd.
10911 feedback_fn("Converting template to drbd")
10912 instance = self.instance
10913 pnode = instance.primary_node
10914 snode = self.op.remote_node
10916 # create a fake disk info for _GenerateDiskTemplate
10917 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10918 constants.IDISK_VG: d.logical_id[0]}
10919 for d in instance.disks]
10920 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10921 instance.name, pnode, [snode],
10922 disk_info, None, None, 0, feedback_fn)
10923 info = _GetInstanceInfoText(instance)
10924 feedback_fn("Creating aditional volumes...")
10925 # first, create the missing data and meta devices
10926 for disk in new_disks:
10927 # unfortunately this is... not too nice
10928 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10930 for child in disk.children:
10931 _CreateSingleBlockDev(self, snode, instance, child, info, True)
10932 # at this stage, all new LVs have been created, we can rename the
10934 feedback_fn("Renaming original volumes...")
10935 rename_list = [(o, n.children[0].logical_id)
10936 for (o, n) in zip(instance.disks, new_disks)]
10937 result = self.rpc.call_blockdev_rename(pnode, rename_list)
10938 result.Raise("Failed to rename original LVs")
10940 feedback_fn("Initializing DRBD devices...")
10941 # all child devices are in place, we can now create the DRBD devices
10942 for disk in new_disks:
10943 for node in [pnode, snode]:
10944 f_create = node == pnode
10945 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10947 # at this point, the instance has been modified
10948 instance.disk_template = constants.DT_DRBD8
10949 instance.disks = new_disks
10950 self.cfg.Update(instance, feedback_fn)
10952 # disks are created, waiting for sync
10953 disk_abort = not _WaitForSync(self, instance,
10954 oneshot=not self.op.wait_for_sync)
10956 raise errors.OpExecError("There are some degraded disks for"
10957 " this instance, please cleanup manually")
10959 def _ConvertDrbdToPlain(self, feedback_fn):
10960 """Converts an instance from drbd to plain.
10963 instance = self.instance
10964 assert len(instance.secondary_nodes) == 1
10965 pnode = instance.primary_node
10966 snode = instance.secondary_nodes[0]
10967 feedback_fn("Converting template to plain")
10969 old_disks = instance.disks
10970 new_disks = [d.children[0] for d in old_disks]
10972 # copy over size and mode
10973 for parent, child in zip(old_disks, new_disks):
10974 child.size = parent.size
10975 child.mode = parent.mode
10977 # update instance structure
10978 instance.disks = new_disks
10979 instance.disk_template = constants.DT_PLAIN
10980 self.cfg.Update(instance, feedback_fn)
10982 feedback_fn("Removing volumes on the secondary node...")
10983 for disk in old_disks:
10984 self.cfg.SetDiskID(disk, snode)
10985 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10987 self.LogWarning("Could not remove block device %s on node %s,"
10988 " continuing anyway: %s", disk.iv_name, snode, msg)
10990 feedback_fn("Removing unneeded volumes on the primary node...")
10991 for idx, disk in enumerate(old_disks):
10992 meta = disk.children[1]
10993 self.cfg.SetDiskID(meta, pnode)
10994 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10996 self.LogWarning("Could not remove metadata for disk %d on node %s,"
10997 " continuing anyway: %s", idx, pnode, msg)
10999 def Exec(self, feedback_fn):
11000 """Modifies an instance.
11002 All parameters take effect only at the next restart of the instance.
11005 # Process here the warnings from CheckPrereq, as we don't have a
11006 # feedback_fn there.
11007 for warn in self.warn:
11008 feedback_fn("WARNING: %s" % warn)
11011 instance = self.instance
11013 for disk_op, disk_dict in self.op.disks:
11014 if disk_op == constants.DDM_REMOVE:
11015 # remove the last disk
11016 device = instance.disks.pop()
11017 device_idx = len(instance.disks)
11018 for node, disk in device.ComputeNodeTree(instance.primary_node):
11019 self.cfg.SetDiskID(disk, node)
11020 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11022 self.LogWarning("Could not remove disk/%d on node %s: %s,"
11023 " continuing anyway", device_idx, node, msg)
11024 result.append(("disk/%d" % device_idx, "remove"))
11025 elif disk_op == constants.DDM_ADD:
11027 if instance.disk_template in (constants.DT_FILE,
11028 constants.DT_SHARED_FILE):
11029 file_driver, file_path = instance.disks[0].logical_id
11030 file_path = os.path.dirname(file_path)
11032 file_driver = file_path = None
11033 disk_idx_base = len(instance.disks)
11034 new_disk = _GenerateDiskTemplate(self,
11035 instance.disk_template,
11036 instance.name, instance.primary_node,
11037 instance.secondary_nodes,
11041 disk_idx_base, feedback_fn)[0]
11042 instance.disks.append(new_disk)
11043 info = _GetInstanceInfoText(instance)
11045 logging.info("Creating volume %s for instance %s",
11046 new_disk.iv_name, instance.name)
11047 # Note: this needs to be kept in sync with _CreateDisks
11049 for node in instance.all_nodes:
11050 f_create = node == instance.primary_node
11052 _CreateBlockDev(self, node, instance, new_disk,
11053 f_create, info, f_create)
11054 except errors.OpExecError, err:
11055 self.LogWarning("Failed to create volume %s (%s) on"
11057 new_disk.iv_name, new_disk, node, err)
11058 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11059 (new_disk.size, new_disk.mode)))
11061 # change a given disk
11062 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11063 result.append(("disk.mode/%d" % disk_op,
11064 disk_dict[constants.IDISK_MODE]))
11066 if self.op.disk_template:
11067 r_shut = _ShutdownInstanceDisks(self, instance)
11069 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11070 " proceed with disk template conversion")
11071 mode = (instance.disk_template, self.op.disk_template)
11073 self._DISK_CONVERSIONS[mode](self, feedback_fn)
11075 self.cfg.ReleaseDRBDMinors(instance.name)
11077 result.append(("disk_template", self.op.disk_template))
11080 for nic_op, nic_dict in self.op.nics:
11081 if nic_op == constants.DDM_REMOVE:
11082 # remove the last nic
11083 del instance.nics[-1]
11084 result.append(("nic.%d" % len(instance.nics), "remove"))
11085 elif nic_op == constants.DDM_ADD:
11086 # mac and bridge should be set, by now
11087 mac = nic_dict[constants.INIC_MAC]
11088 ip = nic_dict.get(constants.INIC_IP, None)
11089 nicparams = self.nic_pinst[constants.DDM_ADD]
11090 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11091 instance.nics.append(new_nic)
11092 result.append(("nic.%d" % (len(instance.nics) - 1),
11093 "add:mac=%s,ip=%s,mode=%s,link=%s" %
11094 (new_nic.mac, new_nic.ip,
11095 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11096 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11099 for key in (constants.INIC_MAC, constants.INIC_IP):
11100 if key in nic_dict:
11101 setattr(instance.nics[nic_op], key, nic_dict[key])
11102 if nic_op in self.nic_pinst:
11103 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11104 for key, val in nic_dict.iteritems():
11105 result.append(("nic.%s/%d" % (key, nic_op), val))
11108 if self.op.hvparams:
11109 instance.hvparams = self.hv_inst
11110 for key, val in self.op.hvparams.iteritems():
11111 result.append(("hv/%s" % key, val))
11114 if self.op.beparams:
11115 instance.beparams = self.be_inst
11116 for key, val in self.op.beparams.iteritems():
11117 result.append(("be/%s" % key, val))
11120 if self.op.os_name:
11121 instance.os = self.op.os_name
11124 if self.op.osparams:
11125 instance.osparams = self.os_inst
11126 for key, val in self.op.osparams.iteritems():
11127 result.append(("os/%s" % key, val))
11129 self.cfg.Update(instance, feedback_fn)
11133 _DISK_CONVERSIONS = {
11134 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11135 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11139 class LUBackupQuery(NoHooksLU):
11140 """Query the exports list
11145 def ExpandNames(self):
11146 self.needed_locks = {}
11147 self.share_locks[locking.LEVEL_NODE] = 1
11148 if not self.op.nodes:
11149 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11151 self.needed_locks[locking.LEVEL_NODE] = \
11152 _GetWantedNodes(self, self.op.nodes)
11154 def Exec(self, feedback_fn):
11155 """Compute the list of all the exported system images.
11158 @return: a dictionary with the structure node->(export-list)
11159 where export-list is a list of the instances exported on
11163 self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
11164 rpcresult = self.rpc.call_export_list(self.nodes)
11166 for node in rpcresult:
11167 if rpcresult[node].fail_msg:
11168 result[node] = False
11170 result[node] = rpcresult[node].payload
11175 class LUBackupPrepare(NoHooksLU):
11176 """Prepares an instance for an export and returns useful information.
11181 def ExpandNames(self):
11182 self._ExpandAndLockInstance()
11184 def CheckPrereq(self):
11185 """Check prerequisites.
11188 instance_name = self.op.instance_name
11190 self.instance = self.cfg.GetInstanceInfo(instance_name)
11191 assert self.instance is not None, \
11192 "Cannot retrieve locked instance %s" % self.op.instance_name
11193 _CheckNodeOnline(self, self.instance.primary_node)
11195 self._cds = _GetClusterDomainSecret()
11197 def Exec(self, feedback_fn):
11198 """Prepares an instance for an export.
11201 instance = self.instance
11203 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11204 salt = utils.GenerateSecret(8)
11206 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11207 result = self.rpc.call_x509_cert_create(instance.primary_node,
11208 constants.RIE_CERT_VALIDITY)
11209 result.Raise("Can't create X509 key and certificate on %s" % result.node)
11211 (name, cert_pem) = result.payload
11213 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11217 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11218 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11220 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11226 class LUBackupExport(LogicalUnit):
11227 """Export an instance to an image in the cluster.
11230 HPATH = "instance-export"
11231 HTYPE = constants.HTYPE_INSTANCE
11234 def CheckArguments(self):
11235 """Check the arguments.
11238 self.x509_key_name = self.op.x509_key_name
11239 self.dest_x509_ca_pem = self.op.destination_x509_ca
11241 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11242 if not self.x509_key_name:
11243 raise errors.OpPrereqError("Missing X509 key name for encryption",
11244 errors.ECODE_INVAL)
11246 if not self.dest_x509_ca_pem:
11247 raise errors.OpPrereqError("Missing destination X509 CA",
11248 errors.ECODE_INVAL)
11250 def ExpandNames(self):
11251 self._ExpandAndLockInstance()
11253 # Lock all nodes for local exports
11254 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11255 # FIXME: lock only instance primary and destination node
11257 # Sad but true, for now we have do lock all nodes, as we don't know where
11258 # the previous export might be, and in this LU we search for it and
11259 # remove it from its current node. In the future we could fix this by:
11260 # - making a tasklet to search (share-lock all), then create the
11261 # new one, then one to remove, after
11262 # - removing the removal operation altogether
11263 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11265 def DeclareLocks(self, level):
11266 """Last minute lock declaration."""
11267 # All nodes are locked anyway, so nothing to do here.
11269 def BuildHooksEnv(self):
11270 """Build hooks env.
11272 This will run on the master, primary node and target node.
11276 "EXPORT_MODE": self.op.mode,
11277 "EXPORT_NODE": self.op.target_node,
11278 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11279 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11280 # TODO: Generic function for boolean env variables
11281 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11284 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11288 def BuildHooksNodes(self):
11289 """Build hooks nodes.
11292 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11294 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11295 nl.append(self.op.target_node)
11299 def CheckPrereq(self):
11300 """Check prerequisites.
11302 This checks that the instance and node names are valid.
11305 instance_name = self.op.instance_name
11307 self.instance = self.cfg.GetInstanceInfo(instance_name)
11308 assert self.instance is not None, \
11309 "Cannot retrieve locked instance %s" % self.op.instance_name
11310 _CheckNodeOnline(self, self.instance.primary_node)
11312 if (self.op.remove_instance and self.instance.admin_up and
11313 not self.op.shutdown):
11314 raise errors.OpPrereqError("Can not remove instance without shutting it"
11317 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11318 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11319 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11320 assert self.dst_node is not None
11322 _CheckNodeOnline(self, self.dst_node.name)
11323 _CheckNodeNotDrained(self, self.dst_node.name)
11326 self.dest_disk_info = None
11327 self.dest_x509_ca = None
11329 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11330 self.dst_node = None
11332 if len(self.op.target_node) != len(self.instance.disks):
11333 raise errors.OpPrereqError(("Received destination information for %s"
11334 " disks, but instance %s has %s disks") %
11335 (len(self.op.target_node), instance_name,
11336 len(self.instance.disks)),
11337 errors.ECODE_INVAL)
11339 cds = _GetClusterDomainSecret()
11341 # Check X509 key name
11343 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11344 except (TypeError, ValueError), err:
11345 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11347 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11348 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11349 errors.ECODE_INVAL)
11351 # Load and verify CA
11353 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11354 except OpenSSL.crypto.Error, err:
11355 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11356 (err, ), errors.ECODE_INVAL)
11358 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11359 if errcode is not None:
11360 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11361 (msg, ), errors.ECODE_INVAL)
11363 self.dest_x509_ca = cert
11365 # Verify target information
11367 for idx, disk_data in enumerate(self.op.target_node):
11369 (host, port, magic) = \
11370 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11371 except errors.GenericError, err:
11372 raise errors.OpPrereqError("Target info for disk %s: %s" %
11373 (idx, err), errors.ECODE_INVAL)
11375 disk_info.append((host, port, magic))
11377 assert len(disk_info) == len(self.op.target_node)
11378 self.dest_disk_info = disk_info
11381 raise errors.ProgrammerError("Unhandled export mode %r" %
11384 # instance disk type verification
11385 # TODO: Implement export support for file-based disks
11386 for disk in self.instance.disks:
11387 if disk.dev_type == constants.LD_FILE:
11388 raise errors.OpPrereqError("Export not supported for instances with"
11389 " file-based disks", errors.ECODE_INVAL)
11391 def _CleanupExports(self, feedback_fn):
11392 """Removes exports of current instance from all other nodes.
11394 If an instance in a cluster with nodes A..D was exported to node C, its
11395 exports will be removed from the nodes A, B and D.
11398 assert self.op.mode != constants.EXPORT_MODE_REMOTE
11400 nodelist = self.cfg.GetNodeList()
11401 nodelist.remove(self.dst_node.name)
11403 # on one-node clusters nodelist will be empty after the removal
11404 # if we proceed the backup would be removed because OpBackupQuery
11405 # substitutes an empty list with the full cluster node list.
11406 iname = self.instance.name
11408 feedback_fn("Removing old exports for instance %s" % iname)
11409 exportlist = self.rpc.call_export_list(nodelist)
11410 for node in exportlist:
11411 if exportlist[node].fail_msg:
11413 if iname in exportlist[node].payload:
11414 msg = self.rpc.call_export_remove(node, iname).fail_msg
11416 self.LogWarning("Could not remove older export for instance %s"
11417 " on node %s: %s", iname, node, msg)
11419 def Exec(self, feedback_fn):
11420 """Export an instance to an image in the cluster.
11423 assert self.op.mode in constants.EXPORT_MODES
11425 instance = self.instance
11426 src_node = instance.primary_node
11428 if self.op.shutdown:
11429 # shutdown the instance, but not the disks
11430 feedback_fn("Shutting down instance %s" % instance.name)
11431 result = self.rpc.call_instance_shutdown(src_node, instance,
11432 self.op.shutdown_timeout)
11433 # TODO: Maybe ignore failures if ignore_remove_failures is set
11434 result.Raise("Could not shutdown instance %s on"
11435 " node %s" % (instance.name, src_node))
11437 # set the disks ID correctly since call_instance_start needs the
11438 # correct drbd minor to create the symlinks
11439 for disk in instance.disks:
11440 self.cfg.SetDiskID(disk, src_node)
11442 activate_disks = (not instance.admin_up)
11445 # Activate the instance disks if we'exporting a stopped instance
11446 feedback_fn("Activating disks for %s" % instance.name)
11447 _StartInstanceDisks(self, instance, None)
11450 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11453 helper.CreateSnapshots()
11455 if (self.op.shutdown and instance.admin_up and
11456 not self.op.remove_instance):
11457 assert not activate_disks
11458 feedback_fn("Starting instance %s" % instance.name)
11459 result = self.rpc.call_instance_start(src_node, instance,
11461 msg = result.fail_msg
11463 feedback_fn("Failed to start instance: %s" % msg)
11464 _ShutdownInstanceDisks(self, instance)
11465 raise errors.OpExecError("Could not start instance: %s" % msg)
11467 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11468 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11469 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11470 connect_timeout = constants.RIE_CONNECT_TIMEOUT
11471 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11473 (key_name, _, _) = self.x509_key_name
11476 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11479 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11480 key_name, dest_ca_pem,
11485 # Check for backwards compatibility
11486 assert len(dresults) == len(instance.disks)
11487 assert compat.all(isinstance(i, bool) for i in dresults), \
11488 "Not all results are boolean: %r" % dresults
11492 feedback_fn("Deactivating disks for %s" % instance.name)
11493 _ShutdownInstanceDisks(self, instance)
11495 if not (compat.all(dresults) and fin_resu):
11498 failures.append("export finalization")
11499 if not compat.all(dresults):
11500 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11502 failures.append("disk export: disk(s) %s" % fdsk)
11504 raise errors.OpExecError("Export failed, errors in %s" %
11505 utils.CommaJoin(failures))
11507 # At this point, the export was successful, we can cleanup/finish
11509 # Remove instance if requested
11510 if self.op.remove_instance:
11511 feedback_fn("Removing instance %s" % instance.name)
11512 _RemoveInstance(self, feedback_fn, instance,
11513 self.op.ignore_remove_failures)
11515 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11516 self._CleanupExports(feedback_fn)
11518 return fin_resu, dresults
11521 class LUBackupRemove(NoHooksLU):
11522 """Remove exports related to the named instance.
11527 def ExpandNames(self):
11528 self.needed_locks = {}
11529 # We need all nodes to be locked in order for RemoveExport to work, but we
11530 # don't need to lock the instance itself, as nothing will happen to it (and
11531 # we can remove exports also for a removed instance)
11532 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11534 def Exec(self, feedback_fn):
11535 """Remove any export.
11538 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11539 # If the instance was not found we'll try with the name that was passed in.
11540 # This will only work if it was an FQDN, though.
11542 if not instance_name:
11544 instance_name = self.op.instance_name
11546 locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
11547 exportlist = self.rpc.call_export_list(locked_nodes)
11549 for node in exportlist:
11550 msg = exportlist[node].fail_msg
11552 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11554 if instance_name in exportlist[node].payload:
11556 result = self.rpc.call_export_remove(node, instance_name)
11557 msg = result.fail_msg
11559 logging.error("Could not remove export for instance %s"
11560 " on node %s: %s", instance_name, node, msg)
11562 if fqdn_warn and not found:
11563 feedback_fn("Export not found. If trying to remove an export belonging"
11564 " to a deleted instance please use its Fully Qualified"
11568 class LUGroupAdd(LogicalUnit):
11569 """Logical unit for creating node groups.
11572 HPATH = "group-add"
11573 HTYPE = constants.HTYPE_GROUP
11576 def ExpandNames(self):
11577 # We need the new group's UUID here so that we can create and acquire the
11578 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11579 # that it should not check whether the UUID exists in the configuration.
11580 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11581 self.needed_locks = {}
11582 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11584 def CheckPrereq(self):
11585 """Check prerequisites.
11587 This checks that the given group name is not an existing node group
11592 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11593 except errors.OpPrereqError:
11596 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11597 " node group (UUID: %s)" %
11598 (self.op.group_name, existing_uuid),
11599 errors.ECODE_EXISTS)
11601 if self.op.ndparams:
11602 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11604 def BuildHooksEnv(self):
11605 """Build hooks env.
11609 "GROUP_NAME": self.op.group_name,
11612 def BuildHooksNodes(self):
11613 """Build hooks nodes.
11616 mn = self.cfg.GetMasterNode()
11617 return ([mn], [mn])
11619 def Exec(self, feedback_fn):
11620 """Add the node group to the cluster.
11623 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11624 uuid=self.group_uuid,
11625 alloc_policy=self.op.alloc_policy,
11626 ndparams=self.op.ndparams)
11628 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11629 del self.remove_locks[locking.LEVEL_NODEGROUP]
11632 class LUGroupAssignNodes(NoHooksLU):
11633 """Logical unit for assigning nodes to groups.
11638 def ExpandNames(self):
11639 # These raise errors.OpPrereqError on their own:
11640 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11641 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11643 # We want to lock all the affected nodes and groups. We have readily
11644 # available the list of nodes, and the *destination* group. To gather the
11645 # list of "source" groups, we need to fetch node information later on.
11646 self.needed_locks = {
11647 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11648 locking.LEVEL_NODE: self.op.nodes,
11651 def DeclareLocks(self, level):
11652 if level == locking.LEVEL_NODEGROUP:
11653 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11655 # Try to get all affected nodes' groups without having the group or node
11656 # lock yet. Needs verification later in the code flow.
11657 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11659 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11661 def CheckPrereq(self):
11662 """Check prerequisites.
11665 assert self.needed_locks[locking.LEVEL_NODEGROUP]
11666 assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11667 frozenset(self.op.nodes))
11669 expected_locks = (set([self.group_uuid]) |
11670 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11671 actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11672 if actual_locks != expected_locks:
11673 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11674 " current groups are '%s', used to be '%s'" %
11675 (utils.CommaJoin(expected_locks),
11676 utils.CommaJoin(actual_locks)))
11678 self.node_data = self.cfg.GetAllNodesInfo()
11679 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11680 instance_data = self.cfg.GetAllInstancesInfo()
11682 if self.group is None:
11683 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11684 (self.op.group_name, self.group_uuid))
11686 (new_splits, previous_splits) = \
11687 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11688 for node in self.op.nodes],
11689 self.node_data, instance_data)
11692 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11694 if not self.op.force:
11695 raise errors.OpExecError("The following instances get split by this"
11696 " change and --force was not given: %s" %
11699 self.LogWarning("This operation will split the following instances: %s",
11702 if previous_splits:
11703 self.LogWarning("In addition, these already-split instances continue"
11704 " to be split across groups: %s",
11705 utils.CommaJoin(utils.NiceSort(previous_splits)))
11707 def Exec(self, feedback_fn):
11708 """Assign nodes to a new group.
11711 for node in self.op.nodes:
11712 self.node_data[node].group = self.group_uuid
11714 # FIXME: Depends on side-effects of modifying the result of
11715 # C{cfg.GetAllNodesInfo}
11717 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11720 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11721 """Check for split instances after a node assignment.
11723 This method considers a series of node assignments as an atomic operation,
11724 and returns information about split instances after applying the set of
11727 In particular, it returns information about newly split instances, and
11728 instances that were already split, and remain so after the change.
11730 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11733 @type changes: list of (node_name, new_group_uuid) pairs.
11734 @param changes: list of node assignments to consider.
11735 @param node_data: a dict with data for all nodes
11736 @param instance_data: a dict with all instances to consider
11737 @rtype: a two-tuple
11738 @return: a list of instances that were previously okay and result split as a
11739 consequence of this change, and a list of instances that were previously
11740 split and this change does not fix.
11743 changed_nodes = dict((node, group) for node, group in changes
11744 if node_data[node].group != group)
11746 all_split_instances = set()
11747 previously_split_instances = set()
11749 def InstanceNodes(instance):
11750 return [instance.primary_node] + list(instance.secondary_nodes)
11752 for inst in instance_data.values():
11753 if inst.disk_template not in constants.DTS_INT_MIRROR:
11756 instance_nodes = InstanceNodes(inst)
11758 if len(set(node_data[node].group for node in instance_nodes)) > 1:
11759 previously_split_instances.add(inst.name)
11761 if len(set(changed_nodes.get(node, node_data[node].group)
11762 for node in instance_nodes)) > 1:
11763 all_split_instances.add(inst.name)
11765 return (list(all_split_instances - previously_split_instances),
11766 list(previously_split_instances & all_split_instances))
11769 class _GroupQuery(_QueryBase):
11770 FIELDS = query.GROUP_FIELDS
11772 def ExpandNames(self, lu):
11773 lu.needed_locks = {}
11775 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11776 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11779 self.wanted = [name_to_uuid[name]
11780 for name in utils.NiceSort(name_to_uuid.keys())]
11782 # Accept names to be either names or UUIDs.
11785 all_uuid = frozenset(self._all_groups.keys())
11787 for name in self.names:
11788 if name in all_uuid:
11789 self.wanted.append(name)
11790 elif name in name_to_uuid:
11791 self.wanted.append(name_to_uuid[name])
11793 missing.append(name)
11796 raise errors.OpPrereqError("Some groups do not exist: %s" %
11797 utils.CommaJoin(missing),
11798 errors.ECODE_NOENT)
11800 def DeclareLocks(self, lu, level):
11803 def _GetQueryData(self, lu):
11804 """Computes the list of node groups and their attributes.
11807 do_nodes = query.GQ_NODE in self.requested_data
11808 do_instances = query.GQ_INST in self.requested_data
11810 group_to_nodes = None
11811 group_to_instances = None
11813 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11814 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11815 # latter GetAllInstancesInfo() is not enough, for we have to go through
11816 # instance->node. Hence, we will need to process nodes even if we only need
11817 # instance information.
11818 if do_nodes or do_instances:
11819 all_nodes = lu.cfg.GetAllNodesInfo()
11820 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11823 for node in all_nodes.values():
11824 if node.group in group_to_nodes:
11825 group_to_nodes[node.group].append(node.name)
11826 node_to_group[node.name] = node.group
11829 all_instances = lu.cfg.GetAllInstancesInfo()
11830 group_to_instances = dict((uuid, []) for uuid in self.wanted)
11832 for instance in all_instances.values():
11833 node = instance.primary_node
11834 if node in node_to_group:
11835 group_to_instances[node_to_group[node]].append(instance.name)
11838 # Do not pass on node information if it was not requested.
11839 group_to_nodes = None
11841 return query.GroupQueryData([self._all_groups[uuid]
11842 for uuid in self.wanted],
11843 group_to_nodes, group_to_instances)
11846 class LUGroupQuery(NoHooksLU):
11847 """Logical unit for querying node groups.
11852 def CheckArguments(self):
11853 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11854 self.op.output_fields, False)
11856 def ExpandNames(self):
11857 self.gq.ExpandNames(self)
11859 def Exec(self, feedback_fn):
11860 return self.gq.OldStyleQuery(self)
11863 class LUGroupSetParams(LogicalUnit):
11864 """Modifies the parameters of a node group.
11867 HPATH = "group-modify"
11868 HTYPE = constants.HTYPE_GROUP
11871 def CheckArguments(self):
11874 self.op.alloc_policy,
11877 if all_changes.count(None) == len(all_changes):
11878 raise errors.OpPrereqError("Please pass at least one modification",
11879 errors.ECODE_INVAL)
11881 def ExpandNames(self):
11882 # This raises errors.OpPrereqError on its own:
11883 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11885 self.needed_locks = {
11886 locking.LEVEL_NODEGROUP: [self.group_uuid],
11889 def CheckPrereq(self):
11890 """Check prerequisites.
11893 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11895 if self.group is None:
11896 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11897 (self.op.group_name, self.group_uuid))
11899 if self.op.ndparams:
11900 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11901 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11902 self.new_ndparams = new_ndparams
11904 def BuildHooksEnv(self):
11905 """Build hooks env.
11909 "GROUP_NAME": self.op.group_name,
11910 "NEW_ALLOC_POLICY": self.op.alloc_policy,
11913 def BuildHooksNodes(self):
11914 """Build hooks nodes.
11917 mn = self.cfg.GetMasterNode()
11918 return ([mn], [mn])
11920 def Exec(self, feedback_fn):
11921 """Modifies the node group.
11926 if self.op.ndparams:
11927 self.group.ndparams = self.new_ndparams
11928 result.append(("ndparams", str(self.group.ndparams)))
11930 if self.op.alloc_policy:
11931 self.group.alloc_policy = self.op.alloc_policy
11933 self.cfg.Update(self.group, feedback_fn)
11938 class LUGroupRemove(LogicalUnit):
11939 HPATH = "group-remove"
11940 HTYPE = constants.HTYPE_GROUP
11943 def ExpandNames(self):
11944 # This will raises errors.OpPrereqError on its own:
11945 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11946 self.needed_locks = {
11947 locking.LEVEL_NODEGROUP: [self.group_uuid],
11950 def CheckPrereq(self):
11951 """Check prerequisites.
11953 This checks that the given group name exists as a node group, that is
11954 empty (i.e., contains no nodes), and that is not the last group of the
11958 # Verify that the group is empty.
11959 group_nodes = [node.name
11960 for node in self.cfg.GetAllNodesInfo().values()
11961 if node.group == self.group_uuid]
11964 raise errors.OpPrereqError("Group '%s' not empty, has the following"
11966 (self.op.group_name,
11967 utils.CommaJoin(utils.NiceSort(group_nodes))),
11968 errors.ECODE_STATE)
11970 # Verify the cluster would not be left group-less.
11971 if len(self.cfg.GetNodeGroupList()) == 1:
11972 raise errors.OpPrereqError("Group '%s' is the only group,"
11973 " cannot be removed" %
11974 self.op.group_name,
11975 errors.ECODE_STATE)
11977 def BuildHooksEnv(self):
11978 """Build hooks env.
11982 "GROUP_NAME": self.op.group_name,
11985 def BuildHooksNodes(self):
11986 """Build hooks nodes.
11989 mn = self.cfg.GetMasterNode()
11990 return ([mn], [mn])
11992 def Exec(self, feedback_fn):
11993 """Remove the node group.
11997 self.cfg.RemoveNodeGroup(self.group_uuid)
11998 except errors.ConfigurationError:
11999 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12000 (self.op.group_name, self.group_uuid))
12002 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12005 class LUGroupRename(LogicalUnit):
12006 HPATH = "group-rename"
12007 HTYPE = constants.HTYPE_GROUP
12010 def ExpandNames(self):
12011 # This raises errors.OpPrereqError on its own:
12012 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12014 self.needed_locks = {
12015 locking.LEVEL_NODEGROUP: [self.group_uuid],
12018 def CheckPrereq(self):
12019 """Check prerequisites.
12021 Ensures requested new name is not yet used.
12025 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12026 except errors.OpPrereqError:
12029 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12030 " node group (UUID: %s)" %
12031 (self.op.new_name, new_name_uuid),
12032 errors.ECODE_EXISTS)
12034 def BuildHooksEnv(self):
12035 """Build hooks env.
12039 "OLD_NAME": self.op.group_name,
12040 "NEW_NAME": self.op.new_name,
12043 def BuildHooksNodes(self):
12044 """Build hooks nodes.
12047 mn = self.cfg.GetMasterNode()
12049 all_nodes = self.cfg.GetAllNodesInfo()
12050 all_nodes.pop(mn, None)
12053 run_nodes.extend(node.name for node in all_nodes.values()
12054 if node.group == self.group_uuid)
12056 return (run_nodes, run_nodes)
12058 def Exec(self, feedback_fn):
12059 """Rename the node group.
12062 group = self.cfg.GetNodeGroup(self.group_uuid)
12065 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12066 (self.op.group_name, self.group_uuid))
12068 group.name = self.op.new_name
12069 self.cfg.Update(group, feedback_fn)
12071 return self.op.new_name
12074 class LUGroupEvacuate(LogicalUnit):
12075 HPATH = "group-evacuate"
12076 HTYPE = constants.HTYPE_GROUP
12079 def ExpandNames(self):
12080 # This raises errors.OpPrereqError on its own:
12081 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12083 if self.op.target_groups:
12084 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12085 self.op.target_groups)
12087 self.req_target_uuids = []
12089 if self.group_uuid in self.req_target_uuids:
12090 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12091 " as a target group (targets are %s)" %
12093 utils.CommaJoin(self.req_target_uuids)),
12094 errors.ECODE_INVAL)
12096 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12098 self.share_locks = _ShareAll()
12099 self.needed_locks = {
12100 locking.LEVEL_INSTANCE: [],
12101 locking.LEVEL_NODEGROUP: [],
12102 locking.LEVEL_NODE: [],
12105 def DeclareLocks(self, level):
12106 if level == locking.LEVEL_INSTANCE:
12107 assert not self.needed_locks[locking.LEVEL_INSTANCE]
12109 # Lock instances optimistically, needs verification once node and group
12110 # locks have been acquired
12111 self.needed_locks[locking.LEVEL_INSTANCE] = \
12112 self.cfg.GetNodeGroupInstances(self.group_uuid)
12114 elif level == locking.LEVEL_NODEGROUP:
12115 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12117 if self.req_target_uuids:
12118 lock_groups = set([self.group_uuid] + self.req_target_uuids)
12120 # Lock all groups used by instances optimistically; this requires going
12121 # via the node before it's locked, requiring verification later on
12122 lock_groups.update(group_uuid
12123 for instance_name in
12124 self.glm.list_owned(locking.LEVEL_INSTANCE)
12126 self.cfg.GetInstanceNodeGroups(instance_name))
12128 # No target groups, need to lock all of them
12129 lock_groups = locking.ALL_SET
12131 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12133 elif level == locking.LEVEL_NODE:
12134 # This will only lock the nodes in the group to be evacuated which
12135 # contain actual instances
12136 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12137 self._LockInstancesNodes()
12139 # Lock all nodes in group to be evacuated
12140 assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
12141 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
12142 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12144 def CheckPrereq(self):
12145 owned_instances = frozenset(self.glm.list_owned(locking.LEVEL_INSTANCE))
12146 owned_groups = frozenset(self.glm.list_owned(locking.LEVEL_NODEGROUP))
12147 owned_nodes = frozenset(self.glm.list_owned(locking.LEVEL_NODE))
12149 assert owned_groups.issuperset(self.req_target_uuids)
12150 assert self.group_uuid in owned_groups
12152 # Check if locked instances are still correct
12153 wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
12154 if owned_instances != wanted_instances:
12155 raise errors.OpPrereqError("Instances in node group to be evacuated (%s)"
12156 " changed since locks were acquired, wanted"
12157 " %s, have %s; retry the operation" %
12159 utils.CommaJoin(wanted_instances),
12160 utils.CommaJoin(owned_instances)),
12161 errors.ECODE_STATE)
12163 # Get instance information
12164 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12166 # Check if node groups for locked instances are still correct
12167 for instance_name in owned_instances:
12168 inst = self.instances[instance_name]
12169 assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
12170 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12171 assert owned_nodes.issuperset(inst.all_nodes), \
12172 "Instance %s's nodes changed while we kept the lock" % instance_name
12174 inst_groups = self.cfg.GetInstanceNodeGroups(instance_name)
12175 if not owned_groups.issuperset(inst_groups):
12176 raise errors.OpPrereqError("Instance %s's node groups changed since"
12177 " locks were acquired, current groups"
12178 " are '%s', owning groups '%s'; retry the"
12181 utils.CommaJoin(inst_groups),
12182 utils.CommaJoin(owned_groups)),
12183 errors.ECODE_STATE)
12185 if self.req_target_uuids:
12186 # User requested specific target groups
12187 self.target_uuids = self.req_target_uuids
12189 # All groups except the one to be evacuated are potential targets
12190 self.target_uuids = [group_uuid for group_uuid in owned_groups
12191 if group_uuid != self.group_uuid]
12193 if not self.target_uuids:
12194 raise errors.OpPrereqError("There are no possible target groups",
12195 errors.ECODE_INVAL)
12197 def BuildHooksEnv(self):
12198 """Build hooks env.
12202 "GROUP_NAME": self.op.group_name,
12203 "TARGET_GROUPS": " ".join(self.target_uuids),
12206 def BuildHooksNodes(self):
12207 """Build hooks nodes.
12210 mn = self.cfg.GetMasterNode()
12212 assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
12214 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12216 return (run_nodes, run_nodes)
12218 def Exec(self, feedback_fn):
12219 instances = list(self.glm.list_owned(locking.LEVEL_INSTANCE))
12221 assert self.group_uuid not in self.target_uuids
12223 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12224 instances=instances, target_groups=self.target_uuids)
12226 ial.Run(self.op.iallocator)
12228 if not ial.success:
12229 raise errors.OpPrereqError("Can't compute group evacuation using"
12230 " iallocator '%s': %s" %
12231 (self.op.iallocator, ial.info),
12232 errors.ECODE_NORES)
12234 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12236 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12237 len(jobs), self.op.group_name)
12239 return ResultWithJobs(jobs)
12242 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
12243 """Generic tags LU.
12245 This is an abstract class which is the parent of all the other tags LUs.
12248 def ExpandNames(self):
12249 self.group_uuid = None
12250 self.needed_locks = {}
12251 if self.op.kind == constants.TAG_NODE:
12252 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12253 self.needed_locks[locking.LEVEL_NODE] = self.op.name
12254 elif self.op.kind == constants.TAG_INSTANCE:
12255 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12256 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12257 elif self.op.kind == constants.TAG_NODEGROUP:
12258 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12260 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12261 # not possible to acquire the BGL based on opcode parameters)
12263 def CheckPrereq(self):
12264 """Check prerequisites.
12267 if self.op.kind == constants.TAG_CLUSTER:
12268 self.target = self.cfg.GetClusterInfo()
12269 elif self.op.kind == constants.TAG_NODE:
12270 self.target = self.cfg.GetNodeInfo(self.op.name)
12271 elif self.op.kind == constants.TAG_INSTANCE:
12272 self.target = self.cfg.GetInstanceInfo(self.op.name)
12273 elif self.op.kind == constants.TAG_NODEGROUP:
12274 self.target = self.cfg.GetNodeGroup(self.group_uuid)
12276 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12277 str(self.op.kind), errors.ECODE_INVAL)
12280 class LUTagsGet(TagsLU):
12281 """Returns the tags of a given object.
12286 def ExpandNames(self):
12287 TagsLU.ExpandNames(self)
12289 # Share locks as this is only a read operation
12290 self.share_locks = _ShareAll()
12292 def Exec(self, feedback_fn):
12293 """Returns the tag list.
12296 return list(self.target.GetTags())
12299 class LUTagsSearch(NoHooksLU):
12300 """Searches the tags for a given pattern.
12305 def ExpandNames(self):
12306 self.needed_locks = {}
12308 def CheckPrereq(self):
12309 """Check prerequisites.
12311 This checks the pattern passed for validity by compiling it.
12315 self.re = re.compile(self.op.pattern)
12316 except re.error, err:
12317 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12318 (self.op.pattern, err), errors.ECODE_INVAL)
12320 def Exec(self, feedback_fn):
12321 """Returns the tag list.
12325 tgts = [("/cluster", cfg.GetClusterInfo())]
12326 ilist = cfg.GetAllInstancesInfo().values()
12327 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12328 nlist = cfg.GetAllNodesInfo().values()
12329 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12330 tgts.extend(("/nodegroup/%s" % n.name, n)
12331 for n in cfg.GetAllNodeGroupsInfo().values())
12333 for path, target in tgts:
12334 for tag in target.GetTags():
12335 if self.re.search(tag):
12336 results.append((path, tag))
12340 class LUTagsSet(TagsLU):
12341 """Sets a tag on a given object.
12346 def CheckPrereq(self):
12347 """Check prerequisites.
12349 This checks the type and length of the tag name and value.
12352 TagsLU.CheckPrereq(self)
12353 for tag in self.op.tags:
12354 objects.TaggableObject.ValidateTag(tag)
12356 def Exec(self, feedback_fn):
12361 for tag in self.op.tags:
12362 self.target.AddTag(tag)
12363 except errors.TagError, err:
12364 raise errors.OpExecError("Error while setting tag: %s" % str(err))
12365 self.cfg.Update(self.target, feedback_fn)
12368 class LUTagsDel(TagsLU):
12369 """Delete a list of tags from a given object.
12374 def CheckPrereq(self):
12375 """Check prerequisites.
12377 This checks that we have the given tag.
12380 TagsLU.CheckPrereq(self)
12381 for tag in self.op.tags:
12382 objects.TaggableObject.ValidateTag(tag)
12383 del_tags = frozenset(self.op.tags)
12384 cur_tags = self.target.GetTags()
12386 diff_tags = del_tags - cur_tags
12388 diff_names = ("'%s'" % i for i in sorted(diff_tags))
12389 raise errors.OpPrereqError("Tag(s) %s not found" %
12390 (utils.CommaJoin(diff_names), ),
12391 errors.ECODE_NOENT)
12393 def Exec(self, feedback_fn):
12394 """Remove the tag from the object.
12397 for tag in self.op.tags:
12398 self.target.RemoveTag(tag)
12399 self.cfg.Update(self.target, feedback_fn)
12402 class LUTestDelay(NoHooksLU):
12403 """Sleep for a specified amount of time.
12405 This LU sleeps on the master and/or nodes for a specified amount of
12411 def ExpandNames(self):
12412 """Expand names and set required locks.
12414 This expands the node list, if any.
12417 self.needed_locks = {}
12418 if self.op.on_nodes:
12419 # _GetWantedNodes can be used here, but is not always appropriate to use
12420 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12421 # more information.
12422 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12423 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12425 def _TestDelay(self):
12426 """Do the actual sleep.
12429 if self.op.on_master:
12430 if not utils.TestDelay(self.op.duration):
12431 raise errors.OpExecError("Error during master delay test")
12432 if self.op.on_nodes:
12433 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12434 for node, node_result in result.items():
12435 node_result.Raise("Failure during rpc call to node %s" % node)
12437 def Exec(self, feedback_fn):
12438 """Execute the test delay opcode, with the wanted repetitions.
12441 if self.op.repeat == 0:
12444 top_value = self.op.repeat - 1
12445 for i in range(self.op.repeat):
12446 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12450 class LUTestJqueue(NoHooksLU):
12451 """Utility LU to test some aspects of the job queue.
12456 # Must be lower than default timeout for WaitForJobChange to see whether it
12457 # notices changed jobs
12458 _CLIENT_CONNECT_TIMEOUT = 20.0
12459 _CLIENT_CONFIRM_TIMEOUT = 60.0
12462 def _NotifyUsingSocket(cls, cb, errcls):
12463 """Opens a Unix socket and waits for another program to connect.
12466 @param cb: Callback to send socket name to client
12467 @type errcls: class
12468 @param errcls: Exception class to use for errors
12471 # Using a temporary directory as there's no easy way to create temporary
12472 # sockets without writing a custom loop around tempfile.mktemp and
12474 tmpdir = tempfile.mkdtemp()
12476 tmpsock = utils.PathJoin(tmpdir, "sock")
12478 logging.debug("Creating temporary socket at %s", tmpsock)
12479 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12484 # Send details to client
12487 # Wait for client to connect before continuing
12488 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12490 (conn, _) = sock.accept()
12491 except socket.error, err:
12492 raise errcls("Client didn't connect in time (%s)" % err)
12496 # Remove as soon as client is connected
12497 shutil.rmtree(tmpdir)
12499 # Wait for client to close
12502 # pylint: disable-msg=E1101
12503 # Instance of '_socketobject' has no ... member
12504 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12506 except socket.error, err:
12507 raise errcls("Client failed to confirm notification (%s)" % err)
12511 def _SendNotification(self, test, arg, sockname):
12512 """Sends a notification to the client.
12515 @param test: Test name
12516 @param arg: Test argument (depends on test)
12517 @type sockname: string
12518 @param sockname: Socket path
12521 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12523 def _Notify(self, prereq, test, arg):
12524 """Notifies the client of a test.
12527 @param prereq: Whether this is a prereq-phase test
12529 @param test: Test name
12530 @param arg: Test argument (depends on test)
12534 errcls = errors.OpPrereqError
12536 errcls = errors.OpExecError
12538 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12542 def CheckArguments(self):
12543 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12544 self.expandnames_calls = 0
12546 def ExpandNames(self):
12547 checkargs_calls = getattr(self, "checkargs_calls", 0)
12548 if checkargs_calls < 1:
12549 raise errors.ProgrammerError("CheckArguments was not called")
12551 self.expandnames_calls += 1
12553 if self.op.notify_waitlock:
12554 self._Notify(True, constants.JQT_EXPANDNAMES, None)
12556 self.LogInfo("Expanding names")
12558 # Get lock on master node (just to get a lock, not for a particular reason)
12559 self.needed_locks = {
12560 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12563 def Exec(self, feedback_fn):
12564 if self.expandnames_calls < 1:
12565 raise errors.ProgrammerError("ExpandNames was not called")
12567 if self.op.notify_exec:
12568 self._Notify(False, constants.JQT_EXEC, None)
12570 self.LogInfo("Executing")
12572 if self.op.log_messages:
12573 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12574 for idx, msg in enumerate(self.op.log_messages):
12575 self.LogInfo("Sending log message %s", idx + 1)
12576 feedback_fn(constants.JQT_MSGPREFIX + msg)
12577 # Report how many test messages have been sent
12578 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12581 raise errors.OpExecError("Opcode failure was requested")
12586 class IAllocator(object):
12587 """IAllocator framework.
12589 An IAllocator instance has three sets of attributes:
12590 - cfg that is needed to query the cluster
12591 - input data (all members of the _KEYS class attribute are required)
12592 - four buffer attributes (in|out_data|text), that represent the
12593 input (to the external script) in text and data structure format,
12594 and the output from it, again in two formats
12595 - the result variables from the script (success, info, nodes) for
12599 # pylint: disable-msg=R0902
12600 # lots of instance attributes
12602 def __init__(self, cfg, rpc, mode, **kwargs):
12605 # init buffer variables
12606 self.in_text = self.out_text = self.in_data = self.out_data = None
12607 # init all input fields so that pylint is happy
12609 self.memory = self.disks = self.disk_template = None
12610 self.os = self.tags = self.nics = self.vcpus = None
12611 self.hypervisor = None
12612 self.relocate_from = None
12614 self.evac_nodes = None
12615 self.instances = None
12616 self.evac_mode = None
12617 self.target_groups = []
12619 self.required_nodes = None
12620 # init result fields
12621 self.success = self.info = self.result = None
12624 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12626 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12627 " IAllocator" % self.mode)
12629 keyset = [n for (n, _) in keydata]
12632 if key not in keyset:
12633 raise errors.ProgrammerError("Invalid input parameter '%s' to"
12634 " IAllocator" % key)
12635 setattr(self, key, kwargs[key])
12638 if key not in kwargs:
12639 raise errors.ProgrammerError("Missing input parameter '%s' to"
12640 " IAllocator" % key)
12641 self._BuildInputData(compat.partial(fn, self), keydata)
12643 def _ComputeClusterData(self):
12644 """Compute the generic allocator input data.
12646 This is the data that is independent of the actual operation.
12650 cluster_info = cfg.GetClusterInfo()
12653 "version": constants.IALLOCATOR_VERSION,
12654 "cluster_name": cfg.GetClusterName(),
12655 "cluster_tags": list(cluster_info.GetTags()),
12656 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12657 # we don't have job IDs
12659 ninfo = cfg.GetAllNodesInfo()
12660 iinfo = cfg.GetAllInstancesInfo().values()
12661 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12664 node_list = [n.name for n in ninfo.values() if n.vm_capable]
12666 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12667 hypervisor_name = self.hypervisor
12668 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12669 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12671 hypervisor_name = cluster_info.enabled_hypervisors[0]
12673 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12676 self.rpc.call_all_instances_info(node_list,
12677 cluster_info.enabled_hypervisors)
12679 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12681 config_ndata = self._ComputeBasicNodeData(ninfo)
12682 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12683 i_list, config_ndata)
12684 assert len(data["nodes"]) == len(ninfo), \
12685 "Incomplete node data computed"
12687 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12689 self.in_data = data
12692 def _ComputeNodeGroupData(cfg):
12693 """Compute node groups data.
12696 ng = dict((guuid, {
12697 "name": gdata.name,
12698 "alloc_policy": gdata.alloc_policy,
12700 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12705 def _ComputeBasicNodeData(node_cfg):
12706 """Compute global node data.
12709 @returns: a dict of name: (node dict, node config)
12712 # fill in static (config-based) values
12713 node_results = dict((ninfo.name, {
12714 "tags": list(ninfo.GetTags()),
12715 "primary_ip": ninfo.primary_ip,
12716 "secondary_ip": ninfo.secondary_ip,
12717 "offline": ninfo.offline,
12718 "drained": ninfo.drained,
12719 "master_candidate": ninfo.master_candidate,
12720 "group": ninfo.group,
12721 "master_capable": ninfo.master_capable,
12722 "vm_capable": ninfo.vm_capable,
12724 for ninfo in node_cfg.values())
12726 return node_results
12729 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12731 """Compute global node data.
12733 @param node_results: the basic node structures as filled from the config
12736 # make a copy of the current dict
12737 node_results = dict(node_results)
12738 for nname, nresult in node_data.items():
12739 assert nname in node_results, "Missing basic data for node %s" % nname
12740 ninfo = node_cfg[nname]
12742 if not (ninfo.offline or ninfo.drained):
12743 nresult.Raise("Can't get data for node %s" % nname)
12744 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12746 remote_info = nresult.payload
12748 for attr in ["memory_total", "memory_free", "memory_dom0",
12749 "vg_size", "vg_free", "cpu_total"]:
12750 if attr not in remote_info:
12751 raise errors.OpExecError("Node '%s' didn't return attribute"
12752 " '%s'" % (nname, attr))
12753 if not isinstance(remote_info[attr], int):
12754 raise errors.OpExecError("Node '%s' returned invalid value"
12756 (nname, attr, remote_info[attr]))
12757 # compute memory used by primary instances
12758 i_p_mem = i_p_up_mem = 0
12759 for iinfo, beinfo in i_list:
12760 if iinfo.primary_node == nname:
12761 i_p_mem += beinfo[constants.BE_MEMORY]
12762 if iinfo.name not in node_iinfo[nname].payload:
12765 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
12766 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12767 remote_info["memory_free"] -= max(0, i_mem_diff)
12770 i_p_up_mem += beinfo[constants.BE_MEMORY]
12772 # compute memory used by instances
12774 "total_memory": remote_info["memory_total"],
12775 "reserved_memory": remote_info["memory_dom0"],
12776 "free_memory": remote_info["memory_free"],
12777 "total_disk": remote_info["vg_size"],
12778 "free_disk": remote_info["vg_free"],
12779 "total_cpus": remote_info["cpu_total"],
12780 "i_pri_memory": i_p_mem,
12781 "i_pri_up_memory": i_p_up_mem,
12783 pnr_dyn.update(node_results[nname])
12784 node_results[nname] = pnr_dyn
12786 return node_results
12789 def _ComputeInstanceData(cluster_info, i_list):
12790 """Compute global instance data.
12794 for iinfo, beinfo in i_list:
12796 for nic in iinfo.nics:
12797 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12801 "mode": filled_params[constants.NIC_MODE],
12802 "link": filled_params[constants.NIC_LINK],
12804 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12805 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12806 nic_data.append(nic_dict)
12808 "tags": list(iinfo.GetTags()),
12809 "admin_up": iinfo.admin_up,
12810 "vcpus": beinfo[constants.BE_VCPUS],
12811 "memory": beinfo[constants.BE_MEMORY],
12813 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12815 "disks": [{constants.IDISK_SIZE: dsk.size,
12816 constants.IDISK_MODE: dsk.mode}
12817 for dsk in iinfo.disks],
12818 "disk_template": iinfo.disk_template,
12819 "hypervisor": iinfo.hypervisor,
12821 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12823 instance_data[iinfo.name] = pir
12825 return instance_data
12827 def _AddNewInstance(self):
12828 """Add new instance data to allocator structure.
12830 This in combination with _AllocatorGetClusterData will create the
12831 correct structure needed as input for the allocator.
12833 The checks for the completeness of the opcode must have already been
12837 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12839 if self.disk_template in constants.DTS_INT_MIRROR:
12840 self.required_nodes = 2
12842 self.required_nodes = 1
12846 "disk_template": self.disk_template,
12849 "vcpus": self.vcpus,
12850 "memory": self.memory,
12851 "disks": self.disks,
12852 "disk_space_total": disk_space,
12854 "required_nodes": self.required_nodes,
12855 "hypervisor": self.hypervisor,
12860 def _AddRelocateInstance(self):
12861 """Add relocate instance data to allocator structure.
12863 This in combination with _IAllocatorGetClusterData will create the
12864 correct structure needed as input for the allocator.
12866 The checks for the completeness of the opcode must have already been
12870 instance = self.cfg.GetInstanceInfo(self.name)
12871 if instance is None:
12872 raise errors.ProgrammerError("Unknown instance '%s' passed to"
12873 " IAllocator" % self.name)
12875 if instance.disk_template not in constants.DTS_MIRRORED:
12876 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12877 errors.ECODE_INVAL)
12879 if instance.disk_template in constants.DTS_INT_MIRROR and \
12880 len(instance.secondary_nodes) != 1:
12881 raise errors.OpPrereqError("Instance has not exactly one secondary node",
12882 errors.ECODE_STATE)
12884 self.required_nodes = 1
12885 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12886 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12890 "disk_space_total": disk_space,
12891 "required_nodes": self.required_nodes,
12892 "relocate_from": self.relocate_from,
12896 def _AddEvacuateNodes(self):
12897 """Add evacuate nodes data to allocator structure.
12901 "evac_nodes": self.evac_nodes
12905 def _AddNodeEvacuate(self):
12906 """Get data for node-evacuate requests.
12910 "instances": self.instances,
12911 "evac_mode": self.evac_mode,
12914 def _AddChangeGroup(self):
12915 """Get data for node-evacuate requests.
12919 "instances": self.instances,
12920 "target_groups": self.target_groups,
12923 def _BuildInputData(self, fn, keydata):
12924 """Build input data structures.
12927 self._ComputeClusterData()
12930 request["type"] = self.mode
12931 for keyname, keytype in keydata:
12932 if keyname not in request:
12933 raise errors.ProgrammerError("Request parameter %s is missing" %
12935 val = request[keyname]
12936 if not keytype(val):
12937 raise errors.ProgrammerError("Request parameter %s doesn't pass"
12938 " validation, value %s, expected"
12939 " type %s" % (keyname, val, keytype))
12940 self.in_data["request"] = request
12942 self.in_text = serializer.Dump(self.in_data)
12944 _STRING_LIST = ht.TListOf(ht.TString)
12945 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12946 # pylint: disable-msg=E1101
12947 # Class '...' has no 'OP_ID' member
12948 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12949 opcodes.OpInstanceMigrate.OP_ID,
12950 opcodes.OpInstanceReplaceDisks.OP_ID])
12954 ht.TListOf(ht.TAnd(ht.TIsLength(3),
12955 ht.TItems([ht.TNonEmptyString,
12956 ht.TNonEmptyString,
12957 ht.TListOf(ht.TNonEmptyString),
12960 ht.TListOf(ht.TAnd(ht.TIsLength(2),
12961 ht.TItems([ht.TNonEmptyString,
12964 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
12965 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
12968 constants.IALLOCATOR_MODE_ALLOC:
12971 ("name", ht.TString),
12972 ("memory", ht.TInt),
12973 ("disks", ht.TListOf(ht.TDict)),
12974 ("disk_template", ht.TString),
12975 ("os", ht.TString),
12976 ("tags", _STRING_LIST),
12977 ("nics", ht.TListOf(ht.TDict)),
12978 ("vcpus", ht.TInt),
12979 ("hypervisor", ht.TString),
12981 constants.IALLOCATOR_MODE_RELOC:
12982 (_AddRelocateInstance,
12983 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
12985 constants.IALLOCATOR_MODE_MEVAC:
12986 (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
12987 ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
12988 constants.IALLOCATOR_MODE_NODE_EVAC:
12989 (_AddNodeEvacuate, [
12990 ("instances", _STRING_LIST),
12991 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
12993 constants.IALLOCATOR_MODE_CHG_GROUP:
12994 (_AddChangeGroup, [
12995 ("instances", _STRING_LIST),
12996 ("target_groups", _STRING_LIST),
13000 def Run(self, name, validate=True, call_fn=None):
13001 """Run an instance allocator and return the results.
13004 if call_fn is None:
13005 call_fn = self.rpc.call_iallocator_runner
13007 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13008 result.Raise("Failure while running the iallocator script")
13010 self.out_text = result.payload
13012 self._ValidateResult()
13014 def _ValidateResult(self):
13015 """Process the allocator results.
13017 This will process and if successful save the result in
13018 self.out_data and the other parameters.
13022 rdict = serializer.Load(self.out_text)
13023 except Exception, err:
13024 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13026 if not isinstance(rdict, dict):
13027 raise errors.OpExecError("Can't parse iallocator results: not a dict")
13029 # TODO: remove backwards compatiblity in later versions
13030 if "nodes" in rdict and "result" not in rdict:
13031 rdict["result"] = rdict["nodes"]
13034 for key in "success", "info", "result":
13035 if key not in rdict:
13036 raise errors.OpExecError("Can't parse iallocator results:"
13037 " missing key '%s'" % key)
13038 setattr(self, key, rdict[key])
13040 if not self._result_check(self.result):
13041 raise errors.OpExecError("Iallocator returned invalid result,"
13042 " expected %s, got %s" %
13043 (self._result_check, self.result),
13044 errors.ECODE_INVAL)
13046 if self.mode in (constants.IALLOCATOR_MODE_RELOC,
13047 constants.IALLOCATOR_MODE_MEVAC):
13048 node2group = dict((name, ndata["group"])
13049 for (name, ndata) in self.in_data["nodes"].items())
13051 fn = compat.partial(self._NodesToGroups, node2group,
13052 self.in_data["nodegroups"])
13054 if self.mode == constants.IALLOCATOR_MODE_RELOC:
13055 assert self.relocate_from is not None
13056 assert self.required_nodes == 1
13058 request_groups = fn(self.relocate_from)
13059 result_groups = fn(rdict["result"])
13061 if result_groups != request_groups:
13062 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13063 " differ from original groups (%s)" %
13064 (utils.CommaJoin(result_groups),
13065 utils.CommaJoin(request_groups)))
13066 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
13067 request_groups = fn(self.evac_nodes)
13068 for (instance_name, secnode) in self.result:
13069 result_groups = fn([secnode])
13070 if result_groups != request_groups:
13071 raise errors.OpExecError("Iallocator returned new secondary node"
13072 " '%s' (group '%s') for instance '%s'"
13073 " which is not in original group '%s'" %
13074 (secnode, utils.CommaJoin(result_groups),
13076 utils.CommaJoin(request_groups)))
13078 raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
13080 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13081 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13083 self.out_data = rdict
13086 def _NodesToGroups(node2group, groups, nodes):
13087 """Returns a list of unique group names for a list of nodes.
13089 @type node2group: dict
13090 @param node2group: Map from node name to group UUID
13092 @param groups: Group information
13094 @param nodes: Node names
13101 group_uuid = node2group[node]
13103 # Ignore unknown node
13107 group = groups[group_uuid]
13109 # Can't find group, let's use UUID
13110 group_name = group_uuid
13112 group_name = group["name"]
13114 result.add(group_name)
13116 return sorted(result)
13119 class LUTestAllocator(NoHooksLU):
13120 """Run allocator tests.
13122 This LU runs the allocator tests
13125 def CheckPrereq(self):
13126 """Check prerequisites.
13128 This checks the opcode parameters depending on the director and mode test.
13131 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13132 for attr in ["memory", "disks", "disk_template",
13133 "os", "tags", "nics", "vcpus"]:
13134 if not hasattr(self.op, attr):
13135 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13136 attr, errors.ECODE_INVAL)
13137 iname = self.cfg.ExpandInstanceName(self.op.name)
13138 if iname is not None:
13139 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13140 iname, errors.ECODE_EXISTS)
13141 if not isinstance(self.op.nics, list):
13142 raise errors.OpPrereqError("Invalid parameter 'nics'",
13143 errors.ECODE_INVAL)
13144 if not isinstance(self.op.disks, list):
13145 raise errors.OpPrereqError("Invalid parameter 'disks'",
13146 errors.ECODE_INVAL)
13147 for row in self.op.disks:
13148 if (not isinstance(row, dict) or
13149 constants.IDISK_SIZE not in row or
13150 not isinstance(row[constants.IDISK_SIZE], int) or
13151 constants.IDISK_MODE not in row or
13152 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13153 raise errors.OpPrereqError("Invalid contents of the 'disks'"
13154 " parameter", errors.ECODE_INVAL)
13155 if self.op.hypervisor is None:
13156 self.op.hypervisor = self.cfg.GetHypervisorType()
13157 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13158 fname = _ExpandInstanceName(self.cfg, self.op.name)
13159 self.op.name = fname
13160 self.relocate_from = \
13161 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13162 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13163 if not hasattr(self.op, "evac_nodes"):
13164 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
13165 " opcode input", errors.ECODE_INVAL)
13166 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13167 constants.IALLOCATOR_MODE_NODE_EVAC):
13168 if not self.op.instances:
13169 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13170 self.op.instances = _GetWantedInstances(self, self.op.instances)
13172 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13173 self.op.mode, errors.ECODE_INVAL)
13175 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13176 if self.op.allocator is None:
13177 raise errors.OpPrereqError("Missing allocator name",
13178 errors.ECODE_INVAL)
13179 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13180 raise errors.OpPrereqError("Wrong allocator test '%s'" %
13181 self.op.direction, errors.ECODE_INVAL)
13183 def Exec(self, feedback_fn):
13184 """Run the allocator test.
13187 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13188 ial = IAllocator(self.cfg, self.rpc,
13191 memory=self.op.memory,
13192 disks=self.op.disks,
13193 disk_template=self.op.disk_template,
13197 vcpus=self.op.vcpus,
13198 hypervisor=self.op.hypervisor,
13200 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13201 ial = IAllocator(self.cfg, self.rpc,
13204 relocate_from=list(self.relocate_from),
13206 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13207 ial = IAllocator(self.cfg, self.rpc,
13209 evac_nodes=self.op.evac_nodes)
13210 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13211 ial = IAllocator(self.cfg, self.rpc,
13213 instances=self.op.instances,
13214 target_groups=self.op.target_groups)
13215 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13216 ial = IAllocator(self.cfg, self.rpc,
13218 instances=self.op.instances,
13219 evac_mode=self.op.evac_mode)
13221 raise errors.ProgrammerError("Uncatched mode %s in"
13222 " LUTestAllocator.Exec", self.op.mode)
13224 if self.op.direction == constants.IALLOCATOR_DIR_IN:
13225 result = ial.in_text
13227 ial.Run(self.op.allocator, validate=False)
13228 result = ial.out_text
13232 #: Query type implementations
13234 constants.QR_INSTANCE: _InstanceQuery,
13235 constants.QR_NODE: _NodeQuery,
13236 constants.QR_GROUP: _GroupQuery,
13237 constants.QR_OS: _OsQuery,
13240 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13243 def _GetQueryImplementation(name):
13244 """Returns the implemtnation for a query type.
13246 @param name: Query type, must be one of L{constants.QR_VIA_OP}
13250 return _QUERY_IMPL[name]
13252 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13253 errors.ECODE_INVAL)