4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
63 import ganeti.masterd.instance # pylint: disable-msg=W0611
66 def _SupportsOob(cfg, node):
67 """Tells if node supports OOB.
69 @type cfg: L{config.ConfigWriter}
70 @param cfg: The cluster configuration
71 @type node: L{objects.Node}
73 @return: The OOB script if supported or an empty string otherwise
76 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
80 """Data container for LU results with jobs.
82 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
83 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
84 contained in the C{jobs} attribute and include the job IDs in the opcode
88 def __init__(self, jobs, **kwargs):
89 """Initializes this class.
91 Additional return values can be specified as keyword arguments.
93 @type jobs: list of lists of L{opcode.OpCode}
94 @param jobs: A list of lists of opcode objects
101 class LogicalUnit(object):
102 """Logical Unit base class.
104 Subclasses must follow these rules:
105 - implement ExpandNames
106 - implement CheckPrereq (except when tasklets are used)
107 - implement Exec (except when tasklets are used)
108 - implement BuildHooksEnv
109 - implement BuildHooksNodes
110 - redefine HPATH and HTYPE
111 - optionally redefine their run requirements:
112 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
114 Note that all commands require root permissions.
116 @ivar dry_run_result: the value (if any) that will be returned to the caller
117 in dry-run mode (signalled by opcode dry_run parameter)
124 def __init__(self, processor, op, context, rpc):
125 """Constructor for LogicalUnit.
127 This needs to be overridden in derived classes in order to check op
131 self.proc = processor
133 self.cfg = context.cfg
134 self.glm = context.glm
135 self.context = context
137 # Dicts used to declare locking needs to mcpu
138 self.needed_locks = None
139 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
141 self.remove_locks = {}
142 # Used to force good behavior when calling helper functions
143 self.recalculate_locks = {}
145 self.Log = processor.Log # pylint: disable-msg=C0103
146 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
147 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
148 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
149 # support for dry-run
150 self.dry_run_result = None
151 # support for generic debug attribute
152 if (not hasattr(self.op, "debug_level") or
153 not isinstance(self.op.debug_level, int)):
154 self.op.debug_level = 0
159 # Validate opcode parameters and set defaults
160 self.op.Validate(True)
162 self.CheckArguments()
164 def CheckArguments(self):
165 """Check syntactic validity for the opcode arguments.
167 This method is for doing a simple syntactic check and ensure
168 validity of opcode parameters, without any cluster-related
169 checks. While the same can be accomplished in ExpandNames and/or
170 CheckPrereq, doing these separate is better because:
172 - ExpandNames is left as as purely a lock-related function
173 - CheckPrereq is run after we have acquired locks (and possible
176 The function is allowed to change the self.op attribute so that
177 later methods can no longer worry about missing parameters.
182 def ExpandNames(self):
183 """Expand names for this LU.
185 This method is called before starting to execute the opcode, and it should
186 update all the parameters of the opcode to their canonical form (e.g. a
187 short node name must be fully expanded after this method has successfully
188 completed). This way locking, hooks, logging, etc. can work correctly.
190 LUs which implement this method must also populate the self.needed_locks
191 member, as a dict with lock levels as keys, and a list of needed lock names
194 - use an empty dict if you don't need any lock
195 - if you don't need any lock at a particular level omit that level
196 - don't put anything for the BGL level
197 - if you want all locks at a level use locking.ALL_SET as a value
199 If you need to share locks (rather than acquire them exclusively) at one
200 level you can modify self.share_locks, setting a true value (usually 1) for
201 that level. By default locks are not shared.
203 This function can also define a list of tasklets, which then will be
204 executed in order instead of the usual LU-level CheckPrereq and Exec
205 functions, if those are not defined by the LU.
209 # Acquire all nodes and one instance
210 self.needed_locks = {
211 locking.LEVEL_NODE: locking.ALL_SET,
212 locking.LEVEL_INSTANCE: ['instance1.example.com'],
214 # Acquire just two nodes
215 self.needed_locks = {
216 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
219 self.needed_locks = {} # No, you can't leave it to the default value None
222 # The implementation of this method is mandatory only if the new LU is
223 # concurrent, so that old LUs don't need to be changed all at the same
226 self.needed_locks = {} # Exclusive LUs don't need locks.
228 raise NotImplementedError
230 def DeclareLocks(self, level):
231 """Declare LU locking needs for a level
233 While most LUs can just declare their locking needs at ExpandNames time,
234 sometimes there's the need to calculate some locks after having acquired
235 the ones before. This function is called just before acquiring locks at a
236 particular level, but after acquiring the ones at lower levels, and permits
237 such calculations. It can be used to modify self.needed_locks, and by
238 default it does nothing.
240 This function is only called if you have something already set in
241 self.needed_locks for the level.
243 @param level: Locking level which is going to be locked
244 @type level: member of ganeti.locking.LEVELS
248 def CheckPrereq(self):
249 """Check prerequisites for this LU.
251 This method should check that the prerequisites for the execution
252 of this LU are fulfilled. It can do internode communication, but
253 it should be idempotent - no cluster or system changes are
256 The method should raise errors.OpPrereqError in case something is
257 not fulfilled. Its return value is ignored.
259 This method should also update all the parameters of the opcode to
260 their canonical form if it hasn't been done by ExpandNames before.
263 if self.tasklets is not None:
264 for (idx, tl) in enumerate(self.tasklets):
265 logging.debug("Checking prerequisites for tasklet %s/%s",
266 idx + 1, len(self.tasklets))
271 def Exec(self, feedback_fn):
274 This method should implement the actual work. It should raise
275 errors.OpExecError for failures that are somewhat dealt with in
279 if self.tasklets is not None:
280 for (idx, tl) in enumerate(self.tasklets):
281 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
284 raise NotImplementedError
286 def BuildHooksEnv(self):
287 """Build hooks environment for this LU.
290 @return: Dictionary containing the environment that will be used for
291 running the hooks for this LU. The keys of the dict must not be prefixed
292 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
293 will extend the environment with additional variables. If no environment
294 should be defined, an empty dictionary should be returned (not C{None}).
295 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
299 raise NotImplementedError
301 def BuildHooksNodes(self):
302 """Build list of nodes to run LU's hooks.
304 @rtype: tuple; (list, list)
305 @return: Tuple containing a list of node names on which the hook
306 should run before the execution and a list of node names on which the
307 hook should run after the execution. No nodes should be returned as an
308 empty list (and not None).
309 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
313 raise NotImplementedError
315 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
316 """Notify the LU about the results of its hooks.
318 This method is called every time a hooks phase is executed, and notifies
319 the Logical Unit about the hooks' result. The LU can then use it to alter
320 its result based on the hooks. By default the method does nothing and the
321 previous result is passed back unchanged but any LU can define it if it
322 wants to use the local cluster hook-scripts somehow.
324 @param phase: one of L{constants.HOOKS_PHASE_POST} or
325 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
326 @param hook_results: the results of the multi-node hooks rpc call
327 @param feedback_fn: function used send feedback back to the caller
328 @param lu_result: the previous Exec result this LU had, or None
330 @return: the new Exec result, based on the previous result
334 # API must be kept, thus we ignore the unused argument and could
335 # be a function warnings
336 # pylint: disable-msg=W0613,R0201
339 def _ExpandAndLockInstance(self):
340 """Helper function to expand and lock an instance.
342 Many LUs that work on an instance take its name in self.op.instance_name
343 and need to expand it and then declare the expanded name for locking. This
344 function does it, and then updates self.op.instance_name to the expanded
345 name. It also initializes needed_locks as a dict, if this hasn't been done
349 if self.needed_locks is None:
350 self.needed_locks = {}
352 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
353 "_ExpandAndLockInstance called with instance-level locks set"
354 self.op.instance_name = _ExpandInstanceName(self.cfg,
355 self.op.instance_name)
356 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
358 def _LockInstancesNodes(self, primary_only=False):
359 """Helper function to declare instances' nodes for locking.
361 This function should be called after locking one or more instances to lock
362 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
363 with all primary or secondary nodes for instances already locked and
364 present in self.needed_locks[locking.LEVEL_INSTANCE].
366 It should be called from DeclareLocks, and for safety only works if
367 self.recalculate_locks[locking.LEVEL_NODE] is set.
369 In the future it may grow parameters to just lock some instance's nodes, or
370 to just lock primaries or secondary nodes, if needed.
372 If should be called in DeclareLocks in a way similar to::
374 if level == locking.LEVEL_NODE:
375 self._LockInstancesNodes()
377 @type primary_only: boolean
378 @param primary_only: only lock primary nodes of locked instances
381 assert locking.LEVEL_NODE in self.recalculate_locks, \
382 "_LockInstancesNodes helper function called with no nodes to recalculate"
384 # TODO: check if we're really been called with the instance locks held
386 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
387 # future we might want to have different behaviors depending on the value
388 # of self.recalculate_locks[locking.LEVEL_NODE]
390 for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
391 instance = self.context.cfg.GetInstanceInfo(instance_name)
392 wanted_nodes.append(instance.primary_node)
394 wanted_nodes.extend(instance.secondary_nodes)
396 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
397 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
398 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
399 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
401 del self.recalculate_locks[locking.LEVEL_NODE]
404 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
405 """Simple LU which runs no hooks.
407 This LU is intended as a parent for other LogicalUnits which will
408 run no hooks, in order to reduce duplicate code.
414 def BuildHooksEnv(self):
415 """Empty BuildHooksEnv for NoHooksLu.
417 This just raises an error.
420 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
422 def BuildHooksNodes(self):
423 """Empty BuildHooksNodes for NoHooksLU.
426 raise AssertionError("BuildHooksNodes called for NoHooksLU")
430 """Tasklet base class.
432 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
433 they can mix legacy code with tasklets. Locking needs to be done in the LU,
434 tasklets know nothing about locks.
436 Subclasses must follow these rules:
437 - Implement CheckPrereq
441 def __init__(self, lu):
448 def CheckPrereq(self):
449 """Check prerequisites for this tasklets.
451 This method should check whether the prerequisites for the execution of
452 this tasklet are fulfilled. It can do internode communication, but it
453 should be idempotent - no cluster or system changes are allowed.
455 The method should raise errors.OpPrereqError in case something is not
456 fulfilled. Its return value is ignored.
458 This method should also update all parameters to their canonical form if it
459 hasn't been done before.
464 def Exec(self, feedback_fn):
465 """Execute the tasklet.
467 This method should implement the actual work. It should raise
468 errors.OpExecError for failures that are somewhat dealt with in code, or
472 raise NotImplementedError
476 """Base for query utility classes.
479 #: Attribute holding field definitions
482 def __init__(self, filter_, fields, use_locking):
483 """Initializes this class.
486 self.use_locking = use_locking
488 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
490 self.requested_data = self.query.RequestedData()
491 self.names = self.query.RequestedNames()
493 # Sort only if no names were requested
494 self.sort_by_name = not self.names
496 self.do_locking = None
499 def _GetNames(self, lu, all_names, lock_level):
500 """Helper function to determine names asked for in the query.
504 names = lu.glm.list_owned(lock_level)
508 if self.wanted == locking.ALL_SET:
509 assert not self.names
510 # caller didn't specify names, so ordering is not important
511 return utils.NiceSort(names)
513 # caller specified names and we must keep the same order
515 assert not self.do_locking or lu.glm.is_owned(lock_level)
517 missing = set(self.wanted).difference(names)
519 raise errors.OpExecError("Some items were removed before retrieving"
520 " their data: %s" % missing)
522 # Return expanded names
525 def ExpandNames(self, lu):
526 """Expand names for this query.
528 See L{LogicalUnit.ExpandNames}.
531 raise NotImplementedError()
533 def DeclareLocks(self, lu, level):
534 """Declare locks for this query.
536 See L{LogicalUnit.DeclareLocks}.
539 raise NotImplementedError()
541 def _GetQueryData(self, lu):
542 """Collects all data for this query.
544 @return: Query data object
547 raise NotImplementedError()
549 def NewStyleQuery(self, lu):
550 """Collect data and execute query.
553 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
554 sort_by_name=self.sort_by_name)
556 def OldStyleQuery(self, lu):
557 """Collect data and execute query.
560 return self.query.OldStyleQuery(self._GetQueryData(lu),
561 sort_by_name=self.sort_by_name)
564 def _GetWantedNodes(lu, nodes):
565 """Returns list of checked and expanded node names.
567 @type lu: L{LogicalUnit}
568 @param lu: the logical unit on whose behalf we execute
570 @param nodes: list of node names or None for all nodes
572 @return: the list of nodes, sorted
573 @raise errors.ProgrammerError: if the nodes parameter is wrong type
577 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
579 return utils.NiceSort(lu.cfg.GetNodeList())
582 def _GetWantedInstances(lu, instances):
583 """Returns list of checked and expanded instance names.
585 @type lu: L{LogicalUnit}
586 @param lu: the logical unit on whose behalf we execute
587 @type instances: list
588 @param instances: list of instance names or None for all instances
590 @return: the list of instances, sorted
591 @raise errors.OpPrereqError: if the instances parameter is wrong type
592 @raise errors.OpPrereqError: if any of the passed instances is not found
596 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
598 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
602 def _GetUpdatedParams(old_params, update_dict,
603 use_default=True, use_none=False):
604 """Return the new version of a parameter dictionary.
606 @type old_params: dict
607 @param old_params: old parameters
608 @type update_dict: dict
609 @param update_dict: dict containing new parameter values, or
610 constants.VALUE_DEFAULT to reset the parameter to its default
612 @param use_default: boolean
613 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
614 values as 'to be deleted' values
615 @param use_none: boolean
616 @type use_none: whether to recognise C{None} values as 'to be
619 @return: the new parameter dictionary
622 params_copy = copy.deepcopy(old_params)
623 for key, val in update_dict.iteritems():
624 if ((use_default and val == constants.VALUE_DEFAULT) or
625 (use_none and val is None)):
631 params_copy[key] = val
635 def _ReleaseLocks(lu, level, names=None, keep=None):
636 """Releases locks owned by an LU.
638 @type lu: L{LogicalUnit}
639 @param level: Lock level
640 @type names: list or None
641 @param names: Names of locks to release
642 @type keep: list or None
643 @param keep: Names of locks to retain
646 assert not (keep is not None and names is not None), \
647 "Only one of the 'names' and the 'keep' parameters can be given"
649 if names is not None:
650 should_release = names.__contains__
652 should_release = lambda name: name not in keep
654 should_release = None
660 # Determine which locks to release
661 for name in lu.glm.list_owned(level):
662 if should_release(name):
667 assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
669 # Release just some locks
670 lu.glm.release(level, names=release)
672 assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
675 lu.glm.release(level)
677 assert not lu.glm.is_owned(level), "No locks should be owned"
680 def _RunPostHook(lu, node_name):
681 """Runs the post-hook for an opcode on a single node.
684 hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
686 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
688 # pylint: disable-msg=W0702
689 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
692 def _CheckOutputFields(static, dynamic, selected):
693 """Checks whether all selected fields are valid.
695 @type static: L{utils.FieldSet}
696 @param static: static fields set
697 @type dynamic: L{utils.FieldSet}
698 @param dynamic: dynamic fields set
705 delta = f.NonMatching(selected)
707 raise errors.OpPrereqError("Unknown output fields selected: %s"
708 % ",".join(delta), errors.ECODE_INVAL)
711 def _CheckGlobalHvParams(params):
712 """Validates that given hypervisor params are not global ones.
714 This will ensure that instances don't get customised versions of
718 used_globals = constants.HVC_GLOBALS.intersection(params)
720 msg = ("The following hypervisor parameters are global and cannot"
721 " be customized at instance level, please modify them at"
722 " cluster level: %s" % utils.CommaJoin(used_globals))
723 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
726 def _CheckNodeOnline(lu, node, msg=None):
727 """Ensure that a given node is online.
729 @param lu: the LU on behalf of which we make the check
730 @param node: the node to check
731 @param msg: if passed, should be a message to replace the default one
732 @raise errors.OpPrereqError: if the node is offline
736 msg = "Can't use offline node"
737 if lu.cfg.GetNodeInfo(node).offline:
738 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
741 def _CheckNodeNotDrained(lu, node):
742 """Ensure that a given node is not drained.
744 @param lu: the LU on behalf of which we make the check
745 @param node: the node to check
746 @raise errors.OpPrereqError: if the node is drained
749 if lu.cfg.GetNodeInfo(node).drained:
750 raise errors.OpPrereqError("Can't use drained node %s" % node,
754 def _CheckNodeVmCapable(lu, node):
755 """Ensure that a given node is vm capable.
757 @param lu: the LU on behalf of which we make the check
758 @param node: the node to check
759 @raise errors.OpPrereqError: if the node is not vm capable
762 if not lu.cfg.GetNodeInfo(node).vm_capable:
763 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
767 def _CheckNodeHasOS(lu, node, os_name, force_variant):
768 """Ensure that a node supports a given OS.
770 @param lu: the LU on behalf of which we make the check
771 @param node: the node to check
772 @param os_name: the OS to query about
773 @param force_variant: whether to ignore variant errors
774 @raise errors.OpPrereqError: if the node is not supporting the OS
777 result = lu.rpc.call_os_get(node, os_name)
778 result.Raise("OS '%s' not in supported OS list for node %s" %
780 prereq=True, ecode=errors.ECODE_INVAL)
781 if not force_variant:
782 _CheckOSVariant(result.payload, os_name)
785 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
786 """Ensure that a node has the given secondary ip.
788 @type lu: L{LogicalUnit}
789 @param lu: the LU on behalf of which we make the check
791 @param node: the node to check
792 @type secondary_ip: string
793 @param secondary_ip: the ip to check
794 @type prereq: boolean
795 @param prereq: whether to throw a prerequisite or an execute error
796 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
797 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
800 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
801 result.Raise("Failure checking secondary ip on node %s" % node,
802 prereq=prereq, ecode=errors.ECODE_ENVIRON)
803 if not result.payload:
804 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
805 " please fix and re-run this command" % secondary_ip)
807 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
809 raise errors.OpExecError(msg)
812 def _GetClusterDomainSecret():
813 """Reads the cluster domain secret.
816 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
820 def _CheckInstanceDown(lu, instance, reason):
821 """Ensure that an instance is not running."""
822 if instance.admin_up:
823 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
824 (instance.name, reason), errors.ECODE_STATE)
826 pnode = instance.primary_node
827 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
828 ins_l.Raise("Can't contact node %s for instance information" % pnode,
829 prereq=True, ecode=errors.ECODE_ENVIRON)
831 if instance.name in ins_l.payload:
832 raise errors.OpPrereqError("Instance %s is running, %s" %
833 (instance.name, reason), errors.ECODE_STATE)
836 def _ExpandItemName(fn, name, kind):
837 """Expand an item name.
839 @param fn: the function to use for expansion
840 @param name: requested item name
841 @param kind: text description ('Node' or 'Instance')
842 @return: the resolved (full) name
843 @raise errors.OpPrereqError: if the item is not found
847 if full_name is None:
848 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
853 def _ExpandNodeName(cfg, name):
854 """Wrapper over L{_ExpandItemName} for nodes."""
855 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
858 def _ExpandInstanceName(cfg, name):
859 """Wrapper over L{_ExpandItemName} for instance."""
860 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
863 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
864 memory, vcpus, nics, disk_template, disks,
865 bep, hvp, hypervisor_name, tags):
866 """Builds instance related env variables for hooks
868 This builds the hook environment from individual variables.
871 @param name: the name of the instance
872 @type primary_node: string
873 @param primary_node: the name of the instance's primary node
874 @type secondary_nodes: list
875 @param secondary_nodes: list of secondary nodes as strings
876 @type os_type: string
877 @param os_type: the name of the instance's OS
878 @type status: boolean
879 @param status: the should_run status of the instance
881 @param memory: the memory size of the instance
883 @param vcpus: the count of VCPUs the instance has
885 @param nics: list of tuples (ip, mac, mode, link) representing
886 the NICs the instance has
887 @type disk_template: string
888 @param disk_template: the disk template of the instance
890 @param disks: the list of (size, mode) pairs
892 @param bep: the backend parameters for the instance
894 @param hvp: the hypervisor parameters for the instance
895 @type hypervisor_name: string
896 @param hypervisor_name: the hypervisor for the instance
898 @param tags: list of instance tags as strings
900 @return: the hook environment for this instance
909 "INSTANCE_NAME": name,
910 "INSTANCE_PRIMARY": primary_node,
911 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
912 "INSTANCE_OS_TYPE": os_type,
913 "INSTANCE_STATUS": str_status,
914 "INSTANCE_MEMORY": memory,
915 "INSTANCE_VCPUS": vcpus,
916 "INSTANCE_DISK_TEMPLATE": disk_template,
917 "INSTANCE_HYPERVISOR": hypervisor_name,
921 nic_count = len(nics)
922 for idx, (ip, mac, mode, link) in enumerate(nics):
925 env["INSTANCE_NIC%d_IP" % idx] = ip
926 env["INSTANCE_NIC%d_MAC" % idx] = mac
927 env["INSTANCE_NIC%d_MODE" % idx] = mode
928 env["INSTANCE_NIC%d_LINK" % idx] = link
929 if mode == constants.NIC_MODE_BRIDGED:
930 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
934 env["INSTANCE_NIC_COUNT"] = nic_count
937 disk_count = len(disks)
938 for idx, (size, mode) in enumerate(disks):
939 env["INSTANCE_DISK%d_SIZE" % idx] = size
940 env["INSTANCE_DISK%d_MODE" % idx] = mode
944 env["INSTANCE_DISK_COUNT"] = disk_count
949 env["INSTANCE_TAGS"] = " ".join(tags)
951 for source, kind in [(bep, "BE"), (hvp, "HV")]:
952 for key, value in source.items():
953 env["INSTANCE_%s_%s" % (kind, key)] = value
958 def _NICListToTuple(lu, nics):
959 """Build a list of nic information tuples.
961 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
962 value in LUInstanceQueryData.
964 @type lu: L{LogicalUnit}
965 @param lu: the logical unit on whose behalf we execute
966 @type nics: list of L{objects.NIC}
967 @param nics: list of nics to convert to hooks tuples
971 cluster = lu.cfg.GetClusterInfo()
975 filled_params = cluster.SimpleFillNIC(nic.nicparams)
976 mode = filled_params[constants.NIC_MODE]
977 link = filled_params[constants.NIC_LINK]
978 hooks_nics.append((ip, mac, mode, link))
982 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
983 """Builds instance related env variables for hooks from an object.
985 @type lu: L{LogicalUnit}
986 @param lu: the logical unit on whose behalf we execute
987 @type instance: L{objects.Instance}
988 @param instance: the instance for which we should build the
991 @param override: dictionary with key/values that will override
994 @return: the hook environment dictionary
997 cluster = lu.cfg.GetClusterInfo()
998 bep = cluster.FillBE(instance)
999 hvp = cluster.FillHV(instance)
1001 "name": instance.name,
1002 "primary_node": instance.primary_node,
1003 "secondary_nodes": instance.secondary_nodes,
1004 "os_type": instance.os,
1005 "status": instance.admin_up,
1006 "memory": bep[constants.BE_MEMORY],
1007 "vcpus": bep[constants.BE_VCPUS],
1008 "nics": _NICListToTuple(lu, instance.nics),
1009 "disk_template": instance.disk_template,
1010 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1013 "hypervisor_name": instance.hypervisor,
1014 "tags": instance.tags,
1017 args.update(override)
1018 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1021 def _AdjustCandidatePool(lu, exceptions):
1022 """Adjust the candidate pool after node operations.
1025 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1027 lu.LogInfo("Promoted nodes to master candidate role: %s",
1028 utils.CommaJoin(node.name for node in mod_list))
1029 for name in mod_list:
1030 lu.context.ReaddNode(name)
1031 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1033 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1037 def _DecideSelfPromotion(lu, exceptions=None):
1038 """Decide whether I should promote myself as a master candidate.
1041 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1042 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1043 # the new node will increase mc_max with one, so:
1044 mc_should = min(mc_should + 1, cp_size)
1045 return mc_now < mc_should
1048 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1049 """Check that the brigdes needed by a list of nics exist.
1052 cluster = lu.cfg.GetClusterInfo()
1053 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1054 brlist = [params[constants.NIC_LINK] for params in paramslist
1055 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1057 result = lu.rpc.call_bridges_exist(target_node, brlist)
1058 result.Raise("Error checking bridges on destination node '%s'" %
1059 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1062 def _CheckInstanceBridgesExist(lu, instance, node=None):
1063 """Check that the brigdes needed by an instance exist.
1067 node = instance.primary_node
1068 _CheckNicsBridgesExist(lu, instance.nics, node)
1071 def _CheckOSVariant(os_obj, name):
1072 """Check whether an OS name conforms to the os variants specification.
1074 @type os_obj: L{objects.OS}
1075 @param os_obj: OS object to check
1077 @param name: OS name passed by the user, to check for validity
1080 if not os_obj.supported_variants:
1082 variant = objects.OS.GetVariant(name)
1084 raise errors.OpPrereqError("OS name must include a variant",
1087 if variant not in os_obj.supported_variants:
1088 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1091 def _GetNodeInstancesInner(cfg, fn):
1092 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1095 def _GetNodeInstances(cfg, node_name):
1096 """Returns a list of all primary and secondary instances on a node.
1100 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1103 def _GetNodePrimaryInstances(cfg, node_name):
1104 """Returns primary instances on a node.
1107 return _GetNodeInstancesInner(cfg,
1108 lambda inst: node_name == inst.primary_node)
1111 def _GetNodeSecondaryInstances(cfg, node_name):
1112 """Returns secondary instances on a node.
1115 return _GetNodeInstancesInner(cfg,
1116 lambda inst: node_name in inst.secondary_nodes)
1119 def _GetStorageTypeArgs(cfg, storage_type):
1120 """Returns the arguments for a storage type.
1123 # Special case for file storage
1124 if storage_type == constants.ST_FILE:
1125 # storage.FileStorage wants a list of storage directories
1126 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1131 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1134 for dev in instance.disks:
1135 cfg.SetDiskID(dev, node_name)
1137 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1138 result.Raise("Failed to get disk status from node %s" % node_name,
1139 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1141 for idx, bdev_status in enumerate(result.payload):
1142 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1148 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1149 """Check the sanity of iallocator and node arguments and use the
1150 cluster-wide iallocator if appropriate.
1152 Check that at most one of (iallocator, node) is specified. If none is
1153 specified, then the LU's opcode's iallocator slot is filled with the
1154 cluster-wide default iallocator.
1156 @type iallocator_slot: string
1157 @param iallocator_slot: the name of the opcode iallocator slot
1158 @type node_slot: string
1159 @param node_slot: the name of the opcode target node slot
1162 node = getattr(lu.op, node_slot, None)
1163 iallocator = getattr(lu.op, iallocator_slot, None)
1165 if node is not None and iallocator is not None:
1166 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1168 elif node is None and iallocator is None:
1169 default_iallocator = lu.cfg.GetDefaultIAllocator()
1170 if default_iallocator:
1171 setattr(lu.op, iallocator_slot, default_iallocator)
1173 raise errors.OpPrereqError("No iallocator or node given and no"
1174 " cluster-wide default iallocator found;"
1175 " please specify either an iallocator or a"
1176 " node, or set a cluster-wide default"
1180 class LUClusterPostInit(LogicalUnit):
1181 """Logical unit for running hooks after cluster initialization.
1184 HPATH = "cluster-init"
1185 HTYPE = constants.HTYPE_CLUSTER
1187 def BuildHooksEnv(self):
1192 "OP_TARGET": self.cfg.GetClusterName(),
1195 def BuildHooksNodes(self):
1196 """Build hooks nodes.
1199 return ([], [self.cfg.GetMasterNode()])
1201 def Exec(self, feedback_fn):
1208 class LUClusterDestroy(LogicalUnit):
1209 """Logical unit for destroying the cluster.
1212 HPATH = "cluster-destroy"
1213 HTYPE = constants.HTYPE_CLUSTER
1215 def BuildHooksEnv(self):
1220 "OP_TARGET": self.cfg.GetClusterName(),
1223 def BuildHooksNodes(self):
1224 """Build hooks nodes.
1229 def CheckPrereq(self):
1230 """Check prerequisites.
1232 This checks whether the cluster is empty.
1234 Any errors are signaled by raising errors.OpPrereqError.
1237 master = self.cfg.GetMasterNode()
1239 nodelist = self.cfg.GetNodeList()
1240 if len(nodelist) != 1 or nodelist[0] != master:
1241 raise errors.OpPrereqError("There are still %d node(s) in"
1242 " this cluster." % (len(nodelist) - 1),
1244 instancelist = self.cfg.GetInstanceList()
1246 raise errors.OpPrereqError("There are still %d instance(s) in"
1247 " this cluster." % len(instancelist),
1250 def Exec(self, feedback_fn):
1251 """Destroys the cluster.
1254 master = self.cfg.GetMasterNode()
1256 # Run post hooks on master node before it's removed
1257 _RunPostHook(self, master)
1259 result = self.rpc.call_node_stop_master(master, False)
1260 result.Raise("Could not disable the master role")
1265 def _VerifyCertificate(filename):
1266 """Verifies a certificate for L{LUClusterVerifyConfig}.
1268 @type filename: string
1269 @param filename: Path to PEM file
1273 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1274 utils.ReadFile(filename))
1275 except Exception, err: # pylint: disable-msg=W0703
1276 return (LUClusterVerifyConfig.ETYPE_ERROR,
1277 "Failed to load X509 certificate %s: %s" % (filename, err))
1280 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1281 constants.SSL_CERT_EXPIRATION_ERROR)
1284 fnamemsg = "While verifying %s: %s" % (filename, msg)
1289 return (None, fnamemsg)
1290 elif errcode == utils.CERT_WARNING:
1291 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1292 elif errcode == utils.CERT_ERROR:
1293 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1295 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1298 def _GetAllHypervisorParameters(cluster, instances):
1299 """Compute the set of all hypervisor parameters.
1301 @type cluster: L{objects.Cluster}
1302 @param cluster: the cluster object
1303 @param instances: list of L{objects.Instance}
1304 @param instances: additional instances from which to obtain parameters
1305 @rtype: list of (origin, hypervisor, parameters)
1306 @return: a list with all parameters found, indicating the hypervisor they
1307 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1312 for hv_name in cluster.enabled_hypervisors:
1313 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1315 for os_name, os_hvp in cluster.os_hvp.items():
1316 for hv_name, hv_params in os_hvp.items():
1318 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1319 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1321 # TODO: collapse identical parameter values in a single one
1322 for instance in instances:
1323 if instance.hvparams:
1324 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1325 cluster.FillHV(instance)))
1330 class _VerifyErrors(object):
1331 """Mix-in for cluster/group verify LUs.
1333 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1334 self.op and self._feedback_fn to be available.)
1337 TCLUSTER = "cluster"
1339 TINSTANCE = "instance"
1341 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1342 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1343 ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1344 ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1345 ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1346 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1347 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1348 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1349 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1350 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1351 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1352 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1353 ENODEDRBD = (TNODE, "ENODEDRBD")
1354 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1355 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1356 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1357 ENODEHV = (TNODE, "ENODEHV")
1358 ENODELVM = (TNODE, "ENODELVM")
1359 ENODEN1 = (TNODE, "ENODEN1")
1360 ENODENET = (TNODE, "ENODENET")
1361 ENODEOS = (TNODE, "ENODEOS")
1362 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1363 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1364 ENODERPC = (TNODE, "ENODERPC")
1365 ENODESSH = (TNODE, "ENODESSH")
1366 ENODEVERSION = (TNODE, "ENODEVERSION")
1367 ENODESETUP = (TNODE, "ENODESETUP")
1368 ENODETIME = (TNODE, "ENODETIME")
1369 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1371 ETYPE_FIELD = "code"
1372 ETYPE_ERROR = "ERROR"
1373 ETYPE_WARNING = "WARNING"
1375 def _Error(self, ecode, item, msg, *args, **kwargs):
1376 """Format an error message.
1378 Based on the opcode's error_codes parameter, either format a
1379 parseable error code, or a simpler error string.
1381 This must be called only from Exec and functions called from Exec.
1384 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1386 # first complete the msg
1389 # then format the whole message
1390 if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1391 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1397 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1398 # and finally report it via the feedback_fn
1399 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1401 def _ErrorIf(self, cond, *args, **kwargs):
1402 """Log an error message if the passed condition is True.
1406 or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1408 self._Error(*args, **kwargs)
1409 # do not mark the operation as failed for WARN cases only
1410 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1411 self.bad = self.bad or cond
1414 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1415 """Verifies the cluster config.
1420 def _VerifyHVP(self, hvp_data):
1421 """Verifies locally the syntax of the hypervisor parameters.
1424 for item, hv_name, hv_params in hvp_data:
1425 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1428 hv_class = hypervisor.GetHypervisor(hv_name)
1429 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1430 hv_class.CheckParameterSyntax(hv_params)
1431 except errors.GenericError, err:
1432 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1434 def ExpandNames(self):
1435 # Information can be safely retrieved as the BGL is acquired in exclusive
1437 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1438 self.all_node_info = self.cfg.GetAllNodesInfo()
1439 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1440 self.needed_locks = {}
1442 def Exec(self, feedback_fn):
1443 """Verify integrity of cluster, performing various test on nodes.
1447 self._feedback_fn = feedback_fn
1449 feedback_fn("* Verifying cluster config")
1451 for msg in self.cfg.VerifyConfig():
1452 self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1454 feedback_fn("* Verifying cluster certificate files")
1456 for cert_filename in constants.ALL_CERT_FILES:
1457 (errcode, msg) = _VerifyCertificate(cert_filename)
1458 self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1460 feedback_fn("* Verifying hypervisor parameters")
1462 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1463 self.all_inst_info.values()))
1465 feedback_fn("* Verifying all nodes belong to an existing group")
1467 # We do this verification here because, should this bogus circumstance
1468 # occur, it would never be caught by VerifyGroup, which only acts on
1469 # nodes/instances reachable from existing node groups.
1471 dangling_nodes = set(node.name for node in self.all_node_info.values()
1472 if node.group not in self.all_group_info)
1474 dangling_instances = {}
1475 no_node_instances = []
1477 for inst in self.all_inst_info.values():
1478 if inst.primary_node in dangling_nodes:
1479 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1480 elif inst.primary_node not in self.all_node_info:
1481 no_node_instances.append(inst.name)
1486 utils.CommaJoin(dangling_instances.get(node.name,
1488 for node in dangling_nodes]
1490 self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1491 "the following nodes (and their instances) belong to a non"
1492 " existing group: %s", utils.CommaJoin(pretty_dangling))
1494 self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1495 "the following instances have a non-existing primary-node:"
1496 " %s", utils.CommaJoin(no_node_instances))
1498 return (not self.bad, [g.name for g in self.all_group_info.values()])
1501 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1502 """Verifies the status of a node group.
1505 HPATH = "cluster-verify"
1506 HTYPE = constants.HTYPE_CLUSTER
1509 _HOOKS_INDENT_RE = re.compile("^", re.M)
1511 class NodeImage(object):
1512 """A class representing the logical and physical status of a node.
1515 @ivar name: the node name to which this object refers
1516 @ivar volumes: a structure as returned from
1517 L{ganeti.backend.GetVolumeList} (runtime)
1518 @ivar instances: a list of running instances (runtime)
1519 @ivar pinst: list of configured primary instances (config)
1520 @ivar sinst: list of configured secondary instances (config)
1521 @ivar sbp: dictionary of {primary-node: list of instances} for all
1522 instances for which this node is secondary (config)
1523 @ivar mfree: free memory, as reported by hypervisor (runtime)
1524 @ivar dfree: free disk, as reported by the node (runtime)
1525 @ivar offline: the offline status (config)
1526 @type rpc_fail: boolean
1527 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1528 not whether the individual keys were correct) (runtime)
1529 @type lvm_fail: boolean
1530 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1531 @type hyp_fail: boolean
1532 @ivar hyp_fail: whether the RPC call didn't return the instance list
1533 @type ghost: boolean
1534 @ivar ghost: whether this is a known node or not (config)
1535 @type os_fail: boolean
1536 @ivar os_fail: whether the RPC call didn't return valid OS data
1538 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1539 @type vm_capable: boolean
1540 @ivar vm_capable: whether the node can host instances
1543 def __init__(self, offline=False, name=None, vm_capable=True):
1552 self.offline = offline
1553 self.vm_capable = vm_capable
1554 self.rpc_fail = False
1555 self.lvm_fail = False
1556 self.hyp_fail = False
1558 self.os_fail = False
1561 def ExpandNames(self):
1562 # This raises errors.OpPrereqError on its own:
1563 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1565 # Get instances in node group; this is unsafe and needs verification later
1566 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1568 self.needed_locks = {
1569 locking.LEVEL_INSTANCE: inst_names,
1570 locking.LEVEL_NODEGROUP: [self.group_uuid],
1571 locking.LEVEL_NODE: [],
1574 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1576 def DeclareLocks(self, level):
1577 if level == locking.LEVEL_NODE:
1578 # Get members of node group; this is unsafe and needs verification later
1579 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1581 all_inst_info = self.cfg.GetAllInstancesInfo()
1583 # In Exec(), we warn about mirrored instances that have primary and
1584 # secondary living in separate node groups. To fully verify that
1585 # volumes for these instances are healthy, we will need to do an
1586 # extra call to their secondaries. We ensure here those nodes will
1588 for inst in self.glm.list_owned(locking.LEVEL_INSTANCE):
1589 # Important: access only the instances whose lock is owned
1590 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1591 nodes.update(all_inst_info[inst].secondary_nodes)
1593 self.needed_locks[locking.LEVEL_NODE] = nodes
1595 def CheckPrereq(self):
1596 group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1597 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1600 group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1602 unlocked_instances = \
1603 group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
1606 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1607 utils.CommaJoin(unlocked_nodes))
1609 if unlocked_instances:
1610 raise errors.OpPrereqError("Missing lock for instances: %s" %
1611 utils.CommaJoin(unlocked_instances))
1613 self.all_node_info = self.cfg.GetAllNodesInfo()
1614 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1616 self.my_node_names = utils.NiceSort(group_nodes)
1617 self.my_inst_names = utils.NiceSort(group_instances)
1619 self.my_node_info = dict((name, self.all_node_info[name])
1620 for name in self.my_node_names)
1622 self.my_inst_info = dict((name, self.all_inst_info[name])
1623 for name in self.my_inst_names)
1625 # We detect here the nodes that will need the extra RPC calls for verifying
1626 # split LV volumes; they should be locked.
1627 extra_lv_nodes = set()
1629 for inst in self.my_inst_info.values():
1630 if inst.disk_template in constants.DTS_INT_MIRROR:
1631 group = self.my_node_info[inst.primary_node].group
1632 for nname in inst.secondary_nodes:
1633 if self.all_node_info[nname].group != group:
1634 extra_lv_nodes.add(nname)
1636 unlocked_lv_nodes = \
1637 extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1639 if unlocked_lv_nodes:
1640 raise errors.OpPrereqError("these nodes could be locked: %s" %
1641 utils.CommaJoin(unlocked_lv_nodes))
1642 self.extra_lv_nodes = list(extra_lv_nodes)
1644 def _VerifyNode(self, ninfo, nresult):
1645 """Perform some basic validation on data returned from a node.
1647 - check the result data structure is well formed and has all the
1649 - check ganeti version
1651 @type ninfo: L{objects.Node}
1652 @param ninfo: the node to check
1653 @param nresult: the results from the node
1655 @return: whether overall this call was successful (and we can expect
1656 reasonable values in the respose)
1660 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1662 # main result, nresult should be a non-empty dict
1663 test = not nresult or not isinstance(nresult, dict)
1664 _ErrorIf(test, self.ENODERPC, node,
1665 "unable to verify node: no data returned")
1669 # compares ganeti version
1670 local_version = constants.PROTOCOL_VERSION
1671 remote_version = nresult.get("version", None)
1672 test = not (remote_version and
1673 isinstance(remote_version, (list, tuple)) and
1674 len(remote_version) == 2)
1675 _ErrorIf(test, self.ENODERPC, node,
1676 "connection to node returned invalid data")
1680 test = local_version != remote_version[0]
1681 _ErrorIf(test, self.ENODEVERSION, node,
1682 "incompatible protocol versions: master %s,"
1683 " node %s", local_version, remote_version[0])
1687 # node seems compatible, we can actually try to look into its results
1689 # full package version
1690 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1691 self.ENODEVERSION, node,
1692 "software version mismatch: master %s, node %s",
1693 constants.RELEASE_VERSION, remote_version[1],
1694 code=self.ETYPE_WARNING)
1696 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1697 if ninfo.vm_capable and isinstance(hyp_result, dict):
1698 for hv_name, hv_result in hyp_result.iteritems():
1699 test = hv_result is not None
1700 _ErrorIf(test, self.ENODEHV, node,
1701 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1703 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1704 if ninfo.vm_capable and isinstance(hvp_result, list):
1705 for item, hv_name, hv_result in hvp_result:
1706 _ErrorIf(True, self.ENODEHV, node,
1707 "hypervisor %s parameter verify failure (source %s): %s",
1708 hv_name, item, hv_result)
1710 test = nresult.get(constants.NV_NODESETUP,
1711 ["Missing NODESETUP results"])
1712 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1717 def _VerifyNodeTime(self, ninfo, nresult,
1718 nvinfo_starttime, nvinfo_endtime):
1719 """Check the node time.
1721 @type ninfo: L{objects.Node}
1722 @param ninfo: the node to check
1723 @param nresult: the remote results for the node
1724 @param nvinfo_starttime: the start time of the RPC call
1725 @param nvinfo_endtime: the end time of the RPC call
1729 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1731 ntime = nresult.get(constants.NV_TIME, None)
1733 ntime_merged = utils.MergeTime(ntime)
1734 except (ValueError, TypeError):
1735 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1738 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1739 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1740 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1741 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1745 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1746 "Node time diverges by at least %s from master node time",
1749 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1750 """Check the node LVM results.
1752 @type ninfo: L{objects.Node}
1753 @param ninfo: the node to check
1754 @param nresult: the remote results for the node
1755 @param vg_name: the configured VG name
1762 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1764 # checks vg existence and size > 20G
1765 vglist = nresult.get(constants.NV_VGLIST, None)
1767 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1769 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1770 constants.MIN_VG_SIZE)
1771 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1774 pvlist = nresult.get(constants.NV_PVLIST, None)
1775 test = pvlist is None
1776 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1778 # check that ':' is not present in PV names, since it's a
1779 # special character for lvcreate (denotes the range of PEs to
1781 for _, pvname, owner_vg in pvlist:
1782 test = ":" in pvname
1783 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1784 " '%s' of VG '%s'", pvname, owner_vg)
1786 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1787 """Check the node bridges.
1789 @type ninfo: L{objects.Node}
1790 @param ninfo: the node to check
1791 @param nresult: the remote results for the node
1792 @param bridges: the expected list of bridges
1799 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1801 missing = nresult.get(constants.NV_BRIDGES, None)
1802 test = not isinstance(missing, list)
1803 _ErrorIf(test, self.ENODENET, node,
1804 "did not return valid bridge information")
1806 _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1807 utils.CommaJoin(sorted(missing)))
1809 def _VerifyNodeNetwork(self, ninfo, nresult):
1810 """Check the node network connectivity results.
1812 @type ninfo: L{objects.Node}
1813 @param ninfo: the node to check
1814 @param nresult: the remote results for the node
1818 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1820 test = constants.NV_NODELIST not in nresult
1821 _ErrorIf(test, self.ENODESSH, node,
1822 "node hasn't returned node ssh connectivity data")
1824 if nresult[constants.NV_NODELIST]:
1825 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1826 _ErrorIf(True, self.ENODESSH, node,
1827 "ssh communication with node '%s': %s", a_node, a_msg)
1829 test = constants.NV_NODENETTEST not in nresult
1830 _ErrorIf(test, self.ENODENET, node,
1831 "node hasn't returned node tcp connectivity data")
1833 if nresult[constants.NV_NODENETTEST]:
1834 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1836 _ErrorIf(True, self.ENODENET, node,
1837 "tcp communication with node '%s': %s",
1838 anode, nresult[constants.NV_NODENETTEST][anode])
1840 test = constants.NV_MASTERIP not in nresult
1841 _ErrorIf(test, self.ENODENET, node,
1842 "node hasn't returned node master IP reachability data")
1844 if not nresult[constants.NV_MASTERIP]:
1845 if node == self.master_node:
1846 msg = "the master node cannot reach the master IP (not configured?)"
1848 msg = "cannot reach the master IP"
1849 _ErrorIf(True, self.ENODENET, node, msg)
1851 def _VerifyInstance(self, instance, instanceconfig, node_image,
1853 """Verify an instance.
1855 This function checks to see if the required block devices are
1856 available on the instance's node.
1859 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1860 node_current = instanceconfig.primary_node
1862 node_vol_should = {}
1863 instanceconfig.MapLVsByNode(node_vol_should)
1865 for node in node_vol_should:
1866 n_img = node_image[node]
1867 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1868 # ignore missing volumes on offline or broken nodes
1870 for volume in node_vol_should[node]:
1871 test = volume not in n_img.volumes
1872 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1873 "volume %s missing on node %s", volume, node)
1875 if instanceconfig.admin_up:
1876 pri_img = node_image[node_current]
1877 test = instance not in pri_img.instances and not pri_img.offline
1878 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1879 "instance not running on its primary node %s",
1882 diskdata = [(nname, success, status, idx)
1883 for (nname, disks) in diskstatus.items()
1884 for idx, (success, status) in enumerate(disks)]
1886 for nname, success, bdev_status, idx in diskdata:
1887 # the 'ghost node' construction in Exec() ensures that we have a
1889 snode = node_image[nname]
1890 bad_snode = snode.ghost or snode.offline
1891 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1892 self.EINSTANCEFAULTYDISK, instance,
1893 "couldn't retrieve status for disk/%s on %s: %s",
1894 idx, nname, bdev_status)
1895 _ErrorIf((instanceconfig.admin_up and success and
1896 bdev_status.ldisk_status == constants.LDS_FAULTY),
1897 self.EINSTANCEFAULTYDISK, instance,
1898 "disk/%s on %s is faulty", idx, nname)
1900 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1901 """Verify if there are any unknown volumes in the cluster.
1903 The .os, .swap and backup volumes are ignored. All other volumes are
1904 reported as unknown.
1906 @type reserved: L{ganeti.utils.FieldSet}
1907 @param reserved: a FieldSet of reserved volume names
1910 for node, n_img in node_image.items():
1911 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1912 # skip non-healthy nodes
1914 for volume in n_img.volumes:
1915 test = ((node not in node_vol_should or
1916 volume not in node_vol_should[node]) and
1917 not reserved.Matches(volume))
1918 self._ErrorIf(test, self.ENODEORPHANLV, node,
1919 "volume %s is unknown", volume)
1921 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1922 """Verify N+1 Memory Resilience.
1924 Check that if one single node dies we can still start all the
1925 instances it was primary for.
1928 cluster_info = self.cfg.GetClusterInfo()
1929 for node, n_img in node_image.items():
1930 # This code checks that every node which is now listed as
1931 # secondary has enough memory to host all instances it is
1932 # supposed to should a single other node in the cluster fail.
1933 # FIXME: not ready for failover to an arbitrary node
1934 # FIXME: does not support file-backed instances
1935 # WARNING: we currently take into account down instances as well
1936 # as up ones, considering that even if they're down someone
1937 # might want to start them even in the event of a node failure.
1939 # we're skipping offline nodes from the N+1 warning, since
1940 # most likely we don't have good memory infromation from them;
1941 # we already list instances living on such nodes, and that's
1944 for prinode, instances in n_img.sbp.items():
1946 for instance in instances:
1947 bep = cluster_info.FillBE(instance_cfg[instance])
1948 if bep[constants.BE_AUTO_BALANCE]:
1949 needed_mem += bep[constants.BE_MEMORY]
1950 test = n_img.mfree < needed_mem
1951 self._ErrorIf(test, self.ENODEN1, node,
1952 "not enough memory to accomodate instance failovers"
1953 " should node %s fail (%dMiB needed, %dMiB available)",
1954 prinode, needed_mem, n_img.mfree)
1957 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1958 (files_all, files_all_opt, files_mc, files_vm)):
1959 """Verifies file checksums collected from all nodes.
1961 @param errorif: Callback for reporting errors
1962 @param nodeinfo: List of L{objects.Node} objects
1963 @param master_node: Name of master node
1964 @param all_nvinfo: RPC results
1967 node_names = frozenset(node.name for node in nodeinfo)
1969 assert master_node in node_names
1970 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1971 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1972 "Found file listed in more than one file list"
1974 # Define functions determining which nodes to consider for a file
1975 file2nodefn = dict([(filename, fn)
1976 for (files, fn) in [(files_all, None),
1977 (files_all_opt, None),
1978 (files_mc, lambda node: (node.master_candidate or
1979 node.name == master_node)),
1980 (files_vm, lambda node: node.vm_capable)]
1981 for filename in files])
1983 fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1985 for node in nodeinfo:
1986 nresult = all_nvinfo[node.name]
1988 if nresult.fail_msg or not nresult.payload:
1991 node_files = nresult.payload.get(constants.NV_FILELIST, None)
1993 test = not (node_files and isinstance(node_files, dict))
1994 errorif(test, cls.ENODEFILECHECK, node.name,
1995 "Node did not return file checksum data")
1999 for (filename, checksum) in node_files.items():
2000 # Check if the file should be considered for a node
2001 fn = file2nodefn[filename]
2002 if fn is None or fn(node):
2003 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2005 for (filename, checksums) in fileinfo.items():
2006 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2008 # Nodes having the file
2009 with_file = frozenset(node_name
2010 for nodes in fileinfo[filename].values()
2011 for node_name in nodes)
2013 # Nodes missing file
2014 missing_file = node_names - with_file
2016 if filename in files_all_opt:
2018 errorif(missing_file and missing_file != node_names,
2019 cls.ECLUSTERFILECHECK, None,
2020 "File %s is optional, but it must exist on all or no"
2021 " nodes (not found on %s)",
2022 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2024 errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2025 "File %s is missing from node(s) %s", filename,
2026 utils.CommaJoin(utils.NiceSort(missing_file)))
2028 # See if there are multiple versions of the file
2029 test = len(checksums) > 1
2031 variants = ["variant %s on %s" %
2032 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2033 for (idx, (checksum, nodes)) in
2034 enumerate(sorted(checksums.items()))]
2038 errorif(test, cls.ECLUSTERFILECHECK, None,
2039 "File %s found with %s different checksums (%s)",
2040 filename, len(checksums), "; ".join(variants))
2042 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2044 """Verifies and the node DRBD status.
2046 @type ninfo: L{objects.Node}
2047 @param ninfo: the node to check
2048 @param nresult: the remote results for the node
2049 @param instanceinfo: the dict of instances
2050 @param drbd_helper: the configured DRBD usermode helper
2051 @param drbd_map: the DRBD map as returned by
2052 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2056 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2059 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2060 test = (helper_result == None)
2061 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2062 "no drbd usermode helper returned")
2064 status, payload = helper_result
2066 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2067 "drbd usermode helper check unsuccessful: %s", payload)
2068 test = status and (payload != drbd_helper)
2069 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2070 "wrong drbd usermode helper: %s", payload)
2072 # compute the DRBD minors
2074 for minor, instance in drbd_map[node].items():
2075 test = instance not in instanceinfo
2076 _ErrorIf(test, self.ECLUSTERCFG, None,
2077 "ghost instance '%s' in temporary DRBD map", instance)
2078 # ghost instance should not be running, but otherwise we
2079 # don't give double warnings (both ghost instance and
2080 # unallocated minor in use)
2082 node_drbd[minor] = (instance, False)
2084 instance = instanceinfo[instance]
2085 node_drbd[minor] = (instance.name, instance.admin_up)
2087 # and now check them
2088 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2089 test = not isinstance(used_minors, (tuple, list))
2090 _ErrorIf(test, self.ENODEDRBD, node,
2091 "cannot parse drbd status file: %s", str(used_minors))
2093 # we cannot check drbd status
2096 for minor, (iname, must_exist) in node_drbd.items():
2097 test = minor not in used_minors and must_exist
2098 _ErrorIf(test, self.ENODEDRBD, node,
2099 "drbd minor %d of instance %s is not active", minor, iname)
2100 for minor in used_minors:
2101 test = minor not in node_drbd
2102 _ErrorIf(test, self.ENODEDRBD, node,
2103 "unallocated drbd minor %d is in use", minor)
2105 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2106 """Builds the node OS structures.
2108 @type ninfo: L{objects.Node}
2109 @param ninfo: the node to check
2110 @param nresult: the remote results for the node
2111 @param nimg: the node image object
2115 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2117 remote_os = nresult.get(constants.NV_OSLIST, None)
2118 test = (not isinstance(remote_os, list) or
2119 not compat.all(isinstance(v, list) and len(v) == 7
2120 for v in remote_os))
2122 _ErrorIf(test, self.ENODEOS, node,
2123 "node hasn't returned valid OS data")
2132 for (name, os_path, status, diagnose,
2133 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2135 if name not in os_dict:
2138 # parameters is a list of lists instead of list of tuples due to
2139 # JSON lacking a real tuple type, fix it:
2140 parameters = [tuple(v) for v in parameters]
2141 os_dict[name].append((os_path, status, diagnose,
2142 set(variants), set(parameters), set(api_ver)))
2144 nimg.oslist = os_dict
2146 def _VerifyNodeOS(self, ninfo, nimg, base):
2147 """Verifies the node OS list.
2149 @type ninfo: L{objects.Node}
2150 @param ninfo: the node to check
2151 @param nimg: the node image object
2152 @param base: the 'template' node we match against (e.g. from the master)
2156 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2158 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2160 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2161 for os_name, os_data in nimg.oslist.items():
2162 assert os_data, "Empty OS status for OS %s?!" % os_name
2163 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2164 _ErrorIf(not f_status, self.ENODEOS, node,
2165 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2166 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2167 "OS '%s' has multiple entries (first one shadows the rest): %s",
2168 os_name, utils.CommaJoin([v[0] for v in os_data]))
2169 # this will catched in backend too
2170 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
2171 and not f_var, self.ENODEOS, node,
2172 "OS %s with API at least %d does not declare any variant",
2173 os_name, constants.OS_API_V15)
2174 # comparisons with the 'base' image
2175 test = os_name not in base.oslist
2176 _ErrorIf(test, self.ENODEOS, node,
2177 "Extra OS %s not present on reference node (%s)",
2181 assert base.oslist[os_name], "Base node has empty OS status?"
2182 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2184 # base OS is invalid, skipping
2186 for kind, a, b in [("API version", f_api, b_api),
2187 ("variants list", f_var, b_var),
2188 ("parameters", beautify_params(f_param),
2189 beautify_params(b_param))]:
2190 _ErrorIf(a != b, self.ENODEOS, node,
2191 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2192 kind, os_name, base.name,
2193 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2195 # check any missing OSes
2196 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2197 _ErrorIf(missing, self.ENODEOS, node,
2198 "OSes present on reference node %s but missing on this node: %s",
2199 base.name, utils.CommaJoin(missing))
2201 def _VerifyOob(self, ninfo, nresult):
2202 """Verifies out of band functionality of a node.
2204 @type ninfo: L{objects.Node}
2205 @param ninfo: the node to check
2206 @param nresult: the remote results for the node
2210 # We just have to verify the paths on master and/or master candidates
2211 # as the oob helper is invoked on the master
2212 if ((ninfo.master_candidate or ninfo.master_capable) and
2213 constants.NV_OOB_PATHS in nresult):
2214 for path_result in nresult[constants.NV_OOB_PATHS]:
2215 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2217 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2218 """Verifies and updates the node volume data.
2220 This function will update a L{NodeImage}'s internal structures
2221 with data from the remote call.
2223 @type ninfo: L{objects.Node}
2224 @param ninfo: the node to check
2225 @param nresult: the remote results for the node
2226 @param nimg: the node image object
2227 @param vg_name: the configured VG name
2231 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2233 nimg.lvm_fail = True
2234 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2237 elif isinstance(lvdata, basestring):
2238 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2239 utils.SafeEncode(lvdata))
2240 elif not isinstance(lvdata, dict):
2241 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2243 nimg.volumes = lvdata
2244 nimg.lvm_fail = False
2246 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2247 """Verifies and updates the node instance list.
2249 If the listing was successful, then updates this node's instance
2250 list. Otherwise, it marks the RPC call as failed for the instance
2253 @type ninfo: L{objects.Node}
2254 @param ninfo: the node to check
2255 @param nresult: the remote results for the node
2256 @param nimg: the node image object
2259 idata = nresult.get(constants.NV_INSTANCELIST, None)
2260 test = not isinstance(idata, list)
2261 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2262 " (instancelist): %s", utils.SafeEncode(str(idata)))
2264 nimg.hyp_fail = True
2266 nimg.instances = idata
2268 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2269 """Verifies and computes a node information map
2271 @type ninfo: L{objects.Node}
2272 @param ninfo: the node to check
2273 @param nresult: the remote results for the node
2274 @param nimg: the node image object
2275 @param vg_name: the configured VG name
2279 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2281 # try to read free memory (from the hypervisor)
2282 hv_info = nresult.get(constants.NV_HVINFO, None)
2283 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2284 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2287 nimg.mfree = int(hv_info["memory_free"])
2288 except (ValueError, TypeError):
2289 _ErrorIf(True, self.ENODERPC, node,
2290 "node returned invalid nodeinfo, check hypervisor")
2292 # FIXME: devise a free space model for file based instances as well
2293 if vg_name is not None:
2294 test = (constants.NV_VGLIST not in nresult or
2295 vg_name not in nresult[constants.NV_VGLIST])
2296 _ErrorIf(test, self.ENODELVM, node,
2297 "node didn't return data for the volume group '%s'"
2298 " - it is either missing or broken", vg_name)
2301 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2302 except (ValueError, TypeError):
2303 _ErrorIf(True, self.ENODERPC, node,
2304 "node returned invalid LVM info, check LVM status")
2306 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2307 """Gets per-disk status information for all instances.
2309 @type nodelist: list of strings
2310 @param nodelist: Node names
2311 @type node_image: dict of (name, L{objects.Node})
2312 @param node_image: Node objects
2313 @type instanceinfo: dict of (name, L{objects.Instance})
2314 @param instanceinfo: Instance objects
2315 @rtype: {instance: {node: [(succes, payload)]}}
2316 @return: a dictionary of per-instance dictionaries with nodes as
2317 keys and disk information as values; the disk information is a
2318 list of tuples (success, payload)
2321 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2324 node_disks_devonly = {}
2325 diskless_instances = set()
2326 diskless = constants.DT_DISKLESS
2328 for nname in nodelist:
2329 node_instances = list(itertools.chain(node_image[nname].pinst,
2330 node_image[nname].sinst))
2331 diskless_instances.update(inst for inst in node_instances
2332 if instanceinfo[inst].disk_template == diskless)
2333 disks = [(inst, disk)
2334 for inst in node_instances
2335 for disk in instanceinfo[inst].disks]
2338 # No need to collect data
2341 node_disks[nname] = disks
2343 # Creating copies as SetDiskID below will modify the objects and that can
2344 # lead to incorrect data returned from nodes
2345 devonly = [dev.Copy() for (_, dev) in disks]
2348 self.cfg.SetDiskID(dev, nname)
2350 node_disks_devonly[nname] = devonly
2352 assert len(node_disks) == len(node_disks_devonly)
2354 # Collect data from all nodes with disks
2355 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2358 assert len(result) == len(node_disks)
2362 for (nname, nres) in result.items():
2363 disks = node_disks[nname]
2366 # No data from this node
2367 data = len(disks) * [(False, "node offline")]
2370 _ErrorIf(msg, self.ENODERPC, nname,
2371 "while getting disk information: %s", msg)
2373 # No data from this node
2374 data = len(disks) * [(False, msg)]
2377 for idx, i in enumerate(nres.payload):
2378 if isinstance(i, (tuple, list)) and len(i) == 2:
2381 logging.warning("Invalid result from node %s, entry %d: %s",
2383 data.append((False, "Invalid result from the remote node"))
2385 for ((inst, _), status) in zip(disks, data):
2386 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2388 # Add empty entries for diskless instances.
2389 for inst in diskless_instances:
2390 assert inst not in instdisk
2393 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2394 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2395 compat.all(isinstance(s, (tuple, list)) and
2396 len(s) == 2 for s in statuses)
2397 for inst, nnames in instdisk.items()
2398 for nname, statuses in nnames.items())
2399 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2403 def BuildHooksEnv(self):
2406 Cluster-Verify hooks just ran in the post phase and their failure makes
2407 the output be logged in the verify output and the verification to fail.
2411 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2414 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2415 for node in self.my_node_info.values())
2419 def BuildHooksNodes(self):
2420 """Build hooks nodes.
2423 return ([], self.my_node_names)
2425 def Exec(self, feedback_fn):
2426 """Verify integrity of the node group, performing various test on nodes.
2429 # This method has too many local variables. pylint: disable-msg=R0914
2431 if not self.my_node_names:
2433 feedback_fn("* Empty node group, skipping verification")
2437 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2438 verbose = self.op.verbose
2439 self._feedback_fn = feedback_fn
2441 vg_name = self.cfg.GetVGName()
2442 drbd_helper = self.cfg.GetDRBDHelper()
2443 cluster = self.cfg.GetClusterInfo()
2444 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2445 hypervisors = cluster.enabled_hypervisors
2446 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2448 i_non_redundant = [] # Non redundant instances
2449 i_non_a_balanced = [] # Non auto-balanced instances
2450 n_offline = 0 # Count of offline nodes
2451 n_drained = 0 # Count of nodes being drained
2452 node_vol_should = {}
2454 # FIXME: verify OS list
2457 filemap = _ComputeAncillaryFiles(cluster, False)
2459 # do local checksums
2460 master_node = self.master_node = self.cfg.GetMasterNode()
2461 master_ip = self.cfg.GetMasterIP()
2463 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2465 # We will make nodes contact all nodes in their group, and one node from
2466 # every other group.
2467 # TODO: should it be a *random* node, different every time?
2468 online_nodes = [node.name for node in node_data_list if not node.offline]
2469 other_group_nodes = {}
2471 for name in sorted(self.all_node_info):
2472 node = self.all_node_info[name]
2473 if (node.group not in other_group_nodes
2474 and node.group != self.group_uuid
2475 and not node.offline):
2476 other_group_nodes[node.group] = node.name
2478 node_verify_param = {
2479 constants.NV_FILELIST:
2480 utils.UniqueSequence(filename
2481 for files in filemap
2482 for filename in files),
2483 constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2484 constants.NV_HYPERVISOR: hypervisors,
2485 constants.NV_HVPARAMS:
2486 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2487 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2488 for node in node_data_list
2489 if not node.offline],
2490 constants.NV_INSTANCELIST: hypervisors,
2491 constants.NV_VERSION: None,
2492 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2493 constants.NV_NODESETUP: None,
2494 constants.NV_TIME: None,
2495 constants.NV_MASTERIP: (master_node, master_ip),
2496 constants.NV_OSLIST: None,
2497 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2500 if vg_name is not None:
2501 node_verify_param[constants.NV_VGLIST] = None
2502 node_verify_param[constants.NV_LVLIST] = vg_name
2503 node_verify_param[constants.NV_PVLIST] = [vg_name]
2504 node_verify_param[constants.NV_DRBDLIST] = None
2507 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2510 # FIXME: this needs to be changed per node-group, not cluster-wide
2512 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2513 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2514 bridges.add(default_nicpp[constants.NIC_LINK])
2515 for instance in self.my_inst_info.values():
2516 for nic in instance.nics:
2517 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2518 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2519 bridges.add(full_nic[constants.NIC_LINK])
2522 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2524 # Build our expected cluster state
2525 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2527 vm_capable=node.vm_capable))
2528 for node in node_data_list)
2532 for node in self.all_node_info.values():
2533 path = _SupportsOob(self.cfg, node)
2534 if path and path not in oob_paths:
2535 oob_paths.append(path)
2538 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2540 for instance in self.my_inst_names:
2541 inst_config = self.my_inst_info[instance]
2543 for nname in inst_config.all_nodes:
2544 if nname not in node_image:
2545 gnode = self.NodeImage(name=nname)
2546 gnode.ghost = (nname not in self.all_node_info)
2547 node_image[nname] = gnode
2549 inst_config.MapLVsByNode(node_vol_should)
2551 pnode = inst_config.primary_node
2552 node_image[pnode].pinst.append(instance)
2554 for snode in inst_config.secondary_nodes:
2555 nimg = node_image[snode]
2556 nimg.sinst.append(instance)
2557 if pnode not in nimg.sbp:
2558 nimg.sbp[pnode] = []
2559 nimg.sbp[pnode].append(instance)
2561 # At this point, we have the in-memory data structures complete,
2562 # except for the runtime information, which we'll gather next
2564 # Due to the way our RPC system works, exact response times cannot be
2565 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2566 # time before and after executing the request, we can at least have a time
2568 nvinfo_starttime = time.time()
2569 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2571 self.cfg.GetClusterName())
2572 nvinfo_endtime = time.time()
2574 if self.extra_lv_nodes and vg_name is not None:
2576 self.rpc.call_node_verify(self.extra_lv_nodes,
2577 {constants.NV_LVLIST: vg_name},
2578 self.cfg.GetClusterName())
2580 extra_lv_nvinfo = {}
2582 all_drbd_map = self.cfg.ComputeDRBDMap()
2584 feedback_fn("* Gathering disk information (%s nodes)" %
2585 len(self.my_node_names))
2586 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2589 feedback_fn("* Verifying configuration file consistency")
2591 # If not all nodes are being checked, we need to make sure the master node
2592 # and a non-checked vm_capable node are in the list.
2593 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2595 vf_nvinfo = all_nvinfo.copy()
2596 vf_node_info = list(self.my_node_info.values())
2597 additional_nodes = []
2598 if master_node not in self.my_node_info:
2599 additional_nodes.append(master_node)
2600 vf_node_info.append(self.all_node_info[master_node])
2601 # Add the first vm_capable node we find which is not included
2602 for node in absent_nodes:
2603 nodeinfo = self.all_node_info[node]
2604 if nodeinfo.vm_capable and not nodeinfo.offline:
2605 additional_nodes.append(node)
2606 vf_node_info.append(self.all_node_info[node])
2608 key = constants.NV_FILELIST
2609 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2610 {key: node_verify_param[key]},
2611 self.cfg.GetClusterName()))
2613 vf_nvinfo = all_nvinfo
2614 vf_node_info = self.my_node_info.values()
2616 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2618 feedback_fn("* Verifying node status")
2622 for node_i in node_data_list:
2624 nimg = node_image[node]
2628 feedback_fn("* Skipping offline node %s" % (node,))
2632 if node == master_node:
2634 elif node_i.master_candidate:
2635 ntype = "master candidate"
2636 elif node_i.drained:
2642 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2644 msg = all_nvinfo[node].fail_msg
2645 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2647 nimg.rpc_fail = True
2650 nresult = all_nvinfo[node].payload
2652 nimg.call_ok = self._VerifyNode(node_i, nresult)
2653 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2654 self._VerifyNodeNetwork(node_i, nresult)
2655 self._VerifyOob(node_i, nresult)
2658 self._VerifyNodeLVM(node_i, nresult, vg_name)
2659 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2662 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2663 self._UpdateNodeInstances(node_i, nresult, nimg)
2664 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2665 self._UpdateNodeOS(node_i, nresult, nimg)
2667 if not nimg.os_fail:
2668 if refos_img is None:
2670 self._VerifyNodeOS(node_i, nimg, refos_img)
2671 self._VerifyNodeBridges(node_i, nresult, bridges)
2673 # Check whether all running instancies are primary for the node. (This
2674 # can no longer be done from _VerifyInstance below, since some of the
2675 # wrong instances could be from other node groups.)
2676 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2678 for inst in non_primary_inst:
2679 test = inst in self.all_inst_info
2680 _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2681 "instance should not run on node %s", node_i.name)
2682 _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2683 "node is running unknown instance %s", inst)
2685 for node, result in extra_lv_nvinfo.items():
2686 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2687 node_image[node], vg_name)
2689 feedback_fn("* Verifying instance status")
2690 for instance in self.my_inst_names:
2692 feedback_fn("* Verifying instance %s" % instance)
2693 inst_config = self.my_inst_info[instance]
2694 self._VerifyInstance(instance, inst_config, node_image,
2696 inst_nodes_offline = []
2698 pnode = inst_config.primary_node
2699 pnode_img = node_image[pnode]
2700 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2701 self.ENODERPC, pnode, "instance %s, connection to"
2702 " primary node failed", instance)
2704 _ErrorIf(inst_config.admin_up and pnode_img.offline,
2705 self.EINSTANCEBADNODE, instance,
2706 "instance is marked as running and lives on offline node %s",
2707 inst_config.primary_node)
2709 # If the instance is non-redundant we cannot survive losing its primary
2710 # node, so we are not N+1 compliant. On the other hand we have no disk
2711 # templates with more than one secondary so that situation is not well
2713 # FIXME: does not support file-backed instances
2714 if not inst_config.secondary_nodes:
2715 i_non_redundant.append(instance)
2717 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2718 instance, "instance has multiple secondary nodes: %s",
2719 utils.CommaJoin(inst_config.secondary_nodes),
2720 code=self.ETYPE_WARNING)
2722 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2723 pnode = inst_config.primary_node
2724 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2725 instance_groups = {}
2727 for node in instance_nodes:
2728 instance_groups.setdefault(self.all_node_info[node].group,
2732 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2733 # Sort so that we always list the primary node first.
2734 for group, nodes in sorted(instance_groups.items(),
2735 key=lambda (_, nodes): pnode in nodes,
2738 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2739 instance, "instance has primary and secondary nodes in"
2740 " different groups: %s", utils.CommaJoin(pretty_list),
2741 code=self.ETYPE_WARNING)
2743 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2744 i_non_a_balanced.append(instance)
2746 for snode in inst_config.secondary_nodes:
2747 s_img = node_image[snode]
2748 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2749 "instance %s, connection to secondary node failed", instance)
2752 inst_nodes_offline.append(snode)
2754 # warn that the instance lives on offline nodes
2755 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2756 "instance has offline secondary node(s) %s",
2757 utils.CommaJoin(inst_nodes_offline))
2758 # ... or ghost/non-vm_capable nodes
2759 for node in inst_config.all_nodes:
2760 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2761 "instance lives on ghost node %s", node)
2762 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2763 instance, "instance lives on non-vm_capable node %s", node)
2765 feedback_fn("* Verifying orphan volumes")
2766 reserved = utils.FieldSet(*cluster.reserved_lvs)
2768 # We will get spurious "unknown volume" warnings if any node of this group
2769 # is secondary for an instance whose primary is in another group. To avoid
2770 # them, we find these instances and add their volumes to node_vol_should.
2771 for inst in self.all_inst_info.values():
2772 for secondary in inst.secondary_nodes:
2773 if (secondary in self.my_node_info
2774 and inst.name not in self.my_inst_info):
2775 inst.MapLVsByNode(node_vol_should)
2778 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2780 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2781 feedback_fn("* Verifying N+1 Memory redundancy")
2782 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2784 feedback_fn("* Other Notes")
2786 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2787 % len(i_non_redundant))
2789 if i_non_a_balanced:
2790 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2791 % len(i_non_a_balanced))
2794 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2797 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2801 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2802 """Analyze the post-hooks' result
2804 This method analyses the hook result, handles it, and sends some
2805 nicely-formatted feedback back to the user.
2807 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2808 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2809 @param hooks_results: the results of the multi-node hooks rpc call
2810 @param feedback_fn: function used send feedback back to the caller
2811 @param lu_result: previous Exec result
2812 @return: the new Exec result, based on the previous result
2816 # We only really run POST phase hooks, only for non-empty groups,
2817 # and are only interested in their results
2818 if not self.my_node_names:
2821 elif phase == constants.HOOKS_PHASE_POST:
2822 # Used to change hooks' output to proper indentation
2823 feedback_fn("* Hooks Results")
2824 assert hooks_results, "invalid result from hooks"
2826 for node_name in hooks_results:
2827 res = hooks_results[node_name]
2829 test = msg and not res.offline
2830 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2831 "Communication failure in hooks execution: %s", msg)
2832 if res.offline or msg:
2833 # No need to investigate payload if node is offline or gave an error.
2834 # override manually lu_result here as _ErrorIf only
2835 # overrides self.bad
2838 for script, hkr, output in res.payload:
2839 test = hkr == constants.HKR_FAIL
2840 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2841 "Script %s failed, output:", script)
2843 output = self._HOOKS_INDENT_RE.sub(" ", output)
2844 feedback_fn("%s" % output)
2850 class LUClusterVerifyDisks(NoHooksLU):
2851 """Verifies the cluster disks status.
2856 def ExpandNames(self):
2857 self.needed_locks = {
2858 locking.LEVEL_NODE: locking.ALL_SET,
2859 locking.LEVEL_INSTANCE: locking.ALL_SET,
2861 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2863 def Exec(self, feedback_fn):
2864 """Verify integrity of cluster disks.
2866 @rtype: tuple of three items
2867 @return: a tuple of (dict of node-to-node_error, list of instances
2868 which need activate-disks, dict of instance: (node, volume) for
2872 result = res_nodes, res_instances, res_missing = {}, [], {}
2874 nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2875 instances = self.cfg.GetAllInstancesInfo().values()
2878 for inst in instances:
2880 if not inst.admin_up:
2882 inst.MapLVsByNode(inst_lvs)
2883 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2884 for node, vol_list in inst_lvs.iteritems():
2885 for vol in vol_list:
2886 nv_dict[(node, vol)] = inst
2891 node_lvs = self.rpc.call_lv_list(nodes, [])
2892 for node, node_res in node_lvs.items():
2893 if node_res.offline:
2895 msg = node_res.fail_msg
2897 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2898 res_nodes[node] = msg
2901 lvs = node_res.payload
2902 for lv_name, (_, _, lv_online) in lvs.items():
2903 inst = nv_dict.pop((node, lv_name), None)
2904 if (not lv_online and inst is not None
2905 and inst.name not in res_instances):
2906 res_instances.append(inst.name)
2908 # any leftover items in nv_dict are missing LVs, let's arrange the
2910 for key, inst in nv_dict.iteritems():
2911 if inst.name not in res_missing:
2912 res_missing[inst.name] = []
2913 res_missing[inst.name].append(key)
2918 class LUClusterRepairDiskSizes(NoHooksLU):
2919 """Verifies the cluster disks sizes.
2924 def ExpandNames(self):
2925 if self.op.instances:
2926 self.wanted_names = _GetWantedInstances(self, self.op.instances)
2927 self.needed_locks = {
2928 locking.LEVEL_NODE: [],
2929 locking.LEVEL_INSTANCE: self.wanted_names,
2931 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2933 self.wanted_names = None
2934 self.needed_locks = {
2935 locking.LEVEL_NODE: locking.ALL_SET,
2936 locking.LEVEL_INSTANCE: locking.ALL_SET,
2938 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2940 def DeclareLocks(self, level):
2941 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2942 self._LockInstancesNodes(primary_only=True)
2944 def CheckPrereq(self):
2945 """Check prerequisites.
2947 This only checks the optional instance list against the existing names.
2950 if self.wanted_names is None:
2951 self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
2953 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2954 in self.wanted_names]
2956 def _EnsureChildSizes(self, disk):
2957 """Ensure children of the disk have the needed disk size.
2959 This is valid mainly for DRBD8 and fixes an issue where the
2960 children have smaller disk size.
2962 @param disk: an L{ganeti.objects.Disk} object
2965 if disk.dev_type == constants.LD_DRBD8:
2966 assert disk.children, "Empty children for DRBD8?"
2967 fchild = disk.children[0]
2968 mismatch = fchild.size < disk.size
2970 self.LogInfo("Child disk has size %d, parent %d, fixing",
2971 fchild.size, disk.size)
2972 fchild.size = disk.size
2974 # and we recurse on this child only, not on the metadev
2975 return self._EnsureChildSizes(fchild) or mismatch
2979 def Exec(self, feedback_fn):
2980 """Verify the size of cluster disks.
2983 # TODO: check child disks too
2984 # TODO: check differences in size between primary/secondary nodes
2986 for instance in self.wanted_instances:
2987 pnode = instance.primary_node
2988 if pnode not in per_node_disks:
2989 per_node_disks[pnode] = []
2990 for idx, disk in enumerate(instance.disks):
2991 per_node_disks[pnode].append((instance, idx, disk))
2994 for node, dskl in per_node_disks.items():
2995 newl = [v[2].Copy() for v in dskl]
2997 self.cfg.SetDiskID(dsk, node)
2998 result = self.rpc.call_blockdev_getsize(node, newl)
3000 self.LogWarning("Failure in blockdev_getsize call to node"
3001 " %s, ignoring", node)
3003 if len(result.payload) != len(dskl):
3004 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3005 " result.payload=%s", node, len(dskl), result.payload)
3006 self.LogWarning("Invalid result from node %s, ignoring node results",
3009 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3011 self.LogWarning("Disk %d of instance %s did not return size"
3012 " information, ignoring", idx, instance.name)
3014 if not isinstance(size, (int, long)):
3015 self.LogWarning("Disk %d of instance %s did not return valid"
3016 " size information, ignoring", idx, instance.name)
3019 if size != disk.size:
3020 self.LogInfo("Disk %d of instance %s has mismatched size,"
3021 " correcting: recorded %d, actual %d", idx,
3022 instance.name, disk.size, size)
3024 self.cfg.Update(instance, feedback_fn)
3025 changed.append((instance.name, idx, size))
3026 if self._EnsureChildSizes(disk):
3027 self.cfg.Update(instance, feedback_fn)
3028 changed.append((instance.name, idx, disk.size))
3032 class LUClusterRename(LogicalUnit):
3033 """Rename the cluster.
3036 HPATH = "cluster-rename"
3037 HTYPE = constants.HTYPE_CLUSTER
3039 def BuildHooksEnv(self):
3044 "OP_TARGET": self.cfg.GetClusterName(),
3045 "NEW_NAME": self.op.name,
3048 def BuildHooksNodes(self):
3049 """Build hooks nodes.
3052 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3054 def CheckPrereq(self):
3055 """Verify that the passed name is a valid one.
3058 hostname = netutils.GetHostname(name=self.op.name,
3059 family=self.cfg.GetPrimaryIPFamily())
3061 new_name = hostname.name
3062 self.ip = new_ip = hostname.ip
3063 old_name = self.cfg.GetClusterName()
3064 old_ip = self.cfg.GetMasterIP()
3065 if new_name == old_name and new_ip == old_ip:
3066 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3067 " cluster has changed",
3069 if new_ip != old_ip:
3070 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3071 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3072 " reachable on the network" %
3073 new_ip, errors.ECODE_NOTUNIQUE)
3075 self.op.name = new_name
3077 def Exec(self, feedback_fn):
3078 """Rename the cluster.
3081 clustername = self.op.name
3084 # shutdown the master IP
3085 master = self.cfg.GetMasterNode()
3086 result = self.rpc.call_node_stop_master(master, False)
3087 result.Raise("Could not disable the master role")
3090 cluster = self.cfg.GetClusterInfo()
3091 cluster.cluster_name = clustername
3092 cluster.master_ip = ip
3093 self.cfg.Update(cluster, feedback_fn)
3095 # update the known hosts file
3096 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3097 node_list = self.cfg.GetOnlineNodeList()
3099 node_list.remove(master)
3102 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3104 result = self.rpc.call_node_start_master(master, False, False)
3105 msg = result.fail_msg
3107 self.LogWarning("Could not re-enable the master role on"
3108 " the master, please restart manually: %s", msg)
3113 class LUClusterSetParams(LogicalUnit):
3114 """Change the parameters of the cluster.
3117 HPATH = "cluster-modify"
3118 HTYPE = constants.HTYPE_CLUSTER
3121 def CheckArguments(self):
3125 if self.op.uid_pool:
3126 uidpool.CheckUidPool(self.op.uid_pool)
3128 if self.op.add_uids:
3129 uidpool.CheckUidPool(self.op.add_uids)
3131 if self.op.remove_uids:
3132 uidpool.CheckUidPool(self.op.remove_uids)
3134 def ExpandNames(self):
3135 # FIXME: in the future maybe other cluster params won't require checking on
3136 # all nodes to be modified.
3137 self.needed_locks = {
3138 locking.LEVEL_NODE: locking.ALL_SET,
3140 self.share_locks[locking.LEVEL_NODE] = 1
3142 def BuildHooksEnv(self):
3147 "OP_TARGET": self.cfg.GetClusterName(),
3148 "NEW_VG_NAME": self.op.vg_name,
3151 def BuildHooksNodes(self):
3152 """Build hooks nodes.
3155 mn = self.cfg.GetMasterNode()
3158 def CheckPrereq(self):
3159 """Check prerequisites.
3161 This checks whether the given params don't conflict and
3162 if the given volume group is valid.
3165 if self.op.vg_name is not None and not self.op.vg_name:
3166 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3167 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3168 " instances exist", errors.ECODE_INVAL)
3170 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3171 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3172 raise errors.OpPrereqError("Cannot disable drbd helper while"
3173 " drbd-based instances exist",
3176 node_list = self.glm.list_owned(locking.LEVEL_NODE)
3178 # if vg_name not None, checks given volume group on all nodes
3180 vglist = self.rpc.call_vg_list(node_list)
3181 for node in node_list:
3182 msg = vglist[node].fail_msg
3184 # ignoring down node
3185 self.LogWarning("Error while gathering data on node %s"
3186 " (ignoring node): %s", node, msg)
3188 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3190 constants.MIN_VG_SIZE)
3192 raise errors.OpPrereqError("Error on node '%s': %s" %
3193 (node, vgstatus), errors.ECODE_ENVIRON)
3195 if self.op.drbd_helper:
3196 # checks given drbd helper on all nodes
3197 helpers = self.rpc.call_drbd_helper(node_list)
3198 for node in node_list:
3199 ninfo = self.cfg.GetNodeInfo(node)
3201 self.LogInfo("Not checking drbd helper on offline node %s", node)
3203 msg = helpers[node].fail_msg
3205 raise errors.OpPrereqError("Error checking drbd helper on node"
3206 " '%s': %s" % (node, msg),
3207 errors.ECODE_ENVIRON)
3208 node_helper = helpers[node].payload
3209 if node_helper != self.op.drbd_helper:
3210 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3211 (node, node_helper), errors.ECODE_ENVIRON)
3213 self.cluster = cluster = self.cfg.GetClusterInfo()
3214 # validate params changes
3215 if self.op.beparams:
3216 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3217 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3219 if self.op.ndparams:
3220 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3221 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3223 # TODO: we need a more general way to handle resetting
3224 # cluster-level parameters to default values
3225 if self.new_ndparams["oob_program"] == "":
3226 self.new_ndparams["oob_program"] = \
3227 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3229 if self.op.nicparams:
3230 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3231 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3232 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3235 # check all instances for consistency
3236 for instance in self.cfg.GetAllInstancesInfo().values():
3237 for nic_idx, nic in enumerate(instance.nics):
3238 params_copy = copy.deepcopy(nic.nicparams)
3239 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3241 # check parameter syntax
3243 objects.NIC.CheckParameterSyntax(params_filled)
3244 except errors.ConfigurationError, err:
3245 nic_errors.append("Instance %s, nic/%d: %s" %
3246 (instance.name, nic_idx, err))
3248 # if we're moving instances to routed, check that they have an ip
3249 target_mode = params_filled[constants.NIC_MODE]
3250 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3251 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3252 " address" % (instance.name, nic_idx))
3254 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3255 "\n".join(nic_errors))
3257 # hypervisor list/parameters
3258 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3259 if self.op.hvparams:
3260 for hv_name, hv_dict in self.op.hvparams.items():
3261 if hv_name not in self.new_hvparams:
3262 self.new_hvparams[hv_name] = hv_dict
3264 self.new_hvparams[hv_name].update(hv_dict)
3266 # os hypervisor parameters
3267 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3269 for os_name, hvs in self.op.os_hvp.items():
3270 if os_name not in self.new_os_hvp:
3271 self.new_os_hvp[os_name] = hvs
3273 for hv_name, hv_dict in hvs.items():
3274 if hv_name not in self.new_os_hvp[os_name]:
3275 self.new_os_hvp[os_name][hv_name] = hv_dict
3277 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3280 self.new_osp = objects.FillDict(cluster.osparams, {})
3281 if self.op.osparams:
3282 for os_name, osp in self.op.osparams.items():
3283 if os_name not in self.new_osp:
3284 self.new_osp[os_name] = {}
3286 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3289 if not self.new_osp[os_name]:
3290 # we removed all parameters
3291 del self.new_osp[os_name]
3293 # check the parameter validity (remote check)
3294 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3295 os_name, self.new_osp[os_name])
3297 # changes to the hypervisor list
3298 if self.op.enabled_hypervisors is not None:
3299 self.hv_list = self.op.enabled_hypervisors
3300 for hv in self.hv_list:
3301 # if the hypervisor doesn't already exist in the cluster
3302 # hvparams, we initialize it to empty, and then (in both
3303 # cases) we make sure to fill the defaults, as we might not
3304 # have a complete defaults list if the hypervisor wasn't
3306 if hv not in new_hvp:
3308 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3309 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3311 self.hv_list = cluster.enabled_hypervisors
3313 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3314 # either the enabled list has changed, or the parameters have, validate
3315 for hv_name, hv_params in self.new_hvparams.items():
3316 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3317 (self.op.enabled_hypervisors and
3318 hv_name in self.op.enabled_hypervisors)):
3319 # either this is a new hypervisor, or its parameters have changed
3320 hv_class = hypervisor.GetHypervisor(hv_name)
3321 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3322 hv_class.CheckParameterSyntax(hv_params)
3323 _CheckHVParams(self, node_list, hv_name, hv_params)
3326 # no need to check any newly-enabled hypervisors, since the
3327 # defaults have already been checked in the above code-block
3328 for os_name, os_hvp in self.new_os_hvp.items():
3329 for hv_name, hv_params in os_hvp.items():
3330 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3331 # we need to fill in the new os_hvp on top of the actual hv_p
3332 cluster_defaults = self.new_hvparams.get(hv_name, {})
3333 new_osp = objects.FillDict(cluster_defaults, hv_params)
3334 hv_class = hypervisor.GetHypervisor(hv_name)
3335 hv_class.CheckParameterSyntax(new_osp)
3336 _CheckHVParams(self, node_list, hv_name, new_osp)
3338 if self.op.default_iallocator:
3339 alloc_script = utils.FindFile(self.op.default_iallocator,
3340 constants.IALLOCATOR_SEARCH_PATH,
3342 if alloc_script is None:
3343 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3344 " specified" % self.op.default_iallocator,
3347 def Exec(self, feedback_fn):
3348 """Change the parameters of the cluster.
3351 if self.op.vg_name is not None:
3352 new_volume = self.op.vg_name
3355 if new_volume != self.cfg.GetVGName():
3356 self.cfg.SetVGName(new_volume)
3358 feedback_fn("Cluster LVM configuration already in desired"
3359 " state, not changing")
3360 if self.op.drbd_helper is not None:
3361 new_helper = self.op.drbd_helper
3364 if new_helper != self.cfg.GetDRBDHelper():
3365 self.cfg.SetDRBDHelper(new_helper)
3367 feedback_fn("Cluster DRBD helper already in desired state,"
3369 if self.op.hvparams:
3370 self.cluster.hvparams = self.new_hvparams
3372 self.cluster.os_hvp = self.new_os_hvp
3373 if self.op.enabled_hypervisors is not None:
3374 self.cluster.hvparams = self.new_hvparams
3375 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3376 if self.op.beparams:
3377 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3378 if self.op.nicparams:
3379 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3380 if self.op.osparams:
3381 self.cluster.osparams = self.new_osp
3382 if self.op.ndparams:
3383 self.cluster.ndparams = self.new_ndparams
3385 if self.op.candidate_pool_size is not None:
3386 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3387 # we need to update the pool size here, otherwise the save will fail
3388 _AdjustCandidatePool(self, [])
3390 if self.op.maintain_node_health is not None:
3391 self.cluster.maintain_node_health = self.op.maintain_node_health
3393 if self.op.prealloc_wipe_disks is not None:
3394 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3396 if self.op.add_uids is not None:
3397 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3399 if self.op.remove_uids is not None:
3400 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3402 if self.op.uid_pool is not None:
3403 self.cluster.uid_pool = self.op.uid_pool
3405 if self.op.default_iallocator is not None:
3406 self.cluster.default_iallocator = self.op.default_iallocator
3408 if self.op.reserved_lvs is not None:
3409 self.cluster.reserved_lvs = self.op.reserved_lvs
3411 def helper_os(aname, mods, desc):
3413 lst = getattr(self.cluster, aname)
3414 for key, val in mods:
3415 if key == constants.DDM_ADD:
3417 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3420 elif key == constants.DDM_REMOVE:
3424 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3426 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3428 if self.op.hidden_os:
3429 helper_os("hidden_os", self.op.hidden_os, "hidden")
3431 if self.op.blacklisted_os:
3432 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3434 if self.op.master_netdev:
3435 master = self.cfg.GetMasterNode()
3436 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3437 self.cluster.master_netdev)
3438 result = self.rpc.call_node_stop_master(master, False)
3439 result.Raise("Could not disable the master ip")
3440 feedback_fn("Changing master_netdev from %s to %s" %
3441 (self.cluster.master_netdev, self.op.master_netdev))
3442 self.cluster.master_netdev = self.op.master_netdev
3444 self.cfg.Update(self.cluster, feedback_fn)
3446 if self.op.master_netdev:
3447 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3448 self.op.master_netdev)
3449 result = self.rpc.call_node_start_master(master, False, False)
3451 self.LogWarning("Could not re-enable the master ip on"
3452 " the master, please restart manually: %s",
3456 def _UploadHelper(lu, nodes, fname):
3457 """Helper for uploading a file and showing warnings.
3460 if os.path.exists(fname):
3461 result = lu.rpc.call_upload_file(nodes, fname)
3462 for to_node, to_result in result.items():
3463 msg = to_result.fail_msg
3465 msg = ("Copy of file %s to node %s failed: %s" %
3466 (fname, to_node, msg))
3467 lu.proc.LogWarning(msg)
3470 def _ComputeAncillaryFiles(cluster, redist):
3471 """Compute files external to Ganeti which need to be consistent.
3473 @type redist: boolean
3474 @param redist: Whether to include files which need to be redistributed
3477 # Compute files for all nodes
3479 constants.SSH_KNOWN_HOSTS_FILE,
3480 constants.CONFD_HMAC_KEY,
3481 constants.CLUSTER_DOMAIN_SECRET_FILE,
3485 files_all.update(constants.ALL_CERT_FILES)
3486 files_all.update(ssconf.SimpleStore().GetFileList())
3488 if cluster.modify_etc_hosts:
3489 files_all.add(constants.ETC_HOSTS)
3491 # Files which must either exist on all nodes or on none
3492 files_all_opt = set([
3493 constants.RAPI_USERS_FILE,
3496 # Files which should only be on master candidates
3499 files_mc.add(constants.CLUSTER_CONF_FILE)
3501 # Files which should only be on VM-capable nodes
3502 files_vm = set(filename
3503 for hv_name in cluster.enabled_hypervisors
3504 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3506 # Filenames must be unique
3507 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3508 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3509 "Found file listed in more than one file list"
3511 return (files_all, files_all_opt, files_mc, files_vm)
3514 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3515 """Distribute additional files which are part of the cluster configuration.
3517 ConfigWriter takes care of distributing the config and ssconf files, but
3518 there are more files which should be distributed to all nodes. This function
3519 makes sure those are copied.
3521 @param lu: calling logical unit
3522 @param additional_nodes: list of nodes not in the config to distribute to
3523 @type additional_vm: boolean
3524 @param additional_vm: whether the additional nodes are vm-capable or not
3527 # Gather target nodes
3528 cluster = lu.cfg.GetClusterInfo()
3529 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3531 online_nodes = lu.cfg.GetOnlineNodeList()
3532 vm_nodes = lu.cfg.GetVmCapableNodeList()
3534 if additional_nodes is not None:
3535 online_nodes.extend(additional_nodes)
3537 vm_nodes.extend(additional_nodes)
3539 # Never distribute to master node
3540 for nodelist in [online_nodes, vm_nodes]:
3541 if master_info.name in nodelist:
3542 nodelist.remove(master_info.name)
3545 (files_all, files_all_opt, files_mc, files_vm) = \
3546 _ComputeAncillaryFiles(cluster, True)
3548 # Never re-distribute configuration file from here
3549 assert not (constants.CLUSTER_CONF_FILE in files_all or
3550 constants.CLUSTER_CONF_FILE in files_vm)
3551 assert not files_mc, "Master candidates not handled in this function"
3554 (online_nodes, files_all),
3555 (online_nodes, files_all_opt),
3556 (vm_nodes, files_vm),
3560 for (node_list, files) in filemap:
3562 _UploadHelper(lu, node_list, fname)
3565 class LUClusterRedistConf(NoHooksLU):
3566 """Force the redistribution of cluster configuration.
3568 This is a very simple LU.
3573 def ExpandNames(self):
3574 self.needed_locks = {
3575 locking.LEVEL_NODE: locking.ALL_SET,
3577 self.share_locks[locking.LEVEL_NODE] = 1
3579 def Exec(self, feedback_fn):
3580 """Redistribute the configuration.
3583 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3584 _RedistributeAncillaryFiles(self)
3587 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3588 """Sleep and poll for an instance's disk to sync.
3591 if not instance.disks or disks is not None and not disks:
3594 disks = _ExpandCheckDisks(instance, disks)
3597 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3599 node = instance.primary_node
3602 lu.cfg.SetDiskID(dev, node)
3604 # TODO: Convert to utils.Retry
3607 degr_retries = 10 # in seconds, as we sleep 1 second each time
3611 cumul_degraded = False
3612 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3613 msg = rstats.fail_msg
3615 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3618 raise errors.RemoteError("Can't contact node %s for mirror data,"
3619 " aborting." % node)
3622 rstats = rstats.payload
3624 for i, mstat in enumerate(rstats):
3626 lu.LogWarning("Can't compute data for node %s/%s",
3627 node, disks[i].iv_name)
3630 cumul_degraded = (cumul_degraded or
3631 (mstat.is_degraded and mstat.sync_percent is None))
3632 if mstat.sync_percent is not None:
3634 if mstat.estimated_time is not None:
3635 rem_time = ("%s remaining (estimated)" %
3636 utils.FormatSeconds(mstat.estimated_time))
3637 max_time = mstat.estimated_time
3639 rem_time = "no time estimate"
3640 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3641 (disks[i].iv_name, mstat.sync_percent, rem_time))
3643 # if we're done but degraded, let's do a few small retries, to
3644 # make sure we see a stable and not transient situation; therefore
3645 # we force restart of the loop
3646 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3647 logging.info("Degraded disks found, %d retries left", degr_retries)
3655 time.sleep(min(60, max_time))
3658 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3659 return not cumul_degraded
3662 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3663 """Check that mirrors are not degraded.
3665 The ldisk parameter, if True, will change the test from the
3666 is_degraded attribute (which represents overall non-ok status for
3667 the device(s)) to the ldisk (representing the local storage status).
3670 lu.cfg.SetDiskID(dev, node)
3674 if on_primary or dev.AssembleOnSecondary():
3675 rstats = lu.rpc.call_blockdev_find(node, dev)
3676 msg = rstats.fail_msg
3678 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3680 elif not rstats.payload:
3681 lu.LogWarning("Can't find disk on node %s", node)
3685 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3687 result = result and not rstats.payload.is_degraded
3690 for child in dev.children:
3691 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3696 class LUOobCommand(NoHooksLU):
3697 """Logical unit for OOB handling.
3701 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3703 def ExpandNames(self):
3704 """Gather locks we need.
3707 if self.op.node_names:
3708 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3709 lock_names = self.op.node_names
3711 lock_names = locking.ALL_SET
3713 self.needed_locks = {
3714 locking.LEVEL_NODE: lock_names,
3717 def CheckPrereq(self):
3718 """Check prerequisites.
3721 - the node exists in the configuration
3724 Any errors are signaled by raising errors.OpPrereqError.
3728 self.master_node = self.cfg.GetMasterNode()
3730 assert self.op.power_delay >= 0.0
3732 if self.op.node_names:
3733 if (self.op.command in self._SKIP_MASTER and
3734 self.master_node in self.op.node_names):
3735 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3736 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3738 if master_oob_handler:
3739 additional_text = ("run '%s %s %s' if you want to operate on the"
3740 " master regardless") % (master_oob_handler,
3744 additional_text = "it does not support out-of-band operations"
3746 raise errors.OpPrereqError(("Operating on the master node %s is not"
3747 " allowed for %s; %s") %
3748 (self.master_node, self.op.command,
3749 additional_text), errors.ECODE_INVAL)
3751 self.op.node_names = self.cfg.GetNodeList()
3752 if self.op.command in self._SKIP_MASTER:
3753 self.op.node_names.remove(self.master_node)
3755 if self.op.command in self._SKIP_MASTER:
3756 assert self.master_node not in self.op.node_names
3758 for node_name in self.op.node_names:
3759 node = self.cfg.GetNodeInfo(node_name)
3762 raise errors.OpPrereqError("Node %s not found" % node_name,
3765 self.nodes.append(node)
3767 if (not self.op.ignore_status and
3768 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3769 raise errors.OpPrereqError(("Cannot power off node %s because it is"
3770 " not marked offline") % node_name,
3773 def Exec(self, feedback_fn):
3774 """Execute OOB and return result if we expect any.
3777 master_node = self.master_node
3780 for idx, node in enumerate(utils.NiceSort(self.nodes,
3781 key=lambda node: node.name)):
3782 node_entry = [(constants.RS_NORMAL, node.name)]
3783 ret.append(node_entry)
3785 oob_program = _SupportsOob(self.cfg, node)
3788 node_entry.append((constants.RS_UNAVAIL, None))
3791 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3792 self.op.command, oob_program, node.name)
3793 result = self.rpc.call_run_oob(master_node, oob_program,
3794 self.op.command, node.name,
3798 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3799 node.name, result.fail_msg)
3800 node_entry.append((constants.RS_NODATA, None))
3803 self._CheckPayload(result)
3804 except errors.OpExecError, err:
3805 self.LogWarning("Payload returned by node '%s' is not valid: %s",
3807 node_entry.append((constants.RS_NODATA, None))
3809 if self.op.command == constants.OOB_HEALTH:
3810 # For health we should log important events
3811 for item, status in result.payload:
3812 if status in [constants.OOB_STATUS_WARNING,
3813 constants.OOB_STATUS_CRITICAL]:
3814 self.LogWarning("Item '%s' on node '%s' has status '%s'",
3815 item, node.name, status)
3817 if self.op.command == constants.OOB_POWER_ON:
3819 elif self.op.command == constants.OOB_POWER_OFF:
3820 node.powered = False
3821 elif self.op.command == constants.OOB_POWER_STATUS:
3822 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3823 if powered != node.powered:
3824 logging.warning(("Recorded power state (%s) of node '%s' does not"
3825 " match actual power state (%s)"), node.powered,
3828 # For configuration changing commands we should update the node
3829 if self.op.command in (constants.OOB_POWER_ON,
3830 constants.OOB_POWER_OFF):
3831 self.cfg.Update(node, feedback_fn)
3833 node_entry.append((constants.RS_NORMAL, result.payload))
3835 if (self.op.command == constants.OOB_POWER_ON and
3836 idx < len(self.nodes) - 1):
3837 time.sleep(self.op.power_delay)
3841 def _CheckPayload(self, result):
3842 """Checks if the payload is valid.
3844 @param result: RPC result
3845 @raises errors.OpExecError: If payload is not valid
3849 if self.op.command == constants.OOB_HEALTH:
3850 if not isinstance(result.payload, list):
3851 errs.append("command 'health' is expected to return a list but got %s" %
3852 type(result.payload))
3854 for item, status in result.payload:
3855 if status not in constants.OOB_STATUSES:
3856 errs.append("health item '%s' has invalid status '%s'" %
3859 if self.op.command == constants.OOB_POWER_STATUS:
3860 if not isinstance(result.payload, dict):
3861 errs.append("power-status is expected to return a dict but got %s" %
3862 type(result.payload))
3864 if self.op.command in [
3865 constants.OOB_POWER_ON,
3866 constants.OOB_POWER_OFF,
3867 constants.OOB_POWER_CYCLE,
3869 if result.payload is not None:
3870 errs.append("%s is expected to not return payload but got '%s'" %
3871 (self.op.command, result.payload))
3874 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3875 utils.CommaJoin(errs))
3877 class _OsQuery(_QueryBase):
3878 FIELDS = query.OS_FIELDS
3880 def ExpandNames(self, lu):
3881 # Lock all nodes in shared mode
3882 # Temporary removal of locks, should be reverted later
3883 # TODO: reintroduce locks when they are lighter-weight
3884 lu.needed_locks = {}
3885 #self.share_locks[locking.LEVEL_NODE] = 1
3886 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3888 # The following variables interact with _QueryBase._GetNames
3890 self.wanted = self.names
3892 self.wanted = locking.ALL_SET
3894 self.do_locking = self.use_locking
3896 def DeclareLocks(self, lu, level):
3900 def _DiagnoseByOS(rlist):
3901 """Remaps a per-node return list into an a per-os per-node dictionary
3903 @param rlist: a map with node names as keys and OS objects as values
3906 @return: a dictionary with osnames as keys and as value another
3907 map, with nodes as keys and tuples of (path, status, diagnose,
3908 variants, parameters, api_versions) as values, eg::
3910 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3911 (/srv/..., False, "invalid api")],
3912 "node2": [(/srv/..., True, "", [], [])]}
3917 # we build here the list of nodes that didn't fail the RPC (at RPC
3918 # level), so that nodes with a non-responding node daemon don't
3919 # make all OSes invalid
3920 good_nodes = [node_name for node_name in rlist
3921 if not rlist[node_name].fail_msg]
3922 for node_name, nr in rlist.items():
3923 if nr.fail_msg or not nr.payload:
3925 for (name, path, status, diagnose, variants,
3926 params, api_versions) in nr.payload:
3927 if name not in all_os:
3928 # build a list of nodes for this os containing empty lists
3929 # for each node in node_list
3931 for nname in good_nodes:
3932 all_os[name][nname] = []
3933 # convert params from [name, help] to (name, help)
3934 params = [tuple(v) for v in params]
3935 all_os[name][node_name].append((path, status, diagnose,
3936 variants, params, api_versions))
3939 def _GetQueryData(self, lu):
3940 """Computes the list of nodes and their attributes.
3943 # Locking is not used
3944 assert not (compat.any(lu.glm.is_owned(level)
3945 for level in locking.LEVELS
3946 if level != locking.LEVEL_CLUSTER) or
3947 self.do_locking or self.use_locking)
3949 valid_nodes = [node.name
3950 for node in lu.cfg.GetAllNodesInfo().values()
3951 if not node.offline and node.vm_capable]
3952 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3953 cluster = lu.cfg.GetClusterInfo()
3957 for (os_name, os_data) in pol.items():
3958 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3959 hidden=(os_name in cluster.hidden_os),
3960 blacklisted=(os_name in cluster.blacklisted_os))
3964 api_versions = set()
3966 for idx, osl in enumerate(os_data.values()):
3967 info.valid = bool(info.valid and osl and osl[0][1])
3971 (node_variants, node_params, node_api) = osl[0][3:6]
3974 variants.update(node_variants)
3975 parameters.update(node_params)
3976 api_versions.update(node_api)
3978 # Filter out inconsistent values
3979 variants.intersection_update(node_variants)
3980 parameters.intersection_update(node_params)
3981 api_versions.intersection_update(node_api)
3983 info.variants = list(variants)
3984 info.parameters = list(parameters)
3985 info.api_versions = list(api_versions)
3987 data[os_name] = info
3989 # Prepare data in requested order
3990 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3994 class LUOsDiagnose(NoHooksLU):
3995 """Logical unit for OS diagnose/query.
4001 def _BuildFilter(fields, names):
4002 """Builds a filter for querying OSes.
4005 name_filter = qlang.MakeSimpleFilter("name", names)
4007 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4008 # respective field is not requested
4009 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4010 for fname in ["hidden", "blacklisted"]
4011 if fname not in fields]
4012 if "valid" not in fields:
4013 status_filter.append([qlang.OP_TRUE, "valid"])
4016 status_filter.insert(0, qlang.OP_AND)
4018 status_filter = None
4020 if name_filter and status_filter:
4021 return [qlang.OP_AND, name_filter, status_filter]
4025 return status_filter
4027 def CheckArguments(self):
4028 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4029 self.op.output_fields, False)
4031 def ExpandNames(self):
4032 self.oq.ExpandNames(self)
4034 def Exec(self, feedback_fn):
4035 return self.oq.OldStyleQuery(self)
4038 class LUNodeRemove(LogicalUnit):
4039 """Logical unit for removing a node.
4042 HPATH = "node-remove"
4043 HTYPE = constants.HTYPE_NODE
4045 def BuildHooksEnv(self):
4048 This doesn't run on the target node in the pre phase as a failed
4049 node would then be impossible to remove.
4053 "OP_TARGET": self.op.node_name,
4054 "NODE_NAME": self.op.node_name,
4057 def BuildHooksNodes(self):
4058 """Build hooks nodes.
4061 all_nodes = self.cfg.GetNodeList()
4063 all_nodes.remove(self.op.node_name)
4065 logging.warning("Node '%s', which is about to be removed, was not found"
4066 " in the list of all nodes", self.op.node_name)
4067 return (all_nodes, all_nodes)
4069 def CheckPrereq(self):
4070 """Check prerequisites.
4073 - the node exists in the configuration
4074 - it does not have primary or secondary instances
4075 - it's not the master
4077 Any errors are signaled by raising errors.OpPrereqError.
4080 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4081 node = self.cfg.GetNodeInfo(self.op.node_name)
4082 assert node is not None
4084 instance_list = self.cfg.GetInstanceList()
4086 masternode = self.cfg.GetMasterNode()
4087 if node.name == masternode:
4088 raise errors.OpPrereqError("Node is the master node, failover to another"
4089 " node is required", errors.ECODE_INVAL)
4091 for instance_name in instance_list:
4092 instance = self.cfg.GetInstanceInfo(instance_name)
4093 if node.name in instance.all_nodes:
4094 raise errors.OpPrereqError("Instance %s is still running on the node,"
4095 " please remove first" % instance_name,
4097 self.op.node_name = node.name
4100 def Exec(self, feedback_fn):
4101 """Removes the node from the cluster.
4105 logging.info("Stopping the node daemon and removing configs from node %s",
4108 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4110 # Promote nodes to master candidate as needed
4111 _AdjustCandidatePool(self, exceptions=[node.name])
4112 self.context.RemoveNode(node.name)
4114 # Run post hooks on the node before it's removed
4115 _RunPostHook(self, node.name)
4117 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4118 msg = result.fail_msg
4120 self.LogWarning("Errors encountered on the remote node while leaving"
4121 " the cluster: %s", msg)
4123 # Remove node from our /etc/hosts
4124 if self.cfg.GetClusterInfo().modify_etc_hosts:
4125 master_node = self.cfg.GetMasterNode()
4126 result = self.rpc.call_etc_hosts_modify(master_node,
4127 constants.ETC_HOSTS_REMOVE,
4129 result.Raise("Can't update hosts file with new host data")
4130 _RedistributeAncillaryFiles(self)
4133 class _NodeQuery(_QueryBase):
4134 FIELDS = query.NODE_FIELDS
4136 def ExpandNames(self, lu):
4137 lu.needed_locks = {}
4138 lu.share_locks[locking.LEVEL_NODE] = 1
4141 self.wanted = _GetWantedNodes(lu, self.names)
4143 self.wanted = locking.ALL_SET
4145 self.do_locking = (self.use_locking and
4146 query.NQ_LIVE in self.requested_data)
4149 # if we don't request only static fields, we need to lock the nodes
4150 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4152 def DeclareLocks(self, lu, level):
4155 def _GetQueryData(self, lu):
4156 """Computes the list of nodes and their attributes.
4159 all_info = lu.cfg.GetAllNodesInfo()
4161 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4163 # Gather data as requested
4164 if query.NQ_LIVE in self.requested_data:
4165 # filter out non-vm_capable nodes
4166 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4168 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4169 lu.cfg.GetHypervisorType())
4170 live_data = dict((name, nresult.payload)
4171 for (name, nresult) in node_data.items()
4172 if not nresult.fail_msg and nresult.payload)
4176 if query.NQ_INST in self.requested_data:
4177 node_to_primary = dict([(name, set()) for name in nodenames])
4178 node_to_secondary = dict([(name, set()) for name in nodenames])
4180 inst_data = lu.cfg.GetAllInstancesInfo()
4182 for inst in inst_data.values():
4183 if inst.primary_node in node_to_primary:
4184 node_to_primary[inst.primary_node].add(inst.name)
4185 for secnode in inst.secondary_nodes:
4186 if secnode in node_to_secondary:
4187 node_to_secondary[secnode].add(inst.name)
4189 node_to_primary = None
4190 node_to_secondary = None
4192 if query.NQ_OOB in self.requested_data:
4193 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4194 for name, node in all_info.iteritems())
4198 if query.NQ_GROUP in self.requested_data:
4199 groups = lu.cfg.GetAllNodeGroupsInfo()
4203 return query.NodeQueryData([all_info[name] for name in nodenames],
4204 live_data, lu.cfg.GetMasterNode(),
4205 node_to_primary, node_to_secondary, groups,
4206 oob_support, lu.cfg.GetClusterInfo())
4209 class LUNodeQuery(NoHooksLU):
4210 """Logical unit for querying nodes.
4213 # pylint: disable-msg=W0142
4216 def CheckArguments(self):
4217 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4218 self.op.output_fields, self.op.use_locking)
4220 def ExpandNames(self):
4221 self.nq.ExpandNames(self)
4223 def Exec(self, feedback_fn):
4224 return self.nq.OldStyleQuery(self)
4227 class LUNodeQueryvols(NoHooksLU):
4228 """Logical unit for getting volumes on node(s).
4232 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4233 _FIELDS_STATIC = utils.FieldSet("node")
4235 def CheckArguments(self):
4236 _CheckOutputFields(static=self._FIELDS_STATIC,
4237 dynamic=self._FIELDS_DYNAMIC,
4238 selected=self.op.output_fields)
4240 def ExpandNames(self):
4241 self.needed_locks = {}
4242 self.share_locks[locking.LEVEL_NODE] = 1
4243 if not self.op.nodes:
4244 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4246 self.needed_locks[locking.LEVEL_NODE] = \
4247 _GetWantedNodes(self, self.op.nodes)
4249 def Exec(self, feedback_fn):
4250 """Computes the list of nodes and their attributes.
4253 nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4254 volumes = self.rpc.call_node_volumes(nodenames)
4256 ilist = self.cfg.GetAllInstancesInfo()
4258 vol2inst = dict(((node, vol), inst.name)
4259 for inst in ilist.values()
4260 for (node, vols) in inst.MapLVsByNode().items()
4264 for node in nodenames:
4265 nresult = volumes[node]
4268 msg = nresult.fail_msg
4270 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4273 node_vols = sorted(nresult.payload,
4274 key=operator.itemgetter("dev"))
4276 for vol in node_vols:
4278 for field in self.op.output_fields:
4281 elif field == "phys":
4285 elif field == "name":
4287 elif field == "size":
4288 val = int(float(vol["size"]))
4289 elif field == "instance":
4290 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4292 raise errors.ParameterError(field)
4293 node_output.append(str(val))
4295 output.append(node_output)
4300 class LUNodeQueryStorage(NoHooksLU):
4301 """Logical unit for getting information on storage units on node(s).
4304 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4307 def CheckArguments(self):
4308 _CheckOutputFields(static=self._FIELDS_STATIC,
4309 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4310 selected=self.op.output_fields)
4312 def ExpandNames(self):
4313 self.needed_locks = {}
4314 self.share_locks[locking.LEVEL_NODE] = 1
4317 self.needed_locks[locking.LEVEL_NODE] = \
4318 _GetWantedNodes(self, self.op.nodes)
4320 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4322 def Exec(self, feedback_fn):
4323 """Computes the list of nodes and their attributes.
4326 self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4328 # Always get name to sort by
4329 if constants.SF_NAME in self.op.output_fields:
4330 fields = self.op.output_fields[:]
4332 fields = [constants.SF_NAME] + self.op.output_fields
4334 # Never ask for node or type as it's only known to the LU
4335 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4336 while extra in fields:
4337 fields.remove(extra)
4339 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4340 name_idx = field_idx[constants.SF_NAME]
4342 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4343 data = self.rpc.call_storage_list(self.nodes,
4344 self.op.storage_type, st_args,
4345 self.op.name, fields)
4349 for node in utils.NiceSort(self.nodes):
4350 nresult = data[node]
4354 msg = nresult.fail_msg
4356 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4359 rows = dict([(row[name_idx], row) for row in nresult.payload])
4361 for name in utils.NiceSort(rows.keys()):
4366 for field in self.op.output_fields:
4367 if field == constants.SF_NODE:
4369 elif field == constants.SF_TYPE:
4370 val = self.op.storage_type
4371 elif field in field_idx:
4372 val = row[field_idx[field]]
4374 raise errors.ParameterError(field)
4383 class _InstanceQuery(_QueryBase):
4384 FIELDS = query.INSTANCE_FIELDS
4386 def ExpandNames(self, lu):
4387 lu.needed_locks = {}
4388 lu.share_locks[locking.LEVEL_INSTANCE] = 1
4389 lu.share_locks[locking.LEVEL_NODE] = 1
4392 self.wanted = _GetWantedInstances(lu, self.names)
4394 self.wanted = locking.ALL_SET
4396 self.do_locking = (self.use_locking and
4397 query.IQ_LIVE in self.requested_data)
4399 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4400 lu.needed_locks[locking.LEVEL_NODE] = []
4401 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4403 def DeclareLocks(self, lu, level):
4404 if level == locking.LEVEL_NODE and self.do_locking:
4405 lu._LockInstancesNodes() # pylint: disable-msg=W0212
4407 def _GetQueryData(self, lu):
4408 """Computes the list of instances and their attributes.
4411 cluster = lu.cfg.GetClusterInfo()
4412 all_info = lu.cfg.GetAllInstancesInfo()
4414 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4416 instance_list = [all_info[name] for name in instance_names]
4417 nodes = frozenset(itertools.chain(*(inst.all_nodes
4418 for inst in instance_list)))
4419 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4422 wrongnode_inst = set()
4424 # Gather data as requested
4425 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4427 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4429 result = node_data[name]
4431 # offline nodes will be in both lists
4432 assert result.fail_msg
4433 offline_nodes.append(name)
4435 bad_nodes.append(name)
4436 elif result.payload:
4437 for inst in result.payload:
4438 if inst in all_info:
4439 if all_info[inst].primary_node == name:
4440 live_data.update(result.payload)
4442 wrongnode_inst.add(inst)
4444 # orphan instance; we don't list it here as we don't
4445 # handle this case yet in the output of instance listing
4446 logging.warning("Orphan instance '%s' found on node %s",
4448 # else no instance is alive
4452 if query.IQ_DISKUSAGE in self.requested_data:
4453 disk_usage = dict((inst.name,
4454 _ComputeDiskSize(inst.disk_template,
4455 [{constants.IDISK_SIZE: disk.size}
4456 for disk in inst.disks]))
4457 for inst in instance_list)
4461 if query.IQ_CONSOLE in self.requested_data:
4463 for inst in instance_list:
4464 if inst.name in live_data:
4465 # Instance is running
4466 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4468 consinfo[inst.name] = None
4469 assert set(consinfo.keys()) == set(instance_names)
4473 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4474 disk_usage, offline_nodes, bad_nodes,
4475 live_data, wrongnode_inst, consinfo)
4478 class LUQuery(NoHooksLU):
4479 """Query for resources/items of a certain kind.
4482 # pylint: disable-msg=W0142
4485 def CheckArguments(self):
4486 qcls = _GetQueryImplementation(self.op.what)
4488 self.impl = qcls(self.op.filter, self.op.fields, False)
4490 def ExpandNames(self):
4491 self.impl.ExpandNames(self)
4493 def DeclareLocks(self, level):
4494 self.impl.DeclareLocks(self, level)
4496 def Exec(self, feedback_fn):
4497 return self.impl.NewStyleQuery(self)
4500 class LUQueryFields(NoHooksLU):
4501 """Query for resources/items of a certain kind.
4504 # pylint: disable-msg=W0142
4507 def CheckArguments(self):
4508 self.qcls = _GetQueryImplementation(self.op.what)
4510 def ExpandNames(self):
4511 self.needed_locks = {}
4513 def Exec(self, feedback_fn):
4514 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4517 class LUNodeModifyStorage(NoHooksLU):
4518 """Logical unit for modifying a storage volume on a node.
4523 def CheckArguments(self):
4524 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4526 storage_type = self.op.storage_type
4529 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4531 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4532 " modified" % storage_type,
4535 diff = set(self.op.changes.keys()) - modifiable
4537 raise errors.OpPrereqError("The following fields can not be modified for"
4538 " storage units of type '%s': %r" %
4539 (storage_type, list(diff)),
4542 def ExpandNames(self):
4543 self.needed_locks = {
4544 locking.LEVEL_NODE: self.op.node_name,
4547 def Exec(self, feedback_fn):
4548 """Computes the list of nodes and their attributes.
4551 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4552 result = self.rpc.call_storage_modify(self.op.node_name,
4553 self.op.storage_type, st_args,
4554 self.op.name, self.op.changes)
4555 result.Raise("Failed to modify storage unit '%s' on %s" %
4556 (self.op.name, self.op.node_name))
4559 class LUNodeAdd(LogicalUnit):
4560 """Logical unit for adding node to the cluster.
4564 HTYPE = constants.HTYPE_NODE
4565 _NFLAGS = ["master_capable", "vm_capable"]
4567 def CheckArguments(self):
4568 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4569 # validate/normalize the node name
4570 self.hostname = netutils.GetHostname(name=self.op.node_name,
4571 family=self.primary_ip_family)
4572 self.op.node_name = self.hostname.name
4574 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4575 raise errors.OpPrereqError("Cannot readd the master node",
4578 if self.op.readd and self.op.group:
4579 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4580 " being readded", errors.ECODE_INVAL)
4582 def BuildHooksEnv(self):
4585 This will run on all nodes before, and on all nodes + the new node after.
4589 "OP_TARGET": self.op.node_name,
4590 "NODE_NAME": self.op.node_name,
4591 "NODE_PIP": self.op.primary_ip,
4592 "NODE_SIP": self.op.secondary_ip,
4593 "MASTER_CAPABLE": str(self.op.master_capable),
4594 "VM_CAPABLE": str(self.op.vm_capable),
4597 def BuildHooksNodes(self):
4598 """Build hooks nodes.
4601 # Exclude added node
4602 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4603 post_nodes = pre_nodes + [self.op.node_name, ]
4605 return (pre_nodes, post_nodes)
4607 def CheckPrereq(self):
4608 """Check prerequisites.
4611 - the new node is not already in the config
4613 - its parameters (single/dual homed) matches the cluster
4615 Any errors are signaled by raising errors.OpPrereqError.
4619 hostname = self.hostname
4620 node = hostname.name
4621 primary_ip = self.op.primary_ip = hostname.ip
4622 if self.op.secondary_ip is None:
4623 if self.primary_ip_family == netutils.IP6Address.family:
4624 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4625 " IPv4 address must be given as secondary",
4627 self.op.secondary_ip = primary_ip
4629 secondary_ip = self.op.secondary_ip
4630 if not netutils.IP4Address.IsValid(secondary_ip):
4631 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4632 " address" % secondary_ip, errors.ECODE_INVAL)
4634 node_list = cfg.GetNodeList()
4635 if not self.op.readd and node in node_list:
4636 raise errors.OpPrereqError("Node %s is already in the configuration" %
4637 node, errors.ECODE_EXISTS)
4638 elif self.op.readd and node not in node_list:
4639 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4642 self.changed_primary_ip = False
4644 for existing_node_name in node_list:
4645 existing_node = cfg.GetNodeInfo(existing_node_name)
4647 if self.op.readd and node == existing_node_name:
4648 if existing_node.secondary_ip != secondary_ip:
4649 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4650 " address configuration as before",
4652 if existing_node.primary_ip != primary_ip:
4653 self.changed_primary_ip = True
4657 if (existing_node.primary_ip == primary_ip or
4658 existing_node.secondary_ip == primary_ip or
4659 existing_node.primary_ip == secondary_ip or
4660 existing_node.secondary_ip == secondary_ip):
4661 raise errors.OpPrereqError("New node ip address(es) conflict with"
4662 " existing node %s" % existing_node.name,
4663 errors.ECODE_NOTUNIQUE)
4665 # After this 'if' block, None is no longer a valid value for the
4666 # _capable op attributes
4668 old_node = self.cfg.GetNodeInfo(node)
4669 assert old_node is not None, "Can't retrieve locked node %s" % node
4670 for attr in self._NFLAGS:
4671 if getattr(self.op, attr) is None:
4672 setattr(self.op, attr, getattr(old_node, attr))
4674 for attr in self._NFLAGS:
4675 if getattr(self.op, attr) is None:
4676 setattr(self.op, attr, True)
4678 if self.op.readd and not self.op.vm_capable:
4679 pri, sec = cfg.GetNodeInstances(node)
4681 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4682 " flag set to false, but it already holds"
4683 " instances" % node,
4686 # check that the type of the node (single versus dual homed) is the
4687 # same as for the master
4688 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4689 master_singlehomed = myself.secondary_ip == myself.primary_ip
4690 newbie_singlehomed = secondary_ip == primary_ip
4691 if master_singlehomed != newbie_singlehomed:
4692 if master_singlehomed:
4693 raise errors.OpPrereqError("The master has no secondary ip but the"
4694 " new node has one",
4697 raise errors.OpPrereqError("The master has a secondary ip but the"
4698 " new node doesn't have one",
4701 # checks reachability
4702 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4703 raise errors.OpPrereqError("Node not reachable by ping",
4704 errors.ECODE_ENVIRON)
4706 if not newbie_singlehomed:
4707 # check reachability from my secondary ip to newbie's secondary ip
4708 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4709 source=myself.secondary_ip):
4710 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4711 " based ping to node daemon port",
4712 errors.ECODE_ENVIRON)
4719 if self.op.master_capable:
4720 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4722 self.master_candidate = False
4725 self.new_node = old_node
4727 node_group = cfg.LookupNodeGroup(self.op.group)
4728 self.new_node = objects.Node(name=node,
4729 primary_ip=primary_ip,
4730 secondary_ip=secondary_ip,
4731 master_candidate=self.master_candidate,
4732 offline=False, drained=False,
4735 if self.op.ndparams:
4736 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4738 def Exec(self, feedback_fn):
4739 """Adds the new node to the cluster.
4742 new_node = self.new_node
4743 node = new_node.name
4745 # We adding a new node so we assume it's powered
4746 new_node.powered = True
4748 # for re-adds, reset the offline/drained/master-candidate flags;
4749 # we need to reset here, otherwise offline would prevent RPC calls
4750 # later in the procedure; this also means that if the re-add
4751 # fails, we are left with a non-offlined, broken node
4753 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4754 self.LogInfo("Readding a node, the offline/drained flags were reset")
4755 # if we demote the node, we do cleanup later in the procedure
4756 new_node.master_candidate = self.master_candidate
4757 if self.changed_primary_ip:
4758 new_node.primary_ip = self.op.primary_ip
4760 # copy the master/vm_capable flags
4761 for attr in self._NFLAGS:
4762 setattr(new_node, attr, getattr(self.op, attr))
4764 # notify the user about any possible mc promotion
4765 if new_node.master_candidate:
4766 self.LogInfo("Node will be a master candidate")
4768 if self.op.ndparams:
4769 new_node.ndparams = self.op.ndparams
4771 new_node.ndparams = {}
4773 # check connectivity
4774 result = self.rpc.call_version([node])[node]
4775 result.Raise("Can't get version information from node %s" % node)
4776 if constants.PROTOCOL_VERSION == result.payload:
4777 logging.info("Communication to node %s fine, sw version %s match",
4778 node, result.payload)
4780 raise errors.OpExecError("Version mismatch master version %s,"
4781 " node version %s" %
4782 (constants.PROTOCOL_VERSION, result.payload))
4784 # Add node to our /etc/hosts, and add key to known_hosts
4785 if self.cfg.GetClusterInfo().modify_etc_hosts:
4786 master_node = self.cfg.GetMasterNode()
4787 result = self.rpc.call_etc_hosts_modify(master_node,
4788 constants.ETC_HOSTS_ADD,
4791 result.Raise("Can't update hosts file with new host data")
4793 if new_node.secondary_ip != new_node.primary_ip:
4794 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4797 node_verify_list = [self.cfg.GetMasterNode()]
4798 node_verify_param = {
4799 constants.NV_NODELIST: [node],
4800 # TODO: do a node-net-test as well?
4803 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4804 self.cfg.GetClusterName())
4805 for verifier in node_verify_list:
4806 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4807 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4809 for failed in nl_payload:
4810 feedback_fn("ssh/hostname verification failed"
4811 " (checking from %s): %s" %
4812 (verifier, nl_payload[failed]))
4813 raise errors.OpExecError("ssh/hostname verification failed")
4816 _RedistributeAncillaryFiles(self)
4817 self.context.ReaddNode(new_node)
4818 # make sure we redistribute the config
4819 self.cfg.Update(new_node, feedback_fn)
4820 # and make sure the new node will not have old files around
4821 if not new_node.master_candidate:
4822 result = self.rpc.call_node_demote_from_mc(new_node.name)
4823 msg = result.fail_msg
4825 self.LogWarning("Node failed to demote itself from master"
4826 " candidate status: %s" % msg)
4828 _RedistributeAncillaryFiles(self, additional_nodes=[node],
4829 additional_vm=self.op.vm_capable)
4830 self.context.AddNode(new_node, self.proc.GetECId())
4833 class LUNodeSetParams(LogicalUnit):
4834 """Modifies the parameters of a node.
4836 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4837 to the node role (as _ROLE_*)
4838 @cvar _R2F: a dictionary from node role to tuples of flags
4839 @cvar _FLAGS: a list of attribute names corresponding to the flags
4842 HPATH = "node-modify"
4843 HTYPE = constants.HTYPE_NODE
4845 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4847 (True, False, False): _ROLE_CANDIDATE,
4848 (False, True, False): _ROLE_DRAINED,
4849 (False, False, True): _ROLE_OFFLINE,
4850 (False, False, False): _ROLE_REGULAR,
4852 _R2F = dict((v, k) for k, v in _F2R.items())
4853 _FLAGS = ["master_candidate", "drained", "offline"]
4855 def CheckArguments(self):
4856 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4857 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4858 self.op.master_capable, self.op.vm_capable,
4859 self.op.secondary_ip, self.op.ndparams]
4860 if all_mods.count(None) == len(all_mods):
4861 raise errors.OpPrereqError("Please pass at least one modification",
4863 if all_mods.count(True) > 1:
4864 raise errors.OpPrereqError("Can't set the node into more than one"
4865 " state at the same time",
4868 # Boolean value that tells us whether we might be demoting from MC
4869 self.might_demote = (self.op.master_candidate == False or
4870 self.op.offline == True or
4871 self.op.drained == True or
4872 self.op.master_capable == False)
4874 if self.op.secondary_ip:
4875 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4876 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4877 " address" % self.op.secondary_ip,
4880 self.lock_all = self.op.auto_promote and self.might_demote
4881 self.lock_instances = self.op.secondary_ip is not None
4883 def ExpandNames(self):
4885 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4887 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4889 if self.lock_instances:
4890 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4892 def DeclareLocks(self, level):
4893 # If we have locked all instances, before waiting to lock nodes, release
4894 # all the ones living on nodes unrelated to the current operation.
4895 if level == locking.LEVEL_NODE and self.lock_instances:
4896 self.affected_instances = []
4897 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4900 # Build list of instances to release
4901 for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
4902 instance = self.context.cfg.GetInstanceInfo(instance_name)
4903 if (instance.disk_template in constants.DTS_INT_MIRROR and
4904 self.op.node_name in instance.all_nodes):
4905 instances_keep.append(instance_name)
4906 self.affected_instances.append(instance)
4908 _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
4910 assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
4911 set(instances_keep))
4913 def BuildHooksEnv(self):
4916 This runs on the master node.
4920 "OP_TARGET": self.op.node_name,
4921 "MASTER_CANDIDATE": str(self.op.master_candidate),
4922 "OFFLINE": str(self.op.offline),
4923 "DRAINED": str(self.op.drained),
4924 "MASTER_CAPABLE": str(self.op.master_capable),
4925 "VM_CAPABLE": str(self.op.vm_capable),
4928 def BuildHooksNodes(self):
4929 """Build hooks nodes.
4932 nl = [self.cfg.GetMasterNode(), self.op.node_name]
4935 def CheckPrereq(self):
4936 """Check prerequisites.
4938 This only checks the instance list against the existing names.
4941 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4943 if (self.op.master_candidate is not None or
4944 self.op.drained is not None or
4945 self.op.offline is not None):
4946 # we can't change the master's node flags
4947 if self.op.node_name == self.cfg.GetMasterNode():
4948 raise errors.OpPrereqError("The master role can be changed"
4949 " only via master-failover",
4952 if self.op.master_candidate and not node.master_capable:
4953 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4954 " it a master candidate" % node.name,
4957 if self.op.vm_capable == False:
4958 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4960 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4961 " the vm_capable flag" % node.name,
4964 if node.master_candidate and self.might_demote and not self.lock_all:
4965 assert not self.op.auto_promote, "auto_promote set but lock_all not"
4966 # check if after removing the current node, we're missing master
4968 (mc_remaining, mc_should, _) = \
4969 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4970 if mc_remaining < mc_should:
4971 raise errors.OpPrereqError("Not enough master candidates, please"
4972 " pass auto promote option to allow"
4973 " promotion", errors.ECODE_STATE)
4975 self.old_flags = old_flags = (node.master_candidate,
4976 node.drained, node.offline)
4977 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4978 self.old_role = old_role = self._F2R[old_flags]
4980 # Check for ineffective changes
4981 for attr in self._FLAGS:
4982 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4983 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4984 setattr(self.op, attr, None)
4986 # Past this point, any flag change to False means a transition
4987 # away from the respective state, as only real changes are kept
4989 # TODO: We might query the real power state if it supports OOB
4990 if _SupportsOob(self.cfg, node):
4991 if self.op.offline is False and not (node.powered or
4992 self.op.powered == True):
4993 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
4994 " offline status can be reset") %
4996 elif self.op.powered is not None:
4997 raise errors.OpPrereqError(("Unable to change powered state for node %s"
4998 " as it does not support out-of-band"
4999 " handling") % self.op.node_name)
5001 # If we're being deofflined/drained, we'll MC ourself if needed
5002 if (self.op.drained == False or self.op.offline == False or
5003 (self.op.master_capable and not node.master_capable)):
5004 if _DecideSelfPromotion(self):
5005 self.op.master_candidate = True
5006 self.LogInfo("Auto-promoting node to master candidate")
5008 # If we're no longer master capable, we'll demote ourselves from MC
5009 if self.op.master_capable == False and node.master_candidate:
5010 self.LogInfo("Demoting from master candidate")
5011 self.op.master_candidate = False
5014 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5015 if self.op.master_candidate:
5016 new_role = self._ROLE_CANDIDATE
5017 elif self.op.drained:
5018 new_role = self._ROLE_DRAINED
5019 elif self.op.offline:
5020 new_role = self._ROLE_OFFLINE
5021 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5022 # False is still in new flags, which means we're un-setting (the
5024 new_role = self._ROLE_REGULAR
5025 else: # no new flags, nothing, keep old role
5028 self.new_role = new_role
5030 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5031 # Trying to transition out of offline status
5032 result = self.rpc.call_version([node.name])[node.name]
5034 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5035 " to report its version: %s" %
5036 (node.name, result.fail_msg),
5039 self.LogWarning("Transitioning node from offline to online state"
5040 " without using re-add. Please make sure the node"
5043 if self.op.secondary_ip:
5044 # Ok even without locking, because this can't be changed by any LU
5045 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5046 master_singlehomed = master.secondary_ip == master.primary_ip
5047 if master_singlehomed and self.op.secondary_ip:
5048 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5049 " homed cluster", errors.ECODE_INVAL)
5052 if self.affected_instances:
5053 raise errors.OpPrereqError("Cannot change secondary ip: offline"
5054 " node has instances (%s) configured"
5055 " to use it" % self.affected_instances)
5057 # On online nodes, check that no instances are running, and that
5058 # the node has the new ip and we can reach it.
5059 for instance in self.affected_instances:
5060 _CheckInstanceDown(self, instance, "cannot change secondary ip")
5062 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5063 if master.name != node.name:
5064 # check reachability from master secondary ip to new secondary ip
5065 if not netutils.TcpPing(self.op.secondary_ip,
5066 constants.DEFAULT_NODED_PORT,
5067 source=master.secondary_ip):
5068 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5069 " based ping to node daemon port",
5070 errors.ECODE_ENVIRON)
5072 if self.op.ndparams:
5073 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5074 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5075 self.new_ndparams = new_ndparams
5077 def Exec(self, feedback_fn):
5082 old_role = self.old_role
5083 new_role = self.new_role
5087 if self.op.ndparams:
5088 node.ndparams = self.new_ndparams
5090 if self.op.powered is not None:
5091 node.powered = self.op.powered
5093 for attr in ["master_capable", "vm_capable"]:
5094 val = getattr(self.op, attr)
5096 setattr(node, attr, val)
5097 result.append((attr, str(val)))
5099 if new_role != old_role:
5100 # Tell the node to demote itself, if no longer MC and not offline
5101 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5102 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5104 self.LogWarning("Node failed to demote itself: %s", msg)
5106 new_flags = self._R2F[new_role]
5107 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5109 result.append((desc, str(nf)))
5110 (node.master_candidate, node.drained, node.offline) = new_flags
5112 # we locked all nodes, we adjust the CP before updating this node
5114 _AdjustCandidatePool(self, [node.name])
5116 if self.op.secondary_ip:
5117 node.secondary_ip = self.op.secondary_ip
5118 result.append(("secondary_ip", self.op.secondary_ip))
5120 # this will trigger configuration file update, if needed
5121 self.cfg.Update(node, feedback_fn)
5123 # this will trigger job queue propagation or cleanup if the mc
5125 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5126 self.context.ReaddNode(node)
5131 class LUNodePowercycle(NoHooksLU):
5132 """Powercycles a node.
5137 def CheckArguments(self):
5138 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5139 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5140 raise errors.OpPrereqError("The node is the master and the force"
5141 " parameter was not set",
5144 def ExpandNames(self):
5145 """Locking for PowercycleNode.
5147 This is a last-resort option and shouldn't block on other
5148 jobs. Therefore, we grab no locks.
5151 self.needed_locks = {}
5153 def Exec(self, feedback_fn):
5157 result = self.rpc.call_node_powercycle(self.op.node_name,
5158 self.cfg.GetHypervisorType())
5159 result.Raise("Failed to schedule the reboot")
5160 return result.payload
5163 class LUClusterQuery(NoHooksLU):
5164 """Query cluster configuration.
5169 def ExpandNames(self):
5170 self.needed_locks = {}
5172 def Exec(self, feedback_fn):
5173 """Return cluster config.
5176 cluster = self.cfg.GetClusterInfo()
5179 # Filter just for enabled hypervisors
5180 for os_name, hv_dict in cluster.os_hvp.items():
5181 os_hvp[os_name] = {}
5182 for hv_name, hv_params in hv_dict.items():
5183 if hv_name in cluster.enabled_hypervisors:
5184 os_hvp[os_name][hv_name] = hv_params
5186 # Convert ip_family to ip_version
5187 primary_ip_version = constants.IP4_VERSION
5188 if cluster.primary_ip_family == netutils.IP6Address.family:
5189 primary_ip_version = constants.IP6_VERSION
5192 "software_version": constants.RELEASE_VERSION,
5193 "protocol_version": constants.PROTOCOL_VERSION,
5194 "config_version": constants.CONFIG_VERSION,
5195 "os_api_version": max(constants.OS_API_VERSIONS),
5196 "export_version": constants.EXPORT_VERSION,
5197 "architecture": (platform.architecture()[0], platform.machine()),
5198 "name": cluster.cluster_name,
5199 "master": cluster.master_node,
5200 "default_hypervisor": cluster.enabled_hypervisors[0],
5201 "enabled_hypervisors": cluster.enabled_hypervisors,
5202 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5203 for hypervisor_name in cluster.enabled_hypervisors]),
5205 "beparams": cluster.beparams,
5206 "osparams": cluster.osparams,
5207 "nicparams": cluster.nicparams,
5208 "ndparams": cluster.ndparams,
5209 "candidate_pool_size": cluster.candidate_pool_size,
5210 "master_netdev": cluster.master_netdev,
5211 "volume_group_name": cluster.volume_group_name,
5212 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5213 "file_storage_dir": cluster.file_storage_dir,
5214 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5215 "maintain_node_health": cluster.maintain_node_health,
5216 "ctime": cluster.ctime,
5217 "mtime": cluster.mtime,
5218 "uuid": cluster.uuid,
5219 "tags": list(cluster.GetTags()),
5220 "uid_pool": cluster.uid_pool,
5221 "default_iallocator": cluster.default_iallocator,
5222 "reserved_lvs": cluster.reserved_lvs,
5223 "primary_ip_version": primary_ip_version,
5224 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5225 "hidden_os": cluster.hidden_os,
5226 "blacklisted_os": cluster.blacklisted_os,
5232 class LUClusterConfigQuery(NoHooksLU):
5233 """Return configuration values.
5237 _FIELDS_DYNAMIC = utils.FieldSet()
5238 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5239 "watcher_pause", "volume_group_name")
5241 def CheckArguments(self):
5242 _CheckOutputFields(static=self._FIELDS_STATIC,
5243 dynamic=self._FIELDS_DYNAMIC,
5244 selected=self.op.output_fields)
5246 def ExpandNames(self):
5247 self.needed_locks = {}
5249 def Exec(self, feedback_fn):
5250 """Dump a representation of the cluster config to the standard output.
5254 for field in self.op.output_fields:
5255 if field == "cluster_name":
5256 entry = self.cfg.GetClusterName()
5257 elif field == "master_node":
5258 entry = self.cfg.GetMasterNode()
5259 elif field == "drain_flag":
5260 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5261 elif field == "watcher_pause":
5262 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5263 elif field == "volume_group_name":
5264 entry = self.cfg.GetVGName()
5266 raise errors.ParameterError(field)
5267 values.append(entry)
5271 class LUInstanceActivateDisks(NoHooksLU):
5272 """Bring up an instance's disks.
5277 def ExpandNames(self):
5278 self._ExpandAndLockInstance()
5279 self.needed_locks[locking.LEVEL_NODE] = []
5280 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5282 def DeclareLocks(self, level):
5283 if level == locking.LEVEL_NODE:
5284 self._LockInstancesNodes()
5286 def CheckPrereq(self):
5287 """Check prerequisites.
5289 This checks that the instance is in the cluster.
5292 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5293 assert self.instance is not None, \
5294 "Cannot retrieve locked instance %s" % self.op.instance_name
5295 _CheckNodeOnline(self, self.instance.primary_node)
5297 def Exec(self, feedback_fn):
5298 """Activate the disks.
5301 disks_ok, disks_info = \
5302 _AssembleInstanceDisks(self, self.instance,
5303 ignore_size=self.op.ignore_size)
5305 raise errors.OpExecError("Cannot activate block devices")
5310 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5312 """Prepare the block devices for an instance.
5314 This sets up the block devices on all nodes.
5316 @type lu: L{LogicalUnit}
5317 @param lu: the logical unit on whose behalf we execute
5318 @type instance: L{objects.Instance}
5319 @param instance: the instance for whose disks we assemble
5320 @type disks: list of L{objects.Disk} or None
5321 @param disks: which disks to assemble (or all, if None)
5322 @type ignore_secondaries: boolean
5323 @param ignore_secondaries: if true, errors on secondary nodes
5324 won't result in an error return from the function
5325 @type ignore_size: boolean
5326 @param ignore_size: if true, the current known size of the disk
5327 will not be used during the disk activation, useful for cases
5328 when the size is wrong
5329 @return: False if the operation failed, otherwise a list of
5330 (host, instance_visible_name, node_visible_name)
5331 with the mapping from node devices to instance devices
5336 iname = instance.name
5337 disks = _ExpandCheckDisks(instance, disks)
5339 # With the two passes mechanism we try to reduce the window of
5340 # opportunity for the race condition of switching DRBD to primary
5341 # before handshaking occured, but we do not eliminate it
5343 # The proper fix would be to wait (with some limits) until the
5344 # connection has been made and drbd transitions from WFConnection
5345 # into any other network-connected state (Connected, SyncTarget,
5348 # 1st pass, assemble on all nodes in secondary mode
5349 for idx, inst_disk in enumerate(disks):
5350 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5352 node_disk = node_disk.Copy()
5353 node_disk.UnsetSize()
5354 lu.cfg.SetDiskID(node_disk, node)
5355 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5356 msg = result.fail_msg
5358 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5359 " (is_primary=False, pass=1): %s",
5360 inst_disk.iv_name, node, msg)
5361 if not ignore_secondaries:
5364 # FIXME: race condition on drbd migration to primary
5366 # 2nd pass, do only the primary node
5367 for idx, inst_disk in enumerate(disks):
5370 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5371 if node != instance.primary_node:
5374 node_disk = node_disk.Copy()
5375 node_disk.UnsetSize()
5376 lu.cfg.SetDiskID(node_disk, node)
5377 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5378 msg = result.fail_msg
5380 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5381 " (is_primary=True, pass=2): %s",
5382 inst_disk.iv_name, node, msg)
5385 dev_path = result.payload
5387 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5389 # leave the disks configured for the primary node
5390 # this is a workaround that would be fixed better by
5391 # improving the logical/physical id handling
5393 lu.cfg.SetDiskID(disk, instance.primary_node)
5395 return disks_ok, device_info
5398 def _StartInstanceDisks(lu, instance, force):
5399 """Start the disks of an instance.
5402 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5403 ignore_secondaries=force)
5405 _ShutdownInstanceDisks(lu, instance)
5406 if force is not None and not force:
5407 lu.proc.LogWarning("", hint="If the message above refers to a"
5409 " you can retry the operation using '--force'.")
5410 raise errors.OpExecError("Disk consistency error")
5413 class LUInstanceDeactivateDisks(NoHooksLU):
5414 """Shutdown an instance's disks.
5419 def ExpandNames(self):
5420 self._ExpandAndLockInstance()
5421 self.needed_locks[locking.LEVEL_NODE] = []
5422 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5424 def DeclareLocks(self, level):
5425 if level == locking.LEVEL_NODE:
5426 self._LockInstancesNodes()
5428 def CheckPrereq(self):
5429 """Check prerequisites.
5431 This checks that the instance is in the cluster.
5434 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5435 assert self.instance is not None, \
5436 "Cannot retrieve locked instance %s" % self.op.instance_name
5438 def Exec(self, feedback_fn):
5439 """Deactivate the disks
5442 instance = self.instance
5444 _ShutdownInstanceDisks(self, instance)
5446 _SafeShutdownInstanceDisks(self, instance)
5449 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5450 """Shutdown block devices of an instance.
5452 This function checks if an instance is running, before calling
5453 _ShutdownInstanceDisks.
5456 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5457 _ShutdownInstanceDisks(lu, instance, disks=disks)
5460 def _ExpandCheckDisks(instance, disks):
5461 """Return the instance disks selected by the disks list
5463 @type disks: list of L{objects.Disk} or None
5464 @param disks: selected disks
5465 @rtype: list of L{objects.Disk}
5466 @return: selected instance disks to act on
5470 return instance.disks
5472 if not set(disks).issubset(instance.disks):
5473 raise errors.ProgrammerError("Can only act on disks belonging to the"
5478 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5479 """Shutdown block devices of an instance.
5481 This does the shutdown on all nodes of the instance.
5483 If the ignore_primary is false, errors on the primary node are
5488 disks = _ExpandCheckDisks(instance, disks)
5491 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5492 lu.cfg.SetDiskID(top_disk, node)
5493 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5494 msg = result.fail_msg
5496 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5497 disk.iv_name, node, msg)
5498 if ((node == instance.primary_node and not ignore_primary) or
5499 (node != instance.primary_node and not result.offline)):
5504 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5505 """Checks if a node has enough free memory.
5507 This function check if a given node has the needed amount of free
5508 memory. In case the node has less memory or we cannot get the
5509 information from the node, this function raise an OpPrereqError
5512 @type lu: C{LogicalUnit}
5513 @param lu: a logical unit from which we get configuration data
5515 @param node: the node to check
5516 @type reason: C{str}
5517 @param reason: string to use in the error message
5518 @type requested: C{int}
5519 @param requested: the amount of memory in MiB to check for
5520 @type hypervisor_name: C{str}
5521 @param hypervisor_name: the hypervisor to ask for memory stats
5522 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5523 we cannot check the node
5526 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5527 nodeinfo[node].Raise("Can't get data from node %s" % node,
5528 prereq=True, ecode=errors.ECODE_ENVIRON)
5529 free_mem = nodeinfo[node].payload.get("memory_free", None)
5530 if not isinstance(free_mem, int):
5531 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5532 " was '%s'" % (node, free_mem),
5533 errors.ECODE_ENVIRON)
5534 if requested > free_mem:
5535 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5536 " needed %s MiB, available %s MiB" %
5537 (node, reason, requested, free_mem),
5541 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5542 """Checks if nodes have enough free disk space in the all VGs.
5544 This function check if all given nodes have the needed amount of
5545 free disk. In case any node has less disk or we cannot get the
5546 information from the node, this function raise an OpPrereqError
5549 @type lu: C{LogicalUnit}
5550 @param lu: a logical unit from which we get configuration data
5551 @type nodenames: C{list}
5552 @param nodenames: the list of node names to check
5553 @type req_sizes: C{dict}
5554 @param req_sizes: the hash of vg and corresponding amount of disk in
5556 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5557 or we cannot check the node
5560 for vg, req_size in req_sizes.items():
5561 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5564 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5565 """Checks if nodes have enough free disk space in the specified VG.
5567 This function check if all given nodes have the needed amount of
5568 free disk. In case any node has less disk or we cannot get the
5569 information from the node, this function raise an OpPrereqError
5572 @type lu: C{LogicalUnit}
5573 @param lu: a logical unit from which we get configuration data
5574 @type nodenames: C{list}
5575 @param nodenames: the list of node names to check
5577 @param vg: the volume group to check
5578 @type requested: C{int}
5579 @param requested: the amount of disk in MiB to check for
5580 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5581 or we cannot check the node
5584 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5585 for node in nodenames:
5586 info = nodeinfo[node]
5587 info.Raise("Cannot get current information from node %s" % node,
5588 prereq=True, ecode=errors.ECODE_ENVIRON)
5589 vg_free = info.payload.get("vg_free", None)
5590 if not isinstance(vg_free, int):
5591 raise errors.OpPrereqError("Can't compute free disk space on node"
5592 " %s for vg %s, result was '%s'" %
5593 (node, vg, vg_free), errors.ECODE_ENVIRON)
5594 if requested > vg_free:
5595 raise errors.OpPrereqError("Not enough disk space on target node %s"
5596 " vg %s: required %d MiB, available %d MiB" %
5597 (node, vg, requested, vg_free),
5601 class LUInstanceStartup(LogicalUnit):
5602 """Starts an instance.
5605 HPATH = "instance-start"
5606 HTYPE = constants.HTYPE_INSTANCE
5609 def CheckArguments(self):
5611 if self.op.beparams:
5612 # fill the beparams dict
5613 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5615 def ExpandNames(self):
5616 self._ExpandAndLockInstance()
5618 def BuildHooksEnv(self):
5621 This runs on master, primary and secondary nodes of the instance.
5625 "FORCE": self.op.force,
5628 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5632 def BuildHooksNodes(self):
5633 """Build hooks nodes.
5636 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5639 def CheckPrereq(self):
5640 """Check prerequisites.
5642 This checks that the instance is in the cluster.
5645 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5646 assert self.instance is not None, \
5647 "Cannot retrieve locked instance %s" % self.op.instance_name
5650 if self.op.hvparams:
5651 # check hypervisor parameter syntax (locally)
5652 cluster = self.cfg.GetClusterInfo()
5653 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5654 filled_hvp = cluster.FillHV(instance)
5655 filled_hvp.update(self.op.hvparams)
5656 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5657 hv_type.CheckParameterSyntax(filled_hvp)
5658 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5660 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5662 if self.primary_offline and self.op.ignore_offline_nodes:
5663 self.proc.LogWarning("Ignoring offline primary node")
5665 if self.op.hvparams or self.op.beparams:
5666 self.proc.LogWarning("Overridden parameters are ignored")
5668 _CheckNodeOnline(self, instance.primary_node)
5670 bep = self.cfg.GetClusterInfo().FillBE(instance)
5672 # check bridges existence
5673 _CheckInstanceBridgesExist(self, instance)
5675 remote_info = self.rpc.call_instance_info(instance.primary_node,
5677 instance.hypervisor)
5678 remote_info.Raise("Error checking node %s" % instance.primary_node,
5679 prereq=True, ecode=errors.ECODE_ENVIRON)
5680 if not remote_info.payload: # not running already
5681 _CheckNodeFreeMemory(self, instance.primary_node,
5682 "starting instance %s" % instance.name,
5683 bep[constants.BE_MEMORY], instance.hypervisor)
5685 def Exec(self, feedback_fn):
5686 """Start the instance.
5689 instance = self.instance
5690 force = self.op.force
5692 if not self.op.no_remember:
5693 self.cfg.MarkInstanceUp(instance.name)
5695 if self.primary_offline:
5696 assert self.op.ignore_offline_nodes
5697 self.proc.LogInfo("Primary node offline, marked instance as started")
5699 node_current = instance.primary_node
5701 _StartInstanceDisks(self, instance, force)
5703 result = self.rpc.call_instance_start(node_current, instance,
5704 self.op.hvparams, self.op.beparams,
5705 self.op.startup_paused)
5706 msg = result.fail_msg
5708 _ShutdownInstanceDisks(self, instance)
5709 raise errors.OpExecError("Could not start instance: %s" % msg)
5712 class LUInstanceReboot(LogicalUnit):
5713 """Reboot an instance.
5716 HPATH = "instance-reboot"
5717 HTYPE = constants.HTYPE_INSTANCE
5720 def ExpandNames(self):
5721 self._ExpandAndLockInstance()
5723 def BuildHooksEnv(self):
5726 This runs on master, primary and secondary nodes of the instance.
5730 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5731 "REBOOT_TYPE": self.op.reboot_type,
5732 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5735 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5739 def BuildHooksNodes(self):
5740 """Build hooks nodes.
5743 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5746 def CheckPrereq(self):
5747 """Check prerequisites.
5749 This checks that the instance is in the cluster.
5752 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5753 assert self.instance is not None, \
5754 "Cannot retrieve locked instance %s" % self.op.instance_name
5756 _CheckNodeOnline(self, instance.primary_node)
5758 # check bridges existence
5759 _CheckInstanceBridgesExist(self, instance)
5761 def Exec(self, feedback_fn):
5762 """Reboot the instance.
5765 instance = self.instance
5766 ignore_secondaries = self.op.ignore_secondaries
5767 reboot_type = self.op.reboot_type
5769 remote_info = self.rpc.call_instance_info(instance.primary_node,
5771 instance.hypervisor)
5772 remote_info.Raise("Error checking node %s" % instance.primary_node)
5773 instance_running = bool(remote_info.payload)
5775 node_current = instance.primary_node
5777 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5778 constants.INSTANCE_REBOOT_HARD]:
5779 for disk in instance.disks:
5780 self.cfg.SetDiskID(disk, node_current)
5781 result = self.rpc.call_instance_reboot(node_current, instance,
5783 self.op.shutdown_timeout)
5784 result.Raise("Could not reboot instance")
5786 if instance_running:
5787 result = self.rpc.call_instance_shutdown(node_current, instance,
5788 self.op.shutdown_timeout)
5789 result.Raise("Could not shutdown instance for full reboot")
5790 _ShutdownInstanceDisks(self, instance)
5792 self.LogInfo("Instance %s was already stopped, starting now",
5794 _StartInstanceDisks(self, instance, ignore_secondaries)
5795 result = self.rpc.call_instance_start(node_current, instance,
5797 msg = result.fail_msg
5799 _ShutdownInstanceDisks(self, instance)
5800 raise errors.OpExecError("Could not start instance for"
5801 " full reboot: %s" % msg)
5803 self.cfg.MarkInstanceUp(instance.name)
5806 class LUInstanceShutdown(LogicalUnit):
5807 """Shutdown an instance.
5810 HPATH = "instance-stop"
5811 HTYPE = constants.HTYPE_INSTANCE
5814 def ExpandNames(self):
5815 self._ExpandAndLockInstance()
5817 def BuildHooksEnv(self):
5820 This runs on master, primary and secondary nodes of the instance.
5823 env = _BuildInstanceHookEnvByObject(self, self.instance)
5824 env["TIMEOUT"] = self.op.timeout
5827 def BuildHooksNodes(self):
5828 """Build hooks nodes.
5831 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5834 def CheckPrereq(self):
5835 """Check prerequisites.
5837 This checks that the instance is in the cluster.
5840 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5841 assert self.instance is not None, \
5842 "Cannot retrieve locked instance %s" % self.op.instance_name
5844 self.primary_offline = \
5845 self.cfg.GetNodeInfo(self.instance.primary_node).offline
5847 if self.primary_offline and self.op.ignore_offline_nodes:
5848 self.proc.LogWarning("Ignoring offline primary node")
5850 _CheckNodeOnline(self, self.instance.primary_node)
5852 def Exec(self, feedback_fn):
5853 """Shutdown the instance.
5856 instance = self.instance
5857 node_current = instance.primary_node
5858 timeout = self.op.timeout
5860 if not self.op.no_remember:
5861 self.cfg.MarkInstanceDown(instance.name)
5863 if self.primary_offline:
5864 assert self.op.ignore_offline_nodes
5865 self.proc.LogInfo("Primary node offline, marked instance as stopped")
5867 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5868 msg = result.fail_msg
5870 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5872 _ShutdownInstanceDisks(self, instance)
5875 class LUInstanceReinstall(LogicalUnit):
5876 """Reinstall an instance.
5879 HPATH = "instance-reinstall"
5880 HTYPE = constants.HTYPE_INSTANCE
5883 def ExpandNames(self):
5884 self._ExpandAndLockInstance()
5886 def BuildHooksEnv(self):
5889 This runs on master, primary and secondary nodes of the instance.
5892 return _BuildInstanceHookEnvByObject(self, self.instance)
5894 def BuildHooksNodes(self):
5895 """Build hooks nodes.
5898 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5901 def CheckPrereq(self):
5902 """Check prerequisites.
5904 This checks that the instance is in the cluster and is not running.
5907 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5908 assert instance is not None, \
5909 "Cannot retrieve locked instance %s" % self.op.instance_name
5910 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5911 " offline, cannot reinstall")
5912 for node in instance.secondary_nodes:
5913 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5914 " cannot reinstall")
5916 if instance.disk_template == constants.DT_DISKLESS:
5917 raise errors.OpPrereqError("Instance '%s' has no disks" %
5918 self.op.instance_name,
5920 _CheckInstanceDown(self, instance, "cannot reinstall")
5922 if self.op.os_type is not None:
5924 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5925 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5926 instance_os = self.op.os_type
5928 instance_os = instance.os
5930 nodelist = list(instance.all_nodes)
5932 if self.op.osparams:
5933 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5934 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5935 self.os_inst = i_osdict # the new dict (without defaults)
5939 self.instance = instance
5941 def Exec(self, feedback_fn):
5942 """Reinstall the instance.
5945 inst = self.instance
5947 if self.op.os_type is not None:
5948 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5949 inst.os = self.op.os_type
5950 # Write to configuration
5951 self.cfg.Update(inst, feedback_fn)
5953 _StartInstanceDisks(self, inst, None)
5955 feedback_fn("Running the instance OS create scripts...")
5956 # FIXME: pass debug option from opcode to backend
5957 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5958 self.op.debug_level,
5959 osparams=self.os_inst)
5960 result.Raise("Could not install OS for instance %s on node %s" %
5961 (inst.name, inst.primary_node))
5963 _ShutdownInstanceDisks(self, inst)
5966 class LUInstanceRecreateDisks(LogicalUnit):
5967 """Recreate an instance's missing disks.
5970 HPATH = "instance-recreate-disks"
5971 HTYPE = constants.HTYPE_INSTANCE
5974 def CheckArguments(self):
5975 # normalise the disk list
5976 self.op.disks = sorted(frozenset(self.op.disks))
5978 def ExpandNames(self):
5979 self._ExpandAndLockInstance()
5980 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5982 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
5983 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
5985 self.needed_locks[locking.LEVEL_NODE] = []
5987 def DeclareLocks(self, level):
5988 if level == locking.LEVEL_NODE:
5989 # if we replace the nodes, we only need to lock the old primary,
5990 # otherwise we need to lock all nodes for disk re-creation
5991 primary_only = bool(self.op.nodes)
5992 self._LockInstancesNodes(primary_only=primary_only)
5994 def BuildHooksEnv(self):
5997 This runs on master, primary and secondary nodes of the instance.
6000 return _BuildInstanceHookEnvByObject(self, self.instance)
6002 def BuildHooksNodes(self):
6003 """Build hooks nodes.
6006 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6009 def CheckPrereq(self):
6010 """Check prerequisites.
6012 This checks that the instance is in the cluster and is not running.
6015 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6016 assert instance is not None, \
6017 "Cannot retrieve locked instance %s" % self.op.instance_name
6019 if len(self.op.nodes) != len(instance.all_nodes):
6020 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6021 " %d replacement nodes were specified" %
6022 (instance.name, len(instance.all_nodes),
6023 len(self.op.nodes)),
6025 assert instance.disk_template != constants.DT_DRBD8 or \
6026 len(self.op.nodes) == 2
6027 assert instance.disk_template != constants.DT_PLAIN or \
6028 len(self.op.nodes) == 1
6029 primary_node = self.op.nodes[0]
6031 primary_node = instance.primary_node
6032 _CheckNodeOnline(self, primary_node)
6034 if instance.disk_template == constants.DT_DISKLESS:
6035 raise errors.OpPrereqError("Instance '%s' has no disks" %
6036 self.op.instance_name, errors.ECODE_INVAL)
6037 # if we replace nodes *and* the old primary is offline, we don't
6039 assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6040 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6041 if not (self.op.nodes and old_pnode.offline):
6042 _CheckInstanceDown(self, instance, "cannot recreate disks")
6044 if not self.op.disks:
6045 self.op.disks = range(len(instance.disks))
6047 for idx in self.op.disks:
6048 if idx >= len(instance.disks):
6049 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6051 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6052 raise errors.OpPrereqError("Can't recreate disks partially and"
6053 " change the nodes at the same time",
6055 self.instance = instance
6057 def Exec(self, feedback_fn):
6058 """Recreate the disks.
6061 instance = self.instance
6064 mods = [] # keeps track of needed logical_id changes
6066 for idx, disk in enumerate(instance.disks):
6067 if idx not in self.op.disks: # disk idx has not been passed in
6070 # update secondaries for disks, if needed
6072 if disk.dev_type == constants.LD_DRBD8:
6073 # need to update the nodes and minors
6074 assert len(self.op.nodes) == 2
6075 assert len(disk.logical_id) == 6 # otherwise disk internals
6077 (_, _, old_port, _, _, old_secret) = disk.logical_id
6078 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6079 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6080 new_minors[0], new_minors[1], old_secret)
6081 assert len(disk.logical_id) == len(new_id)
6082 mods.append((idx, new_id))
6084 # now that we have passed all asserts above, we can apply the mods
6085 # in a single run (to avoid partial changes)
6086 for idx, new_id in mods:
6087 instance.disks[idx].logical_id = new_id
6089 # change primary node, if needed
6091 instance.primary_node = self.op.nodes[0]
6092 self.LogWarning("Changing the instance's nodes, you will have to"
6093 " remove any disks left on the older nodes manually")
6096 self.cfg.Update(instance, feedback_fn)
6098 _CreateDisks(self, instance, to_skip=to_skip)
6101 class LUInstanceRename(LogicalUnit):
6102 """Rename an instance.
6105 HPATH = "instance-rename"
6106 HTYPE = constants.HTYPE_INSTANCE
6108 def CheckArguments(self):
6112 if self.op.ip_check and not self.op.name_check:
6113 # TODO: make the ip check more flexible and not depend on the name check
6114 raise errors.OpPrereqError("IP address check requires a name check",
6117 def BuildHooksEnv(self):
6120 This runs on master, primary and secondary nodes of the instance.
6123 env = _BuildInstanceHookEnvByObject(self, self.instance)
6124 env["INSTANCE_NEW_NAME"] = self.op.new_name
6127 def BuildHooksNodes(self):
6128 """Build hooks nodes.
6131 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6134 def CheckPrereq(self):
6135 """Check prerequisites.
6137 This checks that the instance is in the cluster and is not running.
6140 self.op.instance_name = _ExpandInstanceName(self.cfg,
6141 self.op.instance_name)
6142 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6143 assert instance is not None
6144 _CheckNodeOnline(self, instance.primary_node)
6145 _CheckInstanceDown(self, instance, "cannot rename")
6146 self.instance = instance
6148 new_name = self.op.new_name
6149 if self.op.name_check:
6150 hostname = netutils.GetHostname(name=new_name)
6151 if hostname != new_name:
6152 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6154 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6155 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6156 " same as given hostname '%s'") %
6157 (hostname.name, self.op.new_name),
6159 new_name = self.op.new_name = hostname.name
6160 if (self.op.ip_check and
6161 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6162 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6163 (hostname.ip, new_name),
6164 errors.ECODE_NOTUNIQUE)
6166 instance_list = self.cfg.GetInstanceList()
6167 if new_name in instance_list and new_name != instance.name:
6168 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6169 new_name, errors.ECODE_EXISTS)
6171 def Exec(self, feedback_fn):
6172 """Rename the instance.
6175 inst = self.instance
6176 old_name = inst.name
6178 rename_file_storage = False
6179 if (inst.disk_template in constants.DTS_FILEBASED and
6180 self.op.new_name != inst.name):
6181 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6182 rename_file_storage = True
6184 self.cfg.RenameInstance(inst.name, self.op.new_name)
6185 # Change the instance lock. This is definitely safe while we hold the BGL.
6186 # Otherwise the new lock would have to be added in acquired mode.
6188 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6189 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6191 # re-read the instance from the configuration after rename
6192 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6194 if rename_file_storage:
6195 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6196 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6197 old_file_storage_dir,
6198 new_file_storage_dir)
6199 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6200 " (but the instance has been renamed in Ganeti)" %
6201 (inst.primary_node, old_file_storage_dir,
6202 new_file_storage_dir))
6204 _StartInstanceDisks(self, inst, None)
6206 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6207 old_name, self.op.debug_level)
6208 msg = result.fail_msg
6210 msg = ("Could not run OS rename script for instance %s on node %s"
6211 " (but the instance has been renamed in Ganeti): %s" %
6212 (inst.name, inst.primary_node, msg))
6213 self.proc.LogWarning(msg)
6215 _ShutdownInstanceDisks(self, inst)
6220 class LUInstanceRemove(LogicalUnit):
6221 """Remove an instance.
6224 HPATH = "instance-remove"
6225 HTYPE = constants.HTYPE_INSTANCE
6228 def ExpandNames(self):
6229 self._ExpandAndLockInstance()
6230 self.needed_locks[locking.LEVEL_NODE] = []
6231 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6233 def DeclareLocks(self, level):
6234 if level == locking.LEVEL_NODE:
6235 self._LockInstancesNodes()
6237 def BuildHooksEnv(self):
6240 This runs on master, primary and secondary nodes of the instance.
6243 env = _BuildInstanceHookEnvByObject(self, self.instance)
6244 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6247 def BuildHooksNodes(self):
6248 """Build hooks nodes.
6251 nl = [self.cfg.GetMasterNode()]
6252 nl_post = list(self.instance.all_nodes) + nl
6253 return (nl, nl_post)
6255 def CheckPrereq(self):
6256 """Check prerequisites.
6258 This checks that the instance is in the cluster.
6261 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6262 assert self.instance is not None, \
6263 "Cannot retrieve locked instance %s" % self.op.instance_name
6265 def Exec(self, feedback_fn):
6266 """Remove the instance.
6269 instance = self.instance
6270 logging.info("Shutting down instance %s on node %s",
6271 instance.name, instance.primary_node)
6273 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6274 self.op.shutdown_timeout)
6275 msg = result.fail_msg
6277 if self.op.ignore_failures:
6278 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6280 raise errors.OpExecError("Could not shutdown instance %s on"
6282 (instance.name, instance.primary_node, msg))
6284 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6287 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6288 """Utility function to remove an instance.
6291 logging.info("Removing block devices for instance %s", instance.name)
6293 if not _RemoveDisks(lu, instance):
6294 if not ignore_failures:
6295 raise errors.OpExecError("Can't remove instance's disks")
6296 feedback_fn("Warning: can't remove instance's disks")
6298 logging.info("Removing instance %s out of cluster config", instance.name)
6300 lu.cfg.RemoveInstance(instance.name)
6302 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6303 "Instance lock removal conflict"
6305 # Remove lock for the instance
6306 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6309 class LUInstanceQuery(NoHooksLU):
6310 """Logical unit for querying instances.
6313 # pylint: disable-msg=W0142
6316 def CheckArguments(self):
6317 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6318 self.op.output_fields, self.op.use_locking)
6320 def ExpandNames(self):
6321 self.iq.ExpandNames(self)
6323 def DeclareLocks(self, level):
6324 self.iq.DeclareLocks(self, level)
6326 def Exec(self, feedback_fn):
6327 return self.iq.OldStyleQuery(self)
6330 class LUInstanceFailover(LogicalUnit):
6331 """Failover an instance.
6334 HPATH = "instance-failover"
6335 HTYPE = constants.HTYPE_INSTANCE
6338 def CheckArguments(self):
6339 """Check the arguments.
6342 self.iallocator = getattr(self.op, "iallocator", None)
6343 self.target_node = getattr(self.op, "target_node", None)
6345 def ExpandNames(self):
6346 self._ExpandAndLockInstance()
6348 if self.op.target_node is not None:
6349 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6351 self.needed_locks[locking.LEVEL_NODE] = []
6352 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6354 ignore_consistency = self.op.ignore_consistency
6355 shutdown_timeout = self.op.shutdown_timeout
6356 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6359 ignore_consistency=ignore_consistency,
6360 shutdown_timeout=shutdown_timeout)
6361 self.tasklets = [self._migrater]
6363 def DeclareLocks(self, level):
6364 if level == locking.LEVEL_NODE:
6365 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6366 if instance.disk_template in constants.DTS_EXT_MIRROR:
6367 if self.op.target_node is None:
6368 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6370 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6371 self.op.target_node]
6372 del self.recalculate_locks[locking.LEVEL_NODE]
6374 self._LockInstancesNodes()
6376 def BuildHooksEnv(self):
6379 This runs on master, primary and secondary nodes of the instance.
6382 instance = self._migrater.instance
6383 source_node = instance.primary_node
6384 target_node = self.op.target_node
6386 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6387 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6388 "OLD_PRIMARY": source_node,
6389 "NEW_PRIMARY": target_node,
6392 if instance.disk_template in constants.DTS_INT_MIRROR:
6393 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6394 env["NEW_SECONDARY"] = source_node
6396 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6398 env.update(_BuildInstanceHookEnvByObject(self, instance))
6402 def BuildHooksNodes(self):
6403 """Build hooks nodes.
6406 instance = self._migrater.instance
6407 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6408 return (nl, nl + [instance.primary_node])
6411 class LUInstanceMigrate(LogicalUnit):
6412 """Migrate an instance.
6414 This is migration without shutting down, compared to the failover,
6415 which is done with shutdown.
6418 HPATH = "instance-migrate"
6419 HTYPE = constants.HTYPE_INSTANCE
6422 def ExpandNames(self):
6423 self._ExpandAndLockInstance()
6425 if self.op.target_node is not None:
6426 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6428 self.needed_locks[locking.LEVEL_NODE] = []
6429 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6431 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6432 cleanup=self.op.cleanup,
6434 fallback=self.op.allow_failover)
6435 self.tasklets = [self._migrater]
6437 def DeclareLocks(self, level):
6438 if level == locking.LEVEL_NODE:
6439 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6440 if instance.disk_template in constants.DTS_EXT_MIRROR:
6441 if self.op.target_node is None:
6442 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6444 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6445 self.op.target_node]
6446 del self.recalculate_locks[locking.LEVEL_NODE]
6448 self._LockInstancesNodes()
6450 def BuildHooksEnv(self):
6453 This runs on master, primary and secondary nodes of the instance.
6456 instance = self._migrater.instance
6457 source_node = instance.primary_node
6458 target_node = self.op.target_node
6459 env = _BuildInstanceHookEnvByObject(self, instance)
6461 "MIGRATE_LIVE": self._migrater.live,
6462 "MIGRATE_CLEANUP": self.op.cleanup,
6463 "OLD_PRIMARY": source_node,
6464 "NEW_PRIMARY": target_node,
6467 if instance.disk_template in constants.DTS_INT_MIRROR:
6468 env["OLD_SECONDARY"] = target_node
6469 env["NEW_SECONDARY"] = source_node
6471 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6475 def BuildHooksNodes(self):
6476 """Build hooks nodes.
6479 instance = self._migrater.instance
6480 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6481 return (nl, nl + [instance.primary_node])
6484 class LUInstanceMove(LogicalUnit):
6485 """Move an instance by data-copying.
6488 HPATH = "instance-move"
6489 HTYPE = constants.HTYPE_INSTANCE
6492 def ExpandNames(self):
6493 self._ExpandAndLockInstance()
6494 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6495 self.op.target_node = target_node
6496 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6497 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6499 def DeclareLocks(self, level):
6500 if level == locking.LEVEL_NODE:
6501 self._LockInstancesNodes(primary_only=True)
6503 def BuildHooksEnv(self):
6506 This runs on master, primary and secondary nodes of the instance.
6510 "TARGET_NODE": self.op.target_node,
6511 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6513 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6516 def BuildHooksNodes(self):
6517 """Build hooks nodes.
6521 self.cfg.GetMasterNode(),
6522 self.instance.primary_node,
6523 self.op.target_node,
6527 def CheckPrereq(self):
6528 """Check prerequisites.
6530 This checks that the instance is in the cluster.
6533 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6534 assert self.instance is not None, \
6535 "Cannot retrieve locked instance %s" % self.op.instance_name
6537 node = self.cfg.GetNodeInfo(self.op.target_node)
6538 assert node is not None, \
6539 "Cannot retrieve locked node %s" % self.op.target_node
6541 self.target_node = target_node = node.name
6543 if target_node == instance.primary_node:
6544 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6545 (instance.name, target_node),
6548 bep = self.cfg.GetClusterInfo().FillBE(instance)
6550 for idx, dsk in enumerate(instance.disks):
6551 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6552 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6553 " cannot copy" % idx, errors.ECODE_STATE)
6555 _CheckNodeOnline(self, target_node)
6556 _CheckNodeNotDrained(self, target_node)
6557 _CheckNodeVmCapable(self, target_node)
6559 if instance.admin_up:
6560 # check memory requirements on the secondary node
6561 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6562 instance.name, bep[constants.BE_MEMORY],
6563 instance.hypervisor)
6565 self.LogInfo("Not checking memory on the secondary node as"
6566 " instance will not be started")
6568 # check bridge existance
6569 _CheckInstanceBridgesExist(self, instance, node=target_node)
6571 def Exec(self, feedback_fn):
6572 """Move an instance.
6574 The move is done by shutting it down on its present node, copying
6575 the data over (slow) and starting it on the new node.
6578 instance = self.instance
6580 source_node = instance.primary_node
6581 target_node = self.target_node
6583 self.LogInfo("Shutting down instance %s on source node %s",
6584 instance.name, source_node)
6586 result = self.rpc.call_instance_shutdown(source_node, instance,
6587 self.op.shutdown_timeout)
6588 msg = result.fail_msg
6590 if self.op.ignore_consistency:
6591 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6592 " Proceeding anyway. Please make sure node"
6593 " %s is down. Error details: %s",
6594 instance.name, source_node, source_node, msg)
6596 raise errors.OpExecError("Could not shutdown instance %s on"
6598 (instance.name, source_node, msg))
6600 # create the target disks
6602 _CreateDisks(self, instance, target_node=target_node)
6603 except errors.OpExecError:
6604 self.LogWarning("Device creation failed, reverting...")
6606 _RemoveDisks(self, instance, target_node=target_node)
6608 self.cfg.ReleaseDRBDMinors(instance.name)
6611 cluster_name = self.cfg.GetClusterInfo().cluster_name
6614 # activate, get path, copy the data over
6615 for idx, disk in enumerate(instance.disks):
6616 self.LogInfo("Copying data for disk %d", idx)
6617 result = self.rpc.call_blockdev_assemble(target_node, disk,
6618 instance.name, True, idx)
6620 self.LogWarning("Can't assemble newly created disk %d: %s",
6621 idx, result.fail_msg)
6622 errs.append(result.fail_msg)
6624 dev_path = result.payload
6625 result = self.rpc.call_blockdev_export(source_node, disk,
6626 target_node, dev_path,
6629 self.LogWarning("Can't copy data over for disk %d: %s",
6630 idx, result.fail_msg)
6631 errs.append(result.fail_msg)
6635 self.LogWarning("Some disks failed to copy, aborting")
6637 _RemoveDisks(self, instance, target_node=target_node)
6639 self.cfg.ReleaseDRBDMinors(instance.name)
6640 raise errors.OpExecError("Errors during disk copy: %s" %
6643 instance.primary_node = target_node
6644 self.cfg.Update(instance, feedback_fn)
6646 self.LogInfo("Removing the disks on the original node")
6647 _RemoveDisks(self, instance, target_node=source_node)
6649 # Only start the instance if it's marked as up
6650 if instance.admin_up:
6651 self.LogInfo("Starting instance %s on node %s",
6652 instance.name, target_node)
6654 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6655 ignore_secondaries=True)
6657 _ShutdownInstanceDisks(self, instance)
6658 raise errors.OpExecError("Can't activate the instance's disks")
6660 result = self.rpc.call_instance_start(target_node, instance,
6662 msg = result.fail_msg
6664 _ShutdownInstanceDisks(self, instance)
6665 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6666 (instance.name, target_node, msg))
6669 class LUNodeMigrate(LogicalUnit):
6670 """Migrate all instances from a node.
6673 HPATH = "node-migrate"
6674 HTYPE = constants.HTYPE_NODE
6677 def CheckArguments(self):
6680 def ExpandNames(self):
6681 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6683 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6684 self.needed_locks = {
6685 locking.LEVEL_NODE: [self.op.node_name],
6688 def BuildHooksEnv(self):
6691 This runs on the master, the primary and all the secondaries.
6695 "NODE_NAME": self.op.node_name,
6698 def BuildHooksNodes(self):
6699 """Build hooks nodes.
6702 nl = [self.cfg.GetMasterNode()]
6705 def CheckPrereq(self):
6708 def Exec(self, feedback_fn):
6709 # Prepare jobs for migration instances
6711 [opcodes.OpInstanceMigrate(instance_name=inst.name,
6714 iallocator=self.op.iallocator,
6715 target_node=self.op.target_node)]
6716 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6719 # TODO: Run iallocator in this opcode and pass correct placement options to
6720 # OpInstanceMigrate. Since other jobs can modify the cluster between
6721 # running the iallocator and the actual migration, a good consistency model
6722 # will have to be found.
6724 assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
6725 frozenset([self.op.node_name]))
6727 return ResultWithJobs(jobs)
6730 class TLMigrateInstance(Tasklet):
6731 """Tasklet class for instance migration.
6734 @ivar live: whether the migration will be done live or non-live;
6735 this variable is initalized only after CheckPrereq has run
6736 @type cleanup: boolean
6737 @ivar cleanup: Wheater we cleanup from a failed migration
6738 @type iallocator: string
6739 @ivar iallocator: The iallocator used to determine target_node
6740 @type target_node: string
6741 @ivar target_node: If given, the target_node to reallocate the instance to
6742 @type failover: boolean
6743 @ivar failover: Whether operation results in failover or migration
6744 @type fallback: boolean
6745 @ivar fallback: Whether fallback to failover is allowed if migration not
6747 @type ignore_consistency: boolean
6748 @ivar ignore_consistency: Wheter we should ignore consistency between source
6750 @type shutdown_timeout: int
6751 @ivar shutdown_timeout: In case of failover timeout of the shutdown
6754 def __init__(self, lu, instance_name, cleanup=False,
6755 failover=False, fallback=False,
6756 ignore_consistency=False,
6757 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6758 """Initializes this class.
6761 Tasklet.__init__(self, lu)
6764 self.instance_name = instance_name
6765 self.cleanup = cleanup
6766 self.live = False # will be overridden later
6767 self.failover = failover
6768 self.fallback = fallback
6769 self.ignore_consistency = ignore_consistency
6770 self.shutdown_timeout = shutdown_timeout
6772 def CheckPrereq(self):
6773 """Check prerequisites.
6775 This checks that the instance is in the cluster.
6778 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6779 instance = self.cfg.GetInstanceInfo(instance_name)
6780 assert instance is not None
6781 self.instance = instance
6783 if (not self.cleanup and not instance.admin_up and not self.failover and
6785 self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6787 self.failover = True
6789 if instance.disk_template not in constants.DTS_MIRRORED:
6794 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6795 " %s" % (instance.disk_template, text),
6798 if instance.disk_template in constants.DTS_EXT_MIRROR:
6799 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6801 if self.lu.op.iallocator:
6802 self._RunAllocator()
6804 # We set set self.target_node as it is required by
6806 self.target_node = self.lu.op.target_node
6808 # self.target_node is already populated, either directly or by the
6810 target_node = self.target_node
6811 if self.target_node == instance.primary_node:
6812 raise errors.OpPrereqError("Cannot migrate instance %s"
6813 " to its primary (%s)" %
6814 (instance.name, instance.primary_node))
6816 if len(self.lu.tasklets) == 1:
6817 # It is safe to release locks only when we're the only tasklet
6819 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6820 keep=[instance.primary_node, self.target_node])
6823 secondary_nodes = instance.secondary_nodes
6824 if not secondary_nodes:
6825 raise errors.ConfigurationError("No secondary node but using"
6826 " %s disk template" %
6827 instance.disk_template)
6828 target_node = secondary_nodes[0]
6829 if self.lu.op.iallocator or (self.lu.op.target_node and
6830 self.lu.op.target_node != target_node):
6832 text = "failed over"
6835 raise errors.OpPrereqError("Instances with disk template %s cannot"
6836 " be %s to arbitrary nodes"
6837 " (neither an iallocator nor a target"
6838 " node can be passed)" %
6839 (instance.disk_template, text),
6842 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6844 # check memory requirements on the secondary node
6845 if not self.failover or instance.admin_up:
6846 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6847 instance.name, i_be[constants.BE_MEMORY],
6848 instance.hypervisor)
6850 self.lu.LogInfo("Not checking memory on the secondary node as"
6851 " instance will not be started")
6853 # check bridge existance
6854 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6856 if not self.cleanup:
6857 _CheckNodeNotDrained(self.lu, target_node)
6858 if not self.failover:
6859 result = self.rpc.call_instance_migratable(instance.primary_node,
6861 if result.fail_msg and self.fallback:
6862 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6864 self.failover = True
6866 result.Raise("Can't migrate, please use failover",
6867 prereq=True, ecode=errors.ECODE_STATE)
6869 assert not (self.failover and self.cleanup)
6871 if not self.failover:
6872 if self.lu.op.live is not None and self.lu.op.mode is not None:
6873 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6874 " parameters are accepted",
6876 if self.lu.op.live is not None:
6878 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6880 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6881 # reset the 'live' parameter to None so that repeated
6882 # invocations of CheckPrereq do not raise an exception
6883 self.lu.op.live = None
6884 elif self.lu.op.mode is None:
6885 # read the default value from the hypervisor
6886 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6888 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6890 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6892 # Failover is never live
6895 def _RunAllocator(self):
6896 """Run the allocator based on input opcode.
6899 ial = IAllocator(self.cfg, self.rpc,
6900 mode=constants.IALLOCATOR_MODE_RELOC,
6901 name=self.instance_name,
6902 # TODO See why hail breaks with a single node below
6903 relocate_from=[self.instance.primary_node,
6904 self.instance.primary_node],
6907 ial.Run(self.lu.op.iallocator)
6910 raise errors.OpPrereqError("Can't compute nodes using"
6911 " iallocator '%s': %s" %
6912 (self.lu.op.iallocator, ial.info),
6914 if len(ial.result) != ial.required_nodes:
6915 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6916 " of nodes (%s), required %s" %
6917 (self.lu.op.iallocator, len(ial.result),
6918 ial.required_nodes), errors.ECODE_FAULT)
6919 self.target_node = ial.result[0]
6920 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6921 self.instance_name, self.lu.op.iallocator,
6922 utils.CommaJoin(ial.result))
6924 def _WaitUntilSync(self):
6925 """Poll with custom rpc for disk sync.
6927 This uses our own step-based rpc call.
6930 self.feedback_fn("* wait until resync is done")
6934 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6936 self.instance.disks)
6938 for node, nres in result.items():
6939 nres.Raise("Cannot resync disks on node %s" % node)
6940 node_done, node_percent = nres.payload
6941 all_done = all_done and node_done
6942 if node_percent is not None:
6943 min_percent = min(min_percent, node_percent)
6945 if min_percent < 100:
6946 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6949 def _EnsureSecondary(self, node):
6950 """Demote a node to secondary.
6953 self.feedback_fn("* switching node %s to secondary mode" % node)
6955 for dev in self.instance.disks:
6956 self.cfg.SetDiskID(dev, node)
6958 result = self.rpc.call_blockdev_close(node, self.instance.name,
6959 self.instance.disks)
6960 result.Raise("Cannot change disk to secondary on node %s" % node)
6962 def _GoStandalone(self):
6963 """Disconnect from the network.
6966 self.feedback_fn("* changing into standalone mode")
6967 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6968 self.instance.disks)
6969 for node, nres in result.items():
6970 nres.Raise("Cannot disconnect disks node %s" % node)
6972 def _GoReconnect(self, multimaster):
6973 """Reconnect to the network.
6979 msg = "single-master"
6980 self.feedback_fn("* changing disks into %s mode" % msg)
6981 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6982 self.instance.disks,
6983 self.instance.name, multimaster)
6984 for node, nres in result.items():
6985 nres.Raise("Cannot change disks config on node %s" % node)
6987 def _ExecCleanup(self):
6988 """Try to cleanup after a failed migration.
6990 The cleanup is done by:
6991 - check that the instance is running only on one node
6992 (and update the config if needed)
6993 - change disks on its secondary node to secondary
6994 - wait until disks are fully synchronized
6995 - disconnect from the network
6996 - change disks into single-master mode
6997 - wait again until disks are fully synchronized
7000 instance = self.instance
7001 target_node = self.target_node
7002 source_node = self.source_node
7004 # check running on only one node
7005 self.feedback_fn("* checking where the instance actually runs"
7006 " (if this hangs, the hypervisor might be in"
7008 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7009 for node, result in ins_l.items():
7010 result.Raise("Can't contact node %s" % node)
7012 runningon_source = instance.name in ins_l[source_node].payload
7013 runningon_target = instance.name in ins_l[target_node].payload
7015 if runningon_source and runningon_target:
7016 raise errors.OpExecError("Instance seems to be running on two nodes,"
7017 " or the hypervisor is confused; you will have"
7018 " to ensure manually that it runs only on one"
7019 " and restart this operation")
7021 if not (runningon_source or runningon_target):
7022 raise errors.OpExecError("Instance does not seem to be running at all;"
7023 " in this case it's safer to repair by"
7024 " running 'gnt-instance stop' to ensure disk"
7025 " shutdown, and then restarting it")
7027 if runningon_target:
7028 # the migration has actually succeeded, we need to update the config
7029 self.feedback_fn("* instance running on secondary node (%s),"
7030 " updating config" % target_node)
7031 instance.primary_node = target_node
7032 self.cfg.Update(instance, self.feedback_fn)
7033 demoted_node = source_node
7035 self.feedback_fn("* instance confirmed to be running on its"
7036 " primary node (%s)" % source_node)
7037 demoted_node = target_node
7039 if instance.disk_template in constants.DTS_INT_MIRROR:
7040 self._EnsureSecondary(demoted_node)
7042 self._WaitUntilSync()
7043 except errors.OpExecError:
7044 # we ignore here errors, since if the device is standalone, it
7045 # won't be able to sync
7047 self._GoStandalone()
7048 self._GoReconnect(False)
7049 self._WaitUntilSync()
7051 self.feedback_fn("* done")
7053 def _RevertDiskStatus(self):
7054 """Try to revert the disk status after a failed migration.
7057 target_node = self.target_node
7058 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7062 self._EnsureSecondary(target_node)
7063 self._GoStandalone()
7064 self._GoReconnect(False)
7065 self._WaitUntilSync()
7066 except errors.OpExecError, err:
7067 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7068 " please try to recover the instance manually;"
7069 " error '%s'" % str(err))
7071 def _AbortMigration(self):
7072 """Call the hypervisor code to abort a started migration.
7075 instance = self.instance
7076 target_node = self.target_node
7077 migration_info = self.migration_info
7079 abort_result = self.rpc.call_finalize_migration(target_node,
7083 abort_msg = abort_result.fail_msg
7085 logging.error("Aborting migration failed on target node %s: %s",
7086 target_node, abort_msg)
7087 # Don't raise an exception here, as we stil have to try to revert the
7088 # disk status, even if this step failed.
7090 def _ExecMigration(self):
7091 """Migrate an instance.
7093 The migrate is done by:
7094 - change the disks into dual-master mode
7095 - wait until disks are fully synchronized again
7096 - migrate the instance
7097 - change disks on the new secondary node (the old primary) to secondary
7098 - wait until disks are fully synchronized
7099 - change disks into single-master mode
7102 instance = self.instance
7103 target_node = self.target_node
7104 source_node = self.source_node
7106 self.feedback_fn("* checking disk consistency between source and target")
7107 for dev in instance.disks:
7108 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7109 raise errors.OpExecError("Disk %s is degraded or not fully"
7110 " synchronized on target node,"
7111 " aborting migration" % dev.iv_name)
7113 # First get the migration information from the remote node
7114 result = self.rpc.call_migration_info(source_node, instance)
7115 msg = result.fail_msg
7117 log_err = ("Failed fetching source migration information from %s: %s" %
7119 logging.error(log_err)
7120 raise errors.OpExecError(log_err)
7122 self.migration_info = migration_info = result.payload
7124 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7125 # Then switch the disks to master/master mode
7126 self._EnsureSecondary(target_node)
7127 self._GoStandalone()
7128 self._GoReconnect(True)
7129 self._WaitUntilSync()
7131 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7132 result = self.rpc.call_accept_instance(target_node,
7135 self.nodes_ip[target_node])
7137 msg = result.fail_msg
7139 logging.error("Instance pre-migration failed, trying to revert"
7140 " disk status: %s", msg)
7141 self.feedback_fn("Pre-migration failed, aborting")
7142 self._AbortMigration()
7143 self._RevertDiskStatus()
7144 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7145 (instance.name, msg))
7147 self.feedback_fn("* migrating instance to %s" % target_node)
7148 result = self.rpc.call_instance_migrate(source_node, instance,
7149 self.nodes_ip[target_node],
7151 msg = result.fail_msg
7153 logging.error("Instance migration failed, trying to revert"
7154 " disk status: %s", msg)
7155 self.feedback_fn("Migration failed, aborting")
7156 self._AbortMigration()
7157 self._RevertDiskStatus()
7158 raise errors.OpExecError("Could not migrate instance %s: %s" %
7159 (instance.name, msg))
7161 instance.primary_node = target_node
7162 # distribute new instance config to the other nodes
7163 self.cfg.Update(instance, self.feedback_fn)
7165 result = self.rpc.call_finalize_migration(target_node,
7169 msg = result.fail_msg
7171 logging.error("Instance migration succeeded, but finalization failed:"
7173 raise errors.OpExecError("Could not finalize instance migration: %s" %
7176 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7177 self._EnsureSecondary(source_node)
7178 self._WaitUntilSync()
7179 self._GoStandalone()
7180 self._GoReconnect(False)
7181 self._WaitUntilSync()
7183 self.feedback_fn("* done")
7185 def _ExecFailover(self):
7186 """Failover an instance.
7188 The failover is done by shutting it down on its present node and
7189 starting it on the secondary.
7192 instance = self.instance
7193 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7195 source_node = instance.primary_node
7196 target_node = self.target_node
7198 if instance.admin_up:
7199 self.feedback_fn("* checking disk consistency between source and target")
7200 for dev in instance.disks:
7201 # for drbd, these are drbd over lvm
7202 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7203 if primary_node.offline:
7204 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7206 (primary_node.name, dev.iv_name, target_node))
7207 elif not self.ignore_consistency:
7208 raise errors.OpExecError("Disk %s is degraded on target node,"
7209 " aborting failover" % dev.iv_name)
7211 self.feedback_fn("* not checking disk consistency as instance is not"
7214 self.feedback_fn("* shutting down instance on source node")
7215 logging.info("Shutting down instance %s on node %s",
7216 instance.name, source_node)
7218 result = self.rpc.call_instance_shutdown(source_node, instance,
7219 self.shutdown_timeout)
7220 msg = result.fail_msg
7222 if self.ignore_consistency or primary_node.offline:
7223 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7224 " proceeding anyway; please make sure node"
7225 " %s is down; error details: %s",
7226 instance.name, source_node, source_node, msg)
7228 raise errors.OpExecError("Could not shutdown instance %s on"
7230 (instance.name, source_node, msg))
7232 self.feedback_fn("* deactivating the instance's disks on source node")
7233 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7234 raise errors.OpExecError("Can't shut down the instance's disks")
7236 instance.primary_node = target_node
7237 # distribute new instance config to the other nodes
7238 self.cfg.Update(instance, self.feedback_fn)
7240 # Only start the instance if it's marked as up
7241 if instance.admin_up:
7242 self.feedback_fn("* activating the instance's disks on target node %s" %
7244 logging.info("Starting instance %s on node %s",
7245 instance.name, target_node)
7247 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7248 ignore_secondaries=True)
7250 _ShutdownInstanceDisks(self.lu, instance)
7251 raise errors.OpExecError("Can't activate the instance's disks")
7253 self.feedback_fn("* starting the instance on the target node %s" %
7255 result = self.rpc.call_instance_start(target_node, instance, None, None,
7257 msg = result.fail_msg
7259 _ShutdownInstanceDisks(self.lu, instance)
7260 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7261 (instance.name, target_node, msg))
7263 def Exec(self, feedback_fn):
7264 """Perform the migration.
7267 self.feedback_fn = feedback_fn
7268 self.source_node = self.instance.primary_node
7270 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7271 if self.instance.disk_template in constants.DTS_INT_MIRROR:
7272 self.target_node = self.instance.secondary_nodes[0]
7273 # Otherwise self.target_node has been populated either
7274 # directly, or through an iallocator.
7276 self.all_nodes = [self.source_node, self.target_node]
7278 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
7279 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
7283 feedback_fn("Failover instance %s" % self.instance.name)
7284 self._ExecFailover()
7286 feedback_fn("Migrating instance %s" % self.instance.name)
7289 return self._ExecCleanup()
7291 return self._ExecMigration()
7294 def _CreateBlockDev(lu, node, instance, device, force_create,
7296 """Create a tree of block devices on a given node.
7298 If this device type has to be created on secondaries, create it and
7301 If not, just recurse to children keeping the same 'force' value.
7303 @param lu: the lu on whose behalf we execute
7304 @param node: the node on which to create the device
7305 @type instance: L{objects.Instance}
7306 @param instance: the instance which owns the device
7307 @type device: L{objects.Disk}
7308 @param device: the device to create
7309 @type force_create: boolean
7310 @param force_create: whether to force creation of this device; this
7311 will be change to True whenever we find a device which has
7312 CreateOnSecondary() attribute
7313 @param info: the extra 'metadata' we should attach to the device
7314 (this will be represented as a LVM tag)
7315 @type force_open: boolean
7316 @param force_open: this parameter will be passes to the
7317 L{backend.BlockdevCreate} function where it specifies
7318 whether we run on primary or not, and it affects both
7319 the child assembly and the device own Open() execution
7322 if device.CreateOnSecondary():
7326 for child in device.children:
7327 _CreateBlockDev(lu, node, instance, child, force_create,
7330 if not force_create:
7333 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7336 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7337 """Create a single block device on a given node.
7339 This will not recurse over children of the device, so they must be
7342 @param lu: the lu on whose behalf we execute
7343 @param node: the node on which to create the device
7344 @type instance: L{objects.Instance}
7345 @param instance: the instance which owns the device
7346 @type device: L{objects.Disk}
7347 @param device: the device to create
7348 @param info: the extra 'metadata' we should attach to the device
7349 (this will be represented as a LVM tag)
7350 @type force_open: boolean
7351 @param force_open: this parameter will be passes to the
7352 L{backend.BlockdevCreate} function where it specifies
7353 whether we run on primary or not, and it affects both
7354 the child assembly and the device own Open() execution
7357 lu.cfg.SetDiskID(device, node)
7358 result = lu.rpc.call_blockdev_create(node, device, device.size,
7359 instance.name, force_open, info)
7360 result.Raise("Can't create block device %s on"
7361 " node %s for instance %s" % (device, node, instance.name))
7362 if device.physical_id is None:
7363 device.physical_id = result.payload
7366 def _GenerateUniqueNames(lu, exts):
7367 """Generate a suitable LV name.
7369 This will generate a logical volume name for the given instance.
7374 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7375 results.append("%s%s" % (new_id, val))
7379 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7380 iv_name, p_minor, s_minor):
7381 """Generate a drbd8 device complete with its children.
7384 assert len(vgnames) == len(names) == 2
7385 port = lu.cfg.AllocatePort()
7386 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7387 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7388 logical_id=(vgnames[0], names[0]))
7389 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7390 logical_id=(vgnames[1], names[1]))
7391 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7392 logical_id=(primary, secondary, port,
7395 children=[dev_data, dev_meta],
7400 def _GenerateDiskTemplate(lu, template_name,
7401 instance_name, primary_node,
7402 secondary_nodes, disk_info,
7403 file_storage_dir, file_driver,
7404 base_index, feedback_fn):
7405 """Generate the entire disk layout for a given template type.
7408 #TODO: compute space requirements
7410 vgname = lu.cfg.GetVGName()
7411 disk_count = len(disk_info)
7413 if template_name == constants.DT_DISKLESS:
7415 elif template_name == constants.DT_PLAIN:
7416 if len(secondary_nodes) != 0:
7417 raise errors.ProgrammerError("Wrong template configuration")
7419 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7420 for i in range(disk_count)])
7421 for idx, disk in enumerate(disk_info):
7422 disk_index = idx + base_index
7423 vg = disk.get(constants.IDISK_VG, vgname)
7424 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7425 disk_dev = objects.Disk(dev_type=constants.LD_LV,
7426 size=disk[constants.IDISK_SIZE],
7427 logical_id=(vg, names[idx]),
7428 iv_name="disk/%d" % disk_index,
7429 mode=disk[constants.IDISK_MODE])
7430 disks.append(disk_dev)
7431 elif template_name == constants.DT_DRBD8:
7432 if len(secondary_nodes) != 1:
7433 raise errors.ProgrammerError("Wrong template configuration")
7434 remote_node = secondary_nodes[0]
7435 minors = lu.cfg.AllocateDRBDMinor(
7436 [primary_node, remote_node] * len(disk_info), instance_name)
7439 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7440 for i in range(disk_count)]):
7441 names.append(lv_prefix + "_data")
7442 names.append(lv_prefix + "_meta")
7443 for idx, disk in enumerate(disk_info):
7444 disk_index = idx + base_index
7445 data_vg = disk.get(constants.IDISK_VG, vgname)
7446 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7447 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7448 disk[constants.IDISK_SIZE],
7450 names[idx * 2:idx * 2 + 2],
7451 "disk/%d" % disk_index,
7452 minors[idx * 2], minors[idx * 2 + 1])
7453 disk_dev.mode = disk[constants.IDISK_MODE]
7454 disks.append(disk_dev)
7455 elif template_name == constants.DT_FILE:
7456 if len(secondary_nodes) != 0:
7457 raise errors.ProgrammerError("Wrong template configuration")
7459 opcodes.RequireFileStorage()
7461 for idx, disk in enumerate(disk_info):
7462 disk_index = idx + base_index
7463 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7464 size=disk[constants.IDISK_SIZE],
7465 iv_name="disk/%d" % disk_index,
7466 logical_id=(file_driver,
7467 "%s/disk%d" % (file_storage_dir,
7469 mode=disk[constants.IDISK_MODE])
7470 disks.append(disk_dev)
7471 elif template_name == constants.DT_SHARED_FILE:
7472 if len(secondary_nodes) != 0:
7473 raise errors.ProgrammerError("Wrong template configuration")
7475 opcodes.RequireSharedFileStorage()
7477 for idx, disk in enumerate(disk_info):
7478 disk_index = idx + base_index
7479 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7480 size=disk[constants.IDISK_SIZE],
7481 iv_name="disk/%d" % disk_index,
7482 logical_id=(file_driver,
7483 "%s/disk%d" % (file_storage_dir,
7485 mode=disk[constants.IDISK_MODE])
7486 disks.append(disk_dev)
7487 elif template_name == constants.DT_BLOCK:
7488 if len(secondary_nodes) != 0:
7489 raise errors.ProgrammerError("Wrong template configuration")
7491 for idx, disk in enumerate(disk_info):
7492 disk_index = idx + base_index
7493 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7494 size=disk[constants.IDISK_SIZE],
7495 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7496 disk[constants.IDISK_ADOPT]),
7497 iv_name="disk/%d" % disk_index,
7498 mode=disk[constants.IDISK_MODE])
7499 disks.append(disk_dev)
7502 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7506 def _GetInstanceInfoText(instance):
7507 """Compute that text that should be added to the disk's metadata.
7510 return "originstname+%s" % instance.name
7513 def _CalcEta(time_taken, written, total_size):
7514 """Calculates the ETA based on size written and total size.
7516 @param time_taken: The time taken so far
7517 @param written: amount written so far
7518 @param total_size: The total size of data to be written
7519 @return: The remaining time in seconds
7522 avg_time = time_taken / float(written)
7523 return (total_size - written) * avg_time
7526 def _WipeDisks(lu, instance):
7527 """Wipes instance disks.
7529 @type lu: L{LogicalUnit}
7530 @param lu: the logical unit on whose behalf we execute
7531 @type instance: L{objects.Instance}
7532 @param instance: the instance whose disks we should create
7533 @return: the success of the wipe
7536 node = instance.primary_node
7538 for device in instance.disks:
7539 lu.cfg.SetDiskID(device, node)
7541 logging.info("Pause sync of instance %s disks", instance.name)
7542 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7544 for idx, success in enumerate(result.payload):
7546 logging.warn("pause-sync of instance %s for disks %d failed",
7550 for idx, device in enumerate(instance.disks):
7551 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7552 # MAX_WIPE_CHUNK at max
7553 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7554 constants.MIN_WIPE_CHUNK_PERCENT)
7555 # we _must_ make this an int, otherwise rounding errors will
7557 wipe_chunk_size = int(wipe_chunk_size)
7559 lu.LogInfo("* Wiping disk %d", idx)
7560 logging.info("Wiping disk %d for instance %s, node %s using"
7561 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7566 start_time = time.time()
7568 while offset < size:
7569 wipe_size = min(wipe_chunk_size, size - offset)
7570 logging.debug("Wiping disk %d, offset %s, chunk %s",
7571 idx, offset, wipe_size)
7572 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7573 result.Raise("Could not wipe disk %d at offset %d for size %d" %
7574 (idx, offset, wipe_size))
7577 if now - last_output >= 60:
7578 eta = _CalcEta(now - start_time, offset, size)
7579 lu.LogInfo(" - done: %.1f%% ETA: %s" %
7580 (offset / float(size) * 100, utils.FormatSeconds(eta)))
7583 logging.info("Resume sync of instance %s disks", instance.name)
7585 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7587 for idx, success in enumerate(result.payload):
7589 lu.LogWarning("Resume sync of disk %d failed, please have a"
7590 " look at the status and troubleshoot the issue", idx)
7591 logging.warn("resume-sync of instance %s for disks %d failed",
7595 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7596 """Create all disks for an instance.
7598 This abstracts away some work from AddInstance.
7600 @type lu: L{LogicalUnit}
7601 @param lu: the logical unit on whose behalf we execute
7602 @type instance: L{objects.Instance}
7603 @param instance: the instance whose disks we should create
7605 @param to_skip: list of indices to skip
7606 @type target_node: string
7607 @param target_node: if passed, overrides the target node for creation
7609 @return: the success of the creation
7612 info = _GetInstanceInfoText(instance)
7613 if target_node is None:
7614 pnode = instance.primary_node
7615 all_nodes = instance.all_nodes
7620 if instance.disk_template in constants.DTS_FILEBASED:
7621 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7622 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7624 result.Raise("Failed to create directory '%s' on"
7625 " node %s" % (file_storage_dir, pnode))
7627 # Note: this needs to be kept in sync with adding of disks in
7628 # LUInstanceSetParams
7629 for idx, device in enumerate(instance.disks):
7630 if to_skip and idx in to_skip:
7632 logging.info("Creating volume %s for instance %s",
7633 device.iv_name, instance.name)
7635 for node in all_nodes:
7636 f_create = node == pnode
7637 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7640 def _RemoveDisks(lu, instance, target_node=None):
7641 """Remove all disks for an instance.
7643 This abstracts away some work from `AddInstance()` and
7644 `RemoveInstance()`. Note that in case some of the devices couldn't
7645 be removed, the removal will continue with the other ones (compare
7646 with `_CreateDisks()`).
7648 @type lu: L{LogicalUnit}
7649 @param lu: the logical unit on whose behalf we execute
7650 @type instance: L{objects.Instance}
7651 @param instance: the instance whose disks we should remove
7652 @type target_node: string
7653 @param target_node: used to override the node on which to remove the disks
7655 @return: the success of the removal
7658 logging.info("Removing block devices for instance %s", instance.name)
7661 for device in instance.disks:
7663 edata = [(target_node, device)]
7665 edata = device.ComputeNodeTree(instance.primary_node)
7666 for node, disk in edata:
7667 lu.cfg.SetDiskID(disk, node)
7668 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7670 lu.LogWarning("Could not remove block device %s on node %s,"
7671 " continuing anyway: %s", device.iv_name, node, msg)
7674 if instance.disk_template == constants.DT_FILE:
7675 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7679 tgt = instance.primary_node
7680 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7682 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7683 file_storage_dir, instance.primary_node, result.fail_msg)
7689 def _ComputeDiskSizePerVG(disk_template, disks):
7690 """Compute disk size requirements in the volume group
7693 def _compute(disks, payload):
7694 """Universal algorithm.
7699 vgs[disk[constants.IDISK_VG]] = \
7700 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7704 # Required free disk space as a function of disk and swap space
7706 constants.DT_DISKLESS: {},
7707 constants.DT_PLAIN: _compute(disks, 0),
7708 # 128 MB are added for drbd metadata for each disk
7709 constants.DT_DRBD8: _compute(disks, 128),
7710 constants.DT_FILE: {},
7711 constants.DT_SHARED_FILE: {},
7714 if disk_template not in req_size_dict:
7715 raise errors.ProgrammerError("Disk template '%s' size requirement"
7716 " is unknown" % disk_template)
7718 return req_size_dict[disk_template]
7721 def _ComputeDiskSize(disk_template, disks):
7722 """Compute disk size requirements in the volume group
7725 # Required free disk space as a function of disk and swap space
7727 constants.DT_DISKLESS: None,
7728 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7729 # 128 MB are added for drbd metadata for each disk
7730 constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7731 constants.DT_FILE: None,
7732 constants.DT_SHARED_FILE: 0,
7733 constants.DT_BLOCK: 0,
7736 if disk_template not in req_size_dict:
7737 raise errors.ProgrammerError("Disk template '%s' size requirement"
7738 " is unknown" % disk_template)
7740 return req_size_dict[disk_template]
7743 def _FilterVmNodes(lu, nodenames):
7744 """Filters out non-vm_capable nodes from a list.
7746 @type lu: L{LogicalUnit}
7747 @param lu: the logical unit for which we check
7748 @type nodenames: list
7749 @param nodenames: the list of nodes on which we should check
7751 @return: the list of vm-capable nodes
7754 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7755 return [name for name in nodenames if name not in vm_nodes]
7758 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7759 """Hypervisor parameter validation.
7761 This function abstract the hypervisor parameter validation to be
7762 used in both instance create and instance modify.
7764 @type lu: L{LogicalUnit}
7765 @param lu: the logical unit for which we check
7766 @type nodenames: list
7767 @param nodenames: the list of nodes on which we should check
7768 @type hvname: string
7769 @param hvname: the name of the hypervisor we should use
7770 @type hvparams: dict
7771 @param hvparams: the parameters which we need to check
7772 @raise errors.OpPrereqError: if the parameters are not valid
7775 nodenames = _FilterVmNodes(lu, nodenames)
7776 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7779 for node in nodenames:
7783 info.Raise("Hypervisor parameter validation failed on node %s" % node)
7786 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7787 """OS parameters validation.
7789 @type lu: L{LogicalUnit}
7790 @param lu: the logical unit for which we check
7791 @type required: boolean
7792 @param required: whether the validation should fail if the OS is not
7794 @type nodenames: list
7795 @param nodenames: the list of nodes on which we should check
7796 @type osname: string
7797 @param osname: the name of the hypervisor we should use
7798 @type osparams: dict
7799 @param osparams: the parameters which we need to check
7800 @raise errors.OpPrereqError: if the parameters are not valid
7803 nodenames = _FilterVmNodes(lu, nodenames)
7804 result = lu.rpc.call_os_validate(required, nodenames, osname,
7805 [constants.OS_VALIDATE_PARAMETERS],
7807 for node, nres in result.items():
7808 # we don't check for offline cases since this should be run only
7809 # against the master node and/or an instance's nodes
7810 nres.Raise("OS Parameters validation failed on node %s" % node)
7811 if not nres.payload:
7812 lu.LogInfo("OS %s not found on node %s, validation skipped",
7816 class LUInstanceCreate(LogicalUnit):
7817 """Create an instance.
7820 HPATH = "instance-add"
7821 HTYPE = constants.HTYPE_INSTANCE
7824 def CheckArguments(self):
7828 # do not require name_check to ease forward/backward compatibility
7830 if self.op.no_install and self.op.start:
7831 self.LogInfo("No-installation mode selected, disabling startup")
7832 self.op.start = False
7833 # validate/normalize the instance name
7834 self.op.instance_name = \
7835 netutils.Hostname.GetNormalizedName(self.op.instance_name)
7837 if self.op.ip_check and not self.op.name_check:
7838 # TODO: make the ip check more flexible and not depend on the name check
7839 raise errors.OpPrereqError("Cannot do IP address check without a name"
7840 " check", errors.ECODE_INVAL)
7842 # check nics' parameter names
7843 for nic in self.op.nics:
7844 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7846 # check disks. parameter names and consistent adopt/no-adopt strategy
7847 has_adopt = has_no_adopt = False
7848 for disk in self.op.disks:
7849 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7850 if constants.IDISK_ADOPT in disk:
7854 if has_adopt and has_no_adopt:
7855 raise errors.OpPrereqError("Either all disks are adopted or none is",
7858 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7859 raise errors.OpPrereqError("Disk adoption is not supported for the"
7860 " '%s' disk template" %
7861 self.op.disk_template,
7863 if self.op.iallocator is not None:
7864 raise errors.OpPrereqError("Disk adoption not allowed with an"
7865 " iallocator script", errors.ECODE_INVAL)
7866 if self.op.mode == constants.INSTANCE_IMPORT:
7867 raise errors.OpPrereqError("Disk adoption not allowed for"
7868 " instance import", errors.ECODE_INVAL)
7870 if self.op.disk_template in constants.DTS_MUST_ADOPT:
7871 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7872 " but no 'adopt' parameter given" %
7873 self.op.disk_template,
7876 self.adopt_disks = has_adopt
7878 # instance name verification
7879 if self.op.name_check:
7880 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7881 self.op.instance_name = self.hostname1.name
7882 # used in CheckPrereq for ip ping check
7883 self.check_ip = self.hostname1.ip
7885 self.check_ip = None
7887 # file storage checks
7888 if (self.op.file_driver and
7889 not self.op.file_driver in constants.FILE_DRIVER):
7890 raise errors.OpPrereqError("Invalid file driver name '%s'" %
7891 self.op.file_driver, errors.ECODE_INVAL)
7893 if self.op.disk_template == constants.DT_FILE:
7894 opcodes.RequireFileStorage()
7895 elif self.op.disk_template == constants.DT_SHARED_FILE:
7896 opcodes.RequireSharedFileStorage()
7898 ### Node/iallocator related checks
7899 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7901 if self.op.pnode is not None:
7902 if self.op.disk_template in constants.DTS_INT_MIRROR:
7903 if self.op.snode is None:
7904 raise errors.OpPrereqError("The networked disk templates need"
7905 " a mirror node", errors.ECODE_INVAL)
7907 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7909 self.op.snode = None
7911 self._cds = _GetClusterDomainSecret()
7913 if self.op.mode == constants.INSTANCE_IMPORT:
7914 # On import force_variant must be True, because if we forced it at
7915 # initial install, our only chance when importing it back is that it
7917 self.op.force_variant = True
7919 if self.op.no_install:
7920 self.LogInfo("No-installation mode has no effect during import")
7922 elif self.op.mode == constants.INSTANCE_CREATE:
7923 if self.op.os_type is None:
7924 raise errors.OpPrereqError("No guest OS specified",
7926 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7927 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7928 " installation" % self.op.os_type,
7930 if self.op.disk_template is None:
7931 raise errors.OpPrereqError("No disk template specified",
7934 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7935 # Check handshake to ensure both clusters have the same domain secret
7936 src_handshake = self.op.source_handshake
7937 if not src_handshake:
7938 raise errors.OpPrereqError("Missing source handshake",
7941 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7944 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7947 # Load and check source CA
7948 self.source_x509_ca_pem = self.op.source_x509_ca
7949 if not self.source_x509_ca_pem:
7950 raise errors.OpPrereqError("Missing source X509 CA",
7954 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7956 except OpenSSL.crypto.Error, err:
7957 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7958 (err, ), errors.ECODE_INVAL)
7960 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7961 if errcode is not None:
7962 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7965 self.source_x509_ca = cert
7967 src_instance_name = self.op.source_instance_name
7968 if not src_instance_name:
7969 raise errors.OpPrereqError("Missing source instance name",
7972 self.source_instance_name = \
7973 netutils.GetHostname(name=src_instance_name).name
7976 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7977 self.op.mode, errors.ECODE_INVAL)
7979 def ExpandNames(self):
7980 """ExpandNames for CreateInstance.
7982 Figure out the right locks for instance creation.
7985 self.needed_locks = {}
7987 instance_name = self.op.instance_name
7988 # this is just a preventive check, but someone might still add this
7989 # instance in the meantime, and creation will fail at lock-add time
7990 if instance_name in self.cfg.GetInstanceList():
7991 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7992 instance_name, errors.ECODE_EXISTS)
7994 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7996 if self.op.iallocator:
7997 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7999 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8000 nodelist = [self.op.pnode]
8001 if self.op.snode is not None:
8002 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8003 nodelist.append(self.op.snode)
8004 self.needed_locks[locking.LEVEL_NODE] = nodelist
8006 # in case of import lock the source node too
8007 if self.op.mode == constants.INSTANCE_IMPORT:
8008 src_node = self.op.src_node
8009 src_path = self.op.src_path
8011 if src_path is None:
8012 self.op.src_path = src_path = self.op.instance_name
8014 if src_node is None:
8015 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8016 self.op.src_node = None
8017 if os.path.isabs(src_path):
8018 raise errors.OpPrereqError("Importing an instance from an absolute"
8019 " path requires a source node option",
8022 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8023 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8024 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8025 if not os.path.isabs(src_path):
8026 self.op.src_path = src_path = \
8027 utils.PathJoin(constants.EXPORT_DIR, src_path)
8029 def _RunAllocator(self):
8030 """Run the allocator based on input opcode.
8033 nics = [n.ToDict() for n in self.nics]
8034 ial = IAllocator(self.cfg, self.rpc,
8035 mode=constants.IALLOCATOR_MODE_ALLOC,
8036 name=self.op.instance_name,
8037 disk_template=self.op.disk_template,
8040 vcpus=self.be_full[constants.BE_VCPUS],
8041 memory=self.be_full[constants.BE_MEMORY],
8044 hypervisor=self.op.hypervisor,
8047 ial.Run(self.op.iallocator)
8050 raise errors.OpPrereqError("Can't compute nodes using"
8051 " iallocator '%s': %s" %
8052 (self.op.iallocator, ial.info),
8054 if len(ial.result) != ial.required_nodes:
8055 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8056 " of nodes (%s), required %s" %
8057 (self.op.iallocator, len(ial.result),
8058 ial.required_nodes), errors.ECODE_FAULT)
8059 self.op.pnode = ial.result[0]
8060 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8061 self.op.instance_name, self.op.iallocator,
8062 utils.CommaJoin(ial.result))
8063 if ial.required_nodes == 2:
8064 self.op.snode = ial.result[1]
8066 def BuildHooksEnv(self):
8069 This runs on master, primary and secondary nodes of the instance.
8073 "ADD_MODE": self.op.mode,
8075 if self.op.mode == constants.INSTANCE_IMPORT:
8076 env["SRC_NODE"] = self.op.src_node
8077 env["SRC_PATH"] = self.op.src_path
8078 env["SRC_IMAGES"] = self.src_images
8080 env.update(_BuildInstanceHookEnv(
8081 name=self.op.instance_name,
8082 primary_node=self.op.pnode,
8083 secondary_nodes=self.secondaries,
8084 status=self.op.start,
8085 os_type=self.op.os_type,
8086 memory=self.be_full[constants.BE_MEMORY],
8087 vcpus=self.be_full[constants.BE_VCPUS],
8088 nics=_NICListToTuple(self, self.nics),
8089 disk_template=self.op.disk_template,
8090 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8091 for d in self.disks],
8094 hypervisor_name=self.op.hypervisor,
8100 def BuildHooksNodes(self):
8101 """Build hooks nodes.
8104 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8107 def _ReadExportInfo(self):
8108 """Reads the export information from disk.
8110 It will override the opcode source node and path with the actual
8111 information, if these two were not specified before.
8113 @return: the export information
8116 assert self.op.mode == constants.INSTANCE_IMPORT
8118 src_node = self.op.src_node
8119 src_path = self.op.src_path
8121 if src_node is None:
8122 locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
8123 exp_list = self.rpc.call_export_list(locked_nodes)
8125 for node in exp_list:
8126 if exp_list[node].fail_msg:
8128 if src_path in exp_list[node].payload:
8130 self.op.src_node = src_node = node
8131 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8135 raise errors.OpPrereqError("No export found for relative path %s" %
8136 src_path, errors.ECODE_INVAL)
8138 _CheckNodeOnline(self, src_node)
8139 result = self.rpc.call_export_info(src_node, src_path)
8140 result.Raise("No export or invalid export found in dir %s" % src_path)
8142 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8143 if not export_info.has_section(constants.INISECT_EXP):
8144 raise errors.ProgrammerError("Corrupted export config",
8145 errors.ECODE_ENVIRON)
8147 ei_version = export_info.get(constants.INISECT_EXP, "version")
8148 if (int(ei_version) != constants.EXPORT_VERSION):
8149 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8150 (ei_version, constants.EXPORT_VERSION),
8151 errors.ECODE_ENVIRON)
8154 def _ReadExportParams(self, einfo):
8155 """Use export parameters as defaults.
8157 In case the opcode doesn't specify (as in override) some instance
8158 parameters, then try to use them from the export information, if
8162 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8164 if self.op.disk_template is None:
8165 if einfo.has_option(constants.INISECT_INS, "disk_template"):
8166 self.op.disk_template = einfo.get(constants.INISECT_INS,
8169 raise errors.OpPrereqError("No disk template specified and the export"
8170 " is missing the disk_template information",
8173 if not self.op.disks:
8174 if einfo.has_option(constants.INISECT_INS, "disk_count"):
8176 # TODO: import the disk iv_name too
8177 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8178 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8179 disks.append({constants.IDISK_SIZE: disk_sz})
8180 self.op.disks = disks
8182 raise errors.OpPrereqError("No disk info specified and the export"
8183 " is missing the disk information",
8186 if (not self.op.nics and
8187 einfo.has_option(constants.INISECT_INS, "nic_count")):
8189 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8191 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8192 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8197 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8198 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8200 if (self.op.hypervisor is None and
8201 einfo.has_option(constants.INISECT_INS, "hypervisor")):
8202 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8204 if einfo.has_section(constants.INISECT_HYP):
8205 # use the export parameters but do not override the ones
8206 # specified by the user
8207 for name, value in einfo.items(constants.INISECT_HYP):
8208 if name not in self.op.hvparams:
8209 self.op.hvparams[name] = value
8211 if einfo.has_section(constants.INISECT_BEP):
8212 # use the parameters, without overriding
8213 for name, value in einfo.items(constants.INISECT_BEP):
8214 if name not in self.op.beparams:
8215 self.op.beparams[name] = value
8217 # try to read the parameters old style, from the main section
8218 for name in constants.BES_PARAMETERS:
8219 if (name not in self.op.beparams and
8220 einfo.has_option(constants.INISECT_INS, name)):
8221 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8223 if einfo.has_section(constants.INISECT_OSP):
8224 # use the parameters, without overriding
8225 for name, value in einfo.items(constants.INISECT_OSP):
8226 if name not in self.op.osparams:
8227 self.op.osparams[name] = value
8229 def _RevertToDefaults(self, cluster):
8230 """Revert the instance parameters to the default values.
8234 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8235 for name in self.op.hvparams.keys():
8236 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8237 del self.op.hvparams[name]
8239 be_defs = cluster.SimpleFillBE({})
8240 for name in self.op.beparams.keys():
8241 if name in be_defs and be_defs[name] == self.op.beparams[name]:
8242 del self.op.beparams[name]
8244 nic_defs = cluster.SimpleFillNIC({})
8245 for nic in self.op.nics:
8246 for name in constants.NICS_PARAMETERS:
8247 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8250 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8251 for name in self.op.osparams.keys():
8252 if name in os_defs and os_defs[name] == self.op.osparams[name]:
8253 del self.op.osparams[name]
8255 def _CalculateFileStorageDir(self):
8256 """Calculate final instance file storage dir.
8259 # file storage dir calculation/check
8260 self.instance_file_storage_dir = None
8261 if self.op.disk_template in constants.DTS_FILEBASED:
8262 # build the full file storage dir path
8265 if self.op.disk_template == constants.DT_SHARED_FILE:
8266 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8268 get_fsd_fn = self.cfg.GetFileStorageDir
8270 cfg_storagedir = get_fsd_fn()
8271 if not cfg_storagedir:
8272 raise errors.OpPrereqError("Cluster file storage dir not defined")
8273 joinargs.append(cfg_storagedir)
8275 if self.op.file_storage_dir is not None:
8276 joinargs.append(self.op.file_storage_dir)
8278 joinargs.append(self.op.instance_name)
8280 # pylint: disable-msg=W0142
8281 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8283 def CheckPrereq(self):
8284 """Check prerequisites.
8287 self._CalculateFileStorageDir()
8289 if self.op.mode == constants.INSTANCE_IMPORT:
8290 export_info = self._ReadExportInfo()
8291 self._ReadExportParams(export_info)
8293 if (not self.cfg.GetVGName() and
8294 self.op.disk_template not in constants.DTS_NOT_LVM):
8295 raise errors.OpPrereqError("Cluster does not support lvm-based"
8296 " instances", errors.ECODE_STATE)
8298 if self.op.hypervisor is None:
8299 self.op.hypervisor = self.cfg.GetHypervisorType()
8301 cluster = self.cfg.GetClusterInfo()
8302 enabled_hvs = cluster.enabled_hypervisors
8303 if self.op.hypervisor not in enabled_hvs:
8304 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8305 " cluster (%s)" % (self.op.hypervisor,
8306 ",".join(enabled_hvs)),
8309 # Check tag validity
8310 for tag in self.op.tags:
8311 objects.TaggableObject.ValidateTag(tag)
8313 # check hypervisor parameter syntax (locally)
8314 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8315 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8317 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8318 hv_type.CheckParameterSyntax(filled_hvp)
8319 self.hv_full = filled_hvp
8320 # check that we don't specify global parameters on an instance
8321 _CheckGlobalHvParams(self.op.hvparams)
8323 # fill and remember the beparams dict
8324 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8325 self.be_full = cluster.SimpleFillBE(self.op.beparams)
8327 # build os parameters
8328 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8330 # now that hvp/bep are in final format, let's reset to defaults,
8332 if self.op.identify_defaults:
8333 self._RevertToDefaults(cluster)
8337 for idx, nic in enumerate(self.op.nics):
8338 nic_mode_req = nic.get(constants.INIC_MODE, None)
8339 nic_mode = nic_mode_req
8340 if nic_mode is None:
8341 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8343 # in routed mode, for the first nic, the default ip is 'auto'
8344 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8345 default_ip_mode = constants.VALUE_AUTO
8347 default_ip_mode = constants.VALUE_NONE
8349 # ip validity checks
8350 ip = nic.get(constants.INIC_IP, default_ip_mode)
8351 if ip is None or ip.lower() == constants.VALUE_NONE:
8353 elif ip.lower() == constants.VALUE_AUTO:
8354 if not self.op.name_check:
8355 raise errors.OpPrereqError("IP address set to auto but name checks"
8356 " have been skipped",
8358 nic_ip = self.hostname1.ip
8360 if not netutils.IPAddress.IsValid(ip):
8361 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8365 # TODO: check the ip address for uniqueness
8366 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8367 raise errors.OpPrereqError("Routed nic mode requires an ip address",
8370 # MAC address verification
8371 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8372 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8373 mac = utils.NormalizeAndValidateMac(mac)
8376 self.cfg.ReserveMAC(mac, self.proc.GetECId())
8377 except errors.ReservationError:
8378 raise errors.OpPrereqError("MAC address %s already in use"
8379 " in cluster" % mac,
8380 errors.ECODE_NOTUNIQUE)
8382 # Build nic parameters
8383 link = nic.get(constants.INIC_LINK, None)
8386 nicparams[constants.NIC_MODE] = nic_mode_req
8388 nicparams[constants.NIC_LINK] = link
8390 check_params = cluster.SimpleFillNIC(nicparams)
8391 objects.NIC.CheckParameterSyntax(check_params)
8392 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8394 # disk checks/pre-build
8395 default_vg = self.cfg.GetVGName()
8397 for disk in self.op.disks:
8398 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8399 if mode not in constants.DISK_ACCESS_SET:
8400 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8401 mode, errors.ECODE_INVAL)
8402 size = disk.get(constants.IDISK_SIZE, None)
8404 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8407 except (TypeError, ValueError):
8408 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8411 data_vg = disk.get(constants.IDISK_VG, default_vg)
8413 constants.IDISK_SIZE: size,
8414 constants.IDISK_MODE: mode,
8415 constants.IDISK_VG: data_vg,
8416 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8418 if constants.IDISK_ADOPT in disk:
8419 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8420 self.disks.append(new_disk)
8422 if self.op.mode == constants.INSTANCE_IMPORT:
8424 # Check that the new instance doesn't have less disks than the export
8425 instance_disks = len(self.disks)
8426 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8427 if instance_disks < export_disks:
8428 raise errors.OpPrereqError("Not enough disks to import."
8429 " (instance: %d, export: %d)" %
8430 (instance_disks, export_disks),
8434 for idx in range(export_disks):
8435 option = "disk%d_dump" % idx
8436 if export_info.has_option(constants.INISECT_INS, option):
8437 # FIXME: are the old os-es, disk sizes, etc. useful?
8438 export_name = export_info.get(constants.INISECT_INS, option)
8439 image = utils.PathJoin(self.op.src_path, export_name)
8440 disk_images.append(image)
8442 disk_images.append(False)
8444 self.src_images = disk_images
8446 old_name = export_info.get(constants.INISECT_INS, "name")
8448 exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8449 except (TypeError, ValueError), err:
8450 raise errors.OpPrereqError("Invalid export file, nic_count is not"
8451 " an integer: %s" % str(err),
8453 if self.op.instance_name == old_name:
8454 for idx, nic in enumerate(self.nics):
8455 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8456 nic_mac_ini = "nic%d_mac" % idx
8457 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8459 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8461 # ip ping checks (we use the same ip that was resolved in ExpandNames)
8462 if self.op.ip_check:
8463 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8464 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8465 (self.check_ip, self.op.instance_name),
8466 errors.ECODE_NOTUNIQUE)
8468 #### mac address generation
8469 # By generating here the mac address both the allocator and the hooks get
8470 # the real final mac address rather than the 'auto' or 'generate' value.
8471 # There is a race condition between the generation and the instance object
8472 # creation, which means that we know the mac is valid now, but we're not
8473 # sure it will be when we actually add the instance. If things go bad
8474 # adding the instance will abort because of a duplicate mac, and the
8475 # creation job will fail.
8476 for nic in self.nics:
8477 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8478 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8482 if self.op.iallocator is not None:
8483 self._RunAllocator()
8485 #### node related checks
8487 # check primary node
8488 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8489 assert self.pnode is not None, \
8490 "Cannot retrieve locked node %s" % self.op.pnode
8492 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8493 pnode.name, errors.ECODE_STATE)
8495 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8496 pnode.name, errors.ECODE_STATE)
8497 if not pnode.vm_capable:
8498 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8499 " '%s'" % pnode.name, errors.ECODE_STATE)
8501 self.secondaries = []
8503 # mirror node verification
8504 if self.op.disk_template in constants.DTS_INT_MIRROR:
8505 if self.op.snode == pnode.name:
8506 raise errors.OpPrereqError("The secondary node cannot be the"
8507 " primary node", errors.ECODE_INVAL)
8508 _CheckNodeOnline(self, self.op.snode)
8509 _CheckNodeNotDrained(self, self.op.snode)
8510 _CheckNodeVmCapable(self, self.op.snode)
8511 self.secondaries.append(self.op.snode)
8513 nodenames = [pnode.name] + self.secondaries
8515 if not self.adopt_disks:
8516 # Check lv size requirements, if not adopting
8517 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8518 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8520 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8521 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8522 disk[constants.IDISK_ADOPT])
8523 for disk in self.disks])
8524 if len(all_lvs) != len(self.disks):
8525 raise errors.OpPrereqError("Duplicate volume names given for adoption",
8527 for lv_name in all_lvs:
8529 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8530 # to ReserveLV uses the same syntax
8531 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8532 except errors.ReservationError:
8533 raise errors.OpPrereqError("LV named %s used by another instance" %
8534 lv_name, errors.ECODE_NOTUNIQUE)
8536 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8537 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8539 node_lvs = self.rpc.call_lv_list([pnode.name],
8540 vg_names.payload.keys())[pnode.name]
8541 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8542 node_lvs = node_lvs.payload
8544 delta = all_lvs.difference(node_lvs.keys())
8546 raise errors.OpPrereqError("Missing logical volume(s): %s" %
8547 utils.CommaJoin(delta),
8549 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8551 raise errors.OpPrereqError("Online logical volumes found, cannot"
8552 " adopt: %s" % utils.CommaJoin(online_lvs),
8554 # update the size of disk based on what is found
8555 for dsk in self.disks:
8556 dsk[constants.IDISK_SIZE] = \
8557 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8558 dsk[constants.IDISK_ADOPT])][0]))
8560 elif self.op.disk_template == constants.DT_BLOCK:
8561 # Normalize and de-duplicate device paths
8562 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8563 for disk in self.disks])
8564 if len(all_disks) != len(self.disks):
8565 raise errors.OpPrereqError("Duplicate disk names given for adoption",
8567 baddisks = [d for d in all_disks
8568 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8570 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8571 " cannot be adopted" %
8572 (", ".join(baddisks),
8573 constants.ADOPTABLE_BLOCKDEV_ROOT),
8576 node_disks = self.rpc.call_bdev_sizes([pnode.name],
8577 list(all_disks))[pnode.name]
8578 node_disks.Raise("Cannot get block device information from node %s" %
8580 node_disks = node_disks.payload
8581 delta = all_disks.difference(node_disks.keys())
8583 raise errors.OpPrereqError("Missing block device(s): %s" %
8584 utils.CommaJoin(delta),
8586 for dsk in self.disks:
8587 dsk[constants.IDISK_SIZE] = \
8588 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8590 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8592 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8593 # check OS parameters (remotely)
8594 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8596 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8598 # memory check on primary node
8600 _CheckNodeFreeMemory(self, self.pnode.name,
8601 "creating instance %s" % self.op.instance_name,
8602 self.be_full[constants.BE_MEMORY],
8605 self.dry_run_result = list(nodenames)
8607 def Exec(self, feedback_fn):
8608 """Create and add the instance to the cluster.
8611 instance = self.op.instance_name
8612 pnode_name = self.pnode.name
8614 ht_kind = self.op.hypervisor
8615 if ht_kind in constants.HTS_REQ_PORT:
8616 network_port = self.cfg.AllocatePort()
8620 disks = _GenerateDiskTemplate(self,
8621 self.op.disk_template,
8622 instance, pnode_name,
8625 self.instance_file_storage_dir,
8626 self.op.file_driver,
8630 iobj = objects.Instance(name=instance, os=self.op.os_type,
8631 primary_node=pnode_name,
8632 nics=self.nics, disks=disks,
8633 disk_template=self.op.disk_template,
8635 network_port=network_port,
8636 beparams=self.op.beparams,
8637 hvparams=self.op.hvparams,
8638 hypervisor=self.op.hypervisor,
8639 osparams=self.op.osparams,
8643 for tag in self.op.tags:
8646 if self.adopt_disks:
8647 if self.op.disk_template == constants.DT_PLAIN:
8648 # rename LVs to the newly-generated names; we need to construct
8649 # 'fake' LV disks with the old data, plus the new unique_id
8650 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8652 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8653 rename_to.append(t_dsk.logical_id)
8654 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8655 self.cfg.SetDiskID(t_dsk, pnode_name)
8656 result = self.rpc.call_blockdev_rename(pnode_name,
8657 zip(tmp_disks, rename_to))
8658 result.Raise("Failed to rename adoped LVs")
8660 feedback_fn("* creating instance disks...")
8662 _CreateDisks(self, iobj)
8663 except errors.OpExecError:
8664 self.LogWarning("Device creation failed, reverting...")
8666 _RemoveDisks(self, iobj)
8668 self.cfg.ReleaseDRBDMinors(instance)
8671 feedback_fn("adding instance %s to cluster config" % instance)
8673 self.cfg.AddInstance(iobj, self.proc.GetECId())
8675 # Declare that we don't want to remove the instance lock anymore, as we've
8676 # added the instance to the config
8677 del self.remove_locks[locking.LEVEL_INSTANCE]
8679 if self.op.mode == constants.INSTANCE_IMPORT:
8680 # Release unused nodes
8681 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8684 _ReleaseLocks(self, locking.LEVEL_NODE)
8687 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8688 feedback_fn("* wiping instance disks...")
8690 _WipeDisks(self, iobj)
8691 except errors.OpExecError, err:
8692 logging.exception("Wiping disks failed")
8693 self.LogWarning("Wiping instance disks failed (%s)", err)
8697 # Something is already wrong with the disks, don't do anything else
8699 elif self.op.wait_for_sync:
8700 disk_abort = not _WaitForSync(self, iobj)
8701 elif iobj.disk_template in constants.DTS_INT_MIRROR:
8702 # make sure the disks are not degraded (still sync-ing is ok)
8704 feedback_fn("* checking mirrors status")
8705 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8710 _RemoveDisks(self, iobj)
8711 self.cfg.RemoveInstance(iobj.name)
8712 # Make sure the instance lock gets removed
8713 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8714 raise errors.OpExecError("There are some degraded disks for"
8717 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8718 if self.op.mode == constants.INSTANCE_CREATE:
8719 if not self.op.no_install:
8720 feedback_fn("* running the instance OS create scripts...")
8721 # FIXME: pass debug option from opcode to backend
8722 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8723 self.op.debug_level)
8724 result.Raise("Could not add os for instance %s"
8725 " on node %s" % (instance, pnode_name))
8727 elif self.op.mode == constants.INSTANCE_IMPORT:
8728 feedback_fn("* running the instance OS import scripts...")
8732 for idx, image in enumerate(self.src_images):
8736 # FIXME: pass debug option from opcode to backend
8737 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8738 constants.IEIO_FILE, (image, ),
8739 constants.IEIO_SCRIPT,
8740 (iobj.disks[idx], idx),
8742 transfers.append(dt)
8745 masterd.instance.TransferInstanceData(self, feedback_fn,
8746 self.op.src_node, pnode_name,
8747 self.pnode.secondary_ip,
8749 if not compat.all(import_result):
8750 self.LogWarning("Some disks for instance %s on node %s were not"
8751 " imported successfully" % (instance, pnode_name))
8753 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8754 feedback_fn("* preparing remote import...")
8755 # The source cluster will stop the instance before attempting to make a
8756 # connection. In some cases stopping an instance can take a long time,
8757 # hence the shutdown timeout is added to the connection timeout.
8758 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8759 self.op.source_shutdown_timeout)
8760 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8762 assert iobj.primary_node == self.pnode.name
8764 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8765 self.source_x509_ca,
8766 self._cds, timeouts)
8767 if not compat.all(disk_results):
8768 # TODO: Should the instance still be started, even if some disks
8769 # failed to import (valid for local imports, too)?
8770 self.LogWarning("Some disks for instance %s on node %s were not"
8771 " imported successfully" % (instance, pnode_name))
8773 # Run rename script on newly imported instance
8774 assert iobj.name == instance
8775 feedback_fn("Running rename script for %s" % instance)
8776 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8777 self.source_instance_name,
8778 self.op.debug_level)
8780 self.LogWarning("Failed to run rename script for %s on node"
8781 " %s: %s" % (instance, pnode_name, result.fail_msg))
8784 # also checked in the prereq part
8785 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8789 iobj.admin_up = True
8790 self.cfg.Update(iobj, feedback_fn)
8791 logging.info("Starting instance %s on node %s", instance, pnode_name)
8792 feedback_fn("* starting instance...")
8793 result = self.rpc.call_instance_start(pnode_name, iobj,
8795 result.Raise("Could not start instance")
8797 return list(iobj.all_nodes)
8800 class LUInstanceConsole(NoHooksLU):
8801 """Connect to an instance's console.
8803 This is somewhat special in that it returns the command line that
8804 you need to run on the master node in order to connect to the
8810 def ExpandNames(self):
8811 self._ExpandAndLockInstance()
8813 def CheckPrereq(self):
8814 """Check prerequisites.
8816 This checks that the instance is in the cluster.
8819 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8820 assert self.instance is not None, \
8821 "Cannot retrieve locked instance %s" % self.op.instance_name
8822 _CheckNodeOnline(self, self.instance.primary_node)
8824 def Exec(self, feedback_fn):
8825 """Connect to the console of an instance
8828 instance = self.instance
8829 node = instance.primary_node
8831 node_insts = self.rpc.call_instance_list([node],
8832 [instance.hypervisor])[node]
8833 node_insts.Raise("Can't get node information from %s" % node)
8835 if instance.name not in node_insts.payload:
8836 if instance.admin_up:
8837 state = constants.INSTST_ERRORDOWN
8839 state = constants.INSTST_ADMINDOWN
8840 raise errors.OpExecError("Instance %s is not running (state %s)" %
8841 (instance.name, state))
8843 logging.debug("Connecting to console of %s on %s", instance.name, node)
8845 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8848 def _GetInstanceConsole(cluster, instance):
8849 """Returns console information for an instance.
8851 @type cluster: L{objects.Cluster}
8852 @type instance: L{objects.Instance}
8856 hyper = hypervisor.GetHypervisor(instance.hypervisor)
8857 # beparams and hvparams are passed separately, to avoid editing the
8858 # instance and then saving the defaults in the instance itself.
8859 hvparams = cluster.FillHV(instance)
8860 beparams = cluster.FillBE(instance)
8861 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8863 assert console.instance == instance.name
8864 assert console.Validate()
8866 return console.ToDict()
8869 class LUInstanceReplaceDisks(LogicalUnit):
8870 """Replace the disks of an instance.
8873 HPATH = "mirrors-replace"
8874 HTYPE = constants.HTYPE_INSTANCE
8877 def CheckArguments(self):
8878 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8881 def ExpandNames(self):
8882 self._ExpandAndLockInstance()
8884 assert locking.LEVEL_NODE not in self.needed_locks
8885 assert locking.LEVEL_NODEGROUP not in self.needed_locks
8887 assert self.op.iallocator is None or self.op.remote_node is None, \
8888 "Conflicting options"
8890 if self.op.remote_node is not None:
8891 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8893 # Warning: do not remove the locking of the new secondary here
8894 # unless DRBD8.AddChildren is changed to work in parallel;
8895 # currently it doesn't since parallel invocations of
8896 # FindUnusedMinor will conflict
8897 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
8898 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8900 self.needed_locks[locking.LEVEL_NODE] = []
8901 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8903 if self.op.iallocator is not None:
8904 # iallocator will select a new node in the same group
8905 self.needed_locks[locking.LEVEL_NODEGROUP] = []
8907 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8908 self.op.iallocator, self.op.remote_node,
8909 self.op.disks, False, self.op.early_release)
8911 self.tasklets = [self.replacer]
8913 def DeclareLocks(self, level):
8914 if level == locking.LEVEL_NODEGROUP:
8915 assert self.op.remote_node is None
8916 assert self.op.iallocator is not None
8917 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
8919 self.share_locks[locking.LEVEL_NODEGROUP] = 1
8920 self.needed_locks[locking.LEVEL_NODEGROUP] = \
8921 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8923 elif level == locking.LEVEL_NODE:
8924 if self.op.iallocator is not None:
8925 assert self.op.remote_node is None
8926 assert not self.needed_locks[locking.LEVEL_NODE]
8928 # Lock member nodes of all locked groups
8929 self.needed_locks[locking.LEVEL_NODE] = [node_name
8930 for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
8931 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
8933 self._LockInstancesNodes()
8935 def BuildHooksEnv(self):
8938 This runs on the master, the primary and all the secondaries.
8941 instance = self.replacer.instance
8943 "MODE": self.op.mode,
8944 "NEW_SECONDARY": self.op.remote_node,
8945 "OLD_SECONDARY": instance.secondary_nodes[0],
8947 env.update(_BuildInstanceHookEnvByObject(self, instance))
8950 def BuildHooksNodes(self):
8951 """Build hooks nodes.
8954 instance = self.replacer.instance
8956 self.cfg.GetMasterNode(),
8957 instance.primary_node,
8959 if self.op.remote_node is not None:
8960 nl.append(self.op.remote_node)
8963 def CheckPrereq(self):
8964 """Check prerequisites.
8967 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
8968 self.op.iallocator is None)
8970 owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
8972 groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8973 if owned_groups != groups:
8974 raise errors.OpExecError("Node groups used by instance '%s' changed"
8975 " since lock was acquired, current list is %r,"
8976 " used to be '%s'" %
8977 (self.op.instance_name,
8978 utils.CommaJoin(groups),
8979 utils.CommaJoin(owned_groups)))
8981 return LogicalUnit.CheckPrereq(self)
8984 class TLReplaceDisks(Tasklet):
8985 """Replaces disks for an instance.
8987 Note: Locking is not within the scope of this class.
8990 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8991 disks, delay_iallocator, early_release):
8992 """Initializes this class.
8995 Tasklet.__init__(self, lu)
8998 self.instance_name = instance_name
9000 self.iallocator_name = iallocator_name
9001 self.remote_node = remote_node
9003 self.delay_iallocator = delay_iallocator
9004 self.early_release = early_release
9007 self.instance = None
9008 self.new_node = None
9009 self.target_node = None
9010 self.other_node = None
9011 self.remote_node_info = None
9012 self.node_secondary_ip = None
9015 def CheckArguments(mode, remote_node, iallocator):
9016 """Helper function for users of this class.
9019 # check for valid parameter combination
9020 if mode == constants.REPLACE_DISK_CHG:
9021 if remote_node is None and iallocator is None:
9022 raise errors.OpPrereqError("When changing the secondary either an"
9023 " iallocator script must be used or the"
9024 " new node given", errors.ECODE_INVAL)
9026 if remote_node is not None and iallocator is not None:
9027 raise errors.OpPrereqError("Give either the iallocator or the new"
9028 " secondary, not both", errors.ECODE_INVAL)
9030 elif remote_node is not None or iallocator is not None:
9031 # Not replacing the secondary
9032 raise errors.OpPrereqError("The iallocator and new node options can"
9033 " only be used when changing the"
9034 " secondary node", errors.ECODE_INVAL)
9037 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9038 """Compute a new secondary node using an IAllocator.
9041 ial = IAllocator(lu.cfg, lu.rpc,
9042 mode=constants.IALLOCATOR_MODE_RELOC,
9044 relocate_from=relocate_from)
9046 ial.Run(iallocator_name)
9049 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9050 " %s" % (iallocator_name, ial.info),
9053 if len(ial.result) != ial.required_nodes:
9054 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9055 " of nodes (%s), required %s" %
9057 len(ial.result), ial.required_nodes),
9060 remote_node_name = ial.result[0]
9062 lu.LogInfo("Selected new secondary for instance '%s': %s",
9063 instance_name, remote_node_name)
9065 return remote_node_name
9067 def _FindFaultyDisks(self, node_name):
9068 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9071 def _CheckDisksActivated(self, instance):
9072 """Checks if the instance disks are activated.
9074 @param instance: The instance to check disks
9075 @return: True if they are activated, False otherwise
9078 nodes = instance.all_nodes
9080 for idx, dev in enumerate(instance.disks):
9082 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9083 self.cfg.SetDiskID(dev, node)
9085 result = self.rpc.call_blockdev_find(node, dev)
9089 elif result.fail_msg or not result.payload:
9094 def CheckPrereq(self):
9095 """Check prerequisites.
9097 This checks that the instance is in the cluster.
9100 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9101 assert instance is not None, \
9102 "Cannot retrieve locked instance %s" % self.instance_name
9104 if instance.disk_template != constants.DT_DRBD8:
9105 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9106 " instances", errors.ECODE_INVAL)
9108 if len(instance.secondary_nodes) != 1:
9109 raise errors.OpPrereqError("The instance has a strange layout,"
9110 " expected one secondary but found %d" %
9111 len(instance.secondary_nodes),
9114 if not self.delay_iallocator:
9115 self._CheckPrereq2()
9117 def _CheckPrereq2(self):
9118 """Check prerequisites, second part.
9120 This function should always be part of CheckPrereq. It was separated and is
9121 now called from Exec because during node evacuation iallocator was only
9122 called with an unmodified cluster model, not taking planned changes into
9126 instance = self.instance
9127 secondary_node = instance.secondary_nodes[0]
9129 if self.iallocator_name is None:
9130 remote_node = self.remote_node
9132 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9133 instance.name, instance.secondary_nodes)
9135 if remote_node is None:
9136 self.remote_node_info = None
9138 assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
9139 "Remote node '%s' is not locked" % remote_node
9141 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9142 assert self.remote_node_info is not None, \
9143 "Cannot retrieve locked node %s" % remote_node
9145 if remote_node == self.instance.primary_node:
9146 raise errors.OpPrereqError("The specified node is the primary node of"
9147 " the instance", errors.ECODE_INVAL)
9149 if remote_node == secondary_node:
9150 raise errors.OpPrereqError("The specified node is already the"
9151 " secondary node of the instance",
9154 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9155 constants.REPLACE_DISK_CHG):
9156 raise errors.OpPrereqError("Cannot specify disks to be replaced",
9159 if self.mode == constants.REPLACE_DISK_AUTO:
9160 if not self._CheckDisksActivated(instance):
9161 raise errors.OpPrereqError("Please run activate-disks on instance %s"
9162 " first" % self.instance_name,
9164 faulty_primary = self._FindFaultyDisks(instance.primary_node)
9165 faulty_secondary = self._FindFaultyDisks(secondary_node)
9167 if faulty_primary and faulty_secondary:
9168 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9169 " one node and can not be repaired"
9170 " automatically" % self.instance_name,
9174 self.disks = faulty_primary
9175 self.target_node = instance.primary_node
9176 self.other_node = secondary_node
9177 check_nodes = [self.target_node, self.other_node]
9178 elif faulty_secondary:
9179 self.disks = faulty_secondary
9180 self.target_node = secondary_node
9181 self.other_node = instance.primary_node
9182 check_nodes = [self.target_node, self.other_node]
9188 # Non-automatic modes
9189 if self.mode == constants.REPLACE_DISK_PRI:
9190 self.target_node = instance.primary_node
9191 self.other_node = secondary_node
9192 check_nodes = [self.target_node, self.other_node]
9194 elif self.mode == constants.REPLACE_DISK_SEC:
9195 self.target_node = secondary_node
9196 self.other_node = instance.primary_node
9197 check_nodes = [self.target_node, self.other_node]
9199 elif self.mode == constants.REPLACE_DISK_CHG:
9200 self.new_node = remote_node
9201 self.other_node = instance.primary_node
9202 self.target_node = secondary_node
9203 check_nodes = [self.new_node, self.other_node]
9205 _CheckNodeNotDrained(self.lu, remote_node)
9206 _CheckNodeVmCapable(self.lu, remote_node)
9208 old_node_info = self.cfg.GetNodeInfo(secondary_node)
9209 assert old_node_info is not None
9210 if old_node_info.offline and not self.early_release:
9211 # doesn't make sense to delay the release
9212 self.early_release = True
9213 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9214 " early-release mode", secondary_node)
9217 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9220 # If not specified all disks should be replaced
9222 self.disks = range(len(self.instance.disks))
9224 for node in check_nodes:
9225 _CheckNodeOnline(self.lu, node)
9227 touched_nodes = frozenset(node_name for node_name in [self.new_node,
9230 if node_name is not None)
9232 # Release unneeded node locks
9233 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9235 # Release any owned node group
9236 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9237 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9239 # Check whether disks are valid
9240 for disk_idx in self.disks:
9241 instance.FindDisk(disk_idx)
9243 # Get secondary node IP addresses
9244 self.node_secondary_ip = \
9245 dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
9246 for node_name in touched_nodes)
9248 def Exec(self, feedback_fn):
9249 """Execute disk replacement.
9251 This dispatches the disk replacement to the appropriate handler.
9254 if self.delay_iallocator:
9255 self._CheckPrereq2()
9258 # Verify owned locks before starting operation
9259 owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9260 assert set(owned_locks) == set(self.node_secondary_ip), \
9261 ("Incorrect node locks, owning %s, expected %s" %
9262 (owned_locks, self.node_secondary_ip.keys()))
9264 owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9265 assert list(owned_locks) == [self.instance_name], \
9266 "Instance '%s' not locked" % self.instance_name
9268 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9269 "Should not own any node group lock at this point"
9272 feedback_fn("No disks need replacement")
9275 feedback_fn("Replacing disk(s) %s for %s" %
9276 (utils.CommaJoin(self.disks), self.instance.name))
9278 activate_disks = (not self.instance.admin_up)
9280 # Activate the instance disks if we're replacing them on a down instance
9282 _StartInstanceDisks(self.lu, self.instance, True)
9285 # Should we replace the secondary node?
9286 if self.new_node is not None:
9287 fn = self._ExecDrbd8Secondary
9289 fn = self._ExecDrbd8DiskOnly
9291 result = fn(feedback_fn)
9293 # Deactivate the instance disks if we're replacing them on a
9296 _SafeShutdownInstanceDisks(self.lu, self.instance)
9299 # Verify owned locks
9300 owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9301 nodes = frozenset(self.node_secondary_ip)
9302 assert ((self.early_release and not owned_locks) or
9303 (not self.early_release and not (set(owned_locks) - nodes))), \
9304 ("Not owning the correct locks, early_release=%s, owned=%r,"
9305 " nodes=%r" % (self.early_release, owned_locks, nodes))
9309 def _CheckVolumeGroup(self, nodes):
9310 self.lu.LogInfo("Checking volume groups")
9312 vgname = self.cfg.GetVGName()
9314 # Make sure volume group exists on all involved nodes
9315 results = self.rpc.call_vg_list(nodes)
9317 raise errors.OpExecError("Can't list volume groups on the nodes")
9321 res.Raise("Error checking node %s" % node)
9322 if vgname not in res.payload:
9323 raise errors.OpExecError("Volume group '%s' not found on node %s" %
9326 def _CheckDisksExistence(self, nodes):
9327 # Check disk existence
9328 for idx, dev in enumerate(self.instance.disks):
9329 if idx not in self.disks:
9333 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9334 self.cfg.SetDiskID(dev, node)
9336 result = self.rpc.call_blockdev_find(node, dev)
9338 msg = result.fail_msg
9339 if msg or not result.payload:
9341 msg = "disk not found"
9342 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9345 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9346 for idx, dev in enumerate(self.instance.disks):
9347 if idx not in self.disks:
9350 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9353 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9355 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9356 " replace disks for instance %s" %
9357 (node_name, self.instance.name))
9359 def _CreateNewStorage(self, node_name):
9360 """Create new storage on the primary or secondary node.
9362 This is only used for same-node replaces, not for changing the
9363 secondary node, hence we don't want to modify the existing disk.
9368 for idx, dev in enumerate(self.instance.disks):
9369 if idx not in self.disks:
9372 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9374 self.cfg.SetDiskID(dev, node_name)
9376 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9377 names = _GenerateUniqueNames(self.lu, lv_names)
9379 vg_data = dev.children[0].logical_id[0]
9380 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9381 logical_id=(vg_data, names[0]))
9382 vg_meta = dev.children[1].logical_id[0]
9383 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9384 logical_id=(vg_meta, names[1]))
9386 new_lvs = [lv_data, lv_meta]
9387 old_lvs = [child.Copy() for child in dev.children]
9388 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9390 # we pass force_create=True to force the LVM creation
9391 for new_lv in new_lvs:
9392 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9393 _GetInstanceInfoText(self.instance), False)
9397 def _CheckDevices(self, node_name, iv_names):
9398 for name, (dev, _, _) in iv_names.iteritems():
9399 self.cfg.SetDiskID(dev, node_name)
9401 result = self.rpc.call_blockdev_find(node_name, dev)
9403 msg = result.fail_msg
9404 if msg or not result.payload:
9406 msg = "disk not found"
9407 raise errors.OpExecError("Can't find DRBD device %s: %s" %
9410 if result.payload.is_degraded:
9411 raise errors.OpExecError("DRBD device %s is degraded!" % name)
9413 def _RemoveOldStorage(self, node_name, iv_names):
9414 for name, (_, old_lvs, _) in iv_names.iteritems():
9415 self.lu.LogInfo("Remove logical volumes for %s" % name)
9418 self.cfg.SetDiskID(lv, node_name)
9420 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9422 self.lu.LogWarning("Can't remove old LV: %s" % msg,
9423 hint="remove unused LVs manually")
9425 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
9426 """Replace a disk on the primary or secondary for DRBD 8.
9428 The algorithm for replace is quite complicated:
9430 1. for each disk to be replaced:
9432 1. create new LVs on the target node with unique names
9433 1. detach old LVs from the drbd device
9434 1. rename old LVs to name_replaced.<time_t>
9435 1. rename new LVs to old LVs
9436 1. attach the new LVs (with the old names now) to the drbd device
9438 1. wait for sync across all devices
9440 1. for each modified disk:
9442 1. remove old LVs (which have the name name_replaces.<time_t>)
9444 Failures are not very well handled.
9449 # Step: check device activation
9450 self.lu.LogStep(1, steps_total, "Check device existence")
9451 self._CheckDisksExistence([self.other_node, self.target_node])
9452 self._CheckVolumeGroup([self.target_node, self.other_node])
9454 # Step: check other node consistency
9455 self.lu.LogStep(2, steps_total, "Check peer consistency")
9456 self._CheckDisksConsistency(self.other_node,
9457 self.other_node == self.instance.primary_node,
9460 # Step: create new storage
9461 self.lu.LogStep(3, steps_total, "Allocate new storage")
9462 iv_names = self._CreateNewStorage(self.target_node)
9464 # Step: for each lv, detach+rename*2+attach
9465 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9466 for dev, old_lvs, new_lvs in iv_names.itervalues():
9467 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9469 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9471 result.Raise("Can't detach drbd from local storage on node"
9472 " %s for device %s" % (self.target_node, dev.iv_name))
9474 #cfg.Update(instance)
9476 # ok, we created the new LVs, so now we know we have the needed
9477 # storage; as such, we proceed on the target node to rename
9478 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9479 # using the assumption that logical_id == physical_id (which in
9480 # turn is the unique_id on that node)
9482 # FIXME(iustin): use a better name for the replaced LVs
9483 temp_suffix = int(time.time())
9484 ren_fn = lambda d, suff: (d.physical_id[0],
9485 d.physical_id[1] + "_replaced-%s" % suff)
9487 # Build the rename list based on what LVs exist on the node
9488 rename_old_to_new = []
9489 for to_ren in old_lvs:
9490 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9491 if not result.fail_msg and result.payload:
9493 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9495 self.lu.LogInfo("Renaming the old LVs on the target node")
9496 result = self.rpc.call_blockdev_rename(self.target_node,
9498 result.Raise("Can't rename old LVs on node %s" % self.target_node)
9500 # Now we rename the new LVs to the old LVs
9501 self.lu.LogInfo("Renaming the new LVs on the target node")
9502 rename_new_to_old = [(new, old.physical_id)
9503 for old, new in zip(old_lvs, new_lvs)]
9504 result = self.rpc.call_blockdev_rename(self.target_node,
9506 result.Raise("Can't rename new LVs on node %s" % self.target_node)
9508 # Intermediate steps of in memory modifications
9509 for old, new in zip(old_lvs, new_lvs):
9510 new.logical_id = old.logical_id
9511 self.cfg.SetDiskID(new, self.target_node)
9513 # We need to modify old_lvs so that removal later removes the
9514 # right LVs, not the newly added ones; note that old_lvs is a
9516 for disk in old_lvs:
9517 disk.logical_id = ren_fn(disk, temp_suffix)
9518 self.cfg.SetDiskID(disk, self.target_node)
9520 # Now that the new lvs have the old name, we can add them to the device
9521 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9522 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9524 msg = result.fail_msg
9526 for new_lv in new_lvs:
9527 msg2 = self.rpc.call_blockdev_remove(self.target_node,
9530 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9531 hint=("cleanup manually the unused logical"
9533 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9536 if self.early_release:
9537 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9539 self._RemoveOldStorage(self.target_node, iv_names)
9540 # WARNING: we release both node locks here, do not do other RPCs
9541 # than WaitForSync to the primary node
9542 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9543 names=[self.target_node, self.other_node])
9546 # This can fail as the old devices are degraded and _WaitForSync
9547 # does a combined result over all disks, so we don't check its return value
9548 self.lu.LogStep(cstep, steps_total, "Sync devices")
9550 _WaitForSync(self.lu, self.instance)
9552 # Check all devices manually
9553 self._CheckDevices(self.instance.primary_node, iv_names)
9555 # Step: remove old storage
9556 if not self.early_release:
9557 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9559 self._RemoveOldStorage(self.target_node, iv_names)
9561 def _ExecDrbd8Secondary(self, feedback_fn):
9562 """Replace the secondary node for DRBD 8.
9564 The algorithm for replace is quite complicated:
9565 - for all disks of the instance:
9566 - create new LVs on the new node with same names
9567 - shutdown the drbd device on the old secondary
9568 - disconnect the drbd network on the primary
9569 - create the drbd device on the new secondary
9570 - network attach the drbd on the primary, using an artifice:
9571 the drbd code for Attach() will connect to the network if it
9572 finds a device which is connected to the good local disks but
9574 - wait for sync across all devices
9575 - remove all disks from the old secondary
9577 Failures are not very well handled.
9582 # Step: check device activation
9583 self.lu.LogStep(1, steps_total, "Check device existence")
9584 self._CheckDisksExistence([self.instance.primary_node])
9585 self._CheckVolumeGroup([self.instance.primary_node])
9587 # Step: check other node consistency
9588 self.lu.LogStep(2, steps_total, "Check peer consistency")
9589 self._CheckDisksConsistency(self.instance.primary_node, True, True)
9591 # Step: create new storage
9592 self.lu.LogStep(3, steps_total, "Allocate new storage")
9593 for idx, dev in enumerate(self.instance.disks):
9594 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9595 (self.new_node, idx))
9596 # we pass force_create=True to force LVM creation
9597 for new_lv in dev.children:
9598 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9599 _GetInstanceInfoText(self.instance), False)
9601 # Step 4: dbrd minors and drbd setups changes
9602 # after this, we must manually remove the drbd minors on both the
9603 # error and the success paths
9604 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9605 minors = self.cfg.AllocateDRBDMinor([self.new_node
9606 for dev in self.instance.disks],
9608 logging.debug("Allocated minors %r", minors)
9611 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9612 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9613 (self.new_node, idx))
9614 # create new devices on new_node; note that we create two IDs:
9615 # one without port, so the drbd will be activated without
9616 # networking information on the new node at this stage, and one
9617 # with network, for the latter activation in step 4
9618 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9619 if self.instance.primary_node == o_node1:
9622 assert self.instance.primary_node == o_node2, "Three-node instance?"
9625 new_alone_id = (self.instance.primary_node, self.new_node, None,
9626 p_minor, new_minor, o_secret)
9627 new_net_id = (self.instance.primary_node, self.new_node, o_port,
9628 p_minor, new_minor, o_secret)
9630 iv_names[idx] = (dev, dev.children, new_net_id)
9631 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9633 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9634 logical_id=new_alone_id,
9635 children=dev.children,
9638 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9639 _GetInstanceInfoText(self.instance), False)
9640 except errors.GenericError:
9641 self.cfg.ReleaseDRBDMinors(self.instance.name)
9644 # We have new devices, shutdown the drbd on the old secondary
9645 for idx, dev in enumerate(self.instance.disks):
9646 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9647 self.cfg.SetDiskID(dev, self.target_node)
9648 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9650 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9651 "node: %s" % (idx, msg),
9652 hint=("Please cleanup this device manually as"
9653 " soon as possible"))
9655 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9656 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9657 self.node_secondary_ip,
9658 self.instance.disks)\
9659 [self.instance.primary_node]
9661 msg = result.fail_msg
9663 # detaches didn't succeed (unlikely)
9664 self.cfg.ReleaseDRBDMinors(self.instance.name)
9665 raise errors.OpExecError("Can't detach the disks from the network on"
9666 " old node: %s" % (msg,))
9668 # if we managed to detach at least one, we update all the disks of
9669 # the instance to point to the new secondary
9670 self.lu.LogInfo("Updating instance configuration")
9671 for dev, _, new_logical_id in iv_names.itervalues():
9672 dev.logical_id = new_logical_id
9673 self.cfg.SetDiskID(dev, self.instance.primary_node)
9675 self.cfg.Update(self.instance, feedback_fn)
9677 # and now perform the drbd attach
9678 self.lu.LogInfo("Attaching primary drbds to new secondary"
9679 " (standalone => connected)")
9680 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9682 self.node_secondary_ip,
9683 self.instance.disks,
9686 for to_node, to_result in result.items():
9687 msg = to_result.fail_msg
9689 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9691 hint=("please do a gnt-instance info to see the"
9692 " status of disks"))
9694 if self.early_release:
9695 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9697 self._RemoveOldStorage(self.target_node, iv_names)
9698 # WARNING: we release all node locks here, do not do other RPCs
9699 # than WaitForSync to the primary node
9700 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9701 names=[self.instance.primary_node,
9706 # This can fail as the old devices are degraded and _WaitForSync
9707 # does a combined result over all disks, so we don't check its return value
9708 self.lu.LogStep(cstep, steps_total, "Sync devices")
9710 _WaitForSync(self.lu, self.instance)
9712 # Check all devices manually
9713 self._CheckDevices(self.instance.primary_node, iv_names)
9715 # Step: remove old storage
9716 if not self.early_release:
9717 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9718 self._RemoveOldStorage(self.target_node, iv_names)
9721 class LURepairNodeStorage(NoHooksLU):
9722 """Repairs the volume group on a node.
9727 def CheckArguments(self):
9728 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9730 storage_type = self.op.storage_type
9732 if (constants.SO_FIX_CONSISTENCY not in
9733 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9734 raise errors.OpPrereqError("Storage units of type '%s' can not be"
9735 " repaired" % storage_type,
9738 def ExpandNames(self):
9739 self.needed_locks = {
9740 locking.LEVEL_NODE: [self.op.node_name],
9743 def _CheckFaultyDisks(self, instance, node_name):
9744 """Ensure faulty disks abort the opcode or at least warn."""
9746 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9748 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9749 " node '%s'" % (instance.name, node_name),
9751 except errors.OpPrereqError, err:
9752 if self.op.ignore_consistency:
9753 self.proc.LogWarning(str(err.args[0]))
9757 def CheckPrereq(self):
9758 """Check prerequisites.
9761 # Check whether any instance on this node has faulty disks
9762 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9763 if not inst.admin_up:
9765 check_nodes = set(inst.all_nodes)
9766 check_nodes.discard(self.op.node_name)
9767 for inst_node_name in check_nodes:
9768 self._CheckFaultyDisks(inst, inst_node_name)
9770 def Exec(self, feedback_fn):
9771 feedback_fn("Repairing storage unit '%s' on %s ..." %
9772 (self.op.name, self.op.node_name))
9774 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9775 result = self.rpc.call_storage_execute(self.op.node_name,
9776 self.op.storage_type, st_args,
9778 constants.SO_FIX_CONSISTENCY)
9779 result.Raise("Failed to repair storage unit '%s' on %s" %
9780 (self.op.name, self.op.node_name))
9783 class LUNodeEvacuate(NoHooksLU):
9784 """Evacuates instances off a list of nodes.
9789 def CheckArguments(self):
9790 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9792 def ExpandNames(self):
9793 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9795 if self.op.remote_node is not None:
9796 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9797 assert self.op.remote_node
9799 if self.op.remote_node == self.op.node_name:
9800 raise errors.OpPrereqError("Can not use evacuated node as a new"
9801 " secondary node", errors.ECODE_INVAL)
9803 if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
9804 raise errors.OpPrereqError("Without the use of an iallocator only"
9805 " secondary instances can be evacuated",
9809 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9810 self.needed_locks = {
9811 locking.LEVEL_INSTANCE: [],
9812 locking.LEVEL_NODEGROUP: [],
9813 locking.LEVEL_NODE: [],
9816 if self.op.remote_node is None:
9817 # Iallocator will choose any node(s) in the same group
9818 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
9820 group_nodes = frozenset([self.op.remote_node])
9822 # Determine nodes to be locked
9823 self.lock_nodes = set([self.op.node_name]) | group_nodes
9825 def _DetermineInstances(self):
9826 """Builds list of instances to operate on.
9829 assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
9831 if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
9832 # Primary instances only
9833 inst_fn = _GetNodePrimaryInstances
9834 assert self.op.remote_node is None, \
9835 "Evacuating primary instances requires iallocator"
9836 elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
9837 # Secondary instances only
9838 inst_fn = _GetNodeSecondaryInstances
9841 assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
9842 inst_fn = _GetNodeInstances
9844 return inst_fn(self.cfg, self.op.node_name)
9846 def DeclareLocks(self, level):
9847 if level == locking.LEVEL_INSTANCE:
9848 # Lock instances optimistically, needs verification once node and group
9849 # locks have been acquired
9850 self.needed_locks[locking.LEVEL_INSTANCE] = \
9851 set(i.name for i in self._DetermineInstances())
9853 elif level == locking.LEVEL_NODEGROUP:
9854 # Lock node groups optimistically, needs verification once nodes have
9856 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9857 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
9859 elif level == locking.LEVEL_NODE:
9860 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
9862 def CheckPrereq(self):
9864 owned_instances = self.glm.list_owned(locking.LEVEL_INSTANCE)
9865 owned_nodes = self.glm.list_owned(locking.LEVEL_NODE)
9866 owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
9868 assert owned_nodes == self.lock_nodes
9870 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
9871 if owned_groups != wanted_groups:
9872 raise errors.OpExecError("Node groups changed since locks were acquired,"
9873 " current groups are '%s', used to be '%s'" %
9874 (utils.CommaJoin(wanted_groups),
9875 utils.CommaJoin(owned_groups)))
9877 # Determine affected instances
9878 self.instances = self._DetermineInstances()
9879 self.instance_names = [i.name for i in self.instances]
9881 if set(self.instance_names) != owned_instances:
9882 raise errors.OpExecError("Instances on node '%s' changed since locks"
9883 " were acquired, current instances are '%s',"
9884 " used to be '%s'" %
9886 utils.CommaJoin(self.instance_names),
9887 utils.CommaJoin(owned_instances)))
9889 if self.instance_names:
9890 self.LogInfo("Evacuating instances from node '%s': %s",
9892 utils.CommaJoin(utils.NiceSort(self.instance_names)))
9894 self.LogInfo("No instances to evacuate from node '%s'",
9897 if self.op.remote_node is not None:
9898 for i in self.instances:
9899 if i.primary_node == self.op.remote_node:
9900 raise errors.OpPrereqError("Node %s is the primary node of"
9901 " instance %s, cannot use it as"
9903 (self.op.remote_node, i.name),
9906 def Exec(self, feedback_fn):
9907 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
9909 if not self.instance_names:
9910 # No instances to evacuate
9913 elif self.op.iallocator is not None:
9914 # TODO: Implement relocation to other group
9915 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
9916 evac_mode=self.op.mode,
9917 instances=list(self.instance_names))
9919 ial.Run(self.op.iallocator)
9922 raise errors.OpPrereqError("Can't compute node evacuation using"
9923 " iallocator '%s': %s" %
9924 (self.op.iallocator, ial.info),
9927 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
9929 elif self.op.remote_node is not None:
9930 assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
9932 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
9933 remote_node=self.op.remote_node,
9935 mode=constants.REPLACE_DISK_CHG,
9936 early_release=self.op.early_release)]
9937 for instance_name in self.instance_names
9941 raise errors.ProgrammerError("No iallocator or remote node")
9943 return ResultWithJobs(jobs)
9946 def _SetOpEarlyRelease(early_release, op):
9947 """Sets C{early_release} flag on opcodes if available.
9951 op.early_release = early_release
9952 except AttributeError:
9953 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
9958 def _NodeEvacDest(use_nodes, group, nodes):
9959 """Returns group or nodes depending on caller's choice.
9963 return utils.CommaJoin(nodes)
9968 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
9969 """Unpacks the result of change-group and node-evacuate iallocator requests.
9971 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
9972 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
9974 @type lu: L{LogicalUnit}
9975 @param lu: Logical unit instance
9976 @type alloc_result: tuple/list
9977 @param alloc_result: Result from iallocator
9978 @type early_release: bool
9979 @param early_release: Whether to release locks early if possible
9980 @type use_nodes: bool
9981 @param use_nodes: Whether to display node names instead of groups
9984 (moved, failed, jobs) = alloc_result
9987 lu.LogWarning("Unable to evacuate instances %s",
9988 utils.CommaJoin("%s (%s)" % (name, reason)
9989 for (name, reason) in failed))
9992 lu.LogInfo("Instances to be moved: %s",
9993 utils.CommaJoin("%s (to %s)" %
9994 (name, _NodeEvacDest(use_nodes, group, nodes))
9995 for (name, group, nodes) in moved))
9997 return [map(compat.partial(_SetOpEarlyRelease, early_release),
9998 map(opcodes.OpCode.LoadOpCode, ops))
10002 class LUInstanceGrowDisk(LogicalUnit):
10003 """Grow a disk of an instance.
10006 HPATH = "disk-grow"
10007 HTYPE = constants.HTYPE_INSTANCE
10010 def ExpandNames(self):
10011 self._ExpandAndLockInstance()
10012 self.needed_locks[locking.LEVEL_NODE] = []
10013 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10015 def DeclareLocks(self, level):
10016 if level == locking.LEVEL_NODE:
10017 self._LockInstancesNodes()
10019 def BuildHooksEnv(self):
10020 """Build hooks env.
10022 This runs on the master, the primary and all the secondaries.
10026 "DISK": self.op.disk,
10027 "AMOUNT": self.op.amount,
10029 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10032 def BuildHooksNodes(self):
10033 """Build hooks nodes.
10036 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10039 def CheckPrereq(self):
10040 """Check prerequisites.
10042 This checks that the instance is in the cluster.
10045 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10046 assert instance is not None, \
10047 "Cannot retrieve locked instance %s" % self.op.instance_name
10048 nodenames = list(instance.all_nodes)
10049 for node in nodenames:
10050 _CheckNodeOnline(self, node)
10052 self.instance = instance
10054 if instance.disk_template not in constants.DTS_GROWABLE:
10055 raise errors.OpPrereqError("Instance's disk layout does not support"
10056 " growing", errors.ECODE_INVAL)
10058 self.disk = instance.FindDisk(self.op.disk)
10060 if instance.disk_template not in (constants.DT_FILE,
10061 constants.DT_SHARED_FILE):
10062 # TODO: check the free disk space for file, when that feature will be
10064 _CheckNodesFreeDiskPerVG(self, nodenames,
10065 self.disk.ComputeGrowth(self.op.amount))
10067 def Exec(self, feedback_fn):
10068 """Execute disk grow.
10071 instance = self.instance
10074 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10076 raise errors.OpExecError("Cannot activate block device to grow")
10078 # First run all grow ops in dry-run mode
10079 for node in instance.all_nodes:
10080 self.cfg.SetDiskID(disk, node)
10081 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10082 result.Raise("Grow request failed to node %s" % node)
10084 # We know that (as far as we can test) operations across different
10085 # nodes will succeed, time to run it for real
10086 for node in instance.all_nodes:
10087 self.cfg.SetDiskID(disk, node)
10088 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10089 result.Raise("Grow request failed to node %s" % node)
10091 # TODO: Rewrite code to work properly
10092 # DRBD goes into sync mode for a short amount of time after executing the
10093 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10094 # calling "resize" in sync mode fails. Sleeping for a short amount of
10095 # time is a work-around.
10098 disk.RecordGrow(self.op.amount)
10099 self.cfg.Update(instance, feedback_fn)
10100 if self.op.wait_for_sync:
10101 disk_abort = not _WaitForSync(self, instance, disks=[disk])
10103 self.proc.LogWarning("Disk sync-ing has not returned a good"
10104 " status; please check the instance")
10105 if not instance.admin_up:
10106 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10107 elif not instance.admin_up:
10108 self.proc.LogWarning("Not shutting down the disk even if the instance is"
10109 " not supposed to be running because no wait for"
10110 " sync mode was requested")
10113 class LUInstanceQueryData(NoHooksLU):
10114 """Query runtime instance data.
10119 def ExpandNames(self):
10120 self.needed_locks = {}
10122 # Use locking if requested or when non-static information is wanted
10123 if not (self.op.static or self.op.use_locking):
10124 self.LogWarning("Non-static data requested, locks need to be acquired")
10125 self.op.use_locking = True
10127 if self.op.instances or not self.op.use_locking:
10128 # Expand instance names right here
10129 self.wanted_names = _GetWantedInstances(self, self.op.instances)
10131 # Will use acquired locks
10132 self.wanted_names = None
10134 if self.op.use_locking:
10135 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10137 if self.wanted_names is None:
10138 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10140 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10142 self.needed_locks[locking.LEVEL_NODE] = []
10143 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10144 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10146 def DeclareLocks(self, level):
10147 if self.op.use_locking and level == locking.LEVEL_NODE:
10148 self._LockInstancesNodes()
10150 def CheckPrereq(self):
10151 """Check prerequisites.
10153 This only checks the optional instance list against the existing names.
10156 if self.wanted_names is None:
10157 assert self.op.use_locking, "Locking was not used"
10158 self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
10160 self.wanted_instances = [self.cfg.GetInstanceInfo(name)
10161 for name in self.wanted_names]
10163 def _ComputeBlockdevStatus(self, node, instance_name, dev):
10164 """Returns the status of a block device
10167 if self.op.static or not node:
10170 self.cfg.SetDiskID(dev, node)
10172 result = self.rpc.call_blockdev_find(node, dev)
10176 result.Raise("Can't compute disk status for %s" % instance_name)
10178 status = result.payload
10182 return (status.dev_path, status.major, status.minor,
10183 status.sync_percent, status.estimated_time,
10184 status.is_degraded, status.ldisk_status)
10186 def _ComputeDiskStatus(self, instance, snode, dev):
10187 """Compute block device status.
10190 if dev.dev_type in constants.LDS_DRBD:
10191 # we change the snode then (otherwise we use the one passed in)
10192 if dev.logical_id[0] == instance.primary_node:
10193 snode = dev.logical_id[1]
10195 snode = dev.logical_id[0]
10197 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10198 instance.name, dev)
10199 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10202 dev_children = map(compat.partial(self._ComputeDiskStatus,
10209 "iv_name": dev.iv_name,
10210 "dev_type": dev.dev_type,
10211 "logical_id": dev.logical_id,
10212 "physical_id": dev.physical_id,
10213 "pstatus": dev_pstatus,
10214 "sstatus": dev_sstatus,
10215 "children": dev_children,
10220 def Exec(self, feedback_fn):
10221 """Gather and return data"""
10224 cluster = self.cfg.GetClusterInfo()
10226 for instance in self.wanted_instances:
10227 pnode = self.cfg.GetNodeInfo(instance.primary_node)
10229 if self.op.static or pnode.offline:
10230 remote_state = None
10232 self.LogWarning("Primary node %s is marked offline, returning static"
10233 " information only for instance %s" %
10234 (pnode.name, instance.name))
10236 remote_info = self.rpc.call_instance_info(instance.primary_node,
10238 instance.hypervisor)
10239 remote_info.Raise("Error checking node %s" % instance.primary_node)
10240 remote_info = remote_info.payload
10241 if remote_info and "state" in remote_info:
10242 remote_state = "up"
10244 remote_state = "down"
10246 if instance.admin_up:
10247 config_state = "up"
10249 config_state = "down"
10251 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10254 result[instance.name] = {
10255 "name": instance.name,
10256 "config_state": config_state,
10257 "run_state": remote_state,
10258 "pnode": instance.primary_node,
10259 "snodes": instance.secondary_nodes,
10261 # this happens to be the same format used for hooks
10262 "nics": _NICListToTuple(self, instance.nics),
10263 "disk_template": instance.disk_template,
10265 "hypervisor": instance.hypervisor,
10266 "network_port": instance.network_port,
10267 "hv_instance": instance.hvparams,
10268 "hv_actual": cluster.FillHV(instance, skip_globals=True),
10269 "be_instance": instance.beparams,
10270 "be_actual": cluster.FillBE(instance),
10271 "os_instance": instance.osparams,
10272 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10273 "serial_no": instance.serial_no,
10274 "mtime": instance.mtime,
10275 "ctime": instance.ctime,
10276 "uuid": instance.uuid,
10282 class LUInstanceSetParams(LogicalUnit):
10283 """Modifies an instances's parameters.
10286 HPATH = "instance-modify"
10287 HTYPE = constants.HTYPE_INSTANCE
10290 def CheckArguments(self):
10291 if not (self.op.nics or self.op.disks or self.op.disk_template or
10292 self.op.hvparams or self.op.beparams or self.op.os_name):
10293 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10295 if self.op.hvparams:
10296 _CheckGlobalHvParams(self.op.hvparams)
10300 for disk_op, disk_dict in self.op.disks:
10301 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10302 if disk_op == constants.DDM_REMOVE:
10303 disk_addremove += 1
10305 elif disk_op == constants.DDM_ADD:
10306 disk_addremove += 1
10308 if not isinstance(disk_op, int):
10309 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10310 if not isinstance(disk_dict, dict):
10311 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10312 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10314 if disk_op == constants.DDM_ADD:
10315 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10316 if mode not in constants.DISK_ACCESS_SET:
10317 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10318 errors.ECODE_INVAL)
10319 size = disk_dict.get(constants.IDISK_SIZE, None)
10321 raise errors.OpPrereqError("Required disk parameter size missing",
10322 errors.ECODE_INVAL)
10325 except (TypeError, ValueError), err:
10326 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10327 str(err), errors.ECODE_INVAL)
10328 disk_dict[constants.IDISK_SIZE] = size
10330 # modification of disk
10331 if constants.IDISK_SIZE in disk_dict:
10332 raise errors.OpPrereqError("Disk size change not possible, use"
10333 " grow-disk", errors.ECODE_INVAL)
10335 if disk_addremove > 1:
10336 raise errors.OpPrereqError("Only one disk add or remove operation"
10337 " supported at a time", errors.ECODE_INVAL)
10339 if self.op.disks and self.op.disk_template is not None:
10340 raise errors.OpPrereqError("Disk template conversion and other disk"
10341 " changes not supported at the same time",
10342 errors.ECODE_INVAL)
10344 if (self.op.disk_template and
10345 self.op.disk_template in constants.DTS_INT_MIRROR and
10346 self.op.remote_node is None):
10347 raise errors.OpPrereqError("Changing the disk template to a mirrored"
10348 " one requires specifying a secondary node",
10349 errors.ECODE_INVAL)
10353 for nic_op, nic_dict in self.op.nics:
10354 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10355 if nic_op == constants.DDM_REMOVE:
10358 elif nic_op == constants.DDM_ADD:
10361 if not isinstance(nic_op, int):
10362 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10363 if not isinstance(nic_dict, dict):
10364 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10365 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10367 # nic_dict should be a dict
10368 nic_ip = nic_dict.get(constants.INIC_IP, None)
10369 if nic_ip is not None:
10370 if nic_ip.lower() == constants.VALUE_NONE:
10371 nic_dict[constants.INIC_IP] = None
10373 if not netutils.IPAddress.IsValid(nic_ip):
10374 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10375 errors.ECODE_INVAL)
10377 nic_bridge = nic_dict.get("bridge", None)
10378 nic_link = nic_dict.get(constants.INIC_LINK, None)
10379 if nic_bridge and nic_link:
10380 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10381 " at the same time", errors.ECODE_INVAL)
10382 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10383 nic_dict["bridge"] = None
10384 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10385 nic_dict[constants.INIC_LINK] = None
10387 if nic_op == constants.DDM_ADD:
10388 nic_mac = nic_dict.get(constants.INIC_MAC, None)
10389 if nic_mac is None:
10390 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10392 if constants.INIC_MAC in nic_dict:
10393 nic_mac = nic_dict[constants.INIC_MAC]
10394 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10395 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10397 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10398 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10399 " modifying an existing nic",
10400 errors.ECODE_INVAL)
10402 if nic_addremove > 1:
10403 raise errors.OpPrereqError("Only one NIC add or remove operation"
10404 " supported at a time", errors.ECODE_INVAL)
10406 def ExpandNames(self):
10407 self._ExpandAndLockInstance()
10408 self.needed_locks[locking.LEVEL_NODE] = []
10409 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10411 def DeclareLocks(self, level):
10412 if level == locking.LEVEL_NODE:
10413 self._LockInstancesNodes()
10414 if self.op.disk_template and self.op.remote_node:
10415 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10416 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10418 def BuildHooksEnv(self):
10419 """Build hooks env.
10421 This runs on the master, primary and secondaries.
10425 if constants.BE_MEMORY in self.be_new:
10426 args["memory"] = self.be_new[constants.BE_MEMORY]
10427 if constants.BE_VCPUS in self.be_new:
10428 args["vcpus"] = self.be_new[constants.BE_VCPUS]
10429 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10430 # information at all.
10433 nic_override = dict(self.op.nics)
10434 for idx, nic in enumerate(self.instance.nics):
10435 if idx in nic_override:
10436 this_nic_override = nic_override[idx]
10438 this_nic_override = {}
10439 if constants.INIC_IP in this_nic_override:
10440 ip = this_nic_override[constants.INIC_IP]
10443 if constants.INIC_MAC in this_nic_override:
10444 mac = this_nic_override[constants.INIC_MAC]
10447 if idx in self.nic_pnew:
10448 nicparams = self.nic_pnew[idx]
10450 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10451 mode = nicparams[constants.NIC_MODE]
10452 link = nicparams[constants.NIC_LINK]
10453 args["nics"].append((ip, mac, mode, link))
10454 if constants.DDM_ADD in nic_override:
10455 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10456 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10457 nicparams = self.nic_pnew[constants.DDM_ADD]
10458 mode = nicparams[constants.NIC_MODE]
10459 link = nicparams[constants.NIC_LINK]
10460 args["nics"].append((ip, mac, mode, link))
10461 elif constants.DDM_REMOVE in nic_override:
10462 del args["nics"][-1]
10464 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10465 if self.op.disk_template:
10466 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10470 def BuildHooksNodes(self):
10471 """Build hooks nodes.
10474 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10477 def CheckPrereq(self):
10478 """Check prerequisites.
10480 This only checks the instance list against the existing names.
10483 # checking the new params on the primary/secondary nodes
10485 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10486 cluster = self.cluster = self.cfg.GetClusterInfo()
10487 assert self.instance is not None, \
10488 "Cannot retrieve locked instance %s" % self.op.instance_name
10489 pnode = instance.primary_node
10490 nodelist = list(instance.all_nodes)
10493 if self.op.os_name and not self.op.force:
10494 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10495 self.op.force_variant)
10496 instance_os = self.op.os_name
10498 instance_os = instance.os
10500 if self.op.disk_template:
10501 if instance.disk_template == self.op.disk_template:
10502 raise errors.OpPrereqError("Instance already has disk template %s" %
10503 instance.disk_template, errors.ECODE_INVAL)
10505 if (instance.disk_template,
10506 self.op.disk_template) not in self._DISK_CONVERSIONS:
10507 raise errors.OpPrereqError("Unsupported disk template conversion from"
10508 " %s to %s" % (instance.disk_template,
10509 self.op.disk_template),
10510 errors.ECODE_INVAL)
10511 _CheckInstanceDown(self, instance, "cannot change disk template")
10512 if self.op.disk_template in constants.DTS_INT_MIRROR:
10513 if self.op.remote_node == pnode:
10514 raise errors.OpPrereqError("Given new secondary node %s is the same"
10515 " as the primary node of the instance" %
10516 self.op.remote_node, errors.ECODE_STATE)
10517 _CheckNodeOnline(self, self.op.remote_node)
10518 _CheckNodeNotDrained(self, self.op.remote_node)
10519 # FIXME: here we assume that the old instance type is DT_PLAIN
10520 assert instance.disk_template == constants.DT_PLAIN
10521 disks = [{constants.IDISK_SIZE: d.size,
10522 constants.IDISK_VG: d.logical_id[0]}
10523 for d in instance.disks]
10524 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10525 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10527 # hvparams processing
10528 if self.op.hvparams:
10529 hv_type = instance.hypervisor
10530 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10531 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10532 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10535 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10536 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10537 self.hv_new = hv_new # the new actual values
10538 self.hv_inst = i_hvdict # the new dict (without defaults)
10540 self.hv_new = self.hv_inst = {}
10542 # beparams processing
10543 if self.op.beparams:
10544 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10546 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10547 be_new = cluster.SimpleFillBE(i_bedict)
10548 self.be_new = be_new # the new actual values
10549 self.be_inst = i_bedict # the new dict (without defaults)
10551 self.be_new = self.be_inst = {}
10552 be_old = cluster.FillBE(instance)
10554 # osparams processing
10555 if self.op.osparams:
10556 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10557 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10558 self.os_inst = i_osdict # the new dict (without defaults)
10564 if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10565 be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10566 mem_check_list = [pnode]
10567 if be_new[constants.BE_AUTO_BALANCE]:
10568 # either we changed auto_balance to yes or it was from before
10569 mem_check_list.extend(instance.secondary_nodes)
10570 instance_info = self.rpc.call_instance_info(pnode, instance.name,
10571 instance.hypervisor)
10572 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10573 instance.hypervisor)
10574 pninfo = nodeinfo[pnode]
10575 msg = pninfo.fail_msg
10577 # Assume the primary node is unreachable and go ahead
10578 self.warn.append("Can't get info from primary node %s: %s" %
10580 elif not isinstance(pninfo.payload.get("memory_free", None), int):
10581 self.warn.append("Node data from primary node %s doesn't contain"
10582 " free memory information" % pnode)
10583 elif instance_info.fail_msg:
10584 self.warn.append("Can't get instance runtime information: %s" %
10585 instance_info.fail_msg)
10587 if instance_info.payload:
10588 current_mem = int(instance_info.payload["memory"])
10590 # Assume instance not running
10591 # (there is a slight race condition here, but it's not very probable,
10592 # and we have no other way to check)
10594 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10595 pninfo.payload["memory_free"])
10597 raise errors.OpPrereqError("This change will prevent the instance"
10598 " from starting, due to %d MB of memory"
10599 " missing on its primary node" % miss_mem,
10600 errors.ECODE_NORES)
10602 if be_new[constants.BE_AUTO_BALANCE]:
10603 for node, nres in nodeinfo.items():
10604 if node not in instance.secondary_nodes:
10606 nres.Raise("Can't get info from secondary node %s" % node,
10607 prereq=True, ecode=errors.ECODE_STATE)
10608 if not isinstance(nres.payload.get("memory_free", None), int):
10609 raise errors.OpPrereqError("Secondary node %s didn't return free"
10610 " memory information" % node,
10611 errors.ECODE_STATE)
10612 elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10613 raise errors.OpPrereqError("This change will prevent the instance"
10614 " from failover to its secondary node"
10615 " %s, due to not enough memory" % node,
10616 errors.ECODE_STATE)
10620 self.nic_pinst = {}
10621 for nic_op, nic_dict in self.op.nics:
10622 if nic_op == constants.DDM_REMOVE:
10623 if not instance.nics:
10624 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10625 errors.ECODE_INVAL)
10627 if nic_op != constants.DDM_ADD:
10629 if not instance.nics:
10630 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10631 " no NICs" % nic_op,
10632 errors.ECODE_INVAL)
10633 if nic_op < 0 or nic_op >= len(instance.nics):
10634 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10636 (nic_op, len(instance.nics) - 1),
10637 errors.ECODE_INVAL)
10638 old_nic_params = instance.nics[nic_op].nicparams
10639 old_nic_ip = instance.nics[nic_op].ip
10641 old_nic_params = {}
10644 update_params_dict = dict([(key, nic_dict[key])
10645 for key in constants.NICS_PARAMETERS
10646 if key in nic_dict])
10648 if "bridge" in nic_dict:
10649 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10651 new_nic_params = _GetUpdatedParams(old_nic_params,
10652 update_params_dict)
10653 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10654 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10655 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10656 self.nic_pinst[nic_op] = new_nic_params
10657 self.nic_pnew[nic_op] = new_filled_nic_params
10658 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10660 if new_nic_mode == constants.NIC_MODE_BRIDGED:
10661 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10662 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10664 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10666 self.warn.append(msg)
10668 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10669 if new_nic_mode == constants.NIC_MODE_ROUTED:
10670 if constants.INIC_IP in nic_dict:
10671 nic_ip = nic_dict[constants.INIC_IP]
10673 nic_ip = old_nic_ip
10675 raise errors.OpPrereqError("Cannot set the nic ip to None"
10676 " on a routed nic", errors.ECODE_INVAL)
10677 if constants.INIC_MAC in nic_dict:
10678 nic_mac = nic_dict[constants.INIC_MAC]
10679 if nic_mac is None:
10680 raise errors.OpPrereqError("Cannot set the nic mac to None",
10681 errors.ECODE_INVAL)
10682 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10683 # otherwise generate the mac
10684 nic_dict[constants.INIC_MAC] = \
10685 self.cfg.GenerateMAC(self.proc.GetECId())
10687 # or validate/reserve the current one
10689 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10690 except errors.ReservationError:
10691 raise errors.OpPrereqError("MAC address %s already in use"
10692 " in cluster" % nic_mac,
10693 errors.ECODE_NOTUNIQUE)
10696 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10697 raise errors.OpPrereqError("Disk operations not supported for"
10698 " diskless instances",
10699 errors.ECODE_INVAL)
10700 for disk_op, _ in self.op.disks:
10701 if disk_op == constants.DDM_REMOVE:
10702 if len(instance.disks) == 1:
10703 raise errors.OpPrereqError("Cannot remove the last disk of"
10704 " an instance", errors.ECODE_INVAL)
10705 _CheckInstanceDown(self, instance, "cannot remove disks")
10707 if (disk_op == constants.DDM_ADD and
10708 len(instance.disks) >= constants.MAX_DISKS):
10709 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10710 " add more" % constants.MAX_DISKS,
10711 errors.ECODE_STATE)
10712 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10714 if disk_op < 0 or disk_op >= len(instance.disks):
10715 raise errors.OpPrereqError("Invalid disk index %s, valid values"
10717 (disk_op, len(instance.disks)),
10718 errors.ECODE_INVAL)
10722 def _ConvertPlainToDrbd(self, feedback_fn):
10723 """Converts an instance from plain to drbd.
10726 feedback_fn("Converting template to drbd")
10727 instance = self.instance
10728 pnode = instance.primary_node
10729 snode = self.op.remote_node
10731 # create a fake disk info for _GenerateDiskTemplate
10732 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10733 constants.IDISK_VG: d.logical_id[0]}
10734 for d in instance.disks]
10735 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10736 instance.name, pnode, [snode],
10737 disk_info, None, None, 0, feedback_fn)
10738 info = _GetInstanceInfoText(instance)
10739 feedback_fn("Creating aditional volumes...")
10740 # first, create the missing data and meta devices
10741 for disk in new_disks:
10742 # unfortunately this is... not too nice
10743 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10745 for child in disk.children:
10746 _CreateSingleBlockDev(self, snode, instance, child, info, True)
10747 # at this stage, all new LVs have been created, we can rename the
10749 feedback_fn("Renaming original volumes...")
10750 rename_list = [(o, n.children[0].logical_id)
10751 for (o, n) in zip(instance.disks, new_disks)]
10752 result = self.rpc.call_blockdev_rename(pnode, rename_list)
10753 result.Raise("Failed to rename original LVs")
10755 feedback_fn("Initializing DRBD devices...")
10756 # all child devices are in place, we can now create the DRBD devices
10757 for disk in new_disks:
10758 for node in [pnode, snode]:
10759 f_create = node == pnode
10760 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10762 # at this point, the instance has been modified
10763 instance.disk_template = constants.DT_DRBD8
10764 instance.disks = new_disks
10765 self.cfg.Update(instance, feedback_fn)
10767 # disks are created, waiting for sync
10768 disk_abort = not _WaitForSync(self, instance,
10769 oneshot=not self.op.wait_for_sync)
10771 raise errors.OpExecError("There are some degraded disks for"
10772 " this instance, please cleanup manually")
10774 def _ConvertDrbdToPlain(self, feedback_fn):
10775 """Converts an instance from drbd to plain.
10778 instance = self.instance
10779 assert len(instance.secondary_nodes) == 1
10780 pnode = instance.primary_node
10781 snode = instance.secondary_nodes[0]
10782 feedback_fn("Converting template to plain")
10784 old_disks = instance.disks
10785 new_disks = [d.children[0] for d in old_disks]
10787 # copy over size and mode
10788 for parent, child in zip(old_disks, new_disks):
10789 child.size = parent.size
10790 child.mode = parent.mode
10792 # update instance structure
10793 instance.disks = new_disks
10794 instance.disk_template = constants.DT_PLAIN
10795 self.cfg.Update(instance, feedback_fn)
10797 feedback_fn("Removing volumes on the secondary node...")
10798 for disk in old_disks:
10799 self.cfg.SetDiskID(disk, snode)
10800 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10802 self.LogWarning("Could not remove block device %s on node %s,"
10803 " continuing anyway: %s", disk.iv_name, snode, msg)
10805 feedback_fn("Removing unneeded volumes on the primary node...")
10806 for idx, disk in enumerate(old_disks):
10807 meta = disk.children[1]
10808 self.cfg.SetDiskID(meta, pnode)
10809 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10811 self.LogWarning("Could not remove metadata for disk %d on node %s,"
10812 " continuing anyway: %s", idx, pnode, msg)
10814 def Exec(self, feedback_fn):
10815 """Modifies an instance.
10817 All parameters take effect only at the next restart of the instance.
10820 # Process here the warnings from CheckPrereq, as we don't have a
10821 # feedback_fn there.
10822 for warn in self.warn:
10823 feedback_fn("WARNING: %s" % warn)
10826 instance = self.instance
10828 for disk_op, disk_dict in self.op.disks:
10829 if disk_op == constants.DDM_REMOVE:
10830 # remove the last disk
10831 device = instance.disks.pop()
10832 device_idx = len(instance.disks)
10833 for node, disk in device.ComputeNodeTree(instance.primary_node):
10834 self.cfg.SetDiskID(disk, node)
10835 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10837 self.LogWarning("Could not remove disk/%d on node %s: %s,"
10838 " continuing anyway", device_idx, node, msg)
10839 result.append(("disk/%d" % device_idx, "remove"))
10840 elif disk_op == constants.DDM_ADD:
10842 if instance.disk_template in (constants.DT_FILE,
10843 constants.DT_SHARED_FILE):
10844 file_driver, file_path = instance.disks[0].logical_id
10845 file_path = os.path.dirname(file_path)
10847 file_driver = file_path = None
10848 disk_idx_base = len(instance.disks)
10849 new_disk = _GenerateDiskTemplate(self,
10850 instance.disk_template,
10851 instance.name, instance.primary_node,
10852 instance.secondary_nodes,
10856 disk_idx_base, feedback_fn)[0]
10857 instance.disks.append(new_disk)
10858 info = _GetInstanceInfoText(instance)
10860 logging.info("Creating volume %s for instance %s",
10861 new_disk.iv_name, instance.name)
10862 # Note: this needs to be kept in sync with _CreateDisks
10864 for node in instance.all_nodes:
10865 f_create = node == instance.primary_node
10867 _CreateBlockDev(self, node, instance, new_disk,
10868 f_create, info, f_create)
10869 except errors.OpExecError, err:
10870 self.LogWarning("Failed to create volume %s (%s) on"
10872 new_disk.iv_name, new_disk, node, err)
10873 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10874 (new_disk.size, new_disk.mode)))
10876 # change a given disk
10877 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10878 result.append(("disk.mode/%d" % disk_op,
10879 disk_dict[constants.IDISK_MODE]))
10881 if self.op.disk_template:
10882 r_shut = _ShutdownInstanceDisks(self, instance)
10884 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10885 " proceed with disk template conversion")
10886 mode = (instance.disk_template, self.op.disk_template)
10888 self._DISK_CONVERSIONS[mode](self, feedback_fn)
10890 self.cfg.ReleaseDRBDMinors(instance.name)
10892 result.append(("disk_template", self.op.disk_template))
10895 for nic_op, nic_dict in self.op.nics:
10896 if nic_op == constants.DDM_REMOVE:
10897 # remove the last nic
10898 del instance.nics[-1]
10899 result.append(("nic.%d" % len(instance.nics), "remove"))
10900 elif nic_op == constants.DDM_ADD:
10901 # mac and bridge should be set, by now
10902 mac = nic_dict[constants.INIC_MAC]
10903 ip = nic_dict.get(constants.INIC_IP, None)
10904 nicparams = self.nic_pinst[constants.DDM_ADD]
10905 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10906 instance.nics.append(new_nic)
10907 result.append(("nic.%d" % (len(instance.nics) - 1),
10908 "add:mac=%s,ip=%s,mode=%s,link=%s" %
10909 (new_nic.mac, new_nic.ip,
10910 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10911 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10914 for key in (constants.INIC_MAC, constants.INIC_IP):
10915 if key in nic_dict:
10916 setattr(instance.nics[nic_op], key, nic_dict[key])
10917 if nic_op in self.nic_pinst:
10918 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10919 for key, val in nic_dict.iteritems():
10920 result.append(("nic.%s/%d" % (key, nic_op), val))
10923 if self.op.hvparams:
10924 instance.hvparams = self.hv_inst
10925 for key, val in self.op.hvparams.iteritems():
10926 result.append(("hv/%s" % key, val))
10929 if self.op.beparams:
10930 instance.beparams = self.be_inst
10931 for key, val in self.op.beparams.iteritems():
10932 result.append(("be/%s" % key, val))
10935 if self.op.os_name:
10936 instance.os = self.op.os_name
10939 if self.op.osparams:
10940 instance.osparams = self.os_inst
10941 for key, val in self.op.osparams.iteritems():
10942 result.append(("os/%s" % key, val))
10944 self.cfg.Update(instance, feedback_fn)
10948 _DISK_CONVERSIONS = {
10949 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10950 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10954 class LUBackupQuery(NoHooksLU):
10955 """Query the exports list
10960 def ExpandNames(self):
10961 self.needed_locks = {}
10962 self.share_locks[locking.LEVEL_NODE] = 1
10963 if not self.op.nodes:
10964 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10966 self.needed_locks[locking.LEVEL_NODE] = \
10967 _GetWantedNodes(self, self.op.nodes)
10969 def Exec(self, feedback_fn):
10970 """Compute the list of all the exported system images.
10973 @return: a dictionary with the structure node->(export-list)
10974 where export-list is a list of the instances exported on
10978 self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
10979 rpcresult = self.rpc.call_export_list(self.nodes)
10981 for node in rpcresult:
10982 if rpcresult[node].fail_msg:
10983 result[node] = False
10985 result[node] = rpcresult[node].payload
10990 class LUBackupPrepare(NoHooksLU):
10991 """Prepares an instance for an export and returns useful information.
10996 def ExpandNames(self):
10997 self._ExpandAndLockInstance()
10999 def CheckPrereq(self):
11000 """Check prerequisites.
11003 instance_name = self.op.instance_name
11005 self.instance = self.cfg.GetInstanceInfo(instance_name)
11006 assert self.instance is not None, \
11007 "Cannot retrieve locked instance %s" % self.op.instance_name
11008 _CheckNodeOnline(self, self.instance.primary_node)
11010 self._cds = _GetClusterDomainSecret()
11012 def Exec(self, feedback_fn):
11013 """Prepares an instance for an export.
11016 instance = self.instance
11018 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11019 salt = utils.GenerateSecret(8)
11021 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11022 result = self.rpc.call_x509_cert_create(instance.primary_node,
11023 constants.RIE_CERT_VALIDITY)
11024 result.Raise("Can't create X509 key and certificate on %s" % result.node)
11026 (name, cert_pem) = result.payload
11028 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11032 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11033 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11035 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11041 class LUBackupExport(LogicalUnit):
11042 """Export an instance to an image in the cluster.
11045 HPATH = "instance-export"
11046 HTYPE = constants.HTYPE_INSTANCE
11049 def CheckArguments(self):
11050 """Check the arguments.
11053 self.x509_key_name = self.op.x509_key_name
11054 self.dest_x509_ca_pem = self.op.destination_x509_ca
11056 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11057 if not self.x509_key_name:
11058 raise errors.OpPrereqError("Missing X509 key name for encryption",
11059 errors.ECODE_INVAL)
11061 if not self.dest_x509_ca_pem:
11062 raise errors.OpPrereqError("Missing destination X509 CA",
11063 errors.ECODE_INVAL)
11065 def ExpandNames(self):
11066 self._ExpandAndLockInstance()
11068 # Lock all nodes for local exports
11069 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11070 # FIXME: lock only instance primary and destination node
11072 # Sad but true, for now we have do lock all nodes, as we don't know where
11073 # the previous export might be, and in this LU we search for it and
11074 # remove it from its current node. In the future we could fix this by:
11075 # - making a tasklet to search (share-lock all), then create the
11076 # new one, then one to remove, after
11077 # - removing the removal operation altogether
11078 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11080 def DeclareLocks(self, level):
11081 """Last minute lock declaration."""
11082 # All nodes are locked anyway, so nothing to do here.
11084 def BuildHooksEnv(self):
11085 """Build hooks env.
11087 This will run on the master, primary node and target node.
11091 "EXPORT_MODE": self.op.mode,
11092 "EXPORT_NODE": self.op.target_node,
11093 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11094 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11095 # TODO: Generic function for boolean env variables
11096 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11099 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11103 def BuildHooksNodes(self):
11104 """Build hooks nodes.
11107 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11109 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11110 nl.append(self.op.target_node)
11114 def CheckPrereq(self):
11115 """Check prerequisites.
11117 This checks that the instance and node names are valid.
11120 instance_name = self.op.instance_name
11122 self.instance = self.cfg.GetInstanceInfo(instance_name)
11123 assert self.instance is not None, \
11124 "Cannot retrieve locked instance %s" % self.op.instance_name
11125 _CheckNodeOnline(self, self.instance.primary_node)
11127 if (self.op.remove_instance and self.instance.admin_up and
11128 not self.op.shutdown):
11129 raise errors.OpPrereqError("Can not remove instance without shutting it"
11132 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11133 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11134 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11135 assert self.dst_node is not None
11137 _CheckNodeOnline(self, self.dst_node.name)
11138 _CheckNodeNotDrained(self, self.dst_node.name)
11141 self.dest_disk_info = None
11142 self.dest_x509_ca = None
11144 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11145 self.dst_node = None
11147 if len(self.op.target_node) != len(self.instance.disks):
11148 raise errors.OpPrereqError(("Received destination information for %s"
11149 " disks, but instance %s has %s disks") %
11150 (len(self.op.target_node), instance_name,
11151 len(self.instance.disks)),
11152 errors.ECODE_INVAL)
11154 cds = _GetClusterDomainSecret()
11156 # Check X509 key name
11158 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11159 except (TypeError, ValueError), err:
11160 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11162 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11163 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11164 errors.ECODE_INVAL)
11166 # Load and verify CA
11168 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11169 except OpenSSL.crypto.Error, err:
11170 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11171 (err, ), errors.ECODE_INVAL)
11173 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11174 if errcode is not None:
11175 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11176 (msg, ), errors.ECODE_INVAL)
11178 self.dest_x509_ca = cert
11180 # Verify target information
11182 for idx, disk_data in enumerate(self.op.target_node):
11184 (host, port, magic) = \
11185 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11186 except errors.GenericError, err:
11187 raise errors.OpPrereqError("Target info for disk %s: %s" %
11188 (idx, err), errors.ECODE_INVAL)
11190 disk_info.append((host, port, magic))
11192 assert len(disk_info) == len(self.op.target_node)
11193 self.dest_disk_info = disk_info
11196 raise errors.ProgrammerError("Unhandled export mode %r" %
11199 # instance disk type verification
11200 # TODO: Implement export support for file-based disks
11201 for disk in self.instance.disks:
11202 if disk.dev_type == constants.LD_FILE:
11203 raise errors.OpPrereqError("Export not supported for instances with"
11204 " file-based disks", errors.ECODE_INVAL)
11206 def _CleanupExports(self, feedback_fn):
11207 """Removes exports of current instance from all other nodes.
11209 If an instance in a cluster with nodes A..D was exported to node C, its
11210 exports will be removed from the nodes A, B and D.
11213 assert self.op.mode != constants.EXPORT_MODE_REMOTE
11215 nodelist = self.cfg.GetNodeList()
11216 nodelist.remove(self.dst_node.name)
11218 # on one-node clusters nodelist will be empty after the removal
11219 # if we proceed the backup would be removed because OpBackupQuery
11220 # substitutes an empty list with the full cluster node list.
11221 iname = self.instance.name
11223 feedback_fn("Removing old exports for instance %s" % iname)
11224 exportlist = self.rpc.call_export_list(nodelist)
11225 for node in exportlist:
11226 if exportlist[node].fail_msg:
11228 if iname in exportlist[node].payload:
11229 msg = self.rpc.call_export_remove(node, iname).fail_msg
11231 self.LogWarning("Could not remove older export for instance %s"
11232 " on node %s: %s", iname, node, msg)
11234 def Exec(self, feedback_fn):
11235 """Export an instance to an image in the cluster.
11238 assert self.op.mode in constants.EXPORT_MODES
11240 instance = self.instance
11241 src_node = instance.primary_node
11243 if self.op.shutdown:
11244 # shutdown the instance, but not the disks
11245 feedback_fn("Shutting down instance %s" % instance.name)
11246 result = self.rpc.call_instance_shutdown(src_node, instance,
11247 self.op.shutdown_timeout)
11248 # TODO: Maybe ignore failures if ignore_remove_failures is set
11249 result.Raise("Could not shutdown instance %s on"
11250 " node %s" % (instance.name, src_node))
11252 # set the disks ID correctly since call_instance_start needs the
11253 # correct drbd minor to create the symlinks
11254 for disk in instance.disks:
11255 self.cfg.SetDiskID(disk, src_node)
11257 activate_disks = (not instance.admin_up)
11260 # Activate the instance disks if we'exporting a stopped instance
11261 feedback_fn("Activating disks for %s" % instance.name)
11262 _StartInstanceDisks(self, instance, None)
11265 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11268 helper.CreateSnapshots()
11270 if (self.op.shutdown and instance.admin_up and
11271 not self.op.remove_instance):
11272 assert not activate_disks
11273 feedback_fn("Starting instance %s" % instance.name)
11274 result = self.rpc.call_instance_start(src_node, instance,
11276 msg = result.fail_msg
11278 feedback_fn("Failed to start instance: %s" % msg)
11279 _ShutdownInstanceDisks(self, instance)
11280 raise errors.OpExecError("Could not start instance: %s" % msg)
11282 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11283 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11284 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11285 connect_timeout = constants.RIE_CONNECT_TIMEOUT
11286 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11288 (key_name, _, _) = self.x509_key_name
11291 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11294 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11295 key_name, dest_ca_pem,
11300 # Check for backwards compatibility
11301 assert len(dresults) == len(instance.disks)
11302 assert compat.all(isinstance(i, bool) for i in dresults), \
11303 "Not all results are boolean: %r" % dresults
11307 feedback_fn("Deactivating disks for %s" % instance.name)
11308 _ShutdownInstanceDisks(self, instance)
11310 if not (compat.all(dresults) and fin_resu):
11313 failures.append("export finalization")
11314 if not compat.all(dresults):
11315 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11317 failures.append("disk export: disk(s) %s" % fdsk)
11319 raise errors.OpExecError("Export failed, errors in %s" %
11320 utils.CommaJoin(failures))
11322 # At this point, the export was successful, we can cleanup/finish
11324 # Remove instance if requested
11325 if self.op.remove_instance:
11326 feedback_fn("Removing instance %s" % instance.name)
11327 _RemoveInstance(self, feedback_fn, instance,
11328 self.op.ignore_remove_failures)
11330 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11331 self._CleanupExports(feedback_fn)
11333 return fin_resu, dresults
11336 class LUBackupRemove(NoHooksLU):
11337 """Remove exports related to the named instance.
11342 def ExpandNames(self):
11343 self.needed_locks = {}
11344 # We need all nodes to be locked in order for RemoveExport to work, but we
11345 # don't need to lock the instance itself, as nothing will happen to it (and
11346 # we can remove exports also for a removed instance)
11347 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11349 def Exec(self, feedback_fn):
11350 """Remove any export.
11353 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11354 # If the instance was not found we'll try with the name that was passed in.
11355 # This will only work if it was an FQDN, though.
11357 if not instance_name:
11359 instance_name = self.op.instance_name
11361 locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
11362 exportlist = self.rpc.call_export_list(locked_nodes)
11364 for node in exportlist:
11365 msg = exportlist[node].fail_msg
11367 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11369 if instance_name in exportlist[node].payload:
11371 result = self.rpc.call_export_remove(node, instance_name)
11372 msg = result.fail_msg
11374 logging.error("Could not remove export for instance %s"
11375 " on node %s: %s", instance_name, node, msg)
11377 if fqdn_warn and not found:
11378 feedback_fn("Export not found. If trying to remove an export belonging"
11379 " to a deleted instance please use its Fully Qualified"
11383 class LUGroupAdd(LogicalUnit):
11384 """Logical unit for creating node groups.
11387 HPATH = "group-add"
11388 HTYPE = constants.HTYPE_GROUP
11391 def ExpandNames(self):
11392 # We need the new group's UUID here so that we can create and acquire the
11393 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11394 # that it should not check whether the UUID exists in the configuration.
11395 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11396 self.needed_locks = {}
11397 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11399 def CheckPrereq(self):
11400 """Check prerequisites.
11402 This checks that the given group name is not an existing node group
11407 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11408 except errors.OpPrereqError:
11411 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11412 " node group (UUID: %s)" %
11413 (self.op.group_name, existing_uuid),
11414 errors.ECODE_EXISTS)
11416 if self.op.ndparams:
11417 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11419 def BuildHooksEnv(self):
11420 """Build hooks env.
11424 "GROUP_NAME": self.op.group_name,
11427 def BuildHooksNodes(self):
11428 """Build hooks nodes.
11431 mn = self.cfg.GetMasterNode()
11432 return ([mn], [mn])
11434 def Exec(self, feedback_fn):
11435 """Add the node group to the cluster.
11438 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11439 uuid=self.group_uuid,
11440 alloc_policy=self.op.alloc_policy,
11441 ndparams=self.op.ndparams)
11443 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11444 del self.remove_locks[locking.LEVEL_NODEGROUP]
11447 class LUGroupAssignNodes(NoHooksLU):
11448 """Logical unit for assigning nodes to groups.
11453 def ExpandNames(self):
11454 # These raise errors.OpPrereqError on their own:
11455 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11456 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11458 # We want to lock all the affected nodes and groups. We have readily
11459 # available the list of nodes, and the *destination* group. To gather the
11460 # list of "source" groups, we need to fetch node information later on.
11461 self.needed_locks = {
11462 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11463 locking.LEVEL_NODE: self.op.nodes,
11466 def DeclareLocks(self, level):
11467 if level == locking.LEVEL_NODEGROUP:
11468 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11470 # Try to get all affected nodes' groups without having the group or node
11471 # lock yet. Needs verification later in the code flow.
11472 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11474 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11476 def CheckPrereq(self):
11477 """Check prerequisites.
11480 assert self.needed_locks[locking.LEVEL_NODEGROUP]
11481 assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11482 frozenset(self.op.nodes))
11484 expected_locks = (set([self.group_uuid]) |
11485 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11486 actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11487 if actual_locks != expected_locks:
11488 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11489 " current groups are '%s', used to be '%s'" %
11490 (utils.CommaJoin(expected_locks),
11491 utils.CommaJoin(actual_locks)))
11493 self.node_data = self.cfg.GetAllNodesInfo()
11494 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11495 instance_data = self.cfg.GetAllInstancesInfo()
11497 if self.group is None:
11498 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11499 (self.op.group_name, self.group_uuid))
11501 (new_splits, previous_splits) = \
11502 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11503 for node in self.op.nodes],
11504 self.node_data, instance_data)
11507 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11509 if not self.op.force:
11510 raise errors.OpExecError("The following instances get split by this"
11511 " change and --force was not given: %s" %
11514 self.LogWarning("This operation will split the following instances: %s",
11517 if previous_splits:
11518 self.LogWarning("In addition, these already-split instances continue"
11519 " to be split across groups: %s",
11520 utils.CommaJoin(utils.NiceSort(previous_splits)))
11522 def Exec(self, feedback_fn):
11523 """Assign nodes to a new group.
11526 for node in self.op.nodes:
11527 self.node_data[node].group = self.group_uuid
11529 # FIXME: Depends on side-effects of modifying the result of
11530 # C{cfg.GetAllNodesInfo}
11532 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11535 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11536 """Check for split instances after a node assignment.
11538 This method considers a series of node assignments as an atomic operation,
11539 and returns information about split instances after applying the set of
11542 In particular, it returns information about newly split instances, and
11543 instances that were already split, and remain so after the change.
11545 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11548 @type changes: list of (node_name, new_group_uuid) pairs.
11549 @param changes: list of node assignments to consider.
11550 @param node_data: a dict with data for all nodes
11551 @param instance_data: a dict with all instances to consider
11552 @rtype: a two-tuple
11553 @return: a list of instances that were previously okay and result split as a
11554 consequence of this change, and a list of instances that were previously
11555 split and this change does not fix.
11558 changed_nodes = dict((node, group) for node, group in changes
11559 if node_data[node].group != group)
11561 all_split_instances = set()
11562 previously_split_instances = set()
11564 def InstanceNodes(instance):
11565 return [instance.primary_node] + list(instance.secondary_nodes)
11567 for inst in instance_data.values():
11568 if inst.disk_template not in constants.DTS_INT_MIRROR:
11571 instance_nodes = InstanceNodes(inst)
11573 if len(set(node_data[node].group for node in instance_nodes)) > 1:
11574 previously_split_instances.add(inst.name)
11576 if len(set(changed_nodes.get(node, node_data[node].group)
11577 for node in instance_nodes)) > 1:
11578 all_split_instances.add(inst.name)
11580 return (list(all_split_instances - previously_split_instances),
11581 list(previously_split_instances & all_split_instances))
11584 class _GroupQuery(_QueryBase):
11585 FIELDS = query.GROUP_FIELDS
11587 def ExpandNames(self, lu):
11588 lu.needed_locks = {}
11590 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11591 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11594 self.wanted = [name_to_uuid[name]
11595 for name in utils.NiceSort(name_to_uuid.keys())]
11597 # Accept names to be either names or UUIDs.
11600 all_uuid = frozenset(self._all_groups.keys())
11602 for name in self.names:
11603 if name in all_uuid:
11604 self.wanted.append(name)
11605 elif name in name_to_uuid:
11606 self.wanted.append(name_to_uuid[name])
11608 missing.append(name)
11611 raise errors.OpPrereqError("Some groups do not exist: %s" %
11612 utils.CommaJoin(missing),
11613 errors.ECODE_NOENT)
11615 def DeclareLocks(self, lu, level):
11618 def _GetQueryData(self, lu):
11619 """Computes the list of node groups and their attributes.
11622 do_nodes = query.GQ_NODE in self.requested_data
11623 do_instances = query.GQ_INST in self.requested_data
11625 group_to_nodes = None
11626 group_to_instances = None
11628 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11629 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11630 # latter GetAllInstancesInfo() is not enough, for we have to go through
11631 # instance->node. Hence, we will need to process nodes even if we only need
11632 # instance information.
11633 if do_nodes or do_instances:
11634 all_nodes = lu.cfg.GetAllNodesInfo()
11635 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11638 for node in all_nodes.values():
11639 if node.group in group_to_nodes:
11640 group_to_nodes[node.group].append(node.name)
11641 node_to_group[node.name] = node.group
11644 all_instances = lu.cfg.GetAllInstancesInfo()
11645 group_to_instances = dict((uuid, []) for uuid in self.wanted)
11647 for instance in all_instances.values():
11648 node = instance.primary_node
11649 if node in node_to_group:
11650 group_to_instances[node_to_group[node]].append(instance.name)
11653 # Do not pass on node information if it was not requested.
11654 group_to_nodes = None
11656 return query.GroupQueryData([self._all_groups[uuid]
11657 for uuid in self.wanted],
11658 group_to_nodes, group_to_instances)
11661 class LUGroupQuery(NoHooksLU):
11662 """Logical unit for querying node groups.
11667 def CheckArguments(self):
11668 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11669 self.op.output_fields, False)
11671 def ExpandNames(self):
11672 self.gq.ExpandNames(self)
11674 def Exec(self, feedback_fn):
11675 return self.gq.OldStyleQuery(self)
11678 class LUGroupSetParams(LogicalUnit):
11679 """Modifies the parameters of a node group.
11682 HPATH = "group-modify"
11683 HTYPE = constants.HTYPE_GROUP
11686 def CheckArguments(self):
11689 self.op.alloc_policy,
11692 if all_changes.count(None) == len(all_changes):
11693 raise errors.OpPrereqError("Please pass at least one modification",
11694 errors.ECODE_INVAL)
11696 def ExpandNames(self):
11697 # This raises errors.OpPrereqError on its own:
11698 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11700 self.needed_locks = {
11701 locking.LEVEL_NODEGROUP: [self.group_uuid],
11704 def CheckPrereq(self):
11705 """Check prerequisites.
11708 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11710 if self.group is None:
11711 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11712 (self.op.group_name, self.group_uuid))
11714 if self.op.ndparams:
11715 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11716 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11717 self.new_ndparams = new_ndparams
11719 def BuildHooksEnv(self):
11720 """Build hooks env.
11724 "GROUP_NAME": self.op.group_name,
11725 "NEW_ALLOC_POLICY": self.op.alloc_policy,
11728 def BuildHooksNodes(self):
11729 """Build hooks nodes.
11732 mn = self.cfg.GetMasterNode()
11733 return ([mn], [mn])
11735 def Exec(self, feedback_fn):
11736 """Modifies the node group.
11741 if self.op.ndparams:
11742 self.group.ndparams = self.new_ndparams
11743 result.append(("ndparams", str(self.group.ndparams)))
11745 if self.op.alloc_policy:
11746 self.group.alloc_policy = self.op.alloc_policy
11748 self.cfg.Update(self.group, feedback_fn)
11753 class LUGroupRemove(LogicalUnit):
11754 HPATH = "group-remove"
11755 HTYPE = constants.HTYPE_GROUP
11758 def ExpandNames(self):
11759 # This will raises errors.OpPrereqError on its own:
11760 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11761 self.needed_locks = {
11762 locking.LEVEL_NODEGROUP: [self.group_uuid],
11765 def CheckPrereq(self):
11766 """Check prerequisites.
11768 This checks that the given group name exists as a node group, that is
11769 empty (i.e., contains no nodes), and that is not the last group of the
11773 # Verify that the group is empty.
11774 group_nodes = [node.name
11775 for node in self.cfg.GetAllNodesInfo().values()
11776 if node.group == self.group_uuid]
11779 raise errors.OpPrereqError("Group '%s' not empty, has the following"
11781 (self.op.group_name,
11782 utils.CommaJoin(utils.NiceSort(group_nodes))),
11783 errors.ECODE_STATE)
11785 # Verify the cluster would not be left group-less.
11786 if len(self.cfg.GetNodeGroupList()) == 1:
11787 raise errors.OpPrereqError("Group '%s' is the only group,"
11788 " cannot be removed" %
11789 self.op.group_name,
11790 errors.ECODE_STATE)
11792 def BuildHooksEnv(self):
11793 """Build hooks env.
11797 "GROUP_NAME": self.op.group_name,
11800 def BuildHooksNodes(self):
11801 """Build hooks nodes.
11804 mn = self.cfg.GetMasterNode()
11805 return ([mn], [mn])
11807 def Exec(self, feedback_fn):
11808 """Remove the node group.
11812 self.cfg.RemoveNodeGroup(self.group_uuid)
11813 except errors.ConfigurationError:
11814 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11815 (self.op.group_name, self.group_uuid))
11817 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11820 class LUGroupRename(LogicalUnit):
11821 HPATH = "group-rename"
11822 HTYPE = constants.HTYPE_GROUP
11825 def ExpandNames(self):
11826 # This raises errors.OpPrereqError on its own:
11827 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11829 self.needed_locks = {
11830 locking.LEVEL_NODEGROUP: [self.group_uuid],
11833 def CheckPrereq(self):
11834 """Check prerequisites.
11836 Ensures requested new name is not yet used.
11840 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11841 except errors.OpPrereqError:
11844 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11845 " node group (UUID: %s)" %
11846 (self.op.new_name, new_name_uuid),
11847 errors.ECODE_EXISTS)
11849 def BuildHooksEnv(self):
11850 """Build hooks env.
11854 "OLD_NAME": self.op.group_name,
11855 "NEW_NAME": self.op.new_name,
11858 def BuildHooksNodes(self):
11859 """Build hooks nodes.
11862 mn = self.cfg.GetMasterNode()
11864 all_nodes = self.cfg.GetAllNodesInfo()
11865 all_nodes.pop(mn, None)
11868 run_nodes.extend(node.name for node in all_nodes.values()
11869 if node.group == self.group_uuid)
11871 return (run_nodes, run_nodes)
11873 def Exec(self, feedback_fn):
11874 """Rename the node group.
11877 group = self.cfg.GetNodeGroup(self.group_uuid)
11880 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11881 (self.op.group_name, self.group_uuid))
11883 group.name = self.op.new_name
11884 self.cfg.Update(group, feedback_fn)
11886 return self.op.new_name
11889 class LUGroupEvacuate(LogicalUnit):
11890 HPATH = "group-evacuate"
11891 HTYPE = constants.HTYPE_GROUP
11894 def ExpandNames(self):
11895 # This raises errors.OpPrereqError on its own:
11896 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11898 if self.op.target_groups:
11899 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11900 self.op.target_groups)
11902 self.req_target_uuids = []
11904 if self.group_uuid in self.req_target_uuids:
11905 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
11906 " as a target group (targets are %s)" %
11908 utils.CommaJoin(self.req_target_uuids)),
11909 errors.ECODE_INVAL)
11911 if not self.op.iallocator:
11912 # Use default iallocator
11913 self.op.iallocator = self.cfg.GetDefaultIAllocator()
11915 if not self.op.iallocator:
11916 raise errors.OpPrereqError("No iallocator was specified, neither in the"
11917 " opcode nor as a cluster-wide default",
11918 errors.ECODE_INVAL)
11920 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11921 self.needed_locks = {
11922 locking.LEVEL_INSTANCE: [],
11923 locking.LEVEL_NODEGROUP: [],
11924 locking.LEVEL_NODE: [],
11927 def DeclareLocks(self, level):
11928 if level == locking.LEVEL_INSTANCE:
11929 assert not self.needed_locks[locking.LEVEL_INSTANCE]
11931 # Lock instances optimistically, needs verification once node and group
11932 # locks have been acquired
11933 self.needed_locks[locking.LEVEL_INSTANCE] = \
11934 self.cfg.GetNodeGroupInstances(self.group_uuid)
11936 elif level == locking.LEVEL_NODEGROUP:
11937 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11939 if self.req_target_uuids:
11940 lock_groups = set([self.group_uuid] + self.req_target_uuids)
11942 # Lock all groups used by instances optimistically; this requires going
11943 # via the node before it's locked, requiring verification later on
11944 lock_groups.update(group_uuid
11945 for instance_name in
11946 self.glm.list_owned(locking.LEVEL_INSTANCE)
11948 self.cfg.GetInstanceNodeGroups(instance_name))
11950 # No target groups, need to lock all of them
11951 lock_groups = locking.ALL_SET
11953 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11955 elif level == locking.LEVEL_NODE:
11956 # This will only lock the nodes in the group to be evacuated which
11957 # contain actual instances
11958 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11959 self._LockInstancesNodes()
11961 # Lock all nodes in group to be evacuated
11962 assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
11963 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
11964 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11966 def CheckPrereq(self):
11967 owned_instances = frozenset(self.glm.list_owned(locking.LEVEL_INSTANCE))
11968 owned_groups = frozenset(self.glm.list_owned(locking.LEVEL_NODEGROUP))
11969 owned_nodes = frozenset(self.glm.list_owned(locking.LEVEL_NODE))
11971 assert owned_groups.issuperset(self.req_target_uuids)
11972 assert self.group_uuid in owned_groups
11974 # Check if locked instances are still correct
11975 wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
11976 if owned_instances != wanted_instances:
11977 raise errors.OpPrereqError("Instances in node group to be evacuated (%s)"
11978 " changed since locks were acquired, wanted"
11979 " %s, have %s; retry the operation" %
11981 utils.CommaJoin(wanted_instances),
11982 utils.CommaJoin(owned_instances)),
11983 errors.ECODE_STATE)
11985 # Get instance information
11986 self.instances = dict((name, self.cfg.GetInstanceInfo(name))
11987 for name in owned_instances)
11989 # Check if node groups for locked instances are still correct
11990 for instance_name in owned_instances:
11991 inst = self.instances[instance_name]
11992 assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
11993 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
11994 assert owned_nodes.issuperset(inst.all_nodes), \
11995 "Instance %s's nodes changed while we kept the lock" % instance_name
11997 inst_groups = self.cfg.GetInstanceNodeGroups(instance_name)
11998 if not owned_groups.issuperset(inst_groups):
11999 raise errors.OpPrereqError("Instance's node groups changed since locks"
12000 " were acquired, current groups are '%s',"
12001 " owning groups '%s'; retry the operation" %
12002 (utils.CommaJoin(inst_groups),
12003 utils.CommaJoin(owned_groups)),
12004 errors.ECODE_STATE)
12006 if self.req_target_uuids:
12007 # User requested specific target groups
12008 self.target_uuids = self.req_target_uuids
12010 # All groups except the one to be evacuated are potential targets
12011 self.target_uuids = [group_uuid for group_uuid in owned_groups
12012 if group_uuid != self.group_uuid]
12014 if not self.target_uuids:
12015 raise errors.OpExecError("There are no possible target groups")
12017 def BuildHooksEnv(self):
12018 """Build hooks env.
12022 "GROUP_NAME": self.op.group_name,
12023 "TARGET_GROUPS": " ".join(self.target_uuids),
12026 def BuildHooksNodes(self):
12027 """Build hooks nodes.
12030 mn = self.cfg.GetMasterNode()
12032 assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
12034 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12036 return (run_nodes, run_nodes)
12038 def Exec(self, feedback_fn):
12039 instances = list(self.glm.list_owned(locking.LEVEL_INSTANCE))
12041 assert self.group_uuid not in self.target_uuids
12043 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12044 instances=instances, target_groups=self.target_uuids)
12046 ial.Run(self.op.iallocator)
12048 if not ial.success:
12049 raise errors.OpPrereqError("Can't compute group evacuation using"
12050 " iallocator '%s': %s" %
12051 (self.op.iallocator, ial.info),
12052 errors.ECODE_NORES)
12054 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12056 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12057 len(jobs), self.op.group_name)
12059 return ResultWithJobs(jobs)
12062 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
12063 """Generic tags LU.
12065 This is an abstract class which is the parent of all the other tags LUs.
12068 def ExpandNames(self):
12069 self.group_uuid = None
12070 self.needed_locks = {}
12071 if self.op.kind == constants.TAG_NODE:
12072 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12073 self.needed_locks[locking.LEVEL_NODE] = self.op.name
12074 elif self.op.kind == constants.TAG_INSTANCE:
12075 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12076 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12077 elif self.op.kind == constants.TAG_NODEGROUP:
12078 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12080 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12081 # not possible to acquire the BGL based on opcode parameters)
12083 def CheckPrereq(self):
12084 """Check prerequisites.
12087 if self.op.kind == constants.TAG_CLUSTER:
12088 self.target = self.cfg.GetClusterInfo()
12089 elif self.op.kind == constants.TAG_NODE:
12090 self.target = self.cfg.GetNodeInfo(self.op.name)
12091 elif self.op.kind == constants.TAG_INSTANCE:
12092 self.target = self.cfg.GetInstanceInfo(self.op.name)
12093 elif self.op.kind == constants.TAG_NODEGROUP:
12094 self.target = self.cfg.GetNodeGroup(self.group_uuid)
12096 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12097 str(self.op.kind), errors.ECODE_INVAL)
12100 class LUTagsGet(TagsLU):
12101 """Returns the tags of a given object.
12106 def ExpandNames(self):
12107 TagsLU.ExpandNames(self)
12109 # Share locks as this is only a read operation
12110 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
12112 def Exec(self, feedback_fn):
12113 """Returns the tag list.
12116 return list(self.target.GetTags())
12119 class LUTagsSearch(NoHooksLU):
12120 """Searches the tags for a given pattern.
12125 def ExpandNames(self):
12126 self.needed_locks = {}
12128 def CheckPrereq(self):
12129 """Check prerequisites.
12131 This checks the pattern passed for validity by compiling it.
12135 self.re = re.compile(self.op.pattern)
12136 except re.error, err:
12137 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12138 (self.op.pattern, err), errors.ECODE_INVAL)
12140 def Exec(self, feedback_fn):
12141 """Returns the tag list.
12145 tgts = [("/cluster", cfg.GetClusterInfo())]
12146 ilist = cfg.GetAllInstancesInfo().values()
12147 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12148 nlist = cfg.GetAllNodesInfo().values()
12149 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12150 tgts.extend(("/nodegroup/%s" % n.name, n)
12151 for n in cfg.GetAllNodeGroupsInfo().values())
12153 for path, target in tgts:
12154 for tag in target.GetTags():
12155 if self.re.search(tag):
12156 results.append((path, tag))
12160 class LUTagsSet(TagsLU):
12161 """Sets a tag on a given object.
12166 def CheckPrereq(self):
12167 """Check prerequisites.
12169 This checks the type and length of the tag name and value.
12172 TagsLU.CheckPrereq(self)
12173 for tag in self.op.tags:
12174 objects.TaggableObject.ValidateTag(tag)
12176 def Exec(self, feedback_fn):
12181 for tag in self.op.tags:
12182 self.target.AddTag(tag)
12183 except errors.TagError, err:
12184 raise errors.OpExecError("Error while setting tag: %s" % str(err))
12185 self.cfg.Update(self.target, feedback_fn)
12188 class LUTagsDel(TagsLU):
12189 """Delete a list of tags from a given object.
12194 def CheckPrereq(self):
12195 """Check prerequisites.
12197 This checks that we have the given tag.
12200 TagsLU.CheckPrereq(self)
12201 for tag in self.op.tags:
12202 objects.TaggableObject.ValidateTag(tag)
12203 del_tags = frozenset(self.op.tags)
12204 cur_tags = self.target.GetTags()
12206 diff_tags = del_tags - cur_tags
12208 diff_names = ("'%s'" % i for i in sorted(diff_tags))
12209 raise errors.OpPrereqError("Tag(s) %s not found" %
12210 (utils.CommaJoin(diff_names), ),
12211 errors.ECODE_NOENT)
12213 def Exec(self, feedback_fn):
12214 """Remove the tag from the object.
12217 for tag in self.op.tags:
12218 self.target.RemoveTag(tag)
12219 self.cfg.Update(self.target, feedback_fn)
12222 class LUTestDelay(NoHooksLU):
12223 """Sleep for a specified amount of time.
12225 This LU sleeps on the master and/or nodes for a specified amount of
12231 def ExpandNames(self):
12232 """Expand names and set required locks.
12234 This expands the node list, if any.
12237 self.needed_locks = {}
12238 if self.op.on_nodes:
12239 # _GetWantedNodes can be used here, but is not always appropriate to use
12240 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12241 # more information.
12242 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12243 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12245 def _TestDelay(self):
12246 """Do the actual sleep.
12249 if self.op.on_master:
12250 if not utils.TestDelay(self.op.duration):
12251 raise errors.OpExecError("Error during master delay test")
12252 if self.op.on_nodes:
12253 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12254 for node, node_result in result.items():
12255 node_result.Raise("Failure during rpc call to node %s" % node)
12257 def Exec(self, feedback_fn):
12258 """Execute the test delay opcode, with the wanted repetitions.
12261 if self.op.repeat == 0:
12264 top_value = self.op.repeat - 1
12265 for i in range(self.op.repeat):
12266 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12270 class LUTestJqueue(NoHooksLU):
12271 """Utility LU to test some aspects of the job queue.
12276 # Must be lower than default timeout for WaitForJobChange to see whether it
12277 # notices changed jobs
12278 _CLIENT_CONNECT_TIMEOUT = 20.0
12279 _CLIENT_CONFIRM_TIMEOUT = 60.0
12282 def _NotifyUsingSocket(cls, cb, errcls):
12283 """Opens a Unix socket and waits for another program to connect.
12286 @param cb: Callback to send socket name to client
12287 @type errcls: class
12288 @param errcls: Exception class to use for errors
12291 # Using a temporary directory as there's no easy way to create temporary
12292 # sockets without writing a custom loop around tempfile.mktemp and
12294 tmpdir = tempfile.mkdtemp()
12296 tmpsock = utils.PathJoin(tmpdir, "sock")
12298 logging.debug("Creating temporary socket at %s", tmpsock)
12299 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12304 # Send details to client
12307 # Wait for client to connect before continuing
12308 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12310 (conn, _) = sock.accept()
12311 except socket.error, err:
12312 raise errcls("Client didn't connect in time (%s)" % err)
12316 # Remove as soon as client is connected
12317 shutil.rmtree(tmpdir)
12319 # Wait for client to close
12322 # pylint: disable-msg=E1101
12323 # Instance of '_socketobject' has no ... member
12324 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12326 except socket.error, err:
12327 raise errcls("Client failed to confirm notification (%s)" % err)
12331 def _SendNotification(self, test, arg, sockname):
12332 """Sends a notification to the client.
12335 @param test: Test name
12336 @param arg: Test argument (depends on test)
12337 @type sockname: string
12338 @param sockname: Socket path
12341 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12343 def _Notify(self, prereq, test, arg):
12344 """Notifies the client of a test.
12347 @param prereq: Whether this is a prereq-phase test
12349 @param test: Test name
12350 @param arg: Test argument (depends on test)
12354 errcls = errors.OpPrereqError
12356 errcls = errors.OpExecError
12358 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12362 def CheckArguments(self):
12363 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12364 self.expandnames_calls = 0
12366 def ExpandNames(self):
12367 checkargs_calls = getattr(self, "checkargs_calls", 0)
12368 if checkargs_calls < 1:
12369 raise errors.ProgrammerError("CheckArguments was not called")
12371 self.expandnames_calls += 1
12373 if self.op.notify_waitlock:
12374 self._Notify(True, constants.JQT_EXPANDNAMES, None)
12376 self.LogInfo("Expanding names")
12378 # Get lock on master node (just to get a lock, not for a particular reason)
12379 self.needed_locks = {
12380 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12383 def Exec(self, feedback_fn):
12384 if self.expandnames_calls < 1:
12385 raise errors.ProgrammerError("ExpandNames was not called")
12387 if self.op.notify_exec:
12388 self._Notify(False, constants.JQT_EXEC, None)
12390 self.LogInfo("Executing")
12392 if self.op.log_messages:
12393 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12394 for idx, msg in enumerate(self.op.log_messages):
12395 self.LogInfo("Sending log message %s", idx + 1)
12396 feedback_fn(constants.JQT_MSGPREFIX + msg)
12397 # Report how many test messages have been sent
12398 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12401 raise errors.OpExecError("Opcode failure was requested")
12406 class IAllocator(object):
12407 """IAllocator framework.
12409 An IAllocator instance has three sets of attributes:
12410 - cfg that is needed to query the cluster
12411 - input data (all members of the _KEYS class attribute are required)
12412 - four buffer attributes (in|out_data|text), that represent the
12413 input (to the external script) in text and data structure format,
12414 and the output from it, again in two formats
12415 - the result variables from the script (success, info, nodes) for
12419 # pylint: disable-msg=R0902
12420 # lots of instance attributes
12422 def __init__(self, cfg, rpc, mode, **kwargs):
12425 # init buffer variables
12426 self.in_text = self.out_text = self.in_data = self.out_data = None
12427 # init all input fields so that pylint is happy
12429 self.memory = self.disks = self.disk_template = None
12430 self.os = self.tags = self.nics = self.vcpus = None
12431 self.hypervisor = None
12432 self.relocate_from = None
12434 self.evac_nodes = None
12435 self.instances = None
12436 self.evac_mode = None
12437 self.target_groups = []
12439 self.required_nodes = None
12440 # init result fields
12441 self.success = self.info = self.result = None
12444 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12446 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12447 " IAllocator" % self.mode)
12449 keyset = [n for (n, _) in keydata]
12452 if key not in keyset:
12453 raise errors.ProgrammerError("Invalid input parameter '%s' to"
12454 " IAllocator" % key)
12455 setattr(self, key, kwargs[key])
12458 if key not in kwargs:
12459 raise errors.ProgrammerError("Missing input parameter '%s' to"
12460 " IAllocator" % key)
12461 self._BuildInputData(compat.partial(fn, self), keydata)
12463 def _ComputeClusterData(self):
12464 """Compute the generic allocator input data.
12466 This is the data that is independent of the actual operation.
12470 cluster_info = cfg.GetClusterInfo()
12473 "version": constants.IALLOCATOR_VERSION,
12474 "cluster_name": cfg.GetClusterName(),
12475 "cluster_tags": list(cluster_info.GetTags()),
12476 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12477 # we don't have job IDs
12479 ninfo = cfg.GetAllNodesInfo()
12480 iinfo = cfg.GetAllInstancesInfo().values()
12481 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12484 node_list = [n.name for n in ninfo.values() if n.vm_capable]
12486 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12487 hypervisor_name = self.hypervisor
12488 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12489 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12491 hypervisor_name = cluster_info.enabled_hypervisors[0]
12493 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12496 self.rpc.call_all_instances_info(node_list,
12497 cluster_info.enabled_hypervisors)
12499 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12501 config_ndata = self._ComputeBasicNodeData(ninfo)
12502 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12503 i_list, config_ndata)
12504 assert len(data["nodes"]) == len(ninfo), \
12505 "Incomplete node data computed"
12507 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12509 self.in_data = data
12512 def _ComputeNodeGroupData(cfg):
12513 """Compute node groups data.
12516 ng = dict((guuid, {
12517 "name": gdata.name,
12518 "alloc_policy": gdata.alloc_policy,
12520 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12525 def _ComputeBasicNodeData(node_cfg):
12526 """Compute global node data.
12529 @returns: a dict of name: (node dict, node config)
12532 # fill in static (config-based) values
12533 node_results = dict((ninfo.name, {
12534 "tags": list(ninfo.GetTags()),
12535 "primary_ip": ninfo.primary_ip,
12536 "secondary_ip": ninfo.secondary_ip,
12537 "offline": ninfo.offline,
12538 "drained": ninfo.drained,
12539 "master_candidate": ninfo.master_candidate,
12540 "group": ninfo.group,
12541 "master_capable": ninfo.master_capable,
12542 "vm_capable": ninfo.vm_capable,
12544 for ninfo in node_cfg.values())
12546 return node_results
12549 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12551 """Compute global node data.
12553 @param node_results: the basic node structures as filled from the config
12556 # make a copy of the current dict
12557 node_results = dict(node_results)
12558 for nname, nresult in node_data.items():
12559 assert nname in node_results, "Missing basic data for node %s" % nname
12560 ninfo = node_cfg[nname]
12562 if not (ninfo.offline or ninfo.drained):
12563 nresult.Raise("Can't get data for node %s" % nname)
12564 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12566 remote_info = nresult.payload
12568 for attr in ["memory_total", "memory_free", "memory_dom0",
12569 "vg_size", "vg_free", "cpu_total"]:
12570 if attr not in remote_info:
12571 raise errors.OpExecError("Node '%s' didn't return attribute"
12572 " '%s'" % (nname, attr))
12573 if not isinstance(remote_info[attr], int):
12574 raise errors.OpExecError("Node '%s' returned invalid value"
12576 (nname, attr, remote_info[attr]))
12577 # compute memory used by primary instances
12578 i_p_mem = i_p_up_mem = 0
12579 for iinfo, beinfo in i_list:
12580 if iinfo.primary_node == nname:
12581 i_p_mem += beinfo[constants.BE_MEMORY]
12582 if iinfo.name not in node_iinfo[nname].payload:
12585 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
12586 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12587 remote_info["memory_free"] -= max(0, i_mem_diff)
12590 i_p_up_mem += beinfo[constants.BE_MEMORY]
12592 # compute memory used by instances
12594 "total_memory": remote_info["memory_total"],
12595 "reserved_memory": remote_info["memory_dom0"],
12596 "free_memory": remote_info["memory_free"],
12597 "total_disk": remote_info["vg_size"],
12598 "free_disk": remote_info["vg_free"],
12599 "total_cpus": remote_info["cpu_total"],
12600 "i_pri_memory": i_p_mem,
12601 "i_pri_up_memory": i_p_up_mem,
12603 pnr_dyn.update(node_results[nname])
12604 node_results[nname] = pnr_dyn
12606 return node_results
12609 def _ComputeInstanceData(cluster_info, i_list):
12610 """Compute global instance data.
12614 for iinfo, beinfo in i_list:
12616 for nic in iinfo.nics:
12617 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12621 "mode": filled_params[constants.NIC_MODE],
12622 "link": filled_params[constants.NIC_LINK],
12624 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12625 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12626 nic_data.append(nic_dict)
12628 "tags": list(iinfo.GetTags()),
12629 "admin_up": iinfo.admin_up,
12630 "vcpus": beinfo[constants.BE_VCPUS],
12631 "memory": beinfo[constants.BE_MEMORY],
12633 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12635 "disks": [{constants.IDISK_SIZE: dsk.size,
12636 constants.IDISK_MODE: dsk.mode}
12637 for dsk in iinfo.disks],
12638 "disk_template": iinfo.disk_template,
12639 "hypervisor": iinfo.hypervisor,
12641 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12643 instance_data[iinfo.name] = pir
12645 return instance_data
12647 def _AddNewInstance(self):
12648 """Add new instance data to allocator structure.
12650 This in combination with _AllocatorGetClusterData will create the
12651 correct structure needed as input for the allocator.
12653 The checks for the completeness of the opcode must have already been
12657 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12659 if self.disk_template in constants.DTS_INT_MIRROR:
12660 self.required_nodes = 2
12662 self.required_nodes = 1
12666 "disk_template": self.disk_template,
12669 "vcpus": self.vcpus,
12670 "memory": self.memory,
12671 "disks": self.disks,
12672 "disk_space_total": disk_space,
12674 "required_nodes": self.required_nodes,
12675 "hypervisor": self.hypervisor,
12680 def _AddRelocateInstance(self):
12681 """Add relocate instance data to allocator structure.
12683 This in combination with _IAllocatorGetClusterData will create the
12684 correct structure needed as input for the allocator.
12686 The checks for the completeness of the opcode must have already been
12690 instance = self.cfg.GetInstanceInfo(self.name)
12691 if instance is None:
12692 raise errors.ProgrammerError("Unknown instance '%s' passed to"
12693 " IAllocator" % self.name)
12695 if instance.disk_template not in constants.DTS_MIRRORED:
12696 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12697 errors.ECODE_INVAL)
12699 if instance.disk_template in constants.DTS_INT_MIRROR and \
12700 len(instance.secondary_nodes) != 1:
12701 raise errors.OpPrereqError("Instance has not exactly one secondary node",
12702 errors.ECODE_STATE)
12704 self.required_nodes = 1
12705 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12706 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12710 "disk_space_total": disk_space,
12711 "required_nodes": self.required_nodes,
12712 "relocate_from": self.relocate_from,
12716 def _AddEvacuateNodes(self):
12717 """Add evacuate nodes data to allocator structure.
12721 "evac_nodes": self.evac_nodes
12725 def _AddNodeEvacuate(self):
12726 """Get data for node-evacuate requests.
12730 "instances": self.instances,
12731 "evac_mode": self.evac_mode,
12734 def _AddChangeGroup(self):
12735 """Get data for node-evacuate requests.
12739 "instances": self.instances,
12740 "target_groups": self.target_groups,
12743 def _BuildInputData(self, fn, keydata):
12744 """Build input data structures.
12747 self._ComputeClusterData()
12750 request["type"] = self.mode
12751 for keyname, keytype in keydata:
12752 if keyname not in request:
12753 raise errors.ProgrammerError("Request parameter %s is missing" %
12755 val = request[keyname]
12756 if not keytype(val):
12757 raise errors.ProgrammerError("Request parameter %s doesn't pass"
12758 " validation, value %s, expected"
12759 " type %s" % (keyname, val, keytype))
12760 self.in_data["request"] = request
12762 self.in_text = serializer.Dump(self.in_data)
12764 _STRING_LIST = ht.TListOf(ht.TString)
12765 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12766 # pylint: disable-msg=E1101
12767 # Class '...' has no 'OP_ID' member
12768 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12769 opcodes.OpInstanceMigrate.OP_ID,
12770 opcodes.OpInstanceReplaceDisks.OP_ID])
12774 ht.TListOf(ht.TAnd(ht.TIsLength(3),
12775 ht.TItems([ht.TNonEmptyString,
12776 ht.TNonEmptyString,
12777 ht.TListOf(ht.TNonEmptyString),
12780 ht.TListOf(ht.TAnd(ht.TIsLength(2),
12781 ht.TItems([ht.TNonEmptyString,
12784 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
12785 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
12788 constants.IALLOCATOR_MODE_ALLOC:
12791 ("name", ht.TString),
12792 ("memory", ht.TInt),
12793 ("disks", ht.TListOf(ht.TDict)),
12794 ("disk_template", ht.TString),
12795 ("os", ht.TString),
12796 ("tags", _STRING_LIST),
12797 ("nics", ht.TListOf(ht.TDict)),
12798 ("vcpus", ht.TInt),
12799 ("hypervisor", ht.TString),
12801 constants.IALLOCATOR_MODE_RELOC:
12802 (_AddRelocateInstance,
12803 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
12805 constants.IALLOCATOR_MODE_MEVAC:
12806 (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
12807 ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
12808 constants.IALLOCATOR_MODE_NODE_EVAC:
12809 (_AddNodeEvacuate, [
12810 ("instances", _STRING_LIST),
12811 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
12813 constants.IALLOCATOR_MODE_CHG_GROUP:
12814 (_AddChangeGroup, [
12815 ("instances", _STRING_LIST),
12816 ("target_groups", _STRING_LIST),
12820 def Run(self, name, validate=True, call_fn=None):
12821 """Run an instance allocator and return the results.
12824 if call_fn is None:
12825 call_fn = self.rpc.call_iallocator_runner
12827 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
12828 result.Raise("Failure while running the iallocator script")
12830 self.out_text = result.payload
12832 self._ValidateResult()
12834 def _ValidateResult(self):
12835 """Process the allocator results.
12837 This will process and if successful save the result in
12838 self.out_data and the other parameters.
12842 rdict = serializer.Load(self.out_text)
12843 except Exception, err:
12844 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
12846 if not isinstance(rdict, dict):
12847 raise errors.OpExecError("Can't parse iallocator results: not a dict")
12849 # TODO: remove backwards compatiblity in later versions
12850 if "nodes" in rdict and "result" not in rdict:
12851 rdict["result"] = rdict["nodes"]
12854 for key in "success", "info", "result":
12855 if key not in rdict:
12856 raise errors.OpExecError("Can't parse iallocator results:"
12857 " missing key '%s'" % key)
12858 setattr(self, key, rdict[key])
12860 if not self._result_check(self.result):
12861 raise errors.OpExecError("Iallocator returned invalid result,"
12862 " expected %s, got %s" %
12863 (self._result_check, self.result),
12864 errors.ECODE_INVAL)
12866 if self.mode in (constants.IALLOCATOR_MODE_RELOC,
12867 constants.IALLOCATOR_MODE_MEVAC):
12868 node2group = dict((name, ndata["group"])
12869 for (name, ndata) in self.in_data["nodes"].items())
12871 fn = compat.partial(self._NodesToGroups, node2group,
12872 self.in_data["nodegroups"])
12874 if self.mode == constants.IALLOCATOR_MODE_RELOC:
12875 assert self.relocate_from is not None
12876 assert self.required_nodes == 1
12878 request_groups = fn(self.relocate_from)
12879 result_groups = fn(rdict["result"])
12881 if result_groups != request_groups:
12882 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12883 " differ from original groups (%s)" %
12884 (utils.CommaJoin(result_groups),
12885 utils.CommaJoin(request_groups)))
12886 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
12887 request_groups = fn(self.evac_nodes)
12888 for (instance_name, secnode) in self.result:
12889 result_groups = fn([secnode])
12890 if result_groups != request_groups:
12891 raise errors.OpExecError("Iallocator returned new secondary node"
12892 " '%s' (group '%s') for instance '%s'"
12893 " which is not in original group '%s'" %
12894 (secnode, utils.CommaJoin(result_groups),
12896 utils.CommaJoin(request_groups)))
12898 raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
12900 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
12901 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
12903 self.out_data = rdict
12906 def _NodesToGroups(node2group, groups, nodes):
12907 """Returns a list of unique group names for a list of nodes.
12909 @type node2group: dict
12910 @param node2group: Map from node name to group UUID
12912 @param groups: Group information
12914 @param nodes: Node names
12921 group_uuid = node2group[node]
12923 # Ignore unknown node
12927 group = groups[group_uuid]
12929 # Can't find group, let's use UUID
12930 group_name = group_uuid
12932 group_name = group["name"]
12934 result.add(group_name)
12936 return sorted(result)
12939 class LUTestAllocator(NoHooksLU):
12940 """Run allocator tests.
12942 This LU runs the allocator tests
12945 def CheckPrereq(self):
12946 """Check prerequisites.
12948 This checks the opcode parameters depending on the director and mode test.
12951 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12952 for attr in ["memory", "disks", "disk_template",
12953 "os", "tags", "nics", "vcpus"]:
12954 if not hasattr(self.op, attr):
12955 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
12956 attr, errors.ECODE_INVAL)
12957 iname = self.cfg.ExpandInstanceName(self.op.name)
12958 if iname is not None:
12959 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
12960 iname, errors.ECODE_EXISTS)
12961 if not isinstance(self.op.nics, list):
12962 raise errors.OpPrereqError("Invalid parameter 'nics'",
12963 errors.ECODE_INVAL)
12964 if not isinstance(self.op.disks, list):
12965 raise errors.OpPrereqError("Invalid parameter 'disks'",
12966 errors.ECODE_INVAL)
12967 for row in self.op.disks:
12968 if (not isinstance(row, dict) or
12969 constants.IDISK_SIZE not in row or
12970 not isinstance(row[constants.IDISK_SIZE], int) or
12971 constants.IDISK_MODE not in row or
12972 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
12973 raise errors.OpPrereqError("Invalid contents of the 'disks'"
12974 " parameter", errors.ECODE_INVAL)
12975 if self.op.hypervisor is None:
12976 self.op.hypervisor = self.cfg.GetHypervisorType()
12977 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12978 fname = _ExpandInstanceName(self.cfg, self.op.name)
12979 self.op.name = fname
12980 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12981 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12982 if not hasattr(self.op, "evac_nodes"):
12983 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12984 " opcode input", errors.ECODE_INVAL)
12985 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
12986 constants.IALLOCATOR_MODE_NODE_EVAC):
12987 if not self.op.instances:
12988 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
12989 self.op.instances = _GetWantedInstances(self, self.op.instances)
12991 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12992 self.op.mode, errors.ECODE_INVAL)
12994 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12995 if self.op.allocator is None:
12996 raise errors.OpPrereqError("Missing allocator name",
12997 errors.ECODE_INVAL)
12998 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12999 raise errors.OpPrereqError("Wrong allocator test '%s'" %
13000 self.op.direction, errors.ECODE_INVAL)
13002 def Exec(self, feedback_fn):
13003 """Run the allocator test.
13006 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13007 ial = IAllocator(self.cfg, self.rpc,
13010 memory=self.op.memory,
13011 disks=self.op.disks,
13012 disk_template=self.op.disk_template,
13016 vcpus=self.op.vcpus,
13017 hypervisor=self.op.hypervisor,
13019 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13020 ial = IAllocator(self.cfg, self.rpc,
13023 relocate_from=list(self.relocate_from),
13025 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13026 ial = IAllocator(self.cfg, self.rpc,
13028 evac_nodes=self.op.evac_nodes)
13029 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13030 ial = IAllocator(self.cfg, self.rpc,
13032 instances=self.op.instances,
13033 target_groups=self.op.target_groups)
13034 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13035 ial = IAllocator(self.cfg, self.rpc,
13037 instances=self.op.instances,
13038 evac_mode=self.op.evac_mode)
13040 raise errors.ProgrammerError("Uncatched mode %s in"
13041 " LUTestAllocator.Exec", self.op.mode)
13043 if self.op.direction == constants.IALLOCATOR_DIR_IN:
13044 result = ial.in_text
13046 ial.Run(self.op.allocator, validate=False)
13047 result = ial.out_text
13051 #: Query type implementations
13053 constants.QR_INSTANCE: _InstanceQuery,
13054 constants.QR_NODE: _NodeQuery,
13055 constants.QR_GROUP: _GroupQuery,
13056 constants.QR_OS: _OsQuery,
13059 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13062 def _GetQueryImplementation(name):
13063 """Returns the implemtnation for a query type.
13065 @param name: Query type, must be one of L{constants.QR_VIA_OP}
13069 return _QUERY_IMPL[name]
13071 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13072 errors.ECODE_INVAL)