4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 import ganeti.masterd.instance # pylint: disable-msg=W0611
64 def _SupportsOob(cfg, node):
65 """Tells if node supports OOB.
67 @type cfg: L{config.ConfigWriter}
68 @param cfg: The cluster configuration
69 @type node: L{objects.Node}
71 @return: The OOB script if supported or an empty string otherwise
74 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
78 class LogicalUnit(object):
79 """Logical Unit base class.
81 Subclasses must follow these rules:
82 - implement ExpandNames
83 - implement CheckPrereq (except when tasklets are used)
84 - implement Exec (except when tasklets are used)
85 - implement BuildHooksEnv
86 - implement BuildHooksNodes
87 - redefine HPATH and HTYPE
88 - optionally redefine their run requirements:
89 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
91 Note that all commands require root permissions.
93 @ivar dry_run_result: the value (if any) that will be returned to the caller
94 in dry-run mode (signalled by opcode dry_run parameter)
101 def __init__(self, processor, op, context, rpc):
102 """Constructor for LogicalUnit.
104 This needs to be overridden in derived classes in order to check op
108 self.proc = processor
110 self.cfg = context.cfg
111 self.context = context
113 # Dicts used to declare locking needs to mcpu
114 self.needed_locks = None
115 self.acquired_locks = {}
116 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
118 self.remove_locks = {}
119 # Used to force good behavior when calling helper functions
120 self.recalculate_locks = {}
123 self.Log = processor.Log # pylint: disable-msg=C0103
124 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
125 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
126 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
127 # support for dry-run
128 self.dry_run_result = None
129 # support for generic debug attribute
130 if (not hasattr(self.op, "debug_level") or
131 not isinstance(self.op.debug_level, int)):
132 self.op.debug_level = 0
137 # Validate opcode parameters and set defaults
138 self.op.Validate(True)
140 self.CheckArguments()
143 """Returns the SshRunner object
147 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
150 ssh = property(fget=__GetSSH)
152 def CheckArguments(self):
153 """Check syntactic validity for the opcode arguments.
155 This method is for doing a simple syntactic check and ensure
156 validity of opcode parameters, without any cluster-related
157 checks. While the same can be accomplished in ExpandNames and/or
158 CheckPrereq, doing these separate is better because:
160 - ExpandNames is left as as purely a lock-related function
161 - CheckPrereq is run after we have acquired locks (and possible
164 The function is allowed to change the self.op attribute so that
165 later methods can no longer worry about missing parameters.
170 def ExpandNames(self):
171 """Expand names for this LU.
173 This method is called before starting to execute the opcode, and it should
174 update all the parameters of the opcode to their canonical form (e.g. a
175 short node name must be fully expanded after this method has successfully
176 completed). This way locking, hooks, logging, etc. can work correctly.
178 LUs which implement this method must also populate the self.needed_locks
179 member, as a dict with lock levels as keys, and a list of needed lock names
182 - use an empty dict if you don't need any lock
183 - if you don't need any lock at a particular level omit that level
184 - don't put anything for the BGL level
185 - if you want all locks at a level use locking.ALL_SET as a value
187 If you need to share locks (rather than acquire them exclusively) at one
188 level you can modify self.share_locks, setting a true value (usually 1) for
189 that level. By default locks are not shared.
191 This function can also define a list of tasklets, which then will be
192 executed in order instead of the usual LU-level CheckPrereq and Exec
193 functions, if those are not defined by the LU.
197 # Acquire all nodes and one instance
198 self.needed_locks = {
199 locking.LEVEL_NODE: locking.ALL_SET,
200 locking.LEVEL_INSTANCE: ['instance1.example.com'],
202 # Acquire just two nodes
203 self.needed_locks = {
204 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
207 self.needed_locks = {} # No, you can't leave it to the default value None
210 # The implementation of this method is mandatory only if the new LU is
211 # concurrent, so that old LUs don't need to be changed all at the same
214 self.needed_locks = {} # Exclusive LUs don't need locks.
216 raise NotImplementedError
218 def DeclareLocks(self, level):
219 """Declare LU locking needs for a level
221 While most LUs can just declare their locking needs at ExpandNames time,
222 sometimes there's the need to calculate some locks after having acquired
223 the ones before. This function is called just before acquiring locks at a
224 particular level, but after acquiring the ones at lower levels, and permits
225 such calculations. It can be used to modify self.needed_locks, and by
226 default it does nothing.
228 This function is only called if you have something already set in
229 self.needed_locks for the level.
231 @param level: Locking level which is going to be locked
232 @type level: member of ganeti.locking.LEVELS
236 def CheckPrereq(self):
237 """Check prerequisites for this LU.
239 This method should check that the prerequisites for the execution
240 of this LU are fulfilled. It can do internode communication, but
241 it should be idempotent - no cluster or system changes are
244 The method should raise errors.OpPrereqError in case something is
245 not fulfilled. Its return value is ignored.
247 This method should also update all the parameters of the opcode to
248 their canonical form if it hasn't been done by ExpandNames before.
251 if self.tasklets is not None:
252 for (idx, tl) in enumerate(self.tasklets):
253 logging.debug("Checking prerequisites for tasklet %s/%s",
254 idx + 1, len(self.tasklets))
259 def Exec(self, feedback_fn):
262 This method should implement the actual work. It should raise
263 errors.OpExecError for failures that are somewhat dealt with in
267 if self.tasklets is not None:
268 for (idx, tl) in enumerate(self.tasklets):
269 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
272 raise NotImplementedError
274 def BuildHooksEnv(self):
275 """Build hooks environment for this LU.
278 @return: Dictionary containing the environment that will be used for
279 running the hooks for this LU. The keys of the dict must not be prefixed
280 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
281 will extend the environment with additional variables. If no environment
282 should be defined, an empty dictionary should be returned (not C{None}).
283 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
287 raise NotImplementedError
289 def BuildHooksNodes(self):
290 """Build list of nodes to run LU's hooks.
292 @rtype: tuple; (list, list)
293 @return: Tuple containing a list of node names on which the hook
294 should run before the execution and a list of node names on which the
295 hook should run after the execution. No nodes should be returned as an
296 empty list (and not None).
297 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
301 raise NotImplementedError
303 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
304 """Notify the LU about the results of its hooks.
306 This method is called every time a hooks phase is executed, and notifies
307 the Logical Unit about the hooks' result. The LU can then use it to alter
308 its result based on the hooks. By default the method does nothing and the
309 previous result is passed back unchanged but any LU can define it if it
310 wants to use the local cluster hook-scripts somehow.
312 @param phase: one of L{constants.HOOKS_PHASE_POST} or
313 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
314 @param hook_results: the results of the multi-node hooks rpc call
315 @param feedback_fn: function used send feedback back to the caller
316 @param lu_result: the previous Exec result this LU had, or None
318 @return: the new Exec result, based on the previous result
322 # API must be kept, thus we ignore the unused argument and could
323 # be a function warnings
324 # pylint: disable-msg=W0613,R0201
327 def _ExpandAndLockInstance(self):
328 """Helper function to expand and lock an instance.
330 Many LUs that work on an instance take its name in self.op.instance_name
331 and need to expand it and then declare the expanded name for locking. This
332 function does it, and then updates self.op.instance_name to the expanded
333 name. It also initializes needed_locks as a dict, if this hasn't been done
337 if self.needed_locks is None:
338 self.needed_locks = {}
340 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
341 "_ExpandAndLockInstance called with instance-level locks set"
342 self.op.instance_name = _ExpandInstanceName(self.cfg,
343 self.op.instance_name)
344 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
346 def _LockInstancesNodes(self, primary_only=False):
347 """Helper function to declare instances' nodes for locking.
349 This function should be called after locking one or more instances to lock
350 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
351 with all primary or secondary nodes for instances already locked and
352 present in self.needed_locks[locking.LEVEL_INSTANCE].
354 It should be called from DeclareLocks, and for safety only works if
355 self.recalculate_locks[locking.LEVEL_NODE] is set.
357 In the future it may grow parameters to just lock some instance's nodes, or
358 to just lock primaries or secondary nodes, if needed.
360 If should be called in DeclareLocks in a way similar to::
362 if level == locking.LEVEL_NODE:
363 self._LockInstancesNodes()
365 @type primary_only: boolean
366 @param primary_only: only lock primary nodes of locked instances
369 assert locking.LEVEL_NODE in self.recalculate_locks, \
370 "_LockInstancesNodes helper function called with no nodes to recalculate"
372 # TODO: check if we're really been called with the instance locks held
374 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
375 # future we might want to have different behaviors depending on the value
376 # of self.recalculate_locks[locking.LEVEL_NODE]
378 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
379 instance = self.context.cfg.GetInstanceInfo(instance_name)
380 wanted_nodes.append(instance.primary_node)
382 wanted_nodes.extend(instance.secondary_nodes)
384 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
385 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
386 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
387 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
389 del self.recalculate_locks[locking.LEVEL_NODE]
392 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
393 """Simple LU which runs no hooks.
395 This LU is intended as a parent for other LogicalUnits which will
396 run no hooks, in order to reduce duplicate code.
402 def BuildHooksEnv(self):
403 """Empty BuildHooksEnv for NoHooksLu.
405 This just raises an error.
408 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
410 def BuildHooksNodes(self):
411 """Empty BuildHooksNodes for NoHooksLU.
414 raise AssertionError("BuildHooksNodes called for NoHooksLU")
418 """Tasklet base class.
420 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
421 they can mix legacy code with tasklets. Locking needs to be done in the LU,
422 tasklets know nothing about locks.
424 Subclasses must follow these rules:
425 - Implement CheckPrereq
429 def __init__(self, lu):
436 def CheckPrereq(self):
437 """Check prerequisites for this tasklets.
439 This method should check whether the prerequisites for the execution of
440 this tasklet are fulfilled. It can do internode communication, but it
441 should be idempotent - no cluster or system changes are allowed.
443 The method should raise errors.OpPrereqError in case something is not
444 fulfilled. Its return value is ignored.
446 This method should also update all parameters to their canonical form if it
447 hasn't been done before.
452 def Exec(self, feedback_fn):
453 """Execute the tasklet.
455 This method should implement the actual work. It should raise
456 errors.OpExecError for failures that are somewhat dealt with in code, or
460 raise NotImplementedError
464 """Base for query utility classes.
467 #: Attribute holding field definitions
470 def __init__(self, filter_, fields, use_locking):
471 """Initializes this class.
474 self.use_locking = use_locking
476 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
478 self.requested_data = self.query.RequestedData()
479 self.names = self.query.RequestedNames()
481 # Sort only if no names were requested
482 self.sort_by_name = not self.names
484 self.do_locking = None
487 def _GetNames(self, lu, all_names, lock_level):
488 """Helper function to determine names asked for in the query.
492 names = lu.acquired_locks[lock_level]
496 if self.wanted == locking.ALL_SET:
497 assert not self.names
498 # caller didn't specify names, so ordering is not important
499 return utils.NiceSort(names)
501 # caller specified names and we must keep the same order
503 assert not self.do_locking or lu.acquired_locks[lock_level]
505 missing = set(self.wanted).difference(names)
507 raise errors.OpExecError("Some items were removed before retrieving"
508 " their data: %s" % missing)
510 # Return expanded names
513 def ExpandNames(self, lu):
514 """Expand names for this query.
516 See L{LogicalUnit.ExpandNames}.
519 raise NotImplementedError()
521 def DeclareLocks(self, lu, level):
522 """Declare locks for this query.
524 See L{LogicalUnit.DeclareLocks}.
527 raise NotImplementedError()
529 def _GetQueryData(self, lu):
530 """Collects all data for this query.
532 @return: Query data object
535 raise NotImplementedError()
537 def NewStyleQuery(self, lu):
538 """Collect data and execute query.
541 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
542 sort_by_name=self.sort_by_name)
544 def OldStyleQuery(self, lu):
545 """Collect data and execute query.
548 return self.query.OldStyleQuery(self._GetQueryData(lu),
549 sort_by_name=self.sort_by_name)
552 def _GetWantedNodes(lu, nodes):
553 """Returns list of checked and expanded node names.
555 @type lu: L{LogicalUnit}
556 @param lu: the logical unit on whose behalf we execute
558 @param nodes: list of node names or None for all nodes
560 @return: the list of nodes, sorted
561 @raise errors.ProgrammerError: if the nodes parameter is wrong type
565 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
567 return utils.NiceSort(lu.cfg.GetNodeList())
570 def _GetWantedInstances(lu, instances):
571 """Returns list of checked and expanded instance names.
573 @type lu: L{LogicalUnit}
574 @param lu: the logical unit on whose behalf we execute
575 @type instances: list
576 @param instances: list of instance names or None for all instances
578 @return: the list of instances, sorted
579 @raise errors.OpPrereqError: if the instances parameter is wrong type
580 @raise errors.OpPrereqError: if any of the passed instances is not found
584 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
586 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
590 def _GetUpdatedParams(old_params, update_dict,
591 use_default=True, use_none=False):
592 """Return the new version of a parameter dictionary.
594 @type old_params: dict
595 @param old_params: old parameters
596 @type update_dict: dict
597 @param update_dict: dict containing new parameter values, or
598 constants.VALUE_DEFAULT to reset the parameter to its default
600 @param use_default: boolean
601 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
602 values as 'to be deleted' values
603 @param use_none: boolean
604 @type use_none: whether to recognise C{None} values as 'to be
607 @return: the new parameter dictionary
610 params_copy = copy.deepcopy(old_params)
611 for key, val in update_dict.iteritems():
612 if ((use_default and val == constants.VALUE_DEFAULT) or
613 (use_none and val is None)):
619 params_copy[key] = val
623 def _RunPostHook(lu, node_name):
624 """Runs the post-hook for an opcode on a single node.
627 hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
629 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
631 # pylint: disable-msg=W0702
632 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
635 def _CheckOutputFields(static, dynamic, selected):
636 """Checks whether all selected fields are valid.
638 @type static: L{utils.FieldSet}
639 @param static: static fields set
640 @type dynamic: L{utils.FieldSet}
641 @param dynamic: dynamic fields set
648 delta = f.NonMatching(selected)
650 raise errors.OpPrereqError("Unknown output fields selected: %s"
651 % ",".join(delta), errors.ECODE_INVAL)
654 def _CheckGlobalHvParams(params):
655 """Validates that given hypervisor params are not global ones.
657 This will ensure that instances don't get customised versions of
661 used_globals = constants.HVC_GLOBALS.intersection(params)
663 msg = ("The following hypervisor parameters are global and cannot"
664 " be customized at instance level, please modify them at"
665 " cluster level: %s" % utils.CommaJoin(used_globals))
666 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
669 def _CheckNodeOnline(lu, node, msg=None):
670 """Ensure that a given node is online.
672 @param lu: the LU on behalf of which we make the check
673 @param node: the node to check
674 @param msg: if passed, should be a message to replace the default one
675 @raise errors.OpPrereqError: if the node is offline
679 msg = "Can't use offline node"
680 if lu.cfg.GetNodeInfo(node).offline:
681 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
684 def _CheckNodeNotDrained(lu, node):
685 """Ensure that a given node is not drained.
687 @param lu: the LU on behalf of which we make the check
688 @param node: the node to check
689 @raise errors.OpPrereqError: if the node is drained
692 if lu.cfg.GetNodeInfo(node).drained:
693 raise errors.OpPrereqError("Can't use drained node %s" % node,
697 def _CheckNodeVmCapable(lu, node):
698 """Ensure that a given node is vm capable.
700 @param lu: the LU on behalf of which we make the check
701 @param node: the node to check
702 @raise errors.OpPrereqError: if the node is not vm capable
705 if not lu.cfg.GetNodeInfo(node).vm_capable:
706 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
710 def _CheckNodeHasOS(lu, node, os_name, force_variant):
711 """Ensure that a node supports a given OS.
713 @param lu: the LU on behalf of which we make the check
714 @param node: the node to check
715 @param os_name: the OS to query about
716 @param force_variant: whether to ignore variant errors
717 @raise errors.OpPrereqError: if the node is not supporting the OS
720 result = lu.rpc.call_os_get(node, os_name)
721 result.Raise("OS '%s' not in supported OS list for node %s" %
723 prereq=True, ecode=errors.ECODE_INVAL)
724 if not force_variant:
725 _CheckOSVariant(result.payload, os_name)
728 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
729 """Ensure that a node has the given secondary ip.
731 @type lu: L{LogicalUnit}
732 @param lu: the LU on behalf of which we make the check
734 @param node: the node to check
735 @type secondary_ip: string
736 @param secondary_ip: the ip to check
737 @type prereq: boolean
738 @param prereq: whether to throw a prerequisite or an execute error
739 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
740 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
743 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
744 result.Raise("Failure checking secondary ip on node %s" % node,
745 prereq=prereq, ecode=errors.ECODE_ENVIRON)
746 if not result.payload:
747 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
748 " please fix and re-run this command" % secondary_ip)
750 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
752 raise errors.OpExecError(msg)
755 def _GetClusterDomainSecret():
756 """Reads the cluster domain secret.
759 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
763 def _CheckInstanceDown(lu, instance, reason):
764 """Ensure that an instance is not running."""
765 if instance.admin_up:
766 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
767 (instance.name, reason), errors.ECODE_STATE)
769 pnode = instance.primary_node
770 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
771 ins_l.Raise("Can't contact node %s for instance information" % pnode,
772 prereq=True, ecode=errors.ECODE_ENVIRON)
774 if instance.name in ins_l.payload:
775 raise errors.OpPrereqError("Instance %s is running, %s" %
776 (instance.name, reason), errors.ECODE_STATE)
779 def _ExpandItemName(fn, name, kind):
780 """Expand an item name.
782 @param fn: the function to use for expansion
783 @param name: requested item name
784 @param kind: text description ('Node' or 'Instance')
785 @return: the resolved (full) name
786 @raise errors.OpPrereqError: if the item is not found
790 if full_name is None:
791 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
796 def _ExpandNodeName(cfg, name):
797 """Wrapper over L{_ExpandItemName} for nodes."""
798 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
801 def _ExpandInstanceName(cfg, name):
802 """Wrapper over L{_ExpandItemName} for instance."""
803 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
806 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
807 memory, vcpus, nics, disk_template, disks,
808 bep, hvp, hypervisor_name):
809 """Builds instance related env variables for hooks
811 This builds the hook environment from individual variables.
814 @param name: the name of the instance
815 @type primary_node: string
816 @param primary_node: the name of the instance's primary node
817 @type secondary_nodes: list
818 @param secondary_nodes: list of secondary nodes as strings
819 @type os_type: string
820 @param os_type: the name of the instance's OS
821 @type status: boolean
822 @param status: the should_run status of the instance
824 @param memory: the memory size of the instance
826 @param vcpus: the count of VCPUs the instance has
828 @param nics: list of tuples (ip, mac, mode, link) representing
829 the NICs the instance has
830 @type disk_template: string
831 @param disk_template: the disk template of the instance
833 @param disks: the list of (size, mode) pairs
835 @param bep: the backend parameters for the instance
837 @param hvp: the hypervisor parameters for the instance
838 @type hypervisor_name: string
839 @param hypervisor_name: the hypervisor for the instance
841 @return: the hook environment for this instance
850 "INSTANCE_NAME": name,
851 "INSTANCE_PRIMARY": primary_node,
852 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
853 "INSTANCE_OS_TYPE": os_type,
854 "INSTANCE_STATUS": str_status,
855 "INSTANCE_MEMORY": memory,
856 "INSTANCE_VCPUS": vcpus,
857 "INSTANCE_DISK_TEMPLATE": disk_template,
858 "INSTANCE_HYPERVISOR": hypervisor_name,
862 nic_count = len(nics)
863 for idx, (ip, mac, mode, link) in enumerate(nics):
866 env["INSTANCE_NIC%d_IP" % idx] = ip
867 env["INSTANCE_NIC%d_MAC" % idx] = mac
868 env["INSTANCE_NIC%d_MODE" % idx] = mode
869 env["INSTANCE_NIC%d_LINK" % idx] = link
870 if mode == constants.NIC_MODE_BRIDGED:
871 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
875 env["INSTANCE_NIC_COUNT"] = nic_count
878 disk_count = len(disks)
879 for idx, (size, mode) in enumerate(disks):
880 env["INSTANCE_DISK%d_SIZE" % idx] = size
881 env["INSTANCE_DISK%d_MODE" % idx] = mode
885 env["INSTANCE_DISK_COUNT"] = disk_count
887 for source, kind in [(bep, "BE"), (hvp, "HV")]:
888 for key, value in source.items():
889 env["INSTANCE_%s_%s" % (kind, key)] = value
894 def _NICListToTuple(lu, nics):
895 """Build a list of nic information tuples.
897 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
898 value in LUInstanceQueryData.
900 @type lu: L{LogicalUnit}
901 @param lu: the logical unit on whose behalf we execute
902 @type nics: list of L{objects.NIC}
903 @param nics: list of nics to convert to hooks tuples
907 cluster = lu.cfg.GetClusterInfo()
911 filled_params = cluster.SimpleFillNIC(nic.nicparams)
912 mode = filled_params[constants.NIC_MODE]
913 link = filled_params[constants.NIC_LINK]
914 hooks_nics.append((ip, mac, mode, link))
918 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
919 """Builds instance related env variables for hooks from an object.
921 @type lu: L{LogicalUnit}
922 @param lu: the logical unit on whose behalf we execute
923 @type instance: L{objects.Instance}
924 @param instance: the instance for which we should build the
927 @param override: dictionary with key/values that will override
930 @return: the hook environment dictionary
933 cluster = lu.cfg.GetClusterInfo()
934 bep = cluster.FillBE(instance)
935 hvp = cluster.FillHV(instance)
937 'name': instance.name,
938 'primary_node': instance.primary_node,
939 'secondary_nodes': instance.secondary_nodes,
940 'os_type': instance.os,
941 'status': instance.admin_up,
942 'memory': bep[constants.BE_MEMORY],
943 'vcpus': bep[constants.BE_VCPUS],
944 'nics': _NICListToTuple(lu, instance.nics),
945 'disk_template': instance.disk_template,
946 'disks': [(disk.size, disk.mode) for disk in instance.disks],
949 'hypervisor_name': instance.hypervisor,
952 args.update(override)
953 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
956 def _AdjustCandidatePool(lu, exceptions):
957 """Adjust the candidate pool after node operations.
960 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
962 lu.LogInfo("Promoted nodes to master candidate role: %s",
963 utils.CommaJoin(node.name for node in mod_list))
964 for name in mod_list:
965 lu.context.ReaddNode(name)
966 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
968 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
972 def _DecideSelfPromotion(lu, exceptions=None):
973 """Decide whether I should promote myself as a master candidate.
976 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
977 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
978 # the new node will increase mc_max with one, so:
979 mc_should = min(mc_should + 1, cp_size)
980 return mc_now < mc_should
983 def _CheckNicsBridgesExist(lu, target_nics, target_node):
984 """Check that the brigdes needed by a list of nics exist.
987 cluster = lu.cfg.GetClusterInfo()
988 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
989 brlist = [params[constants.NIC_LINK] for params in paramslist
990 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
992 result = lu.rpc.call_bridges_exist(target_node, brlist)
993 result.Raise("Error checking bridges on destination node '%s'" %
994 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
997 def _CheckInstanceBridgesExist(lu, instance, node=None):
998 """Check that the brigdes needed by an instance exist.
1002 node = instance.primary_node
1003 _CheckNicsBridgesExist(lu, instance.nics, node)
1006 def _CheckOSVariant(os_obj, name):
1007 """Check whether an OS name conforms to the os variants specification.
1009 @type os_obj: L{objects.OS}
1010 @param os_obj: OS object to check
1012 @param name: OS name passed by the user, to check for validity
1015 if not os_obj.supported_variants:
1017 variant = objects.OS.GetVariant(name)
1019 raise errors.OpPrereqError("OS name must include a variant",
1022 if variant not in os_obj.supported_variants:
1023 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1026 def _GetNodeInstancesInner(cfg, fn):
1027 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1030 def _GetNodeInstances(cfg, node_name):
1031 """Returns a list of all primary and secondary instances on a node.
1035 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1038 def _GetNodePrimaryInstances(cfg, node_name):
1039 """Returns primary instances on a node.
1042 return _GetNodeInstancesInner(cfg,
1043 lambda inst: node_name == inst.primary_node)
1046 def _GetNodeSecondaryInstances(cfg, node_name):
1047 """Returns secondary instances on a node.
1050 return _GetNodeInstancesInner(cfg,
1051 lambda inst: node_name in inst.secondary_nodes)
1054 def _GetStorageTypeArgs(cfg, storage_type):
1055 """Returns the arguments for a storage type.
1058 # Special case for file storage
1059 if storage_type == constants.ST_FILE:
1060 # storage.FileStorage wants a list of storage directories
1061 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1066 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1069 for dev in instance.disks:
1070 cfg.SetDiskID(dev, node_name)
1072 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1073 result.Raise("Failed to get disk status from node %s" % node_name,
1074 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1076 for idx, bdev_status in enumerate(result.payload):
1077 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1083 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1084 """Check the sanity of iallocator and node arguments and use the
1085 cluster-wide iallocator if appropriate.
1087 Check that at most one of (iallocator, node) is specified. If none is
1088 specified, then the LU's opcode's iallocator slot is filled with the
1089 cluster-wide default iallocator.
1091 @type iallocator_slot: string
1092 @param iallocator_slot: the name of the opcode iallocator slot
1093 @type node_slot: string
1094 @param node_slot: the name of the opcode target node slot
1097 node = getattr(lu.op, node_slot, None)
1098 iallocator = getattr(lu.op, iallocator_slot, None)
1100 if node is not None and iallocator is not None:
1101 raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1103 elif node is None and iallocator is None:
1104 default_iallocator = lu.cfg.GetDefaultIAllocator()
1105 if default_iallocator:
1106 setattr(lu.op, iallocator_slot, default_iallocator)
1108 raise errors.OpPrereqError("No iallocator or node given and no"
1109 " cluster-wide default iallocator found."
1110 " Please specify either an iallocator or a"
1111 " node, or set a cluster-wide default"
1115 class LUClusterPostInit(LogicalUnit):
1116 """Logical unit for running hooks after cluster initialization.
1119 HPATH = "cluster-init"
1120 HTYPE = constants.HTYPE_CLUSTER
1122 def BuildHooksEnv(self):
1127 "OP_TARGET": self.cfg.GetClusterName(),
1130 def BuildHooksNodes(self):
1131 """Build hooks nodes.
1134 return ([], [self.cfg.GetMasterNode()])
1136 def Exec(self, feedback_fn):
1143 class LUClusterDestroy(LogicalUnit):
1144 """Logical unit for destroying the cluster.
1147 HPATH = "cluster-destroy"
1148 HTYPE = constants.HTYPE_CLUSTER
1150 def BuildHooksEnv(self):
1155 "OP_TARGET": self.cfg.GetClusterName(),
1158 def BuildHooksNodes(self):
1159 """Build hooks nodes.
1164 def CheckPrereq(self):
1165 """Check prerequisites.
1167 This checks whether the cluster is empty.
1169 Any errors are signaled by raising errors.OpPrereqError.
1172 master = self.cfg.GetMasterNode()
1174 nodelist = self.cfg.GetNodeList()
1175 if len(nodelist) != 1 or nodelist[0] != master:
1176 raise errors.OpPrereqError("There are still %d node(s) in"
1177 " this cluster." % (len(nodelist) - 1),
1179 instancelist = self.cfg.GetInstanceList()
1181 raise errors.OpPrereqError("There are still %d instance(s) in"
1182 " this cluster." % len(instancelist),
1185 def Exec(self, feedback_fn):
1186 """Destroys the cluster.
1189 master = self.cfg.GetMasterNode()
1191 # Run post hooks on master node before it's removed
1192 _RunPostHook(self, master)
1194 result = self.rpc.call_node_stop_master(master, False)
1195 result.Raise("Could not disable the master role")
1200 def _VerifyCertificate(filename):
1201 """Verifies a certificate for LUClusterVerify.
1203 @type filename: string
1204 @param filename: Path to PEM file
1208 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1209 utils.ReadFile(filename))
1210 except Exception, err: # pylint: disable-msg=W0703
1211 return (LUClusterVerify.ETYPE_ERROR,
1212 "Failed to load X509 certificate %s: %s" % (filename, err))
1215 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1216 constants.SSL_CERT_EXPIRATION_ERROR)
1219 fnamemsg = "While verifying %s: %s" % (filename, msg)
1224 return (None, fnamemsg)
1225 elif errcode == utils.CERT_WARNING:
1226 return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1227 elif errcode == utils.CERT_ERROR:
1228 return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1230 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1233 class LUClusterVerify(LogicalUnit):
1234 """Verifies the cluster status.
1237 HPATH = "cluster-verify"
1238 HTYPE = constants.HTYPE_CLUSTER
1241 TCLUSTER = "cluster"
1243 TINSTANCE = "instance"
1245 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1246 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1247 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1248 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1249 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1250 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1251 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1252 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1253 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1254 ENODEDRBD = (TNODE, "ENODEDRBD")
1255 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1256 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1257 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1258 ENODEHV = (TNODE, "ENODEHV")
1259 ENODELVM = (TNODE, "ENODELVM")
1260 ENODEN1 = (TNODE, "ENODEN1")
1261 ENODENET = (TNODE, "ENODENET")
1262 ENODEOS = (TNODE, "ENODEOS")
1263 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1264 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1265 ENODERPC = (TNODE, "ENODERPC")
1266 ENODESSH = (TNODE, "ENODESSH")
1267 ENODEVERSION = (TNODE, "ENODEVERSION")
1268 ENODESETUP = (TNODE, "ENODESETUP")
1269 ENODETIME = (TNODE, "ENODETIME")
1270 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1272 ETYPE_FIELD = "code"
1273 ETYPE_ERROR = "ERROR"
1274 ETYPE_WARNING = "WARNING"
1276 _HOOKS_INDENT_RE = re.compile("^", re.M)
1278 class NodeImage(object):
1279 """A class representing the logical and physical status of a node.
1282 @ivar name: the node name to which this object refers
1283 @ivar volumes: a structure as returned from
1284 L{ganeti.backend.GetVolumeList} (runtime)
1285 @ivar instances: a list of running instances (runtime)
1286 @ivar pinst: list of configured primary instances (config)
1287 @ivar sinst: list of configured secondary instances (config)
1288 @ivar sbp: dictionary of {primary-node: list of instances} for all
1289 instances for which this node is secondary (config)
1290 @ivar mfree: free memory, as reported by hypervisor (runtime)
1291 @ivar dfree: free disk, as reported by the node (runtime)
1292 @ivar offline: the offline status (config)
1293 @type rpc_fail: boolean
1294 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1295 not whether the individual keys were correct) (runtime)
1296 @type lvm_fail: boolean
1297 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1298 @type hyp_fail: boolean
1299 @ivar hyp_fail: whether the RPC call didn't return the instance list
1300 @type ghost: boolean
1301 @ivar ghost: whether this is a known node or not (config)
1302 @type os_fail: boolean
1303 @ivar os_fail: whether the RPC call didn't return valid OS data
1305 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1306 @type vm_capable: boolean
1307 @ivar vm_capable: whether the node can host instances
1310 def __init__(self, offline=False, name=None, vm_capable=True):
1319 self.offline = offline
1320 self.vm_capable = vm_capable
1321 self.rpc_fail = False
1322 self.lvm_fail = False
1323 self.hyp_fail = False
1325 self.os_fail = False
1328 def ExpandNames(self):
1329 self.needed_locks = {
1330 locking.LEVEL_NODE: locking.ALL_SET,
1331 locking.LEVEL_INSTANCE: locking.ALL_SET,
1333 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1335 def _Error(self, ecode, item, msg, *args, **kwargs):
1336 """Format an error message.
1338 Based on the opcode's error_codes parameter, either format a
1339 parseable error code, or a simpler error string.
1341 This must be called only from Exec and functions called from Exec.
1344 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1346 # first complete the msg
1349 # then format the whole message
1350 if self.op.error_codes:
1351 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1357 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1358 # and finally report it via the feedback_fn
1359 self._feedback_fn(" - %s" % msg)
1361 def _ErrorIf(self, cond, *args, **kwargs):
1362 """Log an error message if the passed condition is True.
1365 cond = bool(cond) or self.op.debug_simulate_errors
1367 self._Error(*args, **kwargs)
1368 # do not mark the operation as failed for WARN cases only
1369 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1370 self.bad = self.bad or cond
1372 def _VerifyNode(self, ninfo, nresult):
1373 """Perform some basic validation on data returned from a node.
1375 - check the result data structure is well formed and has all the
1377 - check ganeti version
1379 @type ninfo: L{objects.Node}
1380 @param ninfo: the node to check
1381 @param nresult: the results from the node
1383 @return: whether overall this call was successful (and we can expect
1384 reasonable values in the respose)
1388 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1390 # main result, nresult should be a non-empty dict
1391 test = not nresult or not isinstance(nresult, dict)
1392 _ErrorIf(test, self.ENODERPC, node,
1393 "unable to verify node: no data returned")
1397 # compares ganeti version
1398 local_version = constants.PROTOCOL_VERSION
1399 remote_version = nresult.get("version", None)
1400 test = not (remote_version and
1401 isinstance(remote_version, (list, tuple)) and
1402 len(remote_version) == 2)
1403 _ErrorIf(test, self.ENODERPC, node,
1404 "connection to node returned invalid data")
1408 test = local_version != remote_version[0]
1409 _ErrorIf(test, self.ENODEVERSION, node,
1410 "incompatible protocol versions: master %s,"
1411 " node %s", local_version, remote_version[0])
1415 # node seems compatible, we can actually try to look into its results
1417 # full package version
1418 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1419 self.ENODEVERSION, node,
1420 "software version mismatch: master %s, node %s",
1421 constants.RELEASE_VERSION, remote_version[1],
1422 code=self.ETYPE_WARNING)
1424 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1425 if ninfo.vm_capable and isinstance(hyp_result, dict):
1426 for hv_name, hv_result in hyp_result.iteritems():
1427 test = hv_result is not None
1428 _ErrorIf(test, self.ENODEHV, node,
1429 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1431 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1432 if ninfo.vm_capable and isinstance(hvp_result, list):
1433 for item, hv_name, hv_result in hvp_result:
1434 _ErrorIf(True, self.ENODEHV, node,
1435 "hypervisor %s parameter verify failure (source %s): %s",
1436 hv_name, item, hv_result)
1438 test = nresult.get(constants.NV_NODESETUP,
1439 ["Missing NODESETUP results"])
1440 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1445 def _VerifyNodeTime(self, ninfo, nresult,
1446 nvinfo_starttime, nvinfo_endtime):
1447 """Check the node time.
1449 @type ninfo: L{objects.Node}
1450 @param ninfo: the node to check
1451 @param nresult: the remote results for the node
1452 @param nvinfo_starttime: the start time of the RPC call
1453 @param nvinfo_endtime: the end time of the RPC call
1457 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1459 ntime = nresult.get(constants.NV_TIME, None)
1461 ntime_merged = utils.MergeTime(ntime)
1462 except (ValueError, TypeError):
1463 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1466 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1467 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1468 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1469 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1473 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1474 "Node time diverges by at least %s from master node time",
1477 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1478 """Check the node time.
1480 @type ninfo: L{objects.Node}
1481 @param ninfo: the node to check
1482 @param nresult: the remote results for the node
1483 @param vg_name: the configured VG name
1490 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1492 # checks vg existence and size > 20G
1493 vglist = nresult.get(constants.NV_VGLIST, None)
1495 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1497 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1498 constants.MIN_VG_SIZE)
1499 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1502 pvlist = nresult.get(constants.NV_PVLIST, None)
1503 test = pvlist is None
1504 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1506 # check that ':' is not present in PV names, since it's a
1507 # special character for lvcreate (denotes the range of PEs to
1509 for _, pvname, owner_vg in pvlist:
1510 test = ":" in pvname
1511 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1512 " '%s' of VG '%s'", pvname, owner_vg)
1514 def _VerifyNodeNetwork(self, ninfo, nresult):
1515 """Check the node time.
1517 @type ninfo: L{objects.Node}
1518 @param ninfo: the node to check
1519 @param nresult: the remote results for the node
1523 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1525 test = constants.NV_NODELIST not in nresult
1526 _ErrorIf(test, self.ENODESSH, node,
1527 "node hasn't returned node ssh connectivity data")
1529 if nresult[constants.NV_NODELIST]:
1530 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1531 _ErrorIf(True, self.ENODESSH, node,
1532 "ssh communication with node '%s': %s", a_node, a_msg)
1534 test = constants.NV_NODENETTEST not in nresult
1535 _ErrorIf(test, self.ENODENET, node,
1536 "node hasn't returned node tcp connectivity data")
1538 if nresult[constants.NV_NODENETTEST]:
1539 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1541 _ErrorIf(True, self.ENODENET, node,
1542 "tcp communication with node '%s': %s",
1543 anode, nresult[constants.NV_NODENETTEST][anode])
1545 test = constants.NV_MASTERIP not in nresult
1546 _ErrorIf(test, self.ENODENET, node,
1547 "node hasn't returned node master IP reachability data")
1549 if not nresult[constants.NV_MASTERIP]:
1550 if node == self.master_node:
1551 msg = "the master node cannot reach the master IP (not configured?)"
1553 msg = "cannot reach the master IP"
1554 _ErrorIf(True, self.ENODENET, node, msg)
1556 def _VerifyInstance(self, instance, instanceconfig, node_image,
1558 """Verify an instance.
1560 This function checks to see if the required block devices are
1561 available on the instance's node.
1564 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1565 node_current = instanceconfig.primary_node
1567 node_vol_should = {}
1568 instanceconfig.MapLVsByNode(node_vol_should)
1570 for node in node_vol_should:
1571 n_img = node_image[node]
1572 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1573 # ignore missing volumes on offline or broken nodes
1575 for volume in node_vol_should[node]:
1576 test = volume not in n_img.volumes
1577 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1578 "volume %s missing on node %s", volume, node)
1580 if instanceconfig.admin_up:
1581 pri_img = node_image[node_current]
1582 test = instance not in pri_img.instances and not pri_img.offline
1583 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1584 "instance not running on its primary node %s",
1587 for node, n_img in node_image.items():
1588 if node != node_current:
1589 test = instance in n_img.instances
1590 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1591 "instance should not run on node %s", node)
1593 diskdata = [(nname, success, status, idx)
1594 for (nname, disks) in diskstatus.items()
1595 for idx, (success, status) in enumerate(disks)]
1597 for nname, success, bdev_status, idx in diskdata:
1598 # the 'ghost node' construction in Exec() ensures that we have a
1600 snode = node_image[nname]
1601 bad_snode = snode.ghost or snode.offline
1602 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1603 self.EINSTANCEFAULTYDISK, instance,
1604 "couldn't retrieve status for disk/%s on %s: %s",
1605 idx, nname, bdev_status)
1606 _ErrorIf((instanceconfig.admin_up and success and
1607 bdev_status.ldisk_status == constants.LDS_FAULTY),
1608 self.EINSTANCEFAULTYDISK, instance,
1609 "disk/%s on %s is faulty", idx, nname)
1611 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1612 """Verify if there are any unknown volumes in the cluster.
1614 The .os, .swap and backup volumes are ignored. All other volumes are
1615 reported as unknown.
1617 @type reserved: L{ganeti.utils.FieldSet}
1618 @param reserved: a FieldSet of reserved volume names
1621 for node, n_img in node_image.items():
1622 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1623 # skip non-healthy nodes
1625 for volume in n_img.volumes:
1626 test = ((node not in node_vol_should or
1627 volume not in node_vol_should[node]) and
1628 not reserved.Matches(volume))
1629 self._ErrorIf(test, self.ENODEORPHANLV, node,
1630 "volume %s is unknown", volume)
1632 def _VerifyOrphanInstances(self, instancelist, node_image):
1633 """Verify the list of running instances.
1635 This checks what instances are running but unknown to the cluster.
1638 for node, n_img in node_image.items():
1639 for o_inst in n_img.instances:
1640 test = o_inst not in instancelist
1641 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1642 "instance %s on node %s should not exist", o_inst, node)
1644 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1645 """Verify N+1 Memory Resilience.
1647 Check that if one single node dies we can still start all the
1648 instances it was primary for.
1651 cluster_info = self.cfg.GetClusterInfo()
1652 for node, n_img in node_image.items():
1653 # This code checks that every node which is now listed as
1654 # secondary has enough memory to host all instances it is
1655 # supposed to should a single other node in the cluster fail.
1656 # FIXME: not ready for failover to an arbitrary node
1657 # FIXME: does not support file-backed instances
1658 # WARNING: we currently take into account down instances as well
1659 # as up ones, considering that even if they're down someone
1660 # might want to start them even in the event of a node failure.
1662 # we're skipping offline nodes from the N+1 warning, since
1663 # most likely we don't have good memory infromation from them;
1664 # we already list instances living on such nodes, and that's
1667 for prinode, instances in n_img.sbp.items():
1669 for instance in instances:
1670 bep = cluster_info.FillBE(instance_cfg[instance])
1671 if bep[constants.BE_AUTO_BALANCE]:
1672 needed_mem += bep[constants.BE_MEMORY]
1673 test = n_img.mfree < needed_mem
1674 self._ErrorIf(test, self.ENODEN1, node,
1675 "not enough memory to accomodate instance failovers"
1676 " should node %s fail", prinode)
1678 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1680 """Verifies and computes the node required file checksums.
1682 @type ninfo: L{objects.Node}
1683 @param ninfo: the node to check
1684 @param nresult: the remote results for the node
1685 @param file_list: required list of files
1686 @param local_cksum: dictionary of local files and their checksums
1687 @param master_files: list of files that only masters should have
1691 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1693 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1694 test = not isinstance(remote_cksum, dict)
1695 _ErrorIf(test, self.ENODEFILECHECK, node,
1696 "node hasn't returned file checksum data")
1700 for file_name in file_list:
1701 node_is_mc = ninfo.master_candidate
1702 must_have = (file_name not in master_files) or node_is_mc
1704 test1 = file_name not in remote_cksum
1706 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1708 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1709 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1710 "file '%s' missing", file_name)
1711 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1712 "file '%s' has wrong checksum", file_name)
1713 # not candidate and this is not a must-have file
1714 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1715 "file '%s' should not exist on non master"
1716 " candidates (and the file is outdated)", file_name)
1717 # all good, except non-master/non-must have combination
1718 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1719 "file '%s' should not exist"
1720 " on non master candidates", file_name)
1722 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1724 """Verifies and the node DRBD status.
1726 @type ninfo: L{objects.Node}
1727 @param ninfo: the node to check
1728 @param nresult: the remote results for the node
1729 @param instanceinfo: the dict of instances
1730 @param drbd_helper: the configured DRBD usermode helper
1731 @param drbd_map: the DRBD map as returned by
1732 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1736 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1739 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1740 test = (helper_result == None)
1741 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1742 "no drbd usermode helper returned")
1744 status, payload = helper_result
1746 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1747 "drbd usermode helper check unsuccessful: %s", payload)
1748 test = status and (payload != drbd_helper)
1749 _ErrorIf(test, self.ENODEDRBDHELPER, node,
1750 "wrong drbd usermode helper: %s", payload)
1752 # compute the DRBD minors
1754 for minor, instance in drbd_map[node].items():
1755 test = instance not in instanceinfo
1756 _ErrorIf(test, self.ECLUSTERCFG, None,
1757 "ghost instance '%s' in temporary DRBD map", instance)
1758 # ghost instance should not be running, but otherwise we
1759 # don't give double warnings (both ghost instance and
1760 # unallocated minor in use)
1762 node_drbd[minor] = (instance, False)
1764 instance = instanceinfo[instance]
1765 node_drbd[minor] = (instance.name, instance.admin_up)
1767 # and now check them
1768 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1769 test = not isinstance(used_minors, (tuple, list))
1770 _ErrorIf(test, self.ENODEDRBD, node,
1771 "cannot parse drbd status file: %s", str(used_minors))
1773 # we cannot check drbd status
1776 for minor, (iname, must_exist) in node_drbd.items():
1777 test = minor not in used_minors and must_exist
1778 _ErrorIf(test, self.ENODEDRBD, node,
1779 "drbd minor %d of instance %s is not active", minor, iname)
1780 for minor in used_minors:
1781 test = minor not in node_drbd
1782 _ErrorIf(test, self.ENODEDRBD, node,
1783 "unallocated drbd minor %d is in use", minor)
1785 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1786 """Builds the node OS structures.
1788 @type ninfo: L{objects.Node}
1789 @param ninfo: the node to check
1790 @param nresult: the remote results for the node
1791 @param nimg: the node image object
1795 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1797 remote_os = nresult.get(constants.NV_OSLIST, None)
1798 test = (not isinstance(remote_os, list) or
1799 not compat.all(isinstance(v, list) and len(v) == 7
1800 for v in remote_os))
1802 _ErrorIf(test, self.ENODEOS, node,
1803 "node hasn't returned valid OS data")
1812 for (name, os_path, status, diagnose,
1813 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1815 if name not in os_dict:
1818 # parameters is a list of lists instead of list of tuples due to
1819 # JSON lacking a real tuple type, fix it:
1820 parameters = [tuple(v) for v in parameters]
1821 os_dict[name].append((os_path, status, diagnose,
1822 set(variants), set(parameters), set(api_ver)))
1824 nimg.oslist = os_dict
1826 def _VerifyNodeOS(self, ninfo, nimg, base):
1827 """Verifies the node OS list.
1829 @type ninfo: L{objects.Node}
1830 @param ninfo: the node to check
1831 @param nimg: the node image object
1832 @param base: the 'template' node we match against (e.g. from the master)
1836 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1838 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1840 for os_name, os_data in nimg.oslist.items():
1841 assert os_data, "Empty OS status for OS %s?!" % os_name
1842 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1843 _ErrorIf(not f_status, self.ENODEOS, node,
1844 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1845 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1846 "OS '%s' has multiple entries (first one shadows the rest): %s",
1847 os_name, utils.CommaJoin([v[0] for v in os_data]))
1848 # this will catched in backend too
1849 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1850 and not f_var, self.ENODEOS, node,
1851 "OS %s with API at least %d does not declare any variant",
1852 os_name, constants.OS_API_V15)
1853 # comparisons with the 'base' image
1854 test = os_name not in base.oslist
1855 _ErrorIf(test, self.ENODEOS, node,
1856 "Extra OS %s not present on reference node (%s)",
1860 assert base.oslist[os_name], "Base node has empty OS status?"
1861 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1863 # base OS is invalid, skipping
1865 for kind, a, b in [("API version", f_api, b_api),
1866 ("variants list", f_var, b_var),
1867 ("parameters", f_param, b_param)]:
1868 _ErrorIf(a != b, self.ENODEOS, node,
1869 "OS %s %s differs from reference node %s: %s vs. %s",
1870 kind, os_name, base.name,
1871 utils.CommaJoin(a), utils.CommaJoin(b))
1873 # check any missing OSes
1874 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1875 _ErrorIf(missing, self.ENODEOS, node,
1876 "OSes present on reference node %s but missing on this node: %s",
1877 base.name, utils.CommaJoin(missing))
1879 def _VerifyOob(self, ninfo, nresult):
1880 """Verifies out of band functionality of a node.
1882 @type ninfo: L{objects.Node}
1883 @param ninfo: the node to check
1884 @param nresult: the remote results for the node
1888 # We just have to verify the paths on master and/or master candidates
1889 # as the oob helper is invoked on the master
1890 if ((ninfo.master_candidate or ninfo.master_capable) and
1891 constants.NV_OOB_PATHS in nresult):
1892 for path_result in nresult[constants.NV_OOB_PATHS]:
1893 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1895 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1896 """Verifies and updates the node volume data.
1898 This function will update a L{NodeImage}'s internal structures
1899 with data from the remote call.
1901 @type ninfo: L{objects.Node}
1902 @param ninfo: the node to check
1903 @param nresult: the remote results for the node
1904 @param nimg: the node image object
1905 @param vg_name: the configured VG name
1909 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1911 nimg.lvm_fail = True
1912 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1915 elif isinstance(lvdata, basestring):
1916 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1917 utils.SafeEncode(lvdata))
1918 elif not isinstance(lvdata, dict):
1919 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1921 nimg.volumes = lvdata
1922 nimg.lvm_fail = False
1924 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1925 """Verifies and updates the node instance list.
1927 If the listing was successful, then updates this node's instance
1928 list. Otherwise, it marks the RPC call as failed for the instance
1931 @type ninfo: L{objects.Node}
1932 @param ninfo: the node to check
1933 @param nresult: the remote results for the node
1934 @param nimg: the node image object
1937 idata = nresult.get(constants.NV_INSTANCELIST, None)
1938 test = not isinstance(idata, list)
1939 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1940 " (instancelist): %s", utils.SafeEncode(str(idata)))
1942 nimg.hyp_fail = True
1944 nimg.instances = idata
1946 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1947 """Verifies and computes a node information map
1949 @type ninfo: L{objects.Node}
1950 @param ninfo: the node to check
1951 @param nresult: the remote results for the node
1952 @param nimg: the node image object
1953 @param vg_name: the configured VG name
1957 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1959 # try to read free memory (from the hypervisor)
1960 hv_info = nresult.get(constants.NV_HVINFO, None)
1961 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1962 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1965 nimg.mfree = int(hv_info["memory_free"])
1966 except (ValueError, TypeError):
1967 _ErrorIf(True, self.ENODERPC, node,
1968 "node returned invalid nodeinfo, check hypervisor")
1970 # FIXME: devise a free space model for file based instances as well
1971 if vg_name is not None:
1972 test = (constants.NV_VGLIST not in nresult or
1973 vg_name not in nresult[constants.NV_VGLIST])
1974 _ErrorIf(test, self.ENODELVM, node,
1975 "node didn't return data for the volume group '%s'"
1976 " - it is either missing or broken", vg_name)
1979 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1980 except (ValueError, TypeError):
1981 _ErrorIf(True, self.ENODERPC, node,
1982 "node returned invalid LVM info, check LVM status")
1984 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1985 """Gets per-disk status information for all instances.
1987 @type nodelist: list of strings
1988 @param nodelist: Node names
1989 @type node_image: dict of (name, L{objects.Node})
1990 @param node_image: Node objects
1991 @type instanceinfo: dict of (name, L{objects.Instance})
1992 @param instanceinfo: Instance objects
1993 @rtype: {instance: {node: [(succes, payload)]}}
1994 @return: a dictionary of per-instance dictionaries with nodes as
1995 keys and disk information as values; the disk information is a
1996 list of tuples (success, payload)
1999 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2002 node_disks_devonly = {}
2003 diskless_instances = set()
2004 diskless = constants.DT_DISKLESS
2006 for nname in nodelist:
2007 node_instances = list(itertools.chain(node_image[nname].pinst,
2008 node_image[nname].sinst))
2009 diskless_instances.update(inst for inst in node_instances
2010 if instanceinfo[inst].disk_template == diskless)
2011 disks = [(inst, disk)
2012 for inst in node_instances
2013 for disk in instanceinfo[inst].disks]
2016 # No need to collect data
2019 node_disks[nname] = disks
2021 # Creating copies as SetDiskID below will modify the objects and that can
2022 # lead to incorrect data returned from nodes
2023 devonly = [dev.Copy() for (_, dev) in disks]
2026 self.cfg.SetDiskID(dev, nname)
2028 node_disks_devonly[nname] = devonly
2030 assert len(node_disks) == len(node_disks_devonly)
2032 # Collect data from all nodes with disks
2033 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2036 assert len(result) == len(node_disks)
2040 for (nname, nres) in result.items():
2041 disks = node_disks[nname]
2044 # No data from this node
2045 data = len(disks) * [(False, "node offline")]
2048 _ErrorIf(msg, self.ENODERPC, nname,
2049 "while getting disk information: %s", msg)
2051 # No data from this node
2052 data = len(disks) * [(False, msg)]
2055 for idx, i in enumerate(nres.payload):
2056 if isinstance(i, (tuple, list)) and len(i) == 2:
2059 logging.warning("Invalid result from node %s, entry %d: %s",
2061 data.append((False, "Invalid result from the remote node"))
2063 for ((inst, _), status) in zip(disks, data):
2064 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2066 # Add empty entries for diskless instances.
2067 for inst in diskless_instances:
2068 assert inst not in instdisk
2071 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2072 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2073 compat.all(isinstance(s, (tuple, list)) and
2074 len(s) == 2 for s in statuses)
2075 for inst, nnames in instdisk.items()
2076 for nname, statuses in nnames.items())
2077 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2081 def _VerifyHVP(self, hvp_data):
2082 """Verifies locally the syntax of the hypervisor parameters.
2085 for item, hv_name, hv_params in hvp_data:
2086 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2089 hv_class = hypervisor.GetHypervisor(hv_name)
2090 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2091 hv_class.CheckParameterSyntax(hv_params)
2092 except errors.GenericError, err:
2093 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2095 def BuildHooksEnv(self):
2098 Cluster-Verify hooks just ran in the post phase and their failure makes
2099 the output be logged in the verify output and the verification to fail.
2105 "CLUSTER_TAGS": " ".join(cfg.GetClusterInfo().GetTags())
2108 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2109 for node in cfg.GetAllNodesInfo().values())
2113 def BuildHooksNodes(self):
2114 """Build hooks nodes.
2117 return ([], self.cfg.GetNodeList())
2119 def Exec(self, feedback_fn):
2120 """Verify integrity of cluster, performing various test on nodes.
2123 # This method has too many local variables. pylint: disable-msg=R0914
2125 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2126 verbose = self.op.verbose
2127 self._feedback_fn = feedback_fn
2128 feedback_fn("* Verifying global settings")
2129 for msg in self.cfg.VerifyConfig():
2130 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2132 # Check the cluster certificates
2133 for cert_filename in constants.ALL_CERT_FILES:
2134 (errcode, msg) = _VerifyCertificate(cert_filename)
2135 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2137 vg_name = self.cfg.GetVGName()
2138 drbd_helper = self.cfg.GetDRBDHelper()
2139 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2140 cluster = self.cfg.GetClusterInfo()
2141 nodelist = utils.NiceSort(self.cfg.GetNodeList())
2142 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2143 nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2144 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2145 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2146 for iname in instancelist)
2147 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2148 i_non_redundant = [] # Non redundant instances
2149 i_non_a_balanced = [] # Non auto-balanced instances
2150 n_offline = 0 # Count of offline nodes
2151 n_drained = 0 # Count of nodes being drained
2152 node_vol_should = {}
2154 # FIXME: verify OS list
2155 # do local checksums
2156 master_files = [constants.CLUSTER_CONF_FILE]
2157 master_node = self.master_node = self.cfg.GetMasterNode()
2158 master_ip = self.cfg.GetMasterIP()
2160 file_names = ssconf.SimpleStore().GetFileList()
2161 file_names.extend(constants.ALL_CERT_FILES)
2162 file_names.extend(master_files)
2163 if cluster.modify_etc_hosts:
2164 file_names.append(constants.ETC_HOSTS)
2166 local_checksums = utils.FingerprintFiles(file_names)
2168 # Compute the set of hypervisor parameters
2170 for hv_name in hypervisors:
2171 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2172 for os_name, os_hvp in cluster.os_hvp.items():
2173 for hv_name, hv_params in os_hvp.items():
2176 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2177 hvp_data.append(("os %s" % os_name, hv_name, full_params))
2178 # TODO: collapse identical parameter values in a single one
2179 for instance in instanceinfo.values():
2180 if not instance.hvparams:
2182 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2183 cluster.FillHV(instance)))
2184 # and verify them locally
2185 self._VerifyHVP(hvp_data)
2187 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2188 node_verify_param = {
2189 constants.NV_FILELIST: file_names,
2190 constants.NV_NODELIST: [node.name for node in nodeinfo
2191 if not node.offline],
2192 constants.NV_HYPERVISOR: hypervisors,
2193 constants.NV_HVPARAMS: hvp_data,
2194 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2195 node.secondary_ip) for node in nodeinfo
2196 if not node.offline],
2197 constants.NV_INSTANCELIST: hypervisors,
2198 constants.NV_VERSION: None,
2199 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2200 constants.NV_NODESETUP: None,
2201 constants.NV_TIME: None,
2202 constants.NV_MASTERIP: (master_node, master_ip),
2203 constants.NV_OSLIST: None,
2204 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2207 if vg_name is not None:
2208 node_verify_param[constants.NV_VGLIST] = None
2209 node_verify_param[constants.NV_LVLIST] = vg_name
2210 node_verify_param[constants.NV_PVLIST] = [vg_name]
2211 node_verify_param[constants.NV_DRBDLIST] = None
2214 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2216 # Build our expected cluster state
2217 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2219 vm_capable=node.vm_capable))
2220 for node in nodeinfo)
2224 for node in nodeinfo:
2225 path = _SupportsOob(self.cfg, node)
2226 if path and path not in oob_paths:
2227 oob_paths.append(path)
2230 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2232 for instance in instancelist:
2233 inst_config = instanceinfo[instance]
2235 for nname in inst_config.all_nodes:
2236 if nname not in node_image:
2238 gnode = self.NodeImage(name=nname)
2240 node_image[nname] = gnode
2242 inst_config.MapLVsByNode(node_vol_should)
2244 pnode = inst_config.primary_node
2245 node_image[pnode].pinst.append(instance)
2247 for snode in inst_config.secondary_nodes:
2248 nimg = node_image[snode]
2249 nimg.sinst.append(instance)
2250 if pnode not in nimg.sbp:
2251 nimg.sbp[pnode] = []
2252 nimg.sbp[pnode].append(instance)
2254 # At this point, we have the in-memory data structures complete,
2255 # except for the runtime information, which we'll gather next
2257 # Due to the way our RPC system works, exact response times cannot be
2258 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2259 # time before and after executing the request, we can at least have a time
2261 nvinfo_starttime = time.time()
2262 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2263 self.cfg.GetClusterName())
2264 nvinfo_endtime = time.time()
2266 all_drbd_map = self.cfg.ComputeDRBDMap()
2268 feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2269 instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2271 feedback_fn("* Verifying node status")
2275 for node_i in nodeinfo:
2277 nimg = node_image[node]
2281 feedback_fn("* Skipping offline node %s" % (node,))
2285 if node == master_node:
2287 elif node_i.master_candidate:
2288 ntype = "master candidate"
2289 elif node_i.drained:
2295 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2297 msg = all_nvinfo[node].fail_msg
2298 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2300 nimg.rpc_fail = True
2303 nresult = all_nvinfo[node].payload
2305 nimg.call_ok = self._VerifyNode(node_i, nresult)
2306 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2307 self._VerifyNodeNetwork(node_i, nresult)
2308 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2311 self._VerifyOob(node_i, nresult)
2314 self._VerifyNodeLVM(node_i, nresult, vg_name)
2315 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2318 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2319 self._UpdateNodeInstances(node_i, nresult, nimg)
2320 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2321 self._UpdateNodeOS(node_i, nresult, nimg)
2322 if not nimg.os_fail:
2323 if refos_img is None:
2325 self._VerifyNodeOS(node_i, nimg, refos_img)
2327 feedback_fn("* Verifying instance status")
2328 for instance in instancelist:
2330 feedback_fn("* Verifying instance %s" % instance)
2331 inst_config = instanceinfo[instance]
2332 self._VerifyInstance(instance, inst_config, node_image,
2334 inst_nodes_offline = []
2336 pnode = inst_config.primary_node
2337 pnode_img = node_image[pnode]
2338 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2339 self.ENODERPC, pnode, "instance %s, connection to"
2340 " primary node failed", instance)
2342 _ErrorIf(pnode_img.offline, self.EINSTANCEBADNODE, instance,
2343 "instance lives on offline node %s", inst_config.primary_node)
2345 # If the instance is non-redundant we cannot survive losing its primary
2346 # node, so we are not N+1 compliant. On the other hand we have no disk
2347 # templates with more than one secondary so that situation is not well
2349 # FIXME: does not support file-backed instances
2350 if not inst_config.secondary_nodes:
2351 i_non_redundant.append(instance)
2353 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2354 instance, "instance has multiple secondary nodes: %s",
2355 utils.CommaJoin(inst_config.secondary_nodes),
2356 code=self.ETYPE_WARNING)
2358 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2359 pnode = inst_config.primary_node
2360 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2361 instance_groups = {}
2363 for node in instance_nodes:
2364 instance_groups.setdefault(nodeinfo_byname[node].group,
2368 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2369 # Sort so that we always list the primary node first.
2370 for group, nodes in sorted(instance_groups.items(),
2371 key=lambda (_, nodes): pnode in nodes,
2374 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2375 instance, "instance has primary and secondary nodes in"
2376 " different groups: %s", utils.CommaJoin(pretty_list),
2377 code=self.ETYPE_WARNING)
2379 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2380 i_non_a_balanced.append(instance)
2382 for snode in inst_config.secondary_nodes:
2383 s_img = node_image[snode]
2384 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2385 "instance %s, connection to secondary node failed", instance)
2388 inst_nodes_offline.append(snode)
2390 # warn that the instance lives on offline nodes
2391 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2392 "instance has offline secondary node(s) %s",
2393 utils.CommaJoin(inst_nodes_offline))
2394 # ... or ghost/non-vm_capable nodes
2395 for node in inst_config.all_nodes:
2396 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2397 "instance lives on ghost node %s", node)
2398 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2399 instance, "instance lives on non-vm_capable node %s", node)
2401 feedback_fn("* Verifying orphan volumes")
2402 reserved = utils.FieldSet(*cluster.reserved_lvs)
2403 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2405 feedback_fn("* Verifying orphan instances")
2406 self._VerifyOrphanInstances(instancelist, node_image)
2408 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2409 feedback_fn("* Verifying N+1 Memory redundancy")
2410 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2412 feedback_fn("* Other Notes")
2414 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2415 % len(i_non_redundant))
2417 if i_non_a_balanced:
2418 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2419 % len(i_non_a_balanced))
2422 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2425 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2429 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2430 """Analyze the post-hooks' result
2432 This method analyses the hook result, handles it, and sends some
2433 nicely-formatted feedback back to the user.
2435 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2436 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2437 @param hooks_results: the results of the multi-node hooks rpc call
2438 @param feedback_fn: function used send feedback back to the caller
2439 @param lu_result: previous Exec result
2440 @return: the new Exec result, based on the previous result
2444 # We only really run POST phase hooks, and are only interested in
2446 if phase == constants.HOOKS_PHASE_POST:
2447 # Used to change hooks' output to proper indentation
2448 feedback_fn("* Hooks Results")
2449 assert hooks_results, "invalid result from hooks"
2451 for node_name in hooks_results:
2452 res = hooks_results[node_name]
2454 test = msg and not res.offline
2455 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2456 "Communication failure in hooks execution: %s", msg)
2457 if res.offline or msg:
2458 # No need to investigate payload if node is offline or gave an error.
2459 # override manually lu_result here as _ErrorIf only
2460 # overrides self.bad
2463 for script, hkr, output in res.payload:
2464 test = hkr == constants.HKR_FAIL
2465 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2466 "Script %s failed, output:", script)
2468 output = self._HOOKS_INDENT_RE.sub(' ', output)
2469 feedback_fn("%s" % output)
2475 class LUClusterVerifyDisks(NoHooksLU):
2476 """Verifies the cluster disks status.
2481 def ExpandNames(self):
2482 self.needed_locks = {
2483 locking.LEVEL_NODE: locking.ALL_SET,
2484 locking.LEVEL_INSTANCE: locking.ALL_SET,
2486 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2488 def Exec(self, feedback_fn):
2489 """Verify integrity of cluster disks.
2491 @rtype: tuple of three items
2492 @return: a tuple of (dict of node-to-node_error, list of instances
2493 which need activate-disks, dict of instance: (node, volume) for
2497 result = res_nodes, res_instances, res_missing = {}, [], {}
2499 nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2500 instances = self.cfg.GetAllInstancesInfo().values()
2503 for inst in instances:
2505 if not inst.admin_up:
2507 inst.MapLVsByNode(inst_lvs)
2508 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2509 for node, vol_list in inst_lvs.iteritems():
2510 for vol in vol_list:
2511 nv_dict[(node, vol)] = inst
2516 node_lvs = self.rpc.call_lv_list(nodes, [])
2517 for node, node_res in node_lvs.items():
2518 if node_res.offline:
2520 msg = node_res.fail_msg
2522 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2523 res_nodes[node] = msg
2526 lvs = node_res.payload
2527 for lv_name, (_, _, lv_online) in lvs.items():
2528 inst = nv_dict.pop((node, lv_name), None)
2529 if (not lv_online and inst is not None
2530 and inst.name not in res_instances):
2531 res_instances.append(inst.name)
2533 # any leftover items in nv_dict are missing LVs, let's arrange the
2535 for key, inst in nv_dict.iteritems():
2536 if inst.name not in res_missing:
2537 res_missing[inst.name] = []
2538 res_missing[inst.name].append(key)
2543 class LUClusterRepairDiskSizes(NoHooksLU):
2544 """Verifies the cluster disks sizes.
2549 def ExpandNames(self):
2550 if self.op.instances:
2551 self.wanted_names = []
2552 for name in self.op.instances:
2553 full_name = _ExpandInstanceName(self.cfg, name)
2554 self.wanted_names.append(full_name)
2555 self.needed_locks = {
2556 locking.LEVEL_NODE: [],
2557 locking.LEVEL_INSTANCE: self.wanted_names,
2559 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2561 self.wanted_names = None
2562 self.needed_locks = {
2563 locking.LEVEL_NODE: locking.ALL_SET,
2564 locking.LEVEL_INSTANCE: locking.ALL_SET,
2566 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2568 def DeclareLocks(self, level):
2569 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2570 self._LockInstancesNodes(primary_only=True)
2572 def CheckPrereq(self):
2573 """Check prerequisites.
2575 This only checks the optional instance list against the existing names.
2578 if self.wanted_names is None:
2579 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2581 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2582 in self.wanted_names]
2584 def _EnsureChildSizes(self, disk):
2585 """Ensure children of the disk have the needed disk size.
2587 This is valid mainly for DRBD8 and fixes an issue where the
2588 children have smaller disk size.
2590 @param disk: an L{ganeti.objects.Disk} object
2593 if disk.dev_type == constants.LD_DRBD8:
2594 assert disk.children, "Empty children for DRBD8?"
2595 fchild = disk.children[0]
2596 mismatch = fchild.size < disk.size
2598 self.LogInfo("Child disk has size %d, parent %d, fixing",
2599 fchild.size, disk.size)
2600 fchild.size = disk.size
2602 # and we recurse on this child only, not on the metadev
2603 return self._EnsureChildSizes(fchild) or mismatch
2607 def Exec(self, feedback_fn):
2608 """Verify the size of cluster disks.
2611 # TODO: check child disks too
2612 # TODO: check differences in size between primary/secondary nodes
2614 for instance in self.wanted_instances:
2615 pnode = instance.primary_node
2616 if pnode not in per_node_disks:
2617 per_node_disks[pnode] = []
2618 for idx, disk in enumerate(instance.disks):
2619 per_node_disks[pnode].append((instance, idx, disk))
2622 for node, dskl in per_node_disks.items():
2623 newl = [v[2].Copy() for v in dskl]
2625 self.cfg.SetDiskID(dsk, node)
2626 result = self.rpc.call_blockdev_getsize(node, newl)
2628 self.LogWarning("Failure in blockdev_getsize call to node"
2629 " %s, ignoring", node)
2631 if len(result.payload) != len(dskl):
2632 logging.warning("Invalid result from node %s: len(dksl)=%d,"
2633 " result.payload=%s", node, len(dskl), result.payload)
2634 self.LogWarning("Invalid result from node %s, ignoring node results",
2637 for ((instance, idx, disk), size) in zip(dskl, result.payload):
2639 self.LogWarning("Disk %d of instance %s did not return size"
2640 " information, ignoring", idx, instance.name)
2642 if not isinstance(size, (int, long)):
2643 self.LogWarning("Disk %d of instance %s did not return valid"
2644 " size information, ignoring", idx, instance.name)
2647 if size != disk.size:
2648 self.LogInfo("Disk %d of instance %s has mismatched size,"
2649 " correcting: recorded %d, actual %d", idx,
2650 instance.name, disk.size, size)
2652 self.cfg.Update(instance, feedback_fn)
2653 changed.append((instance.name, idx, size))
2654 if self._EnsureChildSizes(disk):
2655 self.cfg.Update(instance, feedback_fn)
2656 changed.append((instance.name, idx, disk.size))
2660 class LUClusterRename(LogicalUnit):
2661 """Rename the cluster.
2664 HPATH = "cluster-rename"
2665 HTYPE = constants.HTYPE_CLUSTER
2667 def BuildHooksEnv(self):
2672 "OP_TARGET": self.cfg.GetClusterName(),
2673 "NEW_NAME": self.op.name,
2676 def BuildHooksNodes(self):
2677 """Build hooks nodes.
2680 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2682 def CheckPrereq(self):
2683 """Verify that the passed name is a valid one.
2686 hostname = netutils.GetHostname(name=self.op.name,
2687 family=self.cfg.GetPrimaryIPFamily())
2689 new_name = hostname.name
2690 self.ip = new_ip = hostname.ip
2691 old_name = self.cfg.GetClusterName()
2692 old_ip = self.cfg.GetMasterIP()
2693 if new_name == old_name and new_ip == old_ip:
2694 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2695 " cluster has changed",
2697 if new_ip != old_ip:
2698 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2699 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2700 " reachable on the network" %
2701 new_ip, errors.ECODE_NOTUNIQUE)
2703 self.op.name = new_name
2705 def Exec(self, feedback_fn):
2706 """Rename the cluster.
2709 clustername = self.op.name
2712 # shutdown the master IP
2713 master = self.cfg.GetMasterNode()
2714 result = self.rpc.call_node_stop_master(master, False)
2715 result.Raise("Could not disable the master role")
2718 cluster = self.cfg.GetClusterInfo()
2719 cluster.cluster_name = clustername
2720 cluster.master_ip = ip
2721 self.cfg.Update(cluster, feedback_fn)
2723 # update the known hosts file
2724 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2725 node_list = self.cfg.GetOnlineNodeList()
2727 node_list.remove(master)
2730 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2732 result = self.rpc.call_node_start_master(master, False, False)
2733 msg = result.fail_msg
2735 self.LogWarning("Could not re-enable the master role on"
2736 " the master, please restart manually: %s", msg)
2741 class LUClusterSetParams(LogicalUnit):
2742 """Change the parameters of the cluster.
2745 HPATH = "cluster-modify"
2746 HTYPE = constants.HTYPE_CLUSTER
2749 def CheckArguments(self):
2753 if self.op.uid_pool:
2754 uidpool.CheckUidPool(self.op.uid_pool)
2756 if self.op.add_uids:
2757 uidpool.CheckUidPool(self.op.add_uids)
2759 if self.op.remove_uids:
2760 uidpool.CheckUidPool(self.op.remove_uids)
2762 def ExpandNames(self):
2763 # FIXME: in the future maybe other cluster params won't require checking on
2764 # all nodes to be modified.
2765 self.needed_locks = {
2766 locking.LEVEL_NODE: locking.ALL_SET,
2768 self.share_locks[locking.LEVEL_NODE] = 1
2770 def BuildHooksEnv(self):
2775 "OP_TARGET": self.cfg.GetClusterName(),
2776 "NEW_VG_NAME": self.op.vg_name,
2779 def BuildHooksNodes(self):
2780 """Build hooks nodes.
2783 mn = self.cfg.GetMasterNode()
2786 def CheckPrereq(self):
2787 """Check prerequisites.
2789 This checks whether the given params don't conflict and
2790 if the given volume group is valid.
2793 if self.op.vg_name is not None and not self.op.vg_name:
2794 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2795 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2796 " instances exist", errors.ECODE_INVAL)
2798 if self.op.drbd_helper is not None and not self.op.drbd_helper:
2799 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2800 raise errors.OpPrereqError("Cannot disable drbd helper while"
2801 " drbd-based instances exist",
2804 node_list = self.acquired_locks[locking.LEVEL_NODE]
2806 # if vg_name not None, checks given volume group on all nodes
2808 vglist = self.rpc.call_vg_list(node_list)
2809 for node in node_list:
2810 msg = vglist[node].fail_msg
2812 # ignoring down node
2813 self.LogWarning("Error while gathering data on node %s"
2814 " (ignoring node): %s", node, msg)
2816 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2818 constants.MIN_VG_SIZE)
2820 raise errors.OpPrereqError("Error on node '%s': %s" %
2821 (node, vgstatus), errors.ECODE_ENVIRON)
2823 if self.op.drbd_helper:
2824 # checks given drbd helper on all nodes
2825 helpers = self.rpc.call_drbd_helper(node_list)
2826 for node in node_list:
2827 ninfo = self.cfg.GetNodeInfo(node)
2829 self.LogInfo("Not checking drbd helper on offline node %s", node)
2831 msg = helpers[node].fail_msg
2833 raise errors.OpPrereqError("Error checking drbd helper on node"
2834 " '%s': %s" % (node, msg),
2835 errors.ECODE_ENVIRON)
2836 node_helper = helpers[node].payload
2837 if node_helper != self.op.drbd_helper:
2838 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2839 (node, node_helper), errors.ECODE_ENVIRON)
2841 self.cluster = cluster = self.cfg.GetClusterInfo()
2842 # validate params changes
2843 if self.op.beparams:
2844 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2845 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2847 if self.op.ndparams:
2848 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2849 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2851 if self.op.nicparams:
2852 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2853 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2854 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2857 # check all instances for consistency
2858 for instance in self.cfg.GetAllInstancesInfo().values():
2859 for nic_idx, nic in enumerate(instance.nics):
2860 params_copy = copy.deepcopy(nic.nicparams)
2861 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2863 # check parameter syntax
2865 objects.NIC.CheckParameterSyntax(params_filled)
2866 except errors.ConfigurationError, err:
2867 nic_errors.append("Instance %s, nic/%d: %s" %
2868 (instance.name, nic_idx, err))
2870 # if we're moving instances to routed, check that they have an ip
2871 target_mode = params_filled[constants.NIC_MODE]
2872 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2873 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2874 (instance.name, nic_idx))
2876 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2877 "\n".join(nic_errors))
2879 # hypervisor list/parameters
2880 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2881 if self.op.hvparams:
2882 for hv_name, hv_dict in self.op.hvparams.items():
2883 if hv_name not in self.new_hvparams:
2884 self.new_hvparams[hv_name] = hv_dict
2886 self.new_hvparams[hv_name].update(hv_dict)
2888 # os hypervisor parameters
2889 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2891 for os_name, hvs in self.op.os_hvp.items():
2892 if os_name not in self.new_os_hvp:
2893 self.new_os_hvp[os_name] = hvs
2895 for hv_name, hv_dict in hvs.items():
2896 if hv_name not in self.new_os_hvp[os_name]:
2897 self.new_os_hvp[os_name][hv_name] = hv_dict
2899 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2902 self.new_osp = objects.FillDict(cluster.osparams, {})
2903 if self.op.osparams:
2904 for os_name, osp in self.op.osparams.items():
2905 if os_name not in self.new_osp:
2906 self.new_osp[os_name] = {}
2908 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2911 if not self.new_osp[os_name]:
2912 # we removed all parameters
2913 del self.new_osp[os_name]
2915 # check the parameter validity (remote check)
2916 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2917 os_name, self.new_osp[os_name])
2919 # changes to the hypervisor list
2920 if self.op.enabled_hypervisors is not None:
2921 self.hv_list = self.op.enabled_hypervisors
2922 for hv in self.hv_list:
2923 # if the hypervisor doesn't already exist in the cluster
2924 # hvparams, we initialize it to empty, and then (in both
2925 # cases) we make sure to fill the defaults, as we might not
2926 # have a complete defaults list if the hypervisor wasn't
2928 if hv not in new_hvp:
2930 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2931 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2933 self.hv_list = cluster.enabled_hypervisors
2935 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2936 # either the enabled list has changed, or the parameters have, validate
2937 for hv_name, hv_params in self.new_hvparams.items():
2938 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2939 (self.op.enabled_hypervisors and
2940 hv_name in self.op.enabled_hypervisors)):
2941 # either this is a new hypervisor, or its parameters have changed
2942 hv_class = hypervisor.GetHypervisor(hv_name)
2943 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2944 hv_class.CheckParameterSyntax(hv_params)
2945 _CheckHVParams(self, node_list, hv_name, hv_params)
2948 # no need to check any newly-enabled hypervisors, since the
2949 # defaults have already been checked in the above code-block
2950 for os_name, os_hvp in self.new_os_hvp.items():
2951 for hv_name, hv_params in os_hvp.items():
2952 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2953 # we need to fill in the new os_hvp on top of the actual hv_p
2954 cluster_defaults = self.new_hvparams.get(hv_name, {})
2955 new_osp = objects.FillDict(cluster_defaults, hv_params)
2956 hv_class = hypervisor.GetHypervisor(hv_name)
2957 hv_class.CheckParameterSyntax(new_osp)
2958 _CheckHVParams(self, node_list, hv_name, new_osp)
2960 if self.op.default_iallocator:
2961 alloc_script = utils.FindFile(self.op.default_iallocator,
2962 constants.IALLOCATOR_SEARCH_PATH,
2964 if alloc_script is None:
2965 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2966 " specified" % self.op.default_iallocator,
2969 def Exec(self, feedback_fn):
2970 """Change the parameters of the cluster.
2973 if self.op.vg_name is not None:
2974 new_volume = self.op.vg_name
2977 if new_volume != self.cfg.GetVGName():
2978 self.cfg.SetVGName(new_volume)
2980 feedback_fn("Cluster LVM configuration already in desired"
2981 " state, not changing")
2982 if self.op.drbd_helper is not None:
2983 new_helper = self.op.drbd_helper
2986 if new_helper != self.cfg.GetDRBDHelper():
2987 self.cfg.SetDRBDHelper(new_helper)
2989 feedback_fn("Cluster DRBD helper already in desired state,"
2991 if self.op.hvparams:
2992 self.cluster.hvparams = self.new_hvparams
2994 self.cluster.os_hvp = self.new_os_hvp
2995 if self.op.enabled_hypervisors is not None:
2996 self.cluster.hvparams = self.new_hvparams
2997 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2998 if self.op.beparams:
2999 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3000 if self.op.nicparams:
3001 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3002 if self.op.osparams:
3003 self.cluster.osparams = self.new_osp
3004 if self.op.ndparams:
3005 self.cluster.ndparams = self.new_ndparams
3007 if self.op.candidate_pool_size is not None:
3008 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3009 # we need to update the pool size here, otherwise the save will fail
3010 _AdjustCandidatePool(self, [])
3012 if self.op.maintain_node_health is not None:
3013 self.cluster.maintain_node_health = self.op.maintain_node_health
3015 if self.op.prealloc_wipe_disks is not None:
3016 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3018 if self.op.add_uids is not None:
3019 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3021 if self.op.remove_uids is not None:
3022 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3024 if self.op.uid_pool is not None:
3025 self.cluster.uid_pool = self.op.uid_pool
3027 if self.op.default_iallocator is not None:
3028 self.cluster.default_iallocator = self.op.default_iallocator
3030 if self.op.reserved_lvs is not None:
3031 self.cluster.reserved_lvs = self.op.reserved_lvs
3033 def helper_os(aname, mods, desc):
3035 lst = getattr(self.cluster, aname)
3036 for key, val in mods:
3037 if key == constants.DDM_ADD:
3039 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3042 elif key == constants.DDM_REMOVE:
3046 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3048 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3050 if self.op.hidden_os:
3051 helper_os("hidden_os", self.op.hidden_os, "hidden")
3053 if self.op.blacklisted_os:
3054 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3056 if self.op.master_netdev:
3057 master = self.cfg.GetMasterNode()
3058 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3059 self.cluster.master_netdev)
3060 result = self.rpc.call_node_stop_master(master, False)
3061 result.Raise("Could not disable the master ip")
3062 feedback_fn("Changing master_netdev from %s to %s" %
3063 (self.cluster.master_netdev, self.op.master_netdev))
3064 self.cluster.master_netdev = self.op.master_netdev
3066 self.cfg.Update(self.cluster, feedback_fn)
3068 if self.op.master_netdev:
3069 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3070 self.op.master_netdev)
3071 result = self.rpc.call_node_start_master(master, False, False)
3073 self.LogWarning("Could not re-enable the master ip on"
3074 " the master, please restart manually: %s",
3078 def _UploadHelper(lu, nodes, fname):
3079 """Helper for uploading a file and showing warnings.
3082 if os.path.exists(fname):
3083 result = lu.rpc.call_upload_file(nodes, fname)
3084 for to_node, to_result in result.items():
3085 msg = to_result.fail_msg
3087 msg = ("Copy of file %s to node %s failed: %s" %
3088 (fname, to_node, msg))
3089 lu.proc.LogWarning(msg)
3092 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3093 """Distribute additional files which are part of the cluster configuration.
3095 ConfigWriter takes care of distributing the config and ssconf files, but
3096 there are more files which should be distributed to all nodes. This function
3097 makes sure those are copied.
3099 @param lu: calling logical unit
3100 @param additional_nodes: list of nodes not in the config to distribute to
3101 @type additional_vm: boolean
3102 @param additional_vm: whether the additional nodes are vm-capable or not
3105 # 1. Gather target nodes
3106 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3107 dist_nodes = lu.cfg.GetOnlineNodeList()
3108 nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3109 vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3110 if additional_nodes is not None:
3111 dist_nodes.extend(additional_nodes)
3113 vm_nodes.extend(additional_nodes)
3114 if myself.name in dist_nodes:
3115 dist_nodes.remove(myself.name)
3116 if myself.name in vm_nodes:
3117 vm_nodes.remove(myself.name)
3119 # 2. Gather files to distribute
3120 dist_files = set([constants.ETC_HOSTS,
3121 constants.SSH_KNOWN_HOSTS_FILE,
3122 constants.RAPI_CERT_FILE,
3123 constants.RAPI_USERS_FILE,
3124 constants.CONFD_HMAC_KEY,
3125 constants.CLUSTER_DOMAIN_SECRET_FILE,
3129 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3130 for hv_name in enabled_hypervisors:
3131 hv_class = hypervisor.GetHypervisor(hv_name)
3132 vm_files.update(hv_class.GetAncillaryFiles())
3134 # 3. Perform the files upload
3135 for fname in dist_files:
3136 _UploadHelper(lu, dist_nodes, fname)
3137 for fname in vm_files:
3138 _UploadHelper(lu, vm_nodes, fname)
3141 class LUClusterRedistConf(NoHooksLU):
3142 """Force the redistribution of cluster configuration.
3144 This is a very simple LU.
3149 def ExpandNames(self):
3150 self.needed_locks = {
3151 locking.LEVEL_NODE: locking.ALL_SET,
3153 self.share_locks[locking.LEVEL_NODE] = 1
3155 def Exec(self, feedback_fn):
3156 """Redistribute the configuration.
3159 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3160 _RedistributeAncillaryFiles(self)
3163 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3164 """Sleep and poll for an instance's disk to sync.
3167 if not instance.disks or disks is not None and not disks:
3170 disks = _ExpandCheckDisks(instance, disks)
3173 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3175 node = instance.primary_node
3178 lu.cfg.SetDiskID(dev, node)
3180 # TODO: Convert to utils.Retry
3183 degr_retries = 10 # in seconds, as we sleep 1 second each time
3187 cumul_degraded = False
3188 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3189 msg = rstats.fail_msg
3191 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3194 raise errors.RemoteError("Can't contact node %s for mirror data,"
3195 " aborting." % node)
3198 rstats = rstats.payload
3200 for i, mstat in enumerate(rstats):
3202 lu.LogWarning("Can't compute data for node %s/%s",
3203 node, disks[i].iv_name)
3206 cumul_degraded = (cumul_degraded or
3207 (mstat.is_degraded and mstat.sync_percent is None))
3208 if mstat.sync_percent is not None:
3210 if mstat.estimated_time is not None:
3211 rem_time = ("%s remaining (estimated)" %
3212 utils.FormatSeconds(mstat.estimated_time))
3213 max_time = mstat.estimated_time
3215 rem_time = "no time estimate"
3216 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3217 (disks[i].iv_name, mstat.sync_percent, rem_time))
3219 # if we're done but degraded, let's do a few small retries, to
3220 # make sure we see a stable and not transient situation; therefore
3221 # we force restart of the loop
3222 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3223 logging.info("Degraded disks found, %d retries left", degr_retries)
3231 time.sleep(min(60, max_time))
3234 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3235 return not cumul_degraded
3238 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3239 """Check that mirrors are not degraded.
3241 The ldisk parameter, if True, will change the test from the
3242 is_degraded attribute (which represents overall non-ok status for
3243 the device(s)) to the ldisk (representing the local storage status).
3246 lu.cfg.SetDiskID(dev, node)
3250 if on_primary or dev.AssembleOnSecondary():
3251 rstats = lu.rpc.call_blockdev_find(node, dev)
3252 msg = rstats.fail_msg
3254 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3256 elif not rstats.payload:
3257 lu.LogWarning("Can't find disk on node %s", node)
3261 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3263 result = result and not rstats.payload.is_degraded
3266 for child in dev.children:
3267 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3272 class LUOobCommand(NoHooksLU):
3273 """Logical unit for OOB handling.
3277 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3279 def CheckPrereq(self):
3280 """Check prerequisites.
3283 - the node exists in the configuration
3286 Any errors are signaled by raising errors.OpPrereqError.
3290 self.master_node = self.cfg.GetMasterNode()
3292 assert self.op.power_delay >= 0.0
3294 if self.op.node_names:
3295 if self.op.command in self._SKIP_MASTER:
3296 if self.master_node in self.op.node_names:
3297 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3298 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3300 if master_oob_handler:
3301 additional_text = ("Run '%s %s %s' if you want to operate on the"
3302 " master regardless") % (master_oob_handler,
3306 additional_text = "The master node does not support out-of-band"
3308 raise errors.OpPrereqError(("Operating on the master node %s is not"
3309 " allowed for %s\n%s") %
3310 (self.master_node, self.op.command,
3311 additional_text), errors.ECODE_INVAL)
3313 self.op.node_names = self.cfg.GetNodeList()
3314 if self.op.command in self._SKIP_MASTER:
3315 self.op.node_names.remove(self.master_node)
3317 if self.op.command in self._SKIP_MASTER:
3318 assert self.master_node not in self.op.node_names
3320 for node_name in self.op.node_names:
3321 node = self.cfg.GetNodeInfo(node_name)
3324 raise errors.OpPrereqError("Node %s not found" % node_name,
3327 self.nodes.append(node)
3329 if (not self.op.ignore_status and
3330 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3331 raise errors.OpPrereqError(("Cannot power off node %s because it is"
3332 " not marked offline") % node_name,
3335 def ExpandNames(self):
3336 """Gather locks we need.
3339 if self.op.node_names:
3340 self.op.node_names = [_ExpandNodeName(self.cfg, name)
3341 for name in self.op.node_names]
3342 lock_names = self.op.node_names
3344 lock_names = locking.ALL_SET
3346 self.needed_locks = {
3347 locking.LEVEL_NODE: lock_names,
3350 def Exec(self, feedback_fn):
3351 """Execute OOB and return result if we expect any.
3354 master_node = self.master_node
3357 for idx, node in enumerate(self.nodes):
3358 node_entry = [(constants.RS_NORMAL, node.name)]
3359 ret.append(node_entry)
3361 oob_program = _SupportsOob(self.cfg, node)
3364 node_entry.append((constants.RS_UNAVAIL, None))
3367 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3368 self.op.command, oob_program, node.name)
3369 result = self.rpc.call_run_oob(master_node, oob_program,
3370 self.op.command, node.name,
3374 self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3375 node.name, result.fail_msg)
3376 node_entry.append((constants.RS_NODATA, None))
3379 self._CheckPayload(result)
3380 except errors.OpExecError, err:
3381 self.LogWarning("The payload returned by '%s' is not valid: %s",
3383 node_entry.append((constants.RS_NODATA, None))
3385 if self.op.command == constants.OOB_HEALTH:
3386 # For health we should log important events
3387 for item, status in result.payload:
3388 if status in [constants.OOB_STATUS_WARNING,
3389 constants.OOB_STATUS_CRITICAL]:
3390 self.LogWarning("On node '%s' item '%s' has status '%s'",
3391 node.name, item, status)
3393 if self.op.command == constants.OOB_POWER_ON:
3395 elif self.op.command == constants.OOB_POWER_OFF:
3396 node.powered = False
3397 elif self.op.command == constants.OOB_POWER_STATUS:
3398 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3399 if powered != node.powered:
3400 logging.warning(("Recorded power state (%s) of node '%s' does not"
3401 " match actual power state (%s)"), node.powered,
3404 # For configuration changing commands we should update the node
3405 if self.op.command in (constants.OOB_POWER_ON,
3406 constants.OOB_POWER_OFF):
3407 self.cfg.Update(node, feedback_fn)
3409 node_entry.append((constants.RS_NORMAL, result.payload))
3411 if (self.op.command == constants.OOB_POWER_ON and
3412 idx < len(self.nodes) - 1):
3413 time.sleep(self.op.power_delay)
3417 def _CheckPayload(self, result):
3418 """Checks if the payload is valid.
3420 @param result: RPC result
3421 @raises errors.OpExecError: If payload is not valid
3425 if self.op.command == constants.OOB_HEALTH:
3426 if not isinstance(result.payload, list):
3427 errs.append("command 'health' is expected to return a list but got %s" %
3428 type(result.payload))
3430 for item, status in result.payload:
3431 if status not in constants.OOB_STATUSES:
3432 errs.append("health item '%s' has invalid status '%s'" %
3435 if self.op.command == constants.OOB_POWER_STATUS:
3436 if not isinstance(result.payload, dict):
3437 errs.append("power-status is expected to return a dict but got %s" %
3438 type(result.payload))
3440 if self.op.command in [
3441 constants.OOB_POWER_ON,
3442 constants.OOB_POWER_OFF,
3443 constants.OOB_POWER_CYCLE,
3445 if result.payload is not None:
3446 errs.append("%s is expected to not return payload but got '%s'" %
3447 (self.op.command, result.payload))
3450 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3451 utils.CommaJoin(errs))
3453 class _OsQuery(_QueryBase):
3454 FIELDS = query.OS_FIELDS
3456 def ExpandNames(self, lu):
3457 # Lock all nodes in shared mode
3458 # Temporary removal of locks, should be reverted later
3459 # TODO: reintroduce locks when they are lighter-weight
3460 lu.needed_locks = {}
3461 #self.share_locks[locking.LEVEL_NODE] = 1
3462 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3464 # The following variables interact with _QueryBase._GetNames
3466 self.wanted = self.names
3468 self.wanted = locking.ALL_SET
3470 self.do_locking = self.use_locking
3472 def DeclareLocks(self, lu, level):
3476 def _DiagnoseByOS(rlist):
3477 """Remaps a per-node return list into an a per-os per-node dictionary
3479 @param rlist: a map with node names as keys and OS objects as values
3482 @return: a dictionary with osnames as keys and as value another
3483 map, with nodes as keys and tuples of (path, status, diagnose,
3484 variants, parameters, api_versions) as values, eg::
3486 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3487 (/srv/..., False, "invalid api")],
3488 "node2": [(/srv/..., True, "", [], [])]}
3493 # we build here the list of nodes that didn't fail the RPC (at RPC
3494 # level), so that nodes with a non-responding node daemon don't
3495 # make all OSes invalid
3496 good_nodes = [node_name for node_name in rlist
3497 if not rlist[node_name].fail_msg]
3498 for node_name, nr in rlist.items():
3499 if nr.fail_msg or not nr.payload:
3501 for (name, path, status, diagnose, variants,
3502 params, api_versions) in nr.payload:
3503 if name not in all_os:
3504 # build a list of nodes for this os containing empty lists
3505 # for each node in node_list
3507 for nname in good_nodes:
3508 all_os[name][nname] = []
3509 # convert params from [name, help] to (name, help)
3510 params = [tuple(v) for v in params]
3511 all_os[name][node_name].append((path, status, diagnose,
3512 variants, params, api_versions))
3515 def _GetQueryData(self, lu):
3516 """Computes the list of nodes and their attributes.
3519 # Locking is not used
3520 assert not (lu.acquired_locks or self.do_locking or self.use_locking)
3522 valid_nodes = [node.name
3523 for node in lu.cfg.GetAllNodesInfo().values()
3524 if not node.offline and node.vm_capable]
3525 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3526 cluster = lu.cfg.GetClusterInfo()
3530 for (os_name, os_data) in pol.items():
3531 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3532 hidden=(os_name in cluster.hidden_os),
3533 blacklisted=(os_name in cluster.blacklisted_os))
3537 api_versions = set()
3539 for idx, osl in enumerate(os_data.values()):
3540 info.valid = bool(info.valid and osl and osl[0][1])
3544 (node_variants, node_params, node_api) = osl[0][3:6]
3547 variants.update(node_variants)
3548 parameters.update(node_params)
3549 api_versions.update(node_api)
3551 # Filter out inconsistent values
3552 variants.intersection_update(node_variants)
3553 parameters.intersection_update(node_params)
3554 api_versions.intersection_update(node_api)
3556 info.variants = list(variants)
3557 info.parameters = list(parameters)
3558 info.api_versions = list(api_versions)
3560 data[os_name] = info
3562 # Prepare data in requested order
3563 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3567 class LUOsDiagnose(NoHooksLU):
3568 """Logical unit for OS diagnose/query.
3574 def _BuildFilter(fields, names):
3575 """Builds a filter for querying OSes.
3578 name_filter = qlang.MakeSimpleFilter("name", names)
3580 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3581 # respective field is not requested
3582 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3583 for fname in ["hidden", "blacklisted"]
3584 if fname not in fields]
3585 if "valid" not in fields:
3586 status_filter.append([qlang.OP_TRUE, "valid"])
3589 status_filter.insert(0, qlang.OP_AND)
3591 status_filter = None
3593 if name_filter and status_filter:
3594 return [qlang.OP_AND, name_filter, status_filter]
3598 return status_filter
3600 def CheckArguments(self):
3601 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3602 self.op.output_fields, False)
3604 def ExpandNames(self):
3605 self.oq.ExpandNames(self)
3607 def Exec(self, feedback_fn):
3608 return self.oq.OldStyleQuery(self)
3611 class LUNodeRemove(LogicalUnit):
3612 """Logical unit for removing a node.
3615 HPATH = "node-remove"
3616 HTYPE = constants.HTYPE_NODE
3618 def BuildHooksEnv(self):
3621 This doesn't run on the target node in the pre phase as a failed
3622 node would then be impossible to remove.
3626 "OP_TARGET": self.op.node_name,
3627 "NODE_NAME": self.op.node_name,
3630 def BuildHooksNodes(self):
3631 """Build hooks nodes.
3634 all_nodes = self.cfg.GetNodeList()
3636 all_nodes.remove(self.op.node_name)
3638 logging.warning("Node '%s', which is about to be removed, was not found"
3639 " in the list of all nodes", self.op.node_name)
3640 return (all_nodes, all_nodes)
3642 def CheckPrereq(self):
3643 """Check prerequisites.
3646 - the node exists in the configuration
3647 - it does not have primary or secondary instances
3648 - it's not the master
3650 Any errors are signaled by raising errors.OpPrereqError.
3653 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3654 node = self.cfg.GetNodeInfo(self.op.node_name)
3655 assert node is not None
3657 instance_list = self.cfg.GetInstanceList()
3659 masternode = self.cfg.GetMasterNode()
3660 if node.name == masternode:
3661 raise errors.OpPrereqError("Node is the master node,"
3662 " you need to failover first.",
3665 for instance_name in instance_list:
3666 instance = self.cfg.GetInstanceInfo(instance_name)
3667 if node.name in instance.all_nodes:
3668 raise errors.OpPrereqError("Instance %s is still running on the node,"
3669 " please remove first." % instance_name,
3671 self.op.node_name = node.name
3674 def Exec(self, feedback_fn):
3675 """Removes the node from the cluster.
3679 logging.info("Stopping the node daemon and removing configs from node %s",
3682 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3684 # Promote nodes to master candidate as needed
3685 _AdjustCandidatePool(self, exceptions=[node.name])
3686 self.context.RemoveNode(node.name)
3688 # Run post hooks on the node before it's removed
3689 _RunPostHook(self, node.name)
3691 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3692 msg = result.fail_msg
3694 self.LogWarning("Errors encountered on the remote node while leaving"
3695 " the cluster: %s", msg)
3697 # Remove node from our /etc/hosts
3698 if self.cfg.GetClusterInfo().modify_etc_hosts:
3699 master_node = self.cfg.GetMasterNode()
3700 result = self.rpc.call_etc_hosts_modify(master_node,
3701 constants.ETC_HOSTS_REMOVE,
3703 result.Raise("Can't update hosts file with new host data")
3704 _RedistributeAncillaryFiles(self)
3707 class _NodeQuery(_QueryBase):
3708 FIELDS = query.NODE_FIELDS
3710 def ExpandNames(self, lu):
3711 lu.needed_locks = {}
3712 lu.share_locks[locking.LEVEL_NODE] = 1
3715 self.wanted = _GetWantedNodes(lu, self.names)
3717 self.wanted = locking.ALL_SET
3719 self.do_locking = (self.use_locking and
3720 query.NQ_LIVE in self.requested_data)
3723 # if we don't request only static fields, we need to lock the nodes
3724 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3726 def DeclareLocks(self, lu, level):
3729 def _GetQueryData(self, lu):
3730 """Computes the list of nodes and their attributes.
3733 all_info = lu.cfg.GetAllNodesInfo()
3735 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3737 # Gather data as requested
3738 if query.NQ_LIVE in self.requested_data:
3739 # filter out non-vm_capable nodes
3740 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3742 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3743 lu.cfg.GetHypervisorType())
3744 live_data = dict((name, nresult.payload)
3745 for (name, nresult) in node_data.items()
3746 if not nresult.fail_msg and nresult.payload)
3750 if query.NQ_INST in self.requested_data:
3751 node_to_primary = dict([(name, set()) for name in nodenames])
3752 node_to_secondary = dict([(name, set()) for name in nodenames])
3754 inst_data = lu.cfg.GetAllInstancesInfo()
3756 for inst in inst_data.values():
3757 if inst.primary_node in node_to_primary:
3758 node_to_primary[inst.primary_node].add(inst.name)
3759 for secnode in inst.secondary_nodes:
3760 if secnode in node_to_secondary:
3761 node_to_secondary[secnode].add(inst.name)
3763 node_to_primary = None
3764 node_to_secondary = None
3766 if query.NQ_OOB in self.requested_data:
3767 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3768 for name, node in all_info.iteritems())
3772 if query.NQ_GROUP in self.requested_data:
3773 groups = lu.cfg.GetAllNodeGroupsInfo()
3777 return query.NodeQueryData([all_info[name] for name in nodenames],
3778 live_data, lu.cfg.GetMasterNode(),
3779 node_to_primary, node_to_secondary, groups,
3780 oob_support, lu.cfg.GetClusterInfo())
3783 class LUNodeQuery(NoHooksLU):
3784 """Logical unit for querying nodes.
3787 # pylint: disable-msg=W0142
3790 def CheckArguments(self):
3791 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3792 self.op.output_fields, self.op.use_locking)
3794 def ExpandNames(self):
3795 self.nq.ExpandNames(self)
3797 def Exec(self, feedback_fn):
3798 return self.nq.OldStyleQuery(self)
3801 class LUNodeQueryvols(NoHooksLU):
3802 """Logical unit for getting volumes on node(s).
3806 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3807 _FIELDS_STATIC = utils.FieldSet("node")
3809 def CheckArguments(self):
3810 _CheckOutputFields(static=self._FIELDS_STATIC,
3811 dynamic=self._FIELDS_DYNAMIC,
3812 selected=self.op.output_fields)
3814 def ExpandNames(self):
3815 self.needed_locks = {}
3816 self.share_locks[locking.LEVEL_NODE] = 1
3817 if not self.op.nodes:
3818 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3820 self.needed_locks[locking.LEVEL_NODE] = \
3821 _GetWantedNodes(self, self.op.nodes)
3823 def Exec(self, feedback_fn):
3824 """Computes the list of nodes and their attributes.
3827 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3828 volumes = self.rpc.call_node_volumes(nodenames)
3830 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3831 in self.cfg.GetInstanceList()]
3833 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3836 for node in nodenames:
3837 nresult = volumes[node]
3840 msg = nresult.fail_msg
3842 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3845 node_vols = nresult.payload[:]
3846 node_vols.sort(key=lambda vol: vol['dev'])
3848 for vol in node_vols:
3850 for field in self.op.output_fields:
3853 elif field == "phys":
3857 elif field == "name":
3859 elif field == "size":
3860 val = int(float(vol['size']))
3861 elif field == "instance":
3863 if node not in lv_by_node[inst]:
3865 if vol['name'] in lv_by_node[inst][node]:
3871 raise errors.ParameterError(field)
3872 node_output.append(str(val))
3874 output.append(node_output)
3879 class LUNodeQueryStorage(NoHooksLU):
3880 """Logical unit for getting information on storage units on node(s).
3883 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3886 def CheckArguments(self):
3887 _CheckOutputFields(static=self._FIELDS_STATIC,
3888 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3889 selected=self.op.output_fields)
3891 def ExpandNames(self):
3892 self.needed_locks = {}
3893 self.share_locks[locking.LEVEL_NODE] = 1
3896 self.needed_locks[locking.LEVEL_NODE] = \
3897 _GetWantedNodes(self, self.op.nodes)
3899 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3901 def Exec(self, feedback_fn):
3902 """Computes the list of nodes and their attributes.
3905 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3907 # Always get name to sort by
3908 if constants.SF_NAME in self.op.output_fields:
3909 fields = self.op.output_fields[:]
3911 fields = [constants.SF_NAME] + self.op.output_fields
3913 # Never ask for node or type as it's only known to the LU
3914 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3915 while extra in fields:
3916 fields.remove(extra)
3918 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3919 name_idx = field_idx[constants.SF_NAME]
3921 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3922 data = self.rpc.call_storage_list(self.nodes,
3923 self.op.storage_type, st_args,
3924 self.op.name, fields)
3928 for node in utils.NiceSort(self.nodes):
3929 nresult = data[node]
3933 msg = nresult.fail_msg
3935 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3938 rows = dict([(row[name_idx], row) for row in nresult.payload])
3940 for name in utils.NiceSort(rows.keys()):
3945 for field in self.op.output_fields:
3946 if field == constants.SF_NODE:
3948 elif field == constants.SF_TYPE:
3949 val = self.op.storage_type
3950 elif field in field_idx:
3951 val = row[field_idx[field]]
3953 raise errors.ParameterError(field)
3962 class _InstanceQuery(_QueryBase):
3963 FIELDS = query.INSTANCE_FIELDS
3965 def ExpandNames(self, lu):
3966 lu.needed_locks = {}
3967 lu.share_locks[locking.LEVEL_INSTANCE] = 1
3968 lu.share_locks[locking.LEVEL_NODE] = 1
3971 self.wanted = _GetWantedInstances(lu, self.names)
3973 self.wanted = locking.ALL_SET
3975 self.do_locking = (self.use_locking and
3976 query.IQ_LIVE in self.requested_data)
3978 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3979 lu.needed_locks[locking.LEVEL_NODE] = []
3980 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3982 def DeclareLocks(self, lu, level):
3983 if level == locking.LEVEL_NODE and self.do_locking:
3984 lu._LockInstancesNodes() # pylint: disable-msg=W0212
3986 def _GetQueryData(self, lu):
3987 """Computes the list of instances and their attributes.
3990 cluster = lu.cfg.GetClusterInfo()
3991 all_info = lu.cfg.GetAllInstancesInfo()
3993 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3995 instance_list = [all_info[name] for name in instance_names]
3996 nodes = frozenset(itertools.chain(*(inst.all_nodes
3997 for inst in instance_list)))
3998 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4001 wrongnode_inst = set()
4003 # Gather data as requested
4004 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4006 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4008 result = node_data[name]
4010 # offline nodes will be in both lists
4011 assert result.fail_msg
4012 offline_nodes.append(name)
4014 bad_nodes.append(name)
4015 elif result.payload:
4016 for inst in result.payload:
4017 if all_info[inst].primary_node == name:
4018 live_data.update(result.payload)
4020 wrongnode_inst.add(inst)
4021 # else no instance is alive
4025 if query.IQ_DISKUSAGE in self.requested_data:
4026 disk_usage = dict((inst.name,
4027 _ComputeDiskSize(inst.disk_template,
4028 [{"size": disk.size}
4029 for disk in inst.disks]))
4030 for inst in instance_list)
4034 if query.IQ_CONSOLE in self.requested_data:
4036 for inst in instance_list:
4037 if inst.name in live_data:
4038 # Instance is running
4039 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4041 consinfo[inst.name] = None
4042 assert set(consinfo.keys()) == set(instance_names)
4046 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4047 disk_usage, offline_nodes, bad_nodes,
4048 live_data, wrongnode_inst, consinfo)
4051 class LUQuery(NoHooksLU):
4052 """Query for resources/items of a certain kind.
4055 # pylint: disable-msg=W0142
4058 def CheckArguments(self):
4059 qcls = _GetQueryImplementation(self.op.what)
4061 self.impl = qcls(self.op.filter, self.op.fields, False)
4063 def ExpandNames(self):
4064 self.impl.ExpandNames(self)
4066 def DeclareLocks(self, level):
4067 self.impl.DeclareLocks(self, level)
4069 def Exec(self, feedback_fn):
4070 return self.impl.NewStyleQuery(self)
4073 class LUQueryFields(NoHooksLU):
4074 """Query for resources/items of a certain kind.
4077 # pylint: disable-msg=W0142
4080 def CheckArguments(self):
4081 self.qcls = _GetQueryImplementation(self.op.what)
4083 def ExpandNames(self):
4084 self.needed_locks = {}
4086 def Exec(self, feedback_fn):
4087 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4090 class LUNodeModifyStorage(NoHooksLU):
4091 """Logical unit for modifying a storage volume on a node.
4096 def CheckArguments(self):
4097 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4099 storage_type = self.op.storage_type
4102 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4104 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4105 " modified" % storage_type,
4108 diff = set(self.op.changes.keys()) - modifiable
4110 raise errors.OpPrereqError("The following fields can not be modified for"
4111 " storage units of type '%s': %r" %
4112 (storage_type, list(diff)),
4115 def ExpandNames(self):
4116 self.needed_locks = {
4117 locking.LEVEL_NODE: self.op.node_name,
4120 def Exec(self, feedback_fn):
4121 """Computes the list of nodes and their attributes.
4124 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4125 result = self.rpc.call_storage_modify(self.op.node_name,
4126 self.op.storage_type, st_args,
4127 self.op.name, self.op.changes)
4128 result.Raise("Failed to modify storage unit '%s' on %s" %
4129 (self.op.name, self.op.node_name))
4132 class LUNodeAdd(LogicalUnit):
4133 """Logical unit for adding node to the cluster.
4137 HTYPE = constants.HTYPE_NODE
4138 _NFLAGS = ["master_capable", "vm_capable"]
4140 def CheckArguments(self):
4141 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4142 # validate/normalize the node name
4143 self.hostname = netutils.GetHostname(name=self.op.node_name,
4144 family=self.primary_ip_family)
4145 self.op.node_name = self.hostname.name
4146 if self.op.readd and self.op.group:
4147 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4148 " being readded", errors.ECODE_INVAL)
4150 def BuildHooksEnv(self):
4153 This will run on all nodes before, and on all nodes + the new node after.
4157 "OP_TARGET": self.op.node_name,
4158 "NODE_NAME": self.op.node_name,
4159 "NODE_PIP": self.op.primary_ip,
4160 "NODE_SIP": self.op.secondary_ip,
4161 "MASTER_CAPABLE": str(self.op.master_capable),
4162 "VM_CAPABLE": str(self.op.vm_capable),
4165 def BuildHooksNodes(self):
4166 """Build hooks nodes.
4169 # Exclude added node
4170 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4171 post_nodes = pre_nodes + [self.op.node_name, ]
4173 return (pre_nodes, post_nodes)
4175 def CheckPrereq(self):
4176 """Check prerequisites.
4179 - the new node is not already in the config
4181 - its parameters (single/dual homed) matches the cluster
4183 Any errors are signaled by raising errors.OpPrereqError.
4187 hostname = self.hostname
4188 node = hostname.name
4189 primary_ip = self.op.primary_ip = hostname.ip
4190 if self.op.secondary_ip is None:
4191 if self.primary_ip_family == netutils.IP6Address.family:
4192 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4193 " IPv4 address must be given as secondary",
4195 self.op.secondary_ip = primary_ip
4197 secondary_ip = self.op.secondary_ip
4198 if not netutils.IP4Address.IsValid(secondary_ip):
4199 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4200 " address" % secondary_ip, errors.ECODE_INVAL)
4202 node_list = cfg.GetNodeList()
4203 if not self.op.readd and node in node_list:
4204 raise errors.OpPrereqError("Node %s is already in the configuration" %
4205 node, errors.ECODE_EXISTS)
4206 elif self.op.readd and node not in node_list:
4207 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4210 self.changed_primary_ip = False
4212 for existing_node_name in node_list:
4213 existing_node = cfg.GetNodeInfo(existing_node_name)
4215 if self.op.readd and node == existing_node_name:
4216 if existing_node.secondary_ip != secondary_ip:
4217 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4218 " address configuration as before",
4220 if existing_node.primary_ip != primary_ip:
4221 self.changed_primary_ip = True
4225 if (existing_node.primary_ip == primary_ip or
4226 existing_node.secondary_ip == primary_ip or
4227 existing_node.primary_ip == secondary_ip or
4228 existing_node.secondary_ip == secondary_ip):
4229 raise errors.OpPrereqError("New node ip address(es) conflict with"
4230 " existing node %s" % existing_node.name,
4231 errors.ECODE_NOTUNIQUE)
4233 # After this 'if' block, None is no longer a valid value for the
4234 # _capable op attributes
4236 old_node = self.cfg.GetNodeInfo(node)
4237 assert old_node is not None, "Can't retrieve locked node %s" % node
4238 for attr in self._NFLAGS:
4239 if getattr(self.op, attr) is None:
4240 setattr(self.op, attr, getattr(old_node, attr))
4242 for attr in self._NFLAGS:
4243 if getattr(self.op, attr) is None:
4244 setattr(self.op, attr, True)
4246 if self.op.readd and not self.op.vm_capable:
4247 pri, sec = cfg.GetNodeInstances(node)
4249 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4250 " flag set to false, but it already holds"
4251 " instances" % node,
4254 # check that the type of the node (single versus dual homed) is the
4255 # same as for the master
4256 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4257 master_singlehomed = myself.secondary_ip == myself.primary_ip
4258 newbie_singlehomed = secondary_ip == primary_ip
4259 if master_singlehomed != newbie_singlehomed:
4260 if master_singlehomed:
4261 raise errors.OpPrereqError("The master has no secondary ip but the"
4262 " new node has one",
4265 raise errors.OpPrereqError("The master has a secondary ip but the"
4266 " new node doesn't have one",
4269 # checks reachability
4270 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4271 raise errors.OpPrereqError("Node not reachable by ping",
4272 errors.ECODE_ENVIRON)
4274 if not newbie_singlehomed:
4275 # check reachability from my secondary ip to newbie's secondary ip
4276 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4277 source=myself.secondary_ip):
4278 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4279 " based ping to node daemon port",
4280 errors.ECODE_ENVIRON)
4287 if self.op.master_capable:
4288 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4290 self.master_candidate = False
4293 self.new_node = old_node
4295 node_group = cfg.LookupNodeGroup(self.op.group)
4296 self.new_node = objects.Node(name=node,
4297 primary_ip=primary_ip,
4298 secondary_ip=secondary_ip,
4299 master_candidate=self.master_candidate,
4300 offline=False, drained=False,
4303 if self.op.ndparams:
4304 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4306 def Exec(self, feedback_fn):
4307 """Adds the new node to the cluster.
4310 new_node = self.new_node
4311 node = new_node.name
4313 # We adding a new node so we assume it's powered
4314 new_node.powered = True
4316 # for re-adds, reset the offline/drained/master-candidate flags;
4317 # we need to reset here, otherwise offline would prevent RPC calls
4318 # later in the procedure; this also means that if the re-add
4319 # fails, we are left with a non-offlined, broken node
4321 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4322 self.LogInfo("Readding a node, the offline/drained flags were reset")
4323 # if we demote the node, we do cleanup later in the procedure
4324 new_node.master_candidate = self.master_candidate
4325 if self.changed_primary_ip:
4326 new_node.primary_ip = self.op.primary_ip
4328 # copy the master/vm_capable flags
4329 for attr in self._NFLAGS:
4330 setattr(new_node, attr, getattr(self.op, attr))
4332 # notify the user about any possible mc promotion
4333 if new_node.master_candidate:
4334 self.LogInfo("Node will be a master candidate")
4336 if self.op.ndparams:
4337 new_node.ndparams = self.op.ndparams
4339 new_node.ndparams = {}
4341 # check connectivity
4342 result = self.rpc.call_version([node])[node]
4343 result.Raise("Can't get version information from node %s" % node)
4344 if constants.PROTOCOL_VERSION == result.payload:
4345 logging.info("Communication to node %s fine, sw version %s match",
4346 node, result.payload)
4348 raise errors.OpExecError("Version mismatch master version %s,"
4349 " node version %s" %
4350 (constants.PROTOCOL_VERSION, result.payload))
4352 # Add node to our /etc/hosts, and add key to known_hosts
4353 if self.cfg.GetClusterInfo().modify_etc_hosts:
4354 master_node = self.cfg.GetMasterNode()
4355 result = self.rpc.call_etc_hosts_modify(master_node,
4356 constants.ETC_HOSTS_ADD,
4359 result.Raise("Can't update hosts file with new host data")
4361 if new_node.secondary_ip != new_node.primary_ip:
4362 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4365 node_verify_list = [self.cfg.GetMasterNode()]
4366 node_verify_param = {
4367 constants.NV_NODELIST: [node],
4368 # TODO: do a node-net-test as well?
4371 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4372 self.cfg.GetClusterName())
4373 for verifier in node_verify_list:
4374 result[verifier].Raise("Cannot communicate with node %s" % verifier)
4375 nl_payload = result[verifier].payload[constants.NV_NODELIST]
4377 for failed in nl_payload:
4378 feedback_fn("ssh/hostname verification failed"
4379 " (checking from %s): %s" %
4380 (verifier, nl_payload[failed]))
4381 raise errors.OpExecError("ssh/hostname verification failed.")
4384 _RedistributeAncillaryFiles(self)
4385 self.context.ReaddNode(new_node)
4386 # make sure we redistribute the config
4387 self.cfg.Update(new_node, feedback_fn)
4388 # and make sure the new node will not have old files around
4389 if not new_node.master_candidate:
4390 result = self.rpc.call_node_demote_from_mc(new_node.name)
4391 msg = result.fail_msg
4393 self.LogWarning("Node failed to demote itself from master"
4394 " candidate status: %s" % msg)
4396 _RedistributeAncillaryFiles(self, additional_nodes=[node],
4397 additional_vm=self.op.vm_capable)
4398 self.context.AddNode(new_node, self.proc.GetECId())
4401 class LUNodeSetParams(LogicalUnit):
4402 """Modifies the parameters of a node.
4404 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4405 to the node role (as _ROLE_*)
4406 @cvar _R2F: a dictionary from node role to tuples of flags
4407 @cvar _FLAGS: a list of attribute names corresponding to the flags
4410 HPATH = "node-modify"
4411 HTYPE = constants.HTYPE_NODE
4413 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4415 (True, False, False): _ROLE_CANDIDATE,
4416 (False, True, False): _ROLE_DRAINED,
4417 (False, False, True): _ROLE_OFFLINE,
4418 (False, False, False): _ROLE_REGULAR,
4420 _R2F = dict((v, k) for k, v in _F2R.items())
4421 _FLAGS = ["master_candidate", "drained", "offline"]
4423 def CheckArguments(self):
4424 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4425 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4426 self.op.master_capable, self.op.vm_capable,
4427 self.op.secondary_ip, self.op.ndparams]
4428 if all_mods.count(None) == len(all_mods):
4429 raise errors.OpPrereqError("Please pass at least one modification",
4431 if all_mods.count(True) > 1:
4432 raise errors.OpPrereqError("Can't set the node into more than one"
4433 " state at the same time",
4436 # Boolean value that tells us whether we might be demoting from MC
4437 self.might_demote = (self.op.master_candidate == False or
4438 self.op.offline == True or
4439 self.op.drained == True or
4440 self.op.master_capable == False)
4442 if self.op.secondary_ip:
4443 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4444 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4445 " address" % self.op.secondary_ip,
4448 self.lock_all = self.op.auto_promote and self.might_demote
4449 self.lock_instances = self.op.secondary_ip is not None
4451 def ExpandNames(self):
4453 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4455 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4457 if self.lock_instances:
4458 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4460 def DeclareLocks(self, level):
4461 # If we have locked all instances, before waiting to lock nodes, release
4462 # all the ones living on nodes unrelated to the current operation.
4463 if level == locking.LEVEL_NODE and self.lock_instances:
4464 instances_release = []
4466 self.affected_instances = []
4467 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4468 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4469 instance = self.context.cfg.GetInstanceInfo(instance_name)
4470 i_mirrored = instance.disk_template in constants.DTS_INT_MIRROR
4471 if i_mirrored and self.op.node_name in instance.all_nodes:
4472 instances_keep.append(instance_name)
4473 self.affected_instances.append(instance)
4475 instances_release.append(instance_name)
4476 if instances_release:
4477 self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4478 self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4480 def BuildHooksEnv(self):
4483 This runs on the master node.
4487 "OP_TARGET": self.op.node_name,
4488 "MASTER_CANDIDATE": str(self.op.master_candidate),
4489 "OFFLINE": str(self.op.offline),
4490 "DRAINED": str(self.op.drained),
4491 "MASTER_CAPABLE": str(self.op.master_capable),
4492 "VM_CAPABLE": str(self.op.vm_capable),
4495 def BuildHooksNodes(self):
4496 """Build hooks nodes.
4499 nl = [self.cfg.GetMasterNode(), self.op.node_name]
4502 def CheckPrereq(self):
4503 """Check prerequisites.
4505 This only checks the instance list against the existing names.
4508 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4510 if (self.op.master_candidate is not None or
4511 self.op.drained is not None or
4512 self.op.offline is not None):
4513 # we can't change the master's node flags
4514 if self.op.node_name == self.cfg.GetMasterNode():
4515 raise errors.OpPrereqError("The master role can be changed"
4516 " only via master-failover",
4519 if self.op.master_candidate and not node.master_capable:
4520 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4521 " it a master candidate" % node.name,
4524 if self.op.vm_capable == False:
4525 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4527 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4528 " the vm_capable flag" % node.name,
4531 if node.master_candidate and self.might_demote and not self.lock_all:
4532 assert not self.op.auto_promote, "auto_promote set but lock_all not"
4533 # check if after removing the current node, we're missing master
4535 (mc_remaining, mc_should, _) = \
4536 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4537 if mc_remaining < mc_should:
4538 raise errors.OpPrereqError("Not enough master candidates, please"
4539 " pass auto promote option to allow"
4540 " promotion", errors.ECODE_STATE)
4542 self.old_flags = old_flags = (node.master_candidate,
4543 node.drained, node.offline)
4544 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4545 self.old_role = old_role = self._F2R[old_flags]
4547 # Check for ineffective changes
4548 for attr in self._FLAGS:
4549 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4550 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4551 setattr(self.op, attr, None)
4553 # Past this point, any flag change to False means a transition
4554 # away from the respective state, as only real changes are kept
4556 # TODO: We might query the real power state if it supports OOB
4557 if _SupportsOob(self.cfg, node):
4558 if self.op.offline is False and not (node.powered or
4559 self.op.powered == True):
4560 raise errors.OpPrereqError(("Please power on node %s first before you"
4561 " can reset offline state") %
4563 elif self.op.powered is not None:
4564 raise errors.OpPrereqError(("Unable to change powered state for node %s"
4565 " which does not support out-of-band"
4566 " handling") % self.op.node_name)
4568 # If we're being deofflined/drained, we'll MC ourself if needed
4569 if (self.op.drained == False or self.op.offline == False or
4570 (self.op.master_capable and not node.master_capable)):
4571 if _DecideSelfPromotion(self):
4572 self.op.master_candidate = True
4573 self.LogInfo("Auto-promoting node to master candidate")
4575 # If we're no longer master capable, we'll demote ourselves from MC
4576 if self.op.master_capable == False and node.master_candidate:
4577 self.LogInfo("Demoting from master candidate")
4578 self.op.master_candidate = False
4581 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4582 if self.op.master_candidate:
4583 new_role = self._ROLE_CANDIDATE
4584 elif self.op.drained:
4585 new_role = self._ROLE_DRAINED
4586 elif self.op.offline:
4587 new_role = self._ROLE_OFFLINE
4588 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4589 # False is still in new flags, which means we're un-setting (the
4591 new_role = self._ROLE_REGULAR
4592 else: # no new flags, nothing, keep old role
4595 self.new_role = new_role
4597 if old_role == self._ROLE_OFFLINE and new_role != old_role:
4598 # Trying to transition out of offline status
4599 result = self.rpc.call_version([node.name])[node.name]
4601 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4602 " to report its version: %s" %
4603 (node.name, result.fail_msg),
4606 self.LogWarning("Transitioning node from offline to online state"
4607 " without using re-add. Please make sure the node"
4610 if self.op.secondary_ip:
4611 # Ok even without locking, because this can't be changed by any LU
4612 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4613 master_singlehomed = master.secondary_ip == master.primary_ip
4614 if master_singlehomed and self.op.secondary_ip:
4615 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4616 " homed cluster", errors.ECODE_INVAL)
4619 if self.affected_instances:
4620 raise errors.OpPrereqError("Cannot change secondary ip: offline"
4621 " node has instances (%s) configured"
4622 " to use it" % self.affected_instances)
4624 # On online nodes, check that no instances are running, and that
4625 # the node has the new ip and we can reach it.
4626 for instance in self.affected_instances:
4627 _CheckInstanceDown(self, instance, "cannot change secondary ip")
4629 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4630 if master.name != node.name:
4631 # check reachability from master secondary ip to new secondary ip
4632 if not netutils.TcpPing(self.op.secondary_ip,
4633 constants.DEFAULT_NODED_PORT,
4634 source=master.secondary_ip):
4635 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4636 " based ping to node daemon port",
4637 errors.ECODE_ENVIRON)
4639 if self.op.ndparams:
4640 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4641 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4642 self.new_ndparams = new_ndparams
4644 def Exec(self, feedback_fn):
4649 old_role = self.old_role
4650 new_role = self.new_role
4654 if self.op.ndparams:
4655 node.ndparams = self.new_ndparams
4657 if self.op.powered is not None:
4658 node.powered = self.op.powered
4660 for attr in ["master_capable", "vm_capable"]:
4661 val = getattr(self.op, attr)
4663 setattr(node, attr, val)
4664 result.append((attr, str(val)))
4666 if new_role != old_role:
4667 # Tell the node to demote itself, if no longer MC and not offline
4668 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4669 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4671 self.LogWarning("Node failed to demote itself: %s", msg)
4673 new_flags = self._R2F[new_role]
4674 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4676 result.append((desc, str(nf)))
4677 (node.master_candidate, node.drained, node.offline) = new_flags
4679 # we locked all nodes, we adjust the CP before updating this node
4681 _AdjustCandidatePool(self, [node.name])
4683 if self.op.secondary_ip:
4684 node.secondary_ip = self.op.secondary_ip
4685 result.append(("secondary_ip", self.op.secondary_ip))
4687 # this will trigger configuration file update, if needed
4688 self.cfg.Update(node, feedback_fn)
4690 # this will trigger job queue propagation or cleanup if the mc
4692 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4693 self.context.ReaddNode(node)
4698 class LUNodePowercycle(NoHooksLU):
4699 """Powercycles a node.
4704 def CheckArguments(self):
4705 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4706 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4707 raise errors.OpPrereqError("The node is the master and the force"
4708 " parameter was not set",
4711 def ExpandNames(self):
4712 """Locking for PowercycleNode.
4714 This is a last-resort option and shouldn't block on other
4715 jobs. Therefore, we grab no locks.
4718 self.needed_locks = {}
4720 def Exec(self, feedback_fn):
4724 result = self.rpc.call_node_powercycle(self.op.node_name,
4725 self.cfg.GetHypervisorType())
4726 result.Raise("Failed to schedule the reboot")
4727 return result.payload
4730 class LUClusterQuery(NoHooksLU):
4731 """Query cluster configuration.
4736 def ExpandNames(self):
4737 self.needed_locks = {}
4739 def Exec(self, feedback_fn):
4740 """Return cluster config.
4743 cluster = self.cfg.GetClusterInfo()
4746 # Filter just for enabled hypervisors
4747 for os_name, hv_dict in cluster.os_hvp.items():
4748 os_hvp[os_name] = {}
4749 for hv_name, hv_params in hv_dict.items():
4750 if hv_name in cluster.enabled_hypervisors:
4751 os_hvp[os_name][hv_name] = hv_params
4753 # Convert ip_family to ip_version
4754 primary_ip_version = constants.IP4_VERSION
4755 if cluster.primary_ip_family == netutils.IP6Address.family:
4756 primary_ip_version = constants.IP6_VERSION
4759 "software_version": constants.RELEASE_VERSION,
4760 "protocol_version": constants.PROTOCOL_VERSION,
4761 "config_version": constants.CONFIG_VERSION,
4762 "os_api_version": max(constants.OS_API_VERSIONS),
4763 "export_version": constants.EXPORT_VERSION,
4764 "architecture": (platform.architecture()[0], platform.machine()),
4765 "name": cluster.cluster_name,
4766 "master": cluster.master_node,
4767 "default_hypervisor": cluster.enabled_hypervisors[0],
4768 "enabled_hypervisors": cluster.enabled_hypervisors,
4769 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4770 for hypervisor_name in cluster.enabled_hypervisors]),
4772 "beparams": cluster.beparams,
4773 "osparams": cluster.osparams,
4774 "nicparams": cluster.nicparams,
4775 "ndparams": cluster.ndparams,
4776 "candidate_pool_size": cluster.candidate_pool_size,
4777 "master_netdev": cluster.master_netdev,
4778 "volume_group_name": cluster.volume_group_name,
4779 "drbd_usermode_helper": cluster.drbd_usermode_helper,
4780 "file_storage_dir": cluster.file_storage_dir,
4781 "shared_file_storage_dir": cluster.shared_file_storage_dir,
4782 "maintain_node_health": cluster.maintain_node_health,
4783 "ctime": cluster.ctime,
4784 "mtime": cluster.mtime,
4785 "uuid": cluster.uuid,
4786 "tags": list(cluster.GetTags()),
4787 "uid_pool": cluster.uid_pool,
4788 "default_iallocator": cluster.default_iallocator,
4789 "reserved_lvs": cluster.reserved_lvs,
4790 "primary_ip_version": primary_ip_version,
4791 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4792 "hidden_os": cluster.hidden_os,
4793 "blacklisted_os": cluster.blacklisted_os,
4799 class LUClusterConfigQuery(NoHooksLU):
4800 """Return configuration values.
4804 _FIELDS_DYNAMIC = utils.FieldSet()
4805 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4806 "watcher_pause", "volume_group_name")
4808 def CheckArguments(self):
4809 _CheckOutputFields(static=self._FIELDS_STATIC,
4810 dynamic=self._FIELDS_DYNAMIC,
4811 selected=self.op.output_fields)
4813 def ExpandNames(self):
4814 self.needed_locks = {}
4816 def Exec(self, feedback_fn):
4817 """Dump a representation of the cluster config to the standard output.
4821 for field in self.op.output_fields:
4822 if field == "cluster_name":
4823 entry = self.cfg.GetClusterName()
4824 elif field == "master_node":
4825 entry = self.cfg.GetMasterNode()
4826 elif field == "drain_flag":
4827 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4828 elif field == "watcher_pause":
4829 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4830 elif field == "volume_group_name":
4831 entry = self.cfg.GetVGName()
4833 raise errors.ParameterError(field)
4834 values.append(entry)
4838 class LUInstanceActivateDisks(NoHooksLU):
4839 """Bring up an instance's disks.
4844 def ExpandNames(self):
4845 self._ExpandAndLockInstance()
4846 self.needed_locks[locking.LEVEL_NODE] = []
4847 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4849 def DeclareLocks(self, level):
4850 if level == locking.LEVEL_NODE:
4851 self._LockInstancesNodes()
4853 def CheckPrereq(self):
4854 """Check prerequisites.
4856 This checks that the instance is in the cluster.
4859 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4860 assert self.instance is not None, \
4861 "Cannot retrieve locked instance %s" % self.op.instance_name
4862 _CheckNodeOnline(self, self.instance.primary_node)
4864 def Exec(self, feedback_fn):
4865 """Activate the disks.
4868 disks_ok, disks_info = \
4869 _AssembleInstanceDisks(self, self.instance,
4870 ignore_size=self.op.ignore_size)
4872 raise errors.OpExecError("Cannot activate block devices")
4877 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4879 """Prepare the block devices for an instance.
4881 This sets up the block devices on all nodes.
4883 @type lu: L{LogicalUnit}
4884 @param lu: the logical unit on whose behalf we execute
4885 @type instance: L{objects.Instance}
4886 @param instance: the instance for whose disks we assemble
4887 @type disks: list of L{objects.Disk} or None
4888 @param disks: which disks to assemble (or all, if None)
4889 @type ignore_secondaries: boolean
4890 @param ignore_secondaries: if true, errors on secondary nodes
4891 won't result in an error return from the function
4892 @type ignore_size: boolean
4893 @param ignore_size: if true, the current known size of the disk
4894 will not be used during the disk activation, useful for cases
4895 when the size is wrong
4896 @return: False if the operation failed, otherwise a list of
4897 (host, instance_visible_name, node_visible_name)
4898 with the mapping from node devices to instance devices
4903 iname = instance.name
4904 disks = _ExpandCheckDisks(instance, disks)
4906 # With the two passes mechanism we try to reduce the window of
4907 # opportunity for the race condition of switching DRBD to primary
4908 # before handshaking occured, but we do not eliminate it
4910 # The proper fix would be to wait (with some limits) until the
4911 # connection has been made and drbd transitions from WFConnection
4912 # into any other network-connected state (Connected, SyncTarget,
4915 # 1st pass, assemble on all nodes in secondary mode
4916 for idx, inst_disk in enumerate(disks):
4917 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4919 node_disk = node_disk.Copy()
4920 node_disk.UnsetSize()
4921 lu.cfg.SetDiskID(node_disk, node)
4922 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
4923 msg = result.fail_msg
4925 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4926 " (is_primary=False, pass=1): %s",
4927 inst_disk.iv_name, node, msg)
4928 if not ignore_secondaries:
4931 # FIXME: race condition on drbd migration to primary
4933 # 2nd pass, do only the primary node
4934 for idx, inst_disk in enumerate(disks):
4937 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4938 if node != instance.primary_node:
4941 node_disk = node_disk.Copy()
4942 node_disk.UnsetSize()
4943 lu.cfg.SetDiskID(node_disk, node)
4944 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
4945 msg = result.fail_msg
4947 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4948 " (is_primary=True, pass=2): %s",
4949 inst_disk.iv_name, node, msg)
4952 dev_path = result.payload
4954 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4956 # leave the disks configured for the primary node
4957 # this is a workaround that would be fixed better by
4958 # improving the logical/physical id handling
4960 lu.cfg.SetDiskID(disk, instance.primary_node)
4962 return disks_ok, device_info
4965 def _StartInstanceDisks(lu, instance, force):
4966 """Start the disks of an instance.
4969 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4970 ignore_secondaries=force)
4972 _ShutdownInstanceDisks(lu, instance)
4973 if force is not None and not force:
4974 lu.proc.LogWarning("", hint="If the message above refers to a"
4976 " you can retry the operation using '--force'.")
4977 raise errors.OpExecError("Disk consistency error")
4980 class LUInstanceDeactivateDisks(NoHooksLU):
4981 """Shutdown an instance's disks.
4986 def ExpandNames(self):
4987 self._ExpandAndLockInstance()
4988 self.needed_locks[locking.LEVEL_NODE] = []
4989 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4991 def DeclareLocks(self, level):
4992 if level == locking.LEVEL_NODE:
4993 self._LockInstancesNodes()
4995 def CheckPrereq(self):
4996 """Check prerequisites.
4998 This checks that the instance is in the cluster.
5001 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5002 assert self.instance is not None, \
5003 "Cannot retrieve locked instance %s" % self.op.instance_name
5005 def Exec(self, feedback_fn):
5006 """Deactivate the disks
5009 instance = self.instance
5011 _ShutdownInstanceDisks(self, instance)
5013 _SafeShutdownInstanceDisks(self, instance)
5016 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5017 """Shutdown block devices of an instance.
5019 This function checks if an instance is running, before calling
5020 _ShutdownInstanceDisks.
5023 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5024 _ShutdownInstanceDisks(lu, instance, disks=disks)
5027 def _ExpandCheckDisks(instance, disks):
5028 """Return the instance disks selected by the disks list
5030 @type disks: list of L{objects.Disk} or None
5031 @param disks: selected disks
5032 @rtype: list of L{objects.Disk}
5033 @return: selected instance disks to act on
5037 return instance.disks
5039 if not set(disks).issubset(instance.disks):
5040 raise errors.ProgrammerError("Can only act on disks belonging to the"
5045 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5046 """Shutdown block devices of an instance.
5048 This does the shutdown on all nodes of the instance.
5050 If the ignore_primary is false, errors on the primary node are
5055 disks = _ExpandCheckDisks(instance, disks)
5058 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5059 lu.cfg.SetDiskID(top_disk, node)
5060 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5061 msg = result.fail_msg
5063 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5064 disk.iv_name, node, msg)
5065 if ((node == instance.primary_node and not ignore_primary) or
5066 (node != instance.primary_node and not result.offline)):
5071 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5072 """Checks if a node has enough free memory.
5074 This function check if a given node has the needed amount of free
5075 memory. In case the node has less memory or we cannot get the
5076 information from the node, this function raise an OpPrereqError
5079 @type lu: C{LogicalUnit}
5080 @param lu: a logical unit from which we get configuration data
5082 @param node: the node to check
5083 @type reason: C{str}
5084 @param reason: string to use in the error message
5085 @type requested: C{int}
5086 @param requested: the amount of memory in MiB to check for
5087 @type hypervisor_name: C{str}
5088 @param hypervisor_name: the hypervisor to ask for memory stats
5089 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5090 we cannot check the node
5093 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5094 nodeinfo[node].Raise("Can't get data from node %s" % node,
5095 prereq=True, ecode=errors.ECODE_ENVIRON)
5096 free_mem = nodeinfo[node].payload.get('memory_free', None)
5097 if not isinstance(free_mem, int):
5098 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5099 " was '%s'" % (node, free_mem),
5100 errors.ECODE_ENVIRON)
5101 if requested > free_mem:
5102 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5103 " needed %s MiB, available %s MiB" %
5104 (node, reason, requested, free_mem),
5108 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5109 """Checks if nodes have enough free disk space in the all VGs.
5111 This function check if all given nodes have the needed amount of
5112 free disk. In case any node has less disk or we cannot get the
5113 information from the node, this function raise an OpPrereqError
5116 @type lu: C{LogicalUnit}
5117 @param lu: a logical unit from which we get configuration data
5118 @type nodenames: C{list}
5119 @param nodenames: the list of node names to check
5120 @type req_sizes: C{dict}
5121 @param req_sizes: the hash of vg and corresponding amount of disk in
5123 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5124 or we cannot check the node
5127 for vg, req_size in req_sizes.items():
5128 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5131 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5132 """Checks if nodes have enough free disk space in the specified VG.
5134 This function check if all given nodes have the needed amount of
5135 free disk. In case any node has less disk or we cannot get the
5136 information from the node, this function raise an OpPrereqError
5139 @type lu: C{LogicalUnit}
5140 @param lu: a logical unit from which we get configuration data
5141 @type nodenames: C{list}
5142 @param nodenames: the list of node names to check
5144 @param vg: the volume group to check
5145 @type requested: C{int}
5146 @param requested: the amount of disk in MiB to check for
5147 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5148 or we cannot check the node
5151 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5152 for node in nodenames:
5153 info = nodeinfo[node]
5154 info.Raise("Cannot get current information from node %s" % node,
5155 prereq=True, ecode=errors.ECODE_ENVIRON)
5156 vg_free = info.payload.get("vg_free", None)
5157 if not isinstance(vg_free, int):
5158 raise errors.OpPrereqError("Can't compute free disk space on node"
5159 " %s for vg %s, result was '%s'" %
5160 (node, vg, vg_free), errors.ECODE_ENVIRON)
5161 if requested > vg_free:
5162 raise errors.OpPrereqError("Not enough disk space on target node %s"
5163 " vg %s: required %d MiB, available %d MiB" %
5164 (node, vg, requested, vg_free),
5168 class LUInstanceStartup(LogicalUnit):
5169 """Starts an instance.
5172 HPATH = "instance-start"
5173 HTYPE = constants.HTYPE_INSTANCE
5176 def CheckArguments(self):
5178 if self.op.beparams:
5179 # fill the beparams dict
5180 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5182 def ExpandNames(self):
5183 self._ExpandAndLockInstance()
5185 def BuildHooksEnv(self):
5188 This runs on master, primary and secondary nodes of the instance.
5192 "FORCE": self.op.force,
5195 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5199 def BuildHooksNodes(self):
5200 """Build hooks nodes.
5203 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5206 def CheckPrereq(self):
5207 """Check prerequisites.
5209 This checks that the instance is in the cluster.
5212 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5213 assert self.instance is not None, \
5214 "Cannot retrieve locked instance %s" % self.op.instance_name
5217 if self.op.hvparams:
5218 # check hypervisor parameter syntax (locally)
5219 cluster = self.cfg.GetClusterInfo()
5220 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5221 filled_hvp = cluster.FillHV(instance)
5222 filled_hvp.update(self.op.hvparams)
5223 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5224 hv_type.CheckParameterSyntax(filled_hvp)
5225 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5227 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5229 if self.primary_offline and self.op.ignore_offline_nodes:
5230 self.proc.LogWarning("Ignoring offline primary node")
5232 if self.op.hvparams or self.op.beparams:
5233 self.proc.LogWarning("Overridden parameters are ignored")
5235 _CheckNodeOnline(self, instance.primary_node)
5237 bep = self.cfg.GetClusterInfo().FillBE(instance)
5239 # check bridges existence
5240 _CheckInstanceBridgesExist(self, instance)
5242 remote_info = self.rpc.call_instance_info(instance.primary_node,
5244 instance.hypervisor)
5245 remote_info.Raise("Error checking node %s" % instance.primary_node,
5246 prereq=True, ecode=errors.ECODE_ENVIRON)
5247 if not remote_info.payload: # not running already
5248 _CheckNodeFreeMemory(self, instance.primary_node,
5249 "starting instance %s" % instance.name,
5250 bep[constants.BE_MEMORY], instance.hypervisor)
5252 def Exec(self, feedback_fn):
5253 """Start the instance.
5256 instance = self.instance
5257 force = self.op.force
5259 self.cfg.MarkInstanceUp(instance.name)
5261 if self.primary_offline:
5262 assert self.op.ignore_offline_nodes
5263 self.proc.LogInfo("Primary node offline, marked instance as started")
5265 node_current = instance.primary_node
5267 _StartInstanceDisks(self, instance, force)
5269 result = self.rpc.call_instance_start(node_current, instance,
5270 self.op.hvparams, self.op.beparams)
5271 msg = result.fail_msg
5273 _ShutdownInstanceDisks(self, instance)
5274 raise errors.OpExecError("Could not start instance: %s" % msg)
5277 class LUInstanceReboot(LogicalUnit):
5278 """Reboot an instance.
5281 HPATH = "instance-reboot"
5282 HTYPE = constants.HTYPE_INSTANCE
5285 def ExpandNames(self):
5286 self._ExpandAndLockInstance()
5288 def BuildHooksEnv(self):
5291 This runs on master, primary and secondary nodes of the instance.
5295 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5296 "REBOOT_TYPE": self.op.reboot_type,
5297 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5300 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5304 def BuildHooksNodes(self):
5305 """Build hooks nodes.
5308 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5311 def CheckPrereq(self):
5312 """Check prerequisites.
5314 This checks that the instance is in the cluster.
5317 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5318 assert self.instance is not None, \
5319 "Cannot retrieve locked instance %s" % self.op.instance_name
5321 _CheckNodeOnline(self, instance.primary_node)
5323 # check bridges existence
5324 _CheckInstanceBridgesExist(self, instance)
5326 def Exec(self, feedback_fn):
5327 """Reboot the instance.
5330 instance = self.instance
5331 ignore_secondaries = self.op.ignore_secondaries
5332 reboot_type = self.op.reboot_type
5334 remote_info = self.rpc.call_instance_info(instance.primary_node,
5336 instance.hypervisor)
5337 remote_info.Raise("Error checking node %s" % instance.primary_node)
5338 instance_running = bool(remote_info.payload)
5340 node_current = instance.primary_node
5342 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5343 constants.INSTANCE_REBOOT_HARD]:
5344 for disk in instance.disks:
5345 self.cfg.SetDiskID(disk, node_current)
5346 result = self.rpc.call_instance_reboot(node_current, instance,
5348 self.op.shutdown_timeout)
5349 result.Raise("Could not reboot instance")
5351 if instance_running:
5352 result = self.rpc.call_instance_shutdown(node_current, instance,
5353 self.op.shutdown_timeout)
5354 result.Raise("Could not shutdown instance for full reboot")
5355 _ShutdownInstanceDisks(self, instance)
5357 self.LogInfo("Instance %s was already stopped, starting now",
5359 _StartInstanceDisks(self, instance, ignore_secondaries)
5360 result = self.rpc.call_instance_start(node_current, instance, None, None)
5361 msg = result.fail_msg
5363 _ShutdownInstanceDisks(self, instance)
5364 raise errors.OpExecError("Could not start instance for"
5365 " full reboot: %s" % msg)
5367 self.cfg.MarkInstanceUp(instance.name)
5370 class LUInstanceShutdown(LogicalUnit):
5371 """Shutdown an instance.
5374 HPATH = "instance-stop"
5375 HTYPE = constants.HTYPE_INSTANCE
5378 def ExpandNames(self):
5379 self._ExpandAndLockInstance()
5381 def BuildHooksEnv(self):
5384 This runs on master, primary and secondary nodes of the instance.
5387 env = _BuildInstanceHookEnvByObject(self, self.instance)
5388 env["TIMEOUT"] = self.op.timeout
5391 def BuildHooksNodes(self):
5392 """Build hooks nodes.
5395 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5398 def CheckPrereq(self):
5399 """Check prerequisites.
5401 This checks that the instance is in the cluster.
5404 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5405 assert self.instance is not None, \
5406 "Cannot retrieve locked instance %s" % self.op.instance_name
5408 self.primary_offline = \
5409 self.cfg.GetNodeInfo(self.instance.primary_node).offline
5411 if self.primary_offline and self.op.ignore_offline_nodes:
5412 self.proc.LogWarning("Ignoring offline primary node")
5414 _CheckNodeOnline(self, self.instance.primary_node)
5416 def Exec(self, feedback_fn):
5417 """Shutdown the instance.
5420 instance = self.instance
5421 node_current = instance.primary_node
5422 timeout = self.op.timeout
5424 self.cfg.MarkInstanceDown(instance.name)
5426 if self.primary_offline:
5427 assert self.op.ignore_offline_nodes
5428 self.proc.LogInfo("Primary node offline, marked instance as stopped")
5430 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5431 msg = result.fail_msg
5433 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5435 _ShutdownInstanceDisks(self, instance)
5438 class LUInstanceReinstall(LogicalUnit):
5439 """Reinstall an instance.
5442 HPATH = "instance-reinstall"
5443 HTYPE = constants.HTYPE_INSTANCE
5446 def ExpandNames(self):
5447 self._ExpandAndLockInstance()
5449 def BuildHooksEnv(self):
5452 This runs on master, primary and secondary nodes of the instance.
5455 return _BuildInstanceHookEnvByObject(self, self.instance)
5457 def BuildHooksNodes(self):
5458 """Build hooks nodes.
5461 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5464 def CheckPrereq(self):
5465 """Check prerequisites.
5467 This checks that the instance is in the cluster and is not running.
5470 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5471 assert instance is not None, \
5472 "Cannot retrieve locked instance %s" % self.op.instance_name
5473 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5474 " offline, cannot reinstall")
5475 for node in instance.secondary_nodes:
5476 _CheckNodeOnline(self, node, "Instance secondary node offline,"
5477 " cannot reinstall")
5479 if instance.disk_template == constants.DT_DISKLESS:
5480 raise errors.OpPrereqError("Instance '%s' has no disks" %
5481 self.op.instance_name,
5483 _CheckInstanceDown(self, instance, "cannot reinstall")
5485 if self.op.os_type is not None:
5487 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5488 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5489 instance_os = self.op.os_type
5491 instance_os = instance.os
5493 nodelist = list(instance.all_nodes)
5495 if self.op.osparams:
5496 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5497 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5498 self.os_inst = i_osdict # the new dict (without defaults)
5502 self.instance = instance
5504 def Exec(self, feedback_fn):
5505 """Reinstall the instance.
5508 inst = self.instance
5510 if self.op.os_type is not None:
5511 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5512 inst.os = self.op.os_type
5513 # Write to configuration
5514 self.cfg.Update(inst, feedback_fn)
5516 _StartInstanceDisks(self, inst, None)
5518 feedback_fn("Running the instance OS create scripts...")
5519 # FIXME: pass debug option from opcode to backend
5520 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5521 self.op.debug_level,
5522 osparams=self.os_inst)
5523 result.Raise("Could not install OS for instance %s on node %s" %
5524 (inst.name, inst.primary_node))
5526 _ShutdownInstanceDisks(self, inst)
5529 class LUInstanceRecreateDisks(LogicalUnit):
5530 """Recreate an instance's missing disks.
5533 HPATH = "instance-recreate-disks"
5534 HTYPE = constants.HTYPE_INSTANCE
5537 def ExpandNames(self):
5538 self._ExpandAndLockInstance()
5540 def BuildHooksEnv(self):
5543 This runs on master, primary and secondary nodes of the instance.
5546 return _BuildInstanceHookEnvByObject(self, self.instance)
5548 def BuildHooksNodes(self):
5549 """Build hooks nodes.
5552 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5555 def CheckPrereq(self):
5556 """Check prerequisites.
5558 This checks that the instance is in the cluster and is not running.
5561 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5562 assert instance is not None, \
5563 "Cannot retrieve locked instance %s" % self.op.instance_name
5564 _CheckNodeOnline(self, instance.primary_node)
5566 if instance.disk_template == constants.DT_DISKLESS:
5567 raise errors.OpPrereqError("Instance '%s' has no disks" %
5568 self.op.instance_name, errors.ECODE_INVAL)
5569 _CheckInstanceDown(self, instance, "cannot recreate disks")
5571 if not self.op.disks:
5572 self.op.disks = range(len(instance.disks))
5574 for idx in self.op.disks:
5575 if idx >= len(instance.disks):
5576 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5579 self.instance = instance
5581 def Exec(self, feedback_fn):
5582 """Recreate the disks.
5586 for idx, _ in enumerate(self.instance.disks):
5587 if idx not in self.op.disks: # disk idx has not been passed in
5591 _CreateDisks(self, self.instance, to_skip=to_skip)
5594 class LUInstanceRename(LogicalUnit):
5595 """Rename an instance.
5598 HPATH = "instance-rename"
5599 HTYPE = constants.HTYPE_INSTANCE
5601 def CheckArguments(self):
5605 if self.op.ip_check and not self.op.name_check:
5606 # TODO: make the ip check more flexible and not depend on the name check
5607 raise errors.OpPrereqError("Cannot do ip check without a name check",
5610 def BuildHooksEnv(self):
5613 This runs on master, primary and secondary nodes of the instance.
5616 env = _BuildInstanceHookEnvByObject(self, self.instance)
5617 env["INSTANCE_NEW_NAME"] = self.op.new_name
5620 def BuildHooksNodes(self):
5621 """Build hooks nodes.
5624 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5627 def CheckPrereq(self):
5628 """Check prerequisites.
5630 This checks that the instance is in the cluster and is not running.
5633 self.op.instance_name = _ExpandInstanceName(self.cfg,
5634 self.op.instance_name)
5635 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5636 assert instance is not None
5637 _CheckNodeOnline(self, instance.primary_node)
5638 _CheckInstanceDown(self, instance, "cannot rename")
5639 self.instance = instance
5641 new_name = self.op.new_name
5642 if self.op.name_check:
5643 hostname = netutils.GetHostname(name=new_name)
5644 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5646 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5647 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5648 " same as given hostname '%s'") %
5649 (hostname.name, self.op.new_name),
5651 new_name = self.op.new_name = hostname.name
5652 if (self.op.ip_check and
5653 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5654 raise errors.OpPrereqError("IP %s of instance %s already in use" %
5655 (hostname.ip, new_name),
5656 errors.ECODE_NOTUNIQUE)
5658 instance_list = self.cfg.GetInstanceList()
5659 if new_name in instance_list and new_name != instance.name:
5660 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5661 new_name, errors.ECODE_EXISTS)
5663 def Exec(self, feedback_fn):
5664 """Rename the instance.
5667 inst = self.instance
5668 old_name = inst.name
5670 rename_file_storage = False
5671 if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5672 self.op.new_name != inst.name):
5673 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5674 rename_file_storage = True
5676 self.cfg.RenameInstance(inst.name, self.op.new_name)
5677 # Change the instance lock. This is definitely safe while we hold the BGL
5678 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5679 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5681 # re-read the instance from the configuration after rename
5682 inst = self.cfg.GetInstanceInfo(self.op.new_name)
5684 if rename_file_storage:
5685 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5686 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5687 old_file_storage_dir,
5688 new_file_storage_dir)
5689 result.Raise("Could not rename on node %s directory '%s' to '%s'"
5690 " (but the instance has been renamed in Ganeti)" %
5691 (inst.primary_node, old_file_storage_dir,
5692 new_file_storage_dir))
5694 _StartInstanceDisks(self, inst, None)
5696 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5697 old_name, self.op.debug_level)
5698 msg = result.fail_msg
5700 msg = ("Could not run OS rename script for instance %s on node %s"
5701 " (but the instance has been renamed in Ganeti): %s" %
5702 (inst.name, inst.primary_node, msg))
5703 self.proc.LogWarning(msg)
5705 _ShutdownInstanceDisks(self, inst)
5710 class LUInstanceRemove(LogicalUnit):
5711 """Remove an instance.
5714 HPATH = "instance-remove"
5715 HTYPE = constants.HTYPE_INSTANCE
5718 def ExpandNames(self):
5719 self._ExpandAndLockInstance()
5720 self.needed_locks[locking.LEVEL_NODE] = []
5721 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5723 def DeclareLocks(self, level):
5724 if level == locking.LEVEL_NODE:
5725 self._LockInstancesNodes()
5727 def BuildHooksEnv(self):
5730 This runs on master, primary and secondary nodes of the instance.
5733 env = _BuildInstanceHookEnvByObject(self, self.instance)
5734 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5737 def BuildHooksNodes(self):
5738 """Build hooks nodes.
5741 nl = [self.cfg.GetMasterNode()]
5742 nl_post = list(self.instance.all_nodes) + nl
5743 return (nl, nl_post)
5745 def CheckPrereq(self):
5746 """Check prerequisites.
5748 This checks that the instance is in the cluster.
5751 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5752 assert self.instance is not None, \
5753 "Cannot retrieve locked instance %s" % self.op.instance_name
5755 def Exec(self, feedback_fn):
5756 """Remove the instance.
5759 instance = self.instance
5760 logging.info("Shutting down instance %s on node %s",
5761 instance.name, instance.primary_node)
5763 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5764 self.op.shutdown_timeout)
5765 msg = result.fail_msg
5767 if self.op.ignore_failures:
5768 feedback_fn("Warning: can't shutdown instance: %s" % msg)
5770 raise errors.OpExecError("Could not shutdown instance %s on"
5772 (instance.name, instance.primary_node, msg))
5774 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5777 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5778 """Utility function to remove an instance.
5781 logging.info("Removing block devices for instance %s", instance.name)
5783 if not _RemoveDisks(lu, instance):
5784 if not ignore_failures:
5785 raise errors.OpExecError("Can't remove instance's disks")
5786 feedback_fn("Warning: can't remove instance's disks")
5788 logging.info("Removing instance %s out of cluster config", instance.name)
5790 lu.cfg.RemoveInstance(instance.name)
5792 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5793 "Instance lock removal conflict"
5795 # Remove lock for the instance
5796 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5799 class LUInstanceQuery(NoHooksLU):
5800 """Logical unit for querying instances.
5803 # pylint: disable-msg=W0142
5806 def CheckArguments(self):
5807 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5808 self.op.output_fields, self.op.use_locking)
5810 def ExpandNames(self):
5811 self.iq.ExpandNames(self)
5813 def DeclareLocks(self, level):
5814 self.iq.DeclareLocks(self, level)
5816 def Exec(self, feedback_fn):
5817 return self.iq.OldStyleQuery(self)
5820 class LUInstanceFailover(LogicalUnit):
5821 """Failover an instance.
5824 HPATH = "instance-failover"
5825 HTYPE = constants.HTYPE_INSTANCE
5828 def CheckArguments(self):
5829 """Check the arguments.
5832 self.iallocator = getattr(self.op, "iallocator", None)
5833 self.target_node = getattr(self.op, "target_node", None)
5835 def ExpandNames(self):
5836 self._ExpandAndLockInstance()
5838 if self.op.target_node is not None:
5839 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5841 self.needed_locks[locking.LEVEL_NODE] = []
5842 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5844 def DeclareLocks(self, level):
5845 if level == locking.LEVEL_NODE:
5846 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
5847 if instance.disk_template in constants.DTS_EXT_MIRROR:
5848 if self.op.target_node is None:
5849 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5851 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
5852 self.op.target_node]
5853 del self.recalculate_locks[locking.LEVEL_NODE]
5855 self._LockInstancesNodes()
5857 def BuildHooksEnv(self):
5860 This runs on master, primary and secondary nodes of the instance.
5863 instance = self.instance
5864 source_node = instance.primary_node
5866 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5867 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5868 "OLD_PRIMARY": source_node,
5869 "NEW_PRIMARY": self.op.target_node,
5872 if instance.disk_template in constants.DTS_INT_MIRROR:
5873 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
5874 env["NEW_SECONDARY"] = source_node
5876 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
5878 env.update(_BuildInstanceHookEnvByObject(self, instance))
5882 def BuildHooksNodes(self):
5883 """Build hooks nodes.
5886 nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
5887 return (nl, nl + [self.instance.primary_node])
5889 def CheckPrereq(self):
5890 """Check prerequisites.
5892 This checks that the instance is in the cluster.
5895 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5896 assert self.instance is not None, \
5897 "Cannot retrieve locked instance %s" % self.op.instance_name
5899 bep = self.cfg.GetClusterInfo().FillBE(instance)
5900 if instance.disk_template not in constants.DTS_MIRRORED:
5901 raise errors.OpPrereqError("Instance's disk layout is not"
5902 " mirrored, cannot failover.",
5905 if instance.disk_template in constants.DTS_EXT_MIRROR:
5906 _CheckIAllocatorOrNode(self, "iallocator", "target_node")
5907 if self.op.iallocator:
5908 self._RunAllocator()
5909 # Release all unnecessary node locks
5910 nodes_keep = [instance.primary_node, self.op.target_node]
5911 nodes_rel = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5912 if node not in nodes_keep]
5913 self.context.glm.release(locking.LEVEL_NODE, nodes_rel)
5914 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5916 # self.op.target_node is already populated, either directly or by the
5918 target_node = self.op.target_node
5921 secondary_nodes = instance.secondary_nodes
5922 if not secondary_nodes:
5923 raise errors.ConfigurationError("No secondary node but using"
5924 " %s disk template" %
5925 instance.disk_template)
5926 target_node = secondary_nodes[0]
5928 if self.op.iallocator or (self.op.target_node and
5929 self.op.target_node != target_node):
5930 raise errors.OpPrereqError("Instances with disk template %s cannot"
5931 " be failed over to arbitrary nodes"
5932 " (neither an iallocator nor a target"
5933 " node can be passed)" %
5934 instance.disk_template, errors.ECODE_INVAL)
5935 _CheckNodeOnline(self, target_node)
5936 _CheckNodeNotDrained(self, target_node)
5938 # Save target_node so that we can use it in BuildHooksEnv
5939 self.op.target_node = target_node
5941 if instance.admin_up:
5942 # check memory requirements on the secondary node
5943 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5944 instance.name, bep[constants.BE_MEMORY],
5945 instance.hypervisor)
5947 self.LogInfo("Not checking memory on the secondary node as"
5948 " instance will not be started")
5950 # check bridge existance
5951 _CheckInstanceBridgesExist(self, instance, node=target_node)
5953 def Exec(self, feedback_fn):
5954 """Failover an instance.
5956 The failover is done by shutting it down on its present node and
5957 starting it on the secondary.
5960 instance = self.instance
5961 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5963 source_node = instance.primary_node
5964 target_node = self.op.target_node
5966 if instance.admin_up:
5967 feedback_fn("* checking disk consistency between source and target")
5968 for dev in instance.disks:
5969 # for drbd, these are drbd over lvm
5970 if not _CheckDiskConsistency(self, dev, target_node, False):
5971 if not self.op.ignore_consistency:
5972 raise errors.OpExecError("Disk %s is degraded on target node,"
5973 " aborting failover." % dev.iv_name)
5975 feedback_fn("* not checking disk consistency as instance is not running")
5977 feedback_fn("* shutting down instance on source node")
5978 logging.info("Shutting down instance %s on node %s",
5979 instance.name, source_node)
5981 result = self.rpc.call_instance_shutdown(source_node, instance,
5982 self.op.shutdown_timeout)
5983 msg = result.fail_msg
5985 if self.op.ignore_consistency or primary_node.offline:
5986 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5987 " Proceeding anyway. Please make sure node"
5988 " %s is down. Error details: %s",
5989 instance.name, source_node, source_node, msg)
5991 raise errors.OpExecError("Could not shutdown instance %s on"
5993 (instance.name, source_node, msg))
5995 feedback_fn("* deactivating the instance's disks on source node")
5996 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5997 raise errors.OpExecError("Can't shut down the instance's disks.")
5999 instance.primary_node = target_node
6000 # distribute new instance config to the other nodes
6001 self.cfg.Update(instance, feedback_fn)
6003 # Only start the instance if it's marked as up
6004 if instance.admin_up:
6005 feedback_fn("* activating the instance's disks on target node")
6006 logging.info("Starting instance %s on node %s",
6007 instance.name, target_node)
6009 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6010 ignore_secondaries=True)
6012 _ShutdownInstanceDisks(self, instance)
6013 raise errors.OpExecError("Can't activate the instance's disks")
6015 feedback_fn("* starting the instance on the target node")
6016 result = self.rpc.call_instance_start(target_node, instance, None, None)
6017 msg = result.fail_msg
6019 _ShutdownInstanceDisks(self, instance)
6020 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6021 (instance.name, target_node, msg))
6023 def _RunAllocator(self):
6024 """Run the allocator based on input opcode.
6027 ial = IAllocator(self.cfg, self.rpc,
6028 mode=constants.IALLOCATOR_MODE_RELOC,
6029 name=self.instance.name,
6030 # TODO See why hail breaks with a single node below
6031 relocate_from=[self.instance.primary_node,
6032 self.instance.primary_node],
6035 ial.Run(self.op.iallocator)
6038 raise errors.OpPrereqError("Can't compute nodes using"
6039 " iallocator '%s': %s" %
6040 (self.op.iallocator, ial.info),
6042 if len(ial.result) != ial.required_nodes:
6043 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6044 " of nodes (%s), required %s" %
6045 (self.op.iallocator, len(ial.result),
6046 ial.required_nodes), errors.ECODE_FAULT)
6047 self.op.target_node = ial.result[0]
6048 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6049 self.instance.name, self.op.iallocator,
6050 utils.CommaJoin(ial.result))
6053 class LUInstanceMigrate(LogicalUnit):
6054 """Migrate an instance.
6056 This is migration without shutting down, compared to the failover,
6057 which is done with shutdown.
6060 HPATH = "instance-migrate"
6061 HTYPE = constants.HTYPE_INSTANCE
6064 def ExpandNames(self):
6065 self._ExpandAndLockInstance()
6067 if self.op.target_node is not None:
6068 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6070 self.needed_locks[locking.LEVEL_NODE] = []
6071 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6073 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6074 self.op.cleanup, self.op.iallocator,
6075 self.op.target_node)
6076 self.tasklets = [self._migrater]
6078 def DeclareLocks(self, level):
6079 if level == locking.LEVEL_NODE:
6080 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6081 if instance.disk_template in constants.DTS_EXT_MIRROR:
6082 if self.op.target_node is None:
6083 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6085 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6086 self.op.target_node]
6087 del self.recalculate_locks[locking.LEVEL_NODE]
6089 self._LockInstancesNodes()
6091 def BuildHooksEnv(self):
6094 This runs on master, primary and secondary nodes of the instance.
6097 instance = self._migrater.instance
6098 source_node = instance.primary_node
6099 target_node = self._migrater.target_node
6100 env = _BuildInstanceHookEnvByObject(self, instance)
6102 "MIGRATE_LIVE": self._migrater.live,
6103 "MIGRATE_CLEANUP": self.op.cleanup,
6104 "OLD_PRIMARY": source_node,
6105 "NEW_PRIMARY": target_node,
6108 if instance.disk_template in constants.DTS_INT_MIRROR:
6109 env["OLD_SECONDARY"] = target_node
6110 env["NEW_SECONDARY"] = source_node
6112 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6116 def BuildHooksNodes(self):
6117 """Build hooks nodes.
6120 instance = self._migrater.instance
6121 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6122 return (nl, nl + [instance.primary_node])
6125 class LUInstanceMove(LogicalUnit):
6126 """Move an instance by data-copying.
6129 HPATH = "instance-move"
6130 HTYPE = constants.HTYPE_INSTANCE
6133 def ExpandNames(self):
6134 self._ExpandAndLockInstance()
6135 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6136 self.op.target_node = target_node
6137 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6138 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6140 def DeclareLocks(self, level):
6141 if level == locking.LEVEL_NODE:
6142 self._LockInstancesNodes(primary_only=True)
6144 def BuildHooksEnv(self):
6147 This runs on master, primary and secondary nodes of the instance.
6151 "TARGET_NODE": self.op.target_node,
6152 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6154 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6157 def BuildHooksNodes(self):
6158 """Build hooks nodes.
6162 self.cfg.GetMasterNode(),
6163 self.instance.primary_node,
6164 self.op.target_node,
6168 def CheckPrereq(self):
6169 """Check prerequisites.
6171 This checks that the instance is in the cluster.
6174 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6175 assert self.instance is not None, \
6176 "Cannot retrieve locked instance %s" % self.op.instance_name
6178 node = self.cfg.GetNodeInfo(self.op.target_node)
6179 assert node is not None, \
6180 "Cannot retrieve locked node %s" % self.op.target_node
6182 self.target_node = target_node = node.name
6184 if target_node == instance.primary_node:
6185 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6186 (instance.name, target_node),
6189 bep = self.cfg.GetClusterInfo().FillBE(instance)
6191 for idx, dsk in enumerate(instance.disks):
6192 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6193 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6194 " cannot copy" % idx, errors.ECODE_STATE)
6196 _CheckNodeOnline(self, target_node)
6197 _CheckNodeNotDrained(self, target_node)
6198 _CheckNodeVmCapable(self, target_node)
6200 if instance.admin_up:
6201 # check memory requirements on the secondary node
6202 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6203 instance.name, bep[constants.BE_MEMORY],
6204 instance.hypervisor)
6206 self.LogInfo("Not checking memory on the secondary node as"
6207 " instance will not be started")
6209 # check bridge existance
6210 _CheckInstanceBridgesExist(self, instance, node=target_node)
6212 def Exec(self, feedback_fn):
6213 """Move an instance.
6215 The move is done by shutting it down on its present node, copying
6216 the data over (slow) and starting it on the new node.
6219 instance = self.instance
6221 source_node = instance.primary_node
6222 target_node = self.target_node
6224 self.LogInfo("Shutting down instance %s on source node %s",
6225 instance.name, source_node)
6227 result = self.rpc.call_instance_shutdown(source_node, instance,
6228 self.op.shutdown_timeout)
6229 msg = result.fail_msg
6231 if self.op.ignore_consistency:
6232 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6233 " Proceeding anyway. Please make sure node"
6234 " %s is down. Error details: %s",
6235 instance.name, source_node, source_node, msg)
6237 raise errors.OpExecError("Could not shutdown instance %s on"
6239 (instance.name, source_node, msg))
6241 # create the target disks
6243 _CreateDisks(self, instance, target_node=target_node)
6244 except errors.OpExecError:
6245 self.LogWarning("Device creation failed, reverting...")
6247 _RemoveDisks(self, instance, target_node=target_node)
6249 self.cfg.ReleaseDRBDMinors(instance.name)
6252 cluster_name = self.cfg.GetClusterInfo().cluster_name
6255 # activate, get path, copy the data over
6256 for idx, disk in enumerate(instance.disks):
6257 self.LogInfo("Copying data for disk %d", idx)
6258 result = self.rpc.call_blockdev_assemble(target_node, disk,
6259 instance.name, True, idx)
6261 self.LogWarning("Can't assemble newly created disk %d: %s",
6262 idx, result.fail_msg)
6263 errs.append(result.fail_msg)
6265 dev_path = result.payload
6266 result = self.rpc.call_blockdev_export(source_node, disk,
6267 target_node, dev_path,
6270 self.LogWarning("Can't copy data over for disk %d: %s",
6271 idx, result.fail_msg)
6272 errs.append(result.fail_msg)
6276 self.LogWarning("Some disks failed to copy, aborting")
6278 _RemoveDisks(self, instance, target_node=target_node)
6280 self.cfg.ReleaseDRBDMinors(instance.name)
6281 raise errors.OpExecError("Errors during disk copy: %s" %
6284 instance.primary_node = target_node
6285 self.cfg.Update(instance, feedback_fn)
6287 self.LogInfo("Removing the disks on the original node")
6288 _RemoveDisks(self, instance, target_node=source_node)
6290 # Only start the instance if it's marked as up
6291 if instance.admin_up:
6292 self.LogInfo("Starting instance %s on node %s",
6293 instance.name, target_node)
6295 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6296 ignore_secondaries=True)
6298 _ShutdownInstanceDisks(self, instance)
6299 raise errors.OpExecError("Can't activate the instance's disks")
6301 result = self.rpc.call_instance_start(target_node, instance, None, None)
6302 msg = result.fail_msg
6304 _ShutdownInstanceDisks(self, instance)
6305 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6306 (instance.name, target_node, msg))
6309 class LUNodeMigrate(LogicalUnit):
6310 """Migrate all instances from a node.
6313 HPATH = "node-migrate"
6314 HTYPE = constants.HTYPE_NODE
6317 def CheckArguments(self):
6318 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6320 def ExpandNames(self):
6321 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6323 self.needed_locks = {}
6325 # Create tasklets for migrating instances for all instances on this node
6329 self.lock_all_nodes = False
6331 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6332 logging.debug("Migrating instance %s", inst.name)
6333 names.append(inst.name)
6335 tasklets.append(TLMigrateInstance(self, inst.name, False,
6336 self.op.iallocator, None))
6338 if inst.disk_template in constants.DTS_EXT_MIRROR:
6339 # We need to lock all nodes, as the iallocator will choose the
6340 # destination nodes afterwards
6341 self.lock_all_nodes = True
6343 self.tasklets = tasklets
6345 # Declare node locks
6346 if self.lock_all_nodes:
6347 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6349 self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6350 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6352 # Declare instance locks
6353 self.needed_locks[locking.LEVEL_INSTANCE] = names
6355 def DeclareLocks(self, level):
6356 if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6357 self._LockInstancesNodes()
6359 def BuildHooksEnv(self):
6362 This runs on the master, the primary and all the secondaries.
6366 "NODE_NAME": self.op.node_name,
6369 def BuildHooksNodes(self):
6370 """Build hooks nodes.
6373 nl = [self.cfg.GetMasterNode()]
6377 class TLMigrateInstance(Tasklet):
6378 """Tasklet class for instance migration.
6381 @ivar live: whether the migration will be done live or non-live;
6382 this variable is initalized only after CheckPrereq has run
6385 def __init__(self, lu, instance_name, cleanup,
6386 iallocator=None, target_node=None):
6387 """Initializes this class.
6390 Tasklet.__init__(self, lu)
6393 self.instance_name = instance_name
6394 self.cleanup = cleanup
6395 self.live = False # will be overridden later
6396 self.iallocator = iallocator
6397 self.target_node = target_node
6399 def CheckPrereq(self):
6400 """Check prerequisites.
6402 This checks that the instance is in the cluster.
6405 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6406 instance = self.cfg.GetInstanceInfo(instance_name)
6407 assert instance is not None
6408 self.instance = instance
6410 if instance.disk_template not in constants.DTS_MIRRORED:
6411 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6412 " migrations" % instance.disk_template,
6415 if instance.disk_template in constants.DTS_EXT_MIRROR:
6416 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6419 self._RunAllocator()
6421 # self.target_node is already populated, either directly or by the
6423 target_node = self.target_node
6425 if len(self.lu.tasklets) == 1:
6426 # It is safe to remove locks only when we're the only tasklet in the LU
6427 nodes_keep = [instance.primary_node, self.target_node]
6428 nodes_rel = [node for node in self.lu.acquired_locks[locking.LEVEL_NODE]
6429 if node not in nodes_keep]
6430 self.lu.context.glm.release(locking.LEVEL_NODE, nodes_rel)
6431 self.lu.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6434 secondary_nodes = instance.secondary_nodes
6435 if not secondary_nodes:
6436 raise errors.ConfigurationError("No secondary node but using"
6437 " %s disk template" %
6438 instance.disk_template)
6439 target_node = secondary_nodes[0]
6440 if self.lu.op.iallocator or (self.lu.op.target_node and
6441 self.lu.op.target_node != target_node):
6442 raise errors.OpPrereqError("Instances with disk template %s cannot"
6443 " be migrated over to arbitrary nodes"
6444 " (neither an iallocator nor a target"
6445 " node can be passed)" %
6446 instance.disk_template, errors.ECODE_INVAL)
6448 i_be = self.cfg.GetClusterInfo().FillBE(instance)
6450 # check memory requirements on the secondary node
6451 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6452 instance.name, i_be[constants.BE_MEMORY],
6453 instance.hypervisor)
6455 # check bridge existance
6456 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6458 if not self.cleanup:
6459 _CheckNodeNotDrained(self.lu, target_node)
6460 result = self.rpc.call_instance_migratable(instance.primary_node,
6462 result.Raise("Can't migrate, please use failover",
6463 prereq=True, ecode=errors.ECODE_STATE)
6466 def _RunAllocator(self):
6467 """Run the allocator based on input opcode.
6470 ial = IAllocator(self.cfg, self.rpc,
6471 mode=constants.IALLOCATOR_MODE_RELOC,
6472 name=self.instance_name,
6473 # TODO See why hail breaks with a single node below
6474 relocate_from=[self.instance.primary_node,
6475 self.instance.primary_node],
6478 ial.Run(self.iallocator)
6481 raise errors.OpPrereqError("Can't compute nodes using"
6482 " iallocator '%s': %s" %
6483 (self.iallocator, ial.info),
6485 if len(ial.result) != ial.required_nodes:
6486 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6487 " of nodes (%s), required %s" %
6488 (self.iallocator, len(ial.result),
6489 ial.required_nodes), errors.ECODE_FAULT)
6490 self.target_node = ial.result[0]
6491 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6492 self.instance_name, self.iallocator,
6493 utils.CommaJoin(ial.result))
6495 if self.lu.op.live is not None and self.lu.op.mode is not None:
6496 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6497 " parameters are accepted",
6499 if self.lu.op.live is not None:
6501 self.lu.op.mode = constants.HT_MIGRATION_LIVE
6503 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6504 # reset the 'live' parameter to None so that repeated
6505 # invocations of CheckPrereq do not raise an exception
6506 self.lu.op.live = None
6507 elif self.lu.op.mode is None:
6508 # read the default value from the hypervisor
6509 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance, skip_globals=False)
6510 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6512 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6514 def _WaitUntilSync(self):
6515 """Poll with custom rpc for disk sync.
6517 This uses our own step-based rpc call.
6520 self.feedback_fn("* wait until resync is done")
6524 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6526 self.instance.disks)
6528 for node, nres in result.items():
6529 nres.Raise("Cannot resync disks on node %s" % node)
6530 node_done, node_percent = nres.payload
6531 all_done = all_done and node_done
6532 if node_percent is not None:
6533 min_percent = min(min_percent, node_percent)
6535 if min_percent < 100:
6536 self.feedback_fn(" - progress: %.1f%%" % min_percent)
6539 def _EnsureSecondary(self, node):
6540 """Demote a node to secondary.
6543 self.feedback_fn("* switching node %s to secondary mode" % node)
6545 for dev in self.instance.disks:
6546 self.cfg.SetDiskID(dev, node)
6548 result = self.rpc.call_blockdev_close(node, self.instance.name,
6549 self.instance.disks)
6550 result.Raise("Cannot change disk to secondary on node %s" % node)
6552 def _GoStandalone(self):
6553 """Disconnect from the network.
6556 self.feedback_fn("* changing into standalone mode")
6557 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6558 self.instance.disks)
6559 for node, nres in result.items():
6560 nres.Raise("Cannot disconnect disks node %s" % node)
6562 def _GoReconnect(self, multimaster):
6563 """Reconnect to the network.
6569 msg = "single-master"
6570 self.feedback_fn("* changing disks into %s mode" % msg)
6571 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6572 self.instance.disks,
6573 self.instance.name, multimaster)
6574 for node, nres in result.items():
6575 nres.Raise("Cannot change disks config on node %s" % node)
6577 def _ExecCleanup(self):
6578 """Try to cleanup after a failed migration.
6580 The cleanup is done by:
6581 - check that the instance is running only on one node
6582 (and update the config if needed)
6583 - change disks on its secondary node to secondary
6584 - wait until disks are fully synchronized
6585 - disconnect from the network
6586 - change disks into single-master mode
6587 - wait again until disks are fully synchronized
6590 instance = self.instance
6591 target_node = self.target_node
6592 source_node = self.source_node
6594 # check running on only one node
6595 self.feedback_fn("* checking where the instance actually runs"
6596 " (if this hangs, the hypervisor might be in"
6598 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6599 for node, result in ins_l.items():
6600 result.Raise("Can't contact node %s" % node)
6602 runningon_source = instance.name in ins_l[source_node].payload
6603 runningon_target = instance.name in ins_l[target_node].payload
6605 if runningon_source and runningon_target:
6606 raise errors.OpExecError("Instance seems to be running on two nodes,"
6607 " or the hypervisor is confused. You will have"
6608 " to ensure manually that it runs only on one"
6609 " and restart this operation.")
6611 if not (runningon_source or runningon_target):
6612 raise errors.OpExecError("Instance does not seem to be running at all."
6613 " In this case, it's safer to repair by"
6614 " running 'gnt-instance stop' to ensure disk"
6615 " shutdown, and then restarting it.")
6617 if runningon_target:
6618 # the migration has actually succeeded, we need to update the config
6619 self.feedback_fn("* instance running on secondary node (%s),"
6620 " updating config" % target_node)
6621 instance.primary_node = target_node
6622 self.cfg.Update(instance, self.feedback_fn)
6623 demoted_node = source_node
6625 self.feedback_fn("* instance confirmed to be running on its"
6626 " primary node (%s)" % source_node)
6627 demoted_node = target_node
6629 if instance.disk_template in constants.DTS_INT_MIRROR:
6630 self._EnsureSecondary(demoted_node)
6632 self._WaitUntilSync()
6633 except errors.OpExecError:
6634 # we ignore here errors, since if the device is standalone, it
6635 # won't be able to sync
6637 self._GoStandalone()
6638 self._GoReconnect(False)
6639 self._WaitUntilSync()
6641 self.feedback_fn("* done")
6643 def _RevertDiskStatus(self):
6644 """Try to revert the disk status after a failed migration.
6647 target_node = self.target_node
6648 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6652 self._EnsureSecondary(target_node)
6653 self._GoStandalone()
6654 self._GoReconnect(False)
6655 self._WaitUntilSync()
6656 except errors.OpExecError, err:
6657 self.lu.LogWarning("Migration failed and I can't reconnect the"
6658 " drives: error '%s'\n"
6659 "Please look and recover the instance status" %
6662 def _AbortMigration(self):
6663 """Call the hypervisor code to abort a started migration.
6666 instance = self.instance
6667 target_node = self.target_node
6668 migration_info = self.migration_info
6670 abort_result = self.rpc.call_finalize_migration(target_node,
6674 abort_msg = abort_result.fail_msg
6676 logging.error("Aborting migration failed on target node %s: %s",
6677 target_node, abort_msg)
6678 # Don't raise an exception here, as we stil have to try to revert the
6679 # disk status, even if this step failed.
6681 def _ExecMigration(self):
6682 """Migrate an instance.
6684 The migrate is done by:
6685 - change the disks into dual-master mode
6686 - wait until disks are fully synchronized again
6687 - migrate the instance
6688 - change disks on the new secondary node (the old primary) to secondary
6689 - wait until disks are fully synchronized
6690 - change disks into single-master mode
6693 instance = self.instance
6694 target_node = self.target_node
6695 source_node = self.source_node
6697 self.feedback_fn("* checking disk consistency between source and target")
6698 for dev in instance.disks:
6699 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6700 raise errors.OpExecError("Disk %s is degraded or not fully"
6701 " synchronized on target node,"
6702 " aborting migrate." % dev.iv_name)
6704 # First get the migration information from the remote node
6705 result = self.rpc.call_migration_info(source_node, instance)
6706 msg = result.fail_msg
6708 log_err = ("Failed fetching source migration information from %s: %s" %
6710 logging.error(log_err)
6711 raise errors.OpExecError(log_err)
6713 self.migration_info = migration_info = result.payload
6715 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6716 # Then switch the disks to master/master mode
6717 self._EnsureSecondary(target_node)
6718 self._GoStandalone()
6719 self._GoReconnect(True)
6720 self._WaitUntilSync()
6722 self.feedback_fn("* preparing %s to accept the instance" % target_node)
6723 result = self.rpc.call_accept_instance(target_node,
6726 self.nodes_ip[target_node])
6728 msg = result.fail_msg
6730 logging.error("Instance pre-migration failed, trying to revert"
6731 " disk status: %s", msg)
6732 self.feedback_fn("Pre-migration failed, aborting")
6733 self._AbortMigration()
6734 self._RevertDiskStatus()
6735 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6736 (instance.name, msg))
6738 self.feedback_fn("* migrating instance to %s" % target_node)
6740 result = self.rpc.call_instance_migrate(source_node, instance,
6741 self.nodes_ip[target_node],
6743 msg = result.fail_msg
6745 logging.error("Instance migration failed, trying to revert"
6746 " disk status: %s", msg)
6747 self.feedback_fn("Migration failed, aborting")
6748 self._AbortMigration()
6749 self._RevertDiskStatus()
6750 raise errors.OpExecError("Could not migrate instance %s: %s" %
6751 (instance.name, msg))
6754 instance.primary_node = target_node
6755 # distribute new instance config to the other nodes
6756 self.cfg.Update(instance, self.feedback_fn)
6758 result = self.rpc.call_finalize_migration(target_node,
6762 msg = result.fail_msg
6764 logging.error("Instance migration succeeded, but finalization failed:"
6766 raise errors.OpExecError("Could not finalize instance migration: %s" %
6769 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6770 self._EnsureSecondary(source_node)
6771 self._WaitUntilSync()
6772 self._GoStandalone()
6773 self._GoReconnect(False)
6774 self._WaitUntilSync()
6776 self.feedback_fn("* done")
6778 def Exec(self, feedback_fn):
6779 """Perform the migration.
6782 feedback_fn("Migrating instance %s" % self.instance.name)
6784 self.feedback_fn = feedback_fn
6786 self.source_node = self.instance.primary_node
6788 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6789 if self.instance.disk_template in constants.DTS_INT_MIRROR:
6790 self.target_node = self.instance.secondary_nodes[0]
6791 # Otherwise self.target_node has been populated either
6792 # directly, or through an iallocator.
6794 self.all_nodes = [self.source_node, self.target_node]
6796 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6797 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6801 return self._ExecCleanup()
6803 return self._ExecMigration()
6806 def _CreateBlockDev(lu, node, instance, device, force_create,
6808 """Create a tree of block devices on a given node.
6810 If this device type has to be created on secondaries, create it and
6813 If not, just recurse to children keeping the same 'force' value.
6815 @param lu: the lu on whose behalf we execute
6816 @param node: the node on which to create the device
6817 @type instance: L{objects.Instance}
6818 @param instance: the instance which owns the device
6819 @type device: L{objects.Disk}
6820 @param device: the device to create
6821 @type force_create: boolean
6822 @param force_create: whether to force creation of this device; this
6823 will be change to True whenever we find a device which has
6824 CreateOnSecondary() attribute
6825 @param info: the extra 'metadata' we should attach to the device
6826 (this will be represented as a LVM tag)
6827 @type force_open: boolean
6828 @param force_open: this parameter will be passes to the
6829 L{backend.BlockdevCreate} function where it specifies
6830 whether we run on primary or not, and it affects both
6831 the child assembly and the device own Open() execution
6834 if device.CreateOnSecondary():
6838 for child in device.children:
6839 _CreateBlockDev(lu, node, instance, child, force_create,
6842 if not force_create:
6845 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6848 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6849 """Create a single block device on a given node.
6851 This will not recurse over children of the device, so they must be
6854 @param lu: the lu on whose behalf we execute
6855 @param node: the node on which to create the device
6856 @type instance: L{objects.Instance}
6857 @param instance: the instance which owns the device
6858 @type device: L{objects.Disk}
6859 @param device: the device to create
6860 @param info: the extra 'metadata' we should attach to the device
6861 (this will be represented as a LVM tag)
6862 @type force_open: boolean
6863 @param force_open: this parameter will be passes to the
6864 L{backend.BlockdevCreate} function where it specifies
6865 whether we run on primary or not, and it affects both
6866 the child assembly and the device own Open() execution
6869 lu.cfg.SetDiskID(device, node)
6870 result = lu.rpc.call_blockdev_create(node, device, device.size,
6871 instance.name, force_open, info)
6872 result.Raise("Can't create block device %s on"
6873 " node %s for instance %s" % (device, node, instance.name))
6874 if device.physical_id is None:
6875 device.physical_id = result.payload
6878 def _GenerateUniqueNames(lu, exts):
6879 """Generate a suitable LV name.
6881 This will generate a logical volume name for the given instance.
6886 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6887 results.append("%s%s" % (new_id, val))
6891 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6893 """Generate a drbd8 device complete with its children.
6896 port = lu.cfg.AllocatePort()
6897 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6898 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6899 logical_id=(vgname, names[0]))
6900 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6901 logical_id=(vgname, names[1]))
6902 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6903 logical_id=(primary, secondary, port,
6906 children=[dev_data, dev_meta],
6911 def _GenerateDiskTemplate(lu, template_name,
6912 instance_name, primary_node,
6913 secondary_nodes, disk_info,
6914 file_storage_dir, file_driver,
6915 base_index, feedback_fn):
6916 """Generate the entire disk layout for a given template type.
6919 #TODO: compute space requirements
6921 vgname = lu.cfg.GetVGName()
6922 disk_count = len(disk_info)
6924 if template_name == constants.DT_DISKLESS:
6926 elif template_name == constants.DT_PLAIN:
6927 if len(secondary_nodes) != 0:
6928 raise errors.ProgrammerError("Wrong template configuration")
6930 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6931 for i in range(disk_count)])
6932 for idx, disk in enumerate(disk_info):
6933 disk_index = idx + base_index
6934 vg = disk.get("vg", vgname)
6935 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6936 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6937 logical_id=(vg, names[idx]),
6938 iv_name="disk/%d" % disk_index,
6940 disks.append(disk_dev)
6941 elif template_name == constants.DT_DRBD8:
6942 if len(secondary_nodes) != 1:
6943 raise errors.ProgrammerError("Wrong template configuration")
6944 remote_node = secondary_nodes[0]
6945 minors = lu.cfg.AllocateDRBDMinor(
6946 [primary_node, remote_node] * len(disk_info), instance_name)
6949 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6950 for i in range(disk_count)]):
6951 names.append(lv_prefix + "_data")
6952 names.append(lv_prefix + "_meta")
6953 for idx, disk in enumerate(disk_info):
6954 disk_index = idx + base_index
6955 vg = disk.get("vg", vgname)
6956 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6957 disk["size"], vg, names[idx*2:idx*2+2],
6958 "disk/%d" % disk_index,
6959 minors[idx*2], minors[idx*2+1])
6960 disk_dev.mode = disk["mode"]
6961 disks.append(disk_dev)
6962 elif template_name == constants.DT_FILE:
6963 if len(secondary_nodes) != 0:
6964 raise errors.ProgrammerError("Wrong template configuration")
6966 opcodes.RequireFileStorage()
6968 for idx, disk in enumerate(disk_info):
6969 disk_index = idx + base_index
6970 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6971 iv_name="disk/%d" % disk_index,
6972 logical_id=(file_driver,
6973 "%s/disk%d" % (file_storage_dir,
6976 disks.append(disk_dev)
6977 elif template_name == constants.DT_SHARED_FILE:
6978 if len(secondary_nodes) != 0:
6979 raise errors.ProgrammerError("Wrong template configuration")
6981 opcodes.RequireSharedFileStorage()
6983 for idx, disk in enumerate(disk_info):
6984 disk_index = idx + base_index
6985 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6986 iv_name="disk/%d" % disk_index,
6987 logical_id=(file_driver,
6988 "%s/disk%d" % (file_storage_dir,
6991 disks.append(disk_dev)
6992 elif template_name == constants.DT_BLOCK:
6993 if len(secondary_nodes) != 0:
6994 raise errors.ProgrammerError("Wrong template configuration")
6996 for idx, disk in enumerate(disk_info):
6997 disk_index = idx + base_index
6998 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV, size=disk["size"],
6999 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7001 iv_name="disk/%d" % disk_index,
7003 disks.append(disk_dev)
7006 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7010 def _GetInstanceInfoText(instance):
7011 """Compute that text that should be added to the disk's metadata.
7014 return "originstname+%s" % instance.name
7017 def _CalcEta(time_taken, written, total_size):
7018 """Calculates the ETA based on size written and total size.
7020 @param time_taken: The time taken so far
7021 @param written: amount written so far
7022 @param total_size: The total size of data to be written
7023 @return: The remaining time in seconds
7026 avg_time = time_taken / float(written)
7027 return (total_size - written) * avg_time
7030 def _WipeDisks(lu, instance):
7031 """Wipes instance disks.
7033 @type lu: L{LogicalUnit}
7034 @param lu: the logical unit on whose behalf we execute
7035 @type instance: L{objects.Instance}
7036 @param instance: the instance whose disks we should create
7037 @return: the success of the wipe
7040 node = instance.primary_node
7042 for device in instance.disks:
7043 lu.cfg.SetDiskID(device, node)
7045 logging.info("Pause sync of instance %s disks", instance.name)
7046 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7048 for idx, success in enumerate(result.payload):
7050 logging.warn("pause-sync of instance %s for disks %d failed",
7054 for idx, device in enumerate(instance.disks):
7055 lu.LogInfo("* Wiping disk %d", idx)
7056 logging.info("Wiping disk %d for instance %s, node %s",
7057 idx, instance.name, node)
7059 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7060 # MAX_WIPE_CHUNK at max
7061 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7062 constants.MIN_WIPE_CHUNK_PERCENT)
7067 start_time = time.time()
7069 while offset < size:
7070 wipe_size = min(wipe_chunk_size, size - offset)
7071 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7072 result.Raise("Could not wipe disk %d at offset %d for size %d" %
7073 (idx, offset, wipe_size))
7076 if now - last_output >= 60:
7077 eta = _CalcEta(now - start_time, offset, size)
7078 lu.LogInfo(" - done: %.1f%% ETA: %s" %
7079 (offset / float(size) * 100, utils.FormatSeconds(eta)))
7082 logging.info("Resume sync of instance %s disks", instance.name)
7084 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7086 for idx, success in enumerate(result.payload):
7088 lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
7089 " look at the status and troubleshoot the issue.", idx)
7090 logging.warn("resume-sync of instance %s for disks %d failed",
7094 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7095 """Create all disks for an instance.
7097 This abstracts away some work from AddInstance.
7099 @type lu: L{LogicalUnit}
7100 @param lu: the logical unit on whose behalf we execute
7101 @type instance: L{objects.Instance}
7102 @param instance: the instance whose disks we should create
7104 @param to_skip: list of indices to skip
7105 @type target_node: string
7106 @param target_node: if passed, overrides the target node for creation
7108 @return: the success of the creation
7111 info = _GetInstanceInfoText(instance)
7112 if target_node is None:
7113 pnode = instance.primary_node
7114 all_nodes = instance.all_nodes
7119 if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7120 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7121 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7123 result.Raise("Failed to create directory '%s' on"
7124 " node %s" % (file_storage_dir, pnode))
7126 # Note: this needs to be kept in sync with adding of disks in
7127 # LUInstanceSetParams
7128 for idx, device in enumerate(instance.disks):
7129 if to_skip and idx in to_skip:
7131 logging.info("Creating volume %s for instance %s",
7132 device.iv_name, instance.name)
7134 for node in all_nodes:
7135 f_create = node == pnode
7136 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7139 def _RemoveDisks(lu, instance, target_node=None):
7140 """Remove all disks for an instance.
7142 This abstracts away some work from `AddInstance()` and
7143 `RemoveInstance()`. Note that in case some of the devices couldn't
7144 be removed, the removal will continue with the other ones (compare
7145 with `_CreateDisks()`).
7147 @type lu: L{LogicalUnit}
7148 @param lu: the logical unit on whose behalf we execute
7149 @type instance: L{objects.Instance}
7150 @param instance: the instance whose disks we should remove
7151 @type target_node: string
7152 @param target_node: used to override the node on which to remove the disks
7154 @return: the success of the removal
7157 logging.info("Removing block devices for instance %s", instance.name)
7160 for device in instance.disks:
7162 edata = [(target_node, device)]
7164 edata = device.ComputeNodeTree(instance.primary_node)
7165 for node, disk in edata:
7166 lu.cfg.SetDiskID(disk, node)
7167 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7169 lu.LogWarning("Could not remove block device %s on node %s,"
7170 " continuing anyway: %s", device.iv_name, node, msg)
7173 if instance.disk_template == constants.DT_FILE:
7174 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7178 tgt = instance.primary_node
7179 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7181 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7182 file_storage_dir, instance.primary_node, result.fail_msg)
7188 def _ComputeDiskSizePerVG(disk_template, disks):
7189 """Compute disk size requirements in the volume group
7192 def _compute(disks, payload):
7193 """Universal algorithm
7198 vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
7202 # Required free disk space as a function of disk and swap space
7204 constants.DT_DISKLESS: {},
7205 constants.DT_PLAIN: _compute(disks, 0),
7206 # 128 MB are added for drbd metadata for each disk
7207 constants.DT_DRBD8: _compute(disks, 128),
7208 constants.DT_FILE: {},
7209 constants.DT_SHARED_FILE: {},
7212 if disk_template not in req_size_dict:
7213 raise errors.ProgrammerError("Disk template '%s' size requirement"
7214 " is unknown" % disk_template)
7216 return req_size_dict[disk_template]
7219 def _ComputeDiskSize(disk_template, disks):
7220 """Compute disk size requirements in the volume group
7223 # Required free disk space as a function of disk and swap space
7225 constants.DT_DISKLESS: None,
7226 constants.DT_PLAIN: sum(d["size"] for d in disks),
7227 # 128 MB are added for drbd metadata for each disk
7228 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
7229 constants.DT_FILE: None,
7230 constants.DT_SHARED_FILE: 0,
7231 constants.DT_BLOCK: 0,
7234 if disk_template not in req_size_dict:
7235 raise errors.ProgrammerError("Disk template '%s' size requirement"
7236 " is unknown" % disk_template)
7238 return req_size_dict[disk_template]
7241 def _FilterVmNodes(lu, nodenames):
7242 """Filters out non-vm_capable nodes from a list.
7244 @type lu: L{LogicalUnit}
7245 @param lu: the logical unit for which we check
7246 @type nodenames: list
7247 @param nodenames: the list of nodes on which we should check
7249 @return: the list of vm-capable nodes
7252 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7253 return [name for name in nodenames if name not in vm_nodes]
7256 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7257 """Hypervisor parameter validation.
7259 This function abstract the hypervisor parameter validation to be
7260 used in both instance create and instance modify.
7262 @type lu: L{LogicalUnit}
7263 @param lu: the logical unit for which we check
7264 @type nodenames: list
7265 @param nodenames: the list of nodes on which we should check
7266 @type hvname: string
7267 @param hvname: the name of the hypervisor we should use
7268 @type hvparams: dict
7269 @param hvparams: the parameters which we need to check
7270 @raise errors.OpPrereqError: if the parameters are not valid
7273 nodenames = _FilterVmNodes(lu, nodenames)
7274 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7277 for node in nodenames:
7281 info.Raise("Hypervisor parameter validation failed on node %s" % node)
7284 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7285 """OS parameters validation.
7287 @type lu: L{LogicalUnit}
7288 @param lu: the logical unit for which we check
7289 @type required: boolean
7290 @param required: whether the validation should fail if the OS is not
7292 @type nodenames: list
7293 @param nodenames: the list of nodes on which we should check
7294 @type osname: string
7295 @param osname: the name of the hypervisor we should use
7296 @type osparams: dict
7297 @param osparams: the parameters which we need to check
7298 @raise errors.OpPrereqError: if the parameters are not valid
7301 nodenames = _FilterVmNodes(lu, nodenames)
7302 result = lu.rpc.call_os_validate(required, nodenames, osname,
7303 [constants.OS_VALIDATE_PARAMETERS],
7305 for node, nres in result.items():
7306 # we don't check for offline cases since this should be run only
7307 # against the master node and/or an instance's nodes
7308 nres.Raise("OS Parameters validation failed on node %s" % node)
7309 if not nres.payload:
7310 lu.LogInfo("OS %s not found on node %s, validation skipped",
7314 class LUInstanceCreate(LogicalUnit):
7315 """Create an instance.
7318 HPATH = "instance-add"
7319 HTYPE = constants.HTYPE_INSTANCE
7322 def CheckArguments(self):
7326 # do not require name_check to ease forward/backward compatibility
7328 if self.op.no_install and self.op.start:
7329 self.LogInfo("No-installation mode selected, disabling startup")
7330 self.op.start = False
7331 # validate/normalize the instance name
7332 self.op.instance_name = \
7333 netutils.Hostname.GetNormalizedName(self.op.instance_name)
7335 if self.op.ip_check and not self.op.name_check:
7336 # TODO: make the ip check more flexible and not depend on the name check
7337 raise errors.OpPrereqError("Cannot do ip check without a name check",
7340 # check nics' parameter names
7341 for nic in self.op.nics:
7342 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7344 # check disks. parameter names and consistent adopt/no-adopt strategy
7345 has_adopt = has_no_adopt = False
7346 for disk in self.op.disks:
7347 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7352 if has_adopt and has_no_adopt:
7353 raise errors.OpPrereqError("Either all disks are adopted or none is",
7356 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7357 raise errors.OpPrereqError("Disk adoption is not supported for the"
7358 " '%s' disk template" %
7359 self.op.disk_template,
7361 if self.op.iallocator is not None:
7362 raise errors.OpPrereqError("Disk adoption not allowed with an"
7363 " iallocator script", errors.ECODE_INVAL)
7364 if self.op.mode == constants.INSTANCE_IMPORT:
7365 raise errors.OpPrereqError("Disk adoption not allowed for"
7366 " instance import", errors.ECODE_INVAL)
7368 if self.op.disk_template in constants.DTS_MUST_ADOPT:
7369 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7370 " but no 'adopt' parameter given" %
7371 self.op.disk_template,
7374 self.adopt_disks = has_adopt
7376 # instance name verification
7377 if self.op.name_check:
7378 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7379 self.op.instance_name = self.hostname1.name
7380 # used in CheckPrereq for ip ping check
7381 self.check_ip = self.hostname1.ip
7383 self.check_ip = None
7385 # file storage checks
7386 if (self.op.file_driver and
7387 not self.op.file_driver in constants.FILE_DRIVER):
7388 raise errors.OpPrereqError("Invalid file driver name '%s'" %
7389 self.op.file_driver, errors.ECODE_INVAL)
7391 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7392 raise errors.OpPrereqError("File storage directory path not absolute",
7395 ### Node/iallocator related checks
7396 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7398 if self.op.pnode is not None:
7399 if self.op.disk_template in constants.DTS_INT_MIRROR:
7400 if self.op.snode is None:
7401 raise errors.OpPrereqError("The networked disk templates need"
7402 " a mirror node", errors.ECODE_INVAL)
7404 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7406 self.op.snode = None
7408 self._cds = _GetClusterDomainSecret()
7410 if self.op.mode == constants.INSTANCE_IMPORT:
7411 # On import force_variant must be True, because if we forced it at
7412 # initial install, our only chance when importing it back is that it
7414 self.op.force_variant = True
7416 if self.op.no_install:
7417 self.LogInfo("No-installation mode has no effect during import")
7419 elif self.op.mode == constants.INSTANCE_CREATE:
7420 if self.op.os_type is None:
7421 raise errors.OpPrereqError("No guest OS specified",
7423 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7424 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7425 " installation" % self.op.os_type,
7427 if self.op.disk_template is None:
7428 raise errors.OpPrereqError("No disk template specified",
7431 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7432 # Check handshake to ensure both clusters have the same domain secret
7433 src_handshake = self.op.source_handshake
7434 if not src_handshake:
7435 raise errors.OpPrereqError("Missing source handshake",
7438 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7441 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7444 # Load and check source CA
7445 self.source_x509_ca_pem = self.op.source_x509_ca
7446 if not self.source_x509_ca_pem:
7447 raise errors.OpPrereqError("Missing source X509 CA",
7451 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7453 except OpenSSL.crypto.Error, err:
7454 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7455 (err, ), errors.ECODE_INVAL)
7457 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7458 if errcode is not None:
7459 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7462 self.source_x509_ca = cert
7464 src_instance_name = self.op.source_instance_name
7465 if not src_instance_name:
7466 raise errors.OpPrereqError("Missing source instance name",
7469 self.source_instance_name = \
7470 netutils.GetHostname(name=src_instance_name).name
7473 raise errors.OpPrereqError("Invalid instance creation mode %r" %
7474 self.op.mode, errors.ECODE_INVAL)
7476 def ExpandNames(self):
7477 """ExpandNames for CreateInstance.
7479 Figure out the right locks for instance creation.
7482 self.needed_locks = {}
7484 instance_name = self.op.instance_name
7485 # this is just a preventive check, but someone might still add this
7486 # instance in the meantime, and creation will fail at lock-add time
7487 if instance_name in self.cfg.GetInstanceList():
7488 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7489 instance_name, errors.ECODE_EXISTS)
7491 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7493 if self.op.iallocator:
7494 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7496 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7497 nodelist = [self.op.pnode]
7498 if self.op.snode is not None:
7499 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7500 nodelist.append(self.op.snode)
7501 self.needed_locks[locking.LEVEL_NODE] = nodelist
7503 # in case of import lock the source node too
7504 if self.op.mode == constants.INSTANCE_IMPORT:
7505 src_node = self.op.src_node
7506 src_path = self.op.src_path
7508 if src_path is None:
7509 self.op.src_path = src_path = self.op.instance_name
7511 if src_node is None:
7512 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7513 self.op.src_node = None
7514 if os.path.isabs(src_path):
7515 raise errors.OpPrereqError("Importing an instance from an absolute"
7516 " path requires a source node option.",
7519 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7520 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7521 self.needed_locks[locking.LEVEL_NODE].append(src_node)
7522 if not os.path.isabs(src_path):
7523 self.op.src_path = src_path = \
7524 utils.PathJoin(constants.EXPORT_DIR, src_path)
7526 def _RunAllocator(self):
7527 """Run the allocator based on input opcode.
7530 nics = [n.ToDict() for n in self.nics]
7531 ial = IAllocator(self.cfg, self.rpc,
7532 mode=constants.IALLOCATOR_MODE_ALLOC,
7533 name=self.op.instance_name,
7534 disk_template=self.op.disk_template,
7537 vcpus=self.be_full[constants.BE_VCPUS],
7538 mem_size=self.be_full[constants.BE_MEMORY],
7541 hypervisor=self.op.hypervisor,
7544 ial.Run(self.op.iallocator)
7547 raise errors.OpPrereqError("Can't compute nodes using"
7548 " iallocator '%s': %s" %
7549 (self.op.iallocator, ial.info),
7551 if len(ial.result) != ial.required_nodes:
7552 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7553 " of nodes (%s), required %s" %
7554 (self.op.iallocator, len(ial.result),
7555 ial.required_nodes), errors.ECODE_FAULT)
7556 self.op.pnode = ial.result[0]
7557 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7558 self.op.instance_name, self.op.iallocator,
7559 utils.CommaJoin(ial.result))
7560 if ial.required_nodes == 2:
7561 self.op.snode = ial.result[1]
7563 def BuildHooksEnv(self):
7566 This runs on master, primary and secondary nodes of the instance.
7570 "ADD_MODE": self.op.mode,
7572 if self.op.mode == constants.INSTANCE_IMPORT:
7573 env["SRC_NODE"] = self.op.src_node
7574 env["SRC_PATH"] = self.op.src_path
7575 env["SRC_IMAGES"] = self.src_images
7577 env.update(_BuildInstanceHookEnv(
7578 name=self.op.instance_name,
7579 primary_node=self.op.pnode,
7580 secondary_nodes=self.secondaries,
7581 status=self.op.start,
7582 os_type=self.op.os_type,
7583 memory=self.be_full[constants.BE_MEMORY],
7584 vcpus=self.be_full[constants.BE_VCPUS],
7585 nics=_NICListToTuple(self, self.nics),
7586 disk_template=self.op.disk_template,
7587 disks=[(d["size"], d["mode"]) for d in self.disks],
7590 hypervisor_name=self.op.hypervisor,
7595 def BuildHooksNodes(self):
7596 """Build hooks nodes.
7599 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7602 def _ReadExportInfo(self):
7603 """Reads the export information from disk.
7605 It will override the opcode source node and path with the actual
7606 information, if these two were not specified before.
7608 @return: the export information
7611 assert self.op.mode == constants.INSTANCE_IMPORT
7613 src_node = self.op.src_node
7614 src_path = self.op.src_path
7616 if src_node is None:
7617 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7618 exp_list = self.rpc.call_export_list(locked_nodes)
7620 for node in exp_list:
7621 if exp_list[node].fail_msg:
7623 if src_path in exp_list[node].payload:
7625 self.op.src_node = src_node = node
7626 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7630 raise errors.OpPrereqError("No export found for relative path %s" %
7631 src_path, errors.ECODE_INVAL)
7633 _CheckNodeOnline(self, src_node)
7634 result = self.rpc.call_export_info(src_node, src_path)
7635 result.Raise("No export or invalid export found in dir %s" % src_path)
7637 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7638 if not export_info.has_section(constants.INISECT_EXP):
7639 raise errors.ProgrammerError("Corrupted export config",
7640 errors.ECODE_ENVIRON)
7642 ei_version = export_info.get(constants.INISECT_EXP, "version")
7643 if (int(ei_version) != constants.EXPORT_VERSION):
7644 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7645 (ei_version, constants.EXPORT_VERSION),
7646 errors.ECODE_ENVIRON)
7649 def _ReadExportParams(self, einfo):
7650 """Use export parameters as defaults.
7652 In case the opcode doesn't specify (as in override) some instance
7653 parameters, then try to use them from the export information, if
7657 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7659 if self.op.disk_template is None:
7660 if einfo.has_option(constants.INISECT_INS, "disk_template"):
7661 self.op.disk_template = einfo.get(constants.INISECT_INS,
7664 raise errors.OpPrereqError("No disk template specified and the export"
7665 " is missing the disk_template information",
7668 if not self.op.disks:
7669 if einfo.has_option(constants.INISECT_INS, "disk_count"):
7671 # TODO: import the disk iv_name too
7672 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7673 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7674 disks.append({"size": disk_sz})
7675 self.op.disks = disks
7677 raise errors.OpPrereqError("No disk info specified and the export"
7678 " is missing the disk information",
7681 if (not self.op.nics and
7682 einfo.has_option(constants.INISECT_INS, "nic_count")):
7684 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7686 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7687 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7692 if (self.op.hypervisor is None and
7693 einfo.has_option(constants.INISECT_INS, "hypervisor")):
7694 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7695 if einfo.has_section(constants.INISECT_HYP):
7696 # use the export parameters but do not override the ones
7697 # specified by the user
7698 for name, value in einfo.items(constants.INISECT_HYP):
7699 if name not in self.op.hvparams:
7700 self.op.hvparams[name] = value
7702 if einfo.has_section(constants.INISECT_BEP):
7703 # use the parameters, without overriding
7704 for name, value in einfo.items(constants.INISECT_BEP):
7705 if name not in self.op.beparams:
7706 self.op.beparams[name] = value
7708 # try to read the parameters old style, from the main section
7709 for name in constants.BES_PARAMETERS:
7710 if (name not in self.op.beparams and
7711 einfo.has_option(constants.INISECT_INS, name)):
7712 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7714 if einfo.has_section(constants.INISECT_OSP):
7715 # use the parameters, without overriding
7716 for name, value in einfo.items(constants.INISECT_OSP):
7717 if name not in self.op.osparams:
7718 self.op.osparams[name] = value
7720 def _RevertToDefaults(self, cluster):
7721 """Revert the instance parameters to the default values.
7725 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7726 for name in self.op.hvparams.keys():
7727 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7728 del self.op.hvparams[name]
7730 be_defs = cluster.SimpleFillBE({})
7731 for name in self.op.beparams.keys():
7732 if name in be_defs and be_defs[name] == self.op.beparams[name]:
7733 del self.op.beparams[name]
7735 nic_defs = cluster.SimpleFillNIC({})
7736 for nic in self.op.nics:
7737 for name in constants.NICS_PARAMETERS:
7738 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7741 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7742 for name in self.op.osparams.keys():
7743 if name in os_defs and os_defs[name] == self.op.osparams[name]:
7744 del self.op.osparams[name]
7746 def CheckPrereq(self):
7747 """Check prerequisites.
7750 if self.op.mode == constants.INSTANCE_IMPORT:
7751 export_info = self._ReadExportInfo()
7752 self._ReadExportParams(export_info)
7754 if (not self.cfg.GetVGName() and
7755 self.op.disk_template not in constants.DTS_NOT_LVM):
7756 raise errors.OpPrereqError("Cluster does not support lvm-based"
7757 " instances", errors.ECODE_STATE)
7759 if self.op.hypervisor is None:
7760 self.op.hypervisor = self.cfg.GetHypervisorType()
7762 cluster = self.cfg.GetClusterInfo()
7763 enabled_hvs = cluster.enabled_hypervisors
7764 if self.op.hypervisor not in enabled_hvs:
7765 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7766 " cluster (%s)" % (self.op.hypervisor,
7767 ",".join(enabled_hvs)),
7770 # check hypervisor parameter syntax (locally)
7771 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7772 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7774 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7775 hv_type.CheckParameterSyntax(filled_hvp)
7776 self.hv_full = filled_hvp
7777 # check that we don't specify global parameters on an instance
7778 _CheckGlobalHvParams(self.op.hvparams)
7780 # fill and remember the beparams dict
7781 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7782 self.be_full = cluster.SimpleFillBE(self.op.beparams)
7784 # build os parameters
7785 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7787 # now that hvp/bep are in final format, let's reset to defaults,
7789 if self.op.identify_defaults:
7790 self._RevertToDefaults(cluster)
7794 for idx, nic in enumerate(self.op.nics):
7795 nic_mode_req = nic.get("mode", None)
7796 nic_mode = nic_mode_req
7797 if nic_mode is None:
7798 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7800 # in routed mode, for the first nic, the default ip is 'auto'
7801 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7802 default_ip_mode = constants.VALUE_AUTO
7804 default_ip_mode = constants.VALUE_NONE
7806 # ip validity checks
7807 ip = nic.get("ip", default_ip_mode)
7808 if ip is None or ip.lower() == constants.VALUE_NONE:
7810 elif ip.lower() == constants.VALUE_AUTO:
7811 if not self.op.name_check:
7812 raise errors.OpPrereqError("IP address set to auto but name checks"
7813 " have been skipped",
7815 nic_ip = self.hostname1.ip
7817 if not netutils.IPAddress.IsValid(ip):
7818 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7822 # TODO: check the ip address for uniqueness
7823 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7824 raise errors.OpPrereqError("Routed nic mode requires an ip address",
7827 # MAC address verification
7828 mac = nic.get("mac", constants.VALUE_AUTO)
7829 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7830 mac = utils.NormalizeAndValidateMac(mac)
7833 self.cfg.ReserveMAC(mac, self.proc.GetECId())
7834 except errors.ReservationError:
7835 raise errors.OpPrereqError("MAC address %s already in use"
7836 " in cluster" % mac,
7837 errors.ECODE_NOTUNIQUE)
7839 # Build nic parameters
7840 link = nic.get(constants.INIC_LINK, None)
7843 nicparams[constants.NIC_MODE] = nic_mode_req
7845 nicparams[constants.NIC_LINK] = link
7847 check_params = cluster.SimpleFillNIC(nicparams)
7848 objects.NIC.CheckParameterSyntax(check_params)
7849 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7851 # disk checks/pre-build
7853 for disk in self.op.disks:
7854 mode = disk.get("mode", constants.DISK_RDWR)
7855 if mode not in constants.DISK_ACCESS_SET:
7856 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7857 mode, errors.ECODE_INVAL)
7858 size = disk.get("size", None)
7860 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7863 except (TypeError, ValueError):
7864 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7866 vg = disk.get("vg", self.cfg.GetVGName())
7867 new_disk = {"size": size, "mode": mode, "vg": vg}
7869 new_disk["adopt"] = disk["adopt"]
7870 self.disks.append(new_disk)
7872 if self.op.mode == constants.INSTANCE_IMPORT:
7874 # Check that the new instance doesn't have less disks than the export
7875 instance_disks = len(self.disks)
7876 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7877 if instance_disks < export_disks:
7878 raise errors.OpPrereqError("Not enough disks to import."
7879 " (instance: %d, export: %d)" %
7880 (instance_disks, export_disks),
7884 for idx in range(export_disks):
7885 option = 'disk%d_dump' % idx
7886 if export_info.has_option(constants.INISECT_INS, option):
7887 # FIXME: are the old os-es, disk sizes, etc. useful?
7888 export_name = export_info.get(constants.INISECT_INS, option)
7889 image = utils.PathJoin(self.op.src_path, export_name)
7890 disk_images.append(image)
7892 disk_images.append(False)
7894 self.src_images = disk_images
7896 old_name = export_info.get(constants.INISECT_INS, 'name')
7898 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7899 except (TypeError, ValueError), err:
7900 raise errors.OpPrereqError("Invalid export file, nic_count is not"
7901 " an integer: %s" % str(err),
7903 if self.op.instance_name == old_name:
7904 for idx, nic in enumerate(self.nics):
7905 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7906 nic_mac_ini = 'nic%d_mac' % idx
7907 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7909 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7911 # ip ping checks (we use the same ip that was resolved in ExpandNames)
7912 if self.op.ip_check:
7913 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7914 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7915 (self.check_ip, self.op.instance_name),
7916 errors.ECODE_NOTUNIQUE)
7918 #### mac address generation
7919 # By generating here the mac address both the allocator and the hooks get
7920 # the real final mac address rather than the 'auto' or 'generate' value.
7921 # There is a race condition between the generation and the instance object
7922 # creation, which means that we know the mac is valid now, but we're not
7923 # sure it will be when we actually add the instance. If things go bad
7924 # adding the instance will abort because of a duplicate mac, and the
7925 # creation job will fail.
7926 for nic in self.nics:
7927 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7928 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7932 if self.op.iallocator is not None:
7933 self._RunAllocator()
7935 #### node related checks
7937 # check primary node
7938 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7939 assert self.pnode is not None, \
7940 "Cannot retrieve locked node %s" % self.op.pnode
7942 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7943 pnode.name, errors.ECODE_STATE)
7945 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7946 pnode.name, errors.ECODE_STATE)
7947 if not pnode.vm_capable:
7948 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7949 " '%s'" % pnode.name, errors.ECODE_STATE)
7951 self.secondaries = []
7953 # mirror node verification
7954 if self.op.disk_template in constants.DTS_INT_MIRROR:
7955 if self.op.snode == pnode.name:
7956 raise errors.OpPrereqError("The secondary node cannot be the"
7957 " primary node.", errors.ECODE_INVAL)
7958 _CheckNodeOnline(self, self.op.snode)
7959 _CheckNodeNotDrained(self, self.op.snode)
7960 _CheckNodeVmCapable(self, self.op.snode)
7961 self.secondaries.append(self.op.snode)
7963 nodenames = [pnode.name] + self.secondaries
7965 if not self.adopt_disks:
7966 # Check lv size requirements, if not adopting
7967 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7968 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7970 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
7971 all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7972 if len(all_lvs) != len(self.disks):
7973 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7975 for lv_name in all_lvs:
7977 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7978 # to ReserveLV uses the same syntax
7979 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7980 except errors.ReservationError:
7981 raise errors.OpPrereqError("LV named %s used by another instance" %
7982 lv_name, errors.ECODE_NOTUNIQUE)
7984 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
7985 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7987 node_lvs = self.rpc.call_lv_list([pnode.name],
7988 vg_names.payload.keys())[pnode.name]
7989 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7990 node_lvs = node_lvs.payload
7992 delta = all_lvs.difference(node_lvs.keys())
7994 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7995 utils.CommaJoin(delta),
7997 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7999 raise errors.OpPrereqError("Online logical volumes found, cannot"
8000 " adopt: %s" % utils.CommaJoin(online_lvs),
8002 # update the size of disk based on what is found
8003 for dsk in self.disks:
8004 dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
8006 elif self.op.disk_template == constants.DT_BLOCK:
8007 # Normalize and de-duplicate device paths
8008 all_disks = set([os.path.abspath(i["adopt"]) for i in self.disks])
8009 if len(all_disks) != len(self.disks):
8010 raise errors.OpPrereqError("Duplicate disk names given for adoption",
8012 baddisks = [d for d in all_disks
8013 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8015 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8016 " cannot be adopted" %
8017 (", ".join(baddisks),
8018 constants.ADOPTABLE_BLOCKDEV_ROOT),
8021 node_disks = self.rpc.call_bdev_sizes([pnode.name],
8022 list(all_disks))[pnode.name]
8023 node_disks.Raise("Cannot get block device information from node %s" %
8025 node_disks = node_disks.payload
8026 delta = all_disks.difference(node_disks.keys())
8028 raise errors.OpPrereqError("Missing block device(s): %s" %
8029 utils.CommaJoin(delta),
8031 for dsk in self.disks:
8032 dsk["size"] = int(float(node_disks[dsk["adopt"]]))
8034 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8036 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8037 # check OS parameters (remotely)
8038 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8040 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8042 # memory check on primary node
8044 _CheckNodeFreeMemory(self, self.pnode.name,
8045 "creating instance %s" % self.op.instance_name,
8046 self.be_full[constants.BE_MEMORY],
8049 self.dry_run_result = list(nodenames)
8051 def Exec(self, feedback_fn):
8052 """Create and add the instance to the cluster.
8055 instance = self.op.instance_name
8056 pnode_name = self.pnode.name
8058 ht_kind = self.op.hypervisor
8059 if ht_kind in constants.HTS_REQ_PORT:
8060 network_port = self.cfg.AllocatePort()
8064 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8065 # this is needed because os.path.join does not accept None arguments
8066 if self.op.file_storage_dir is None:
8067 string_file_storage_dir = ""
8069 string_file_storage_dir = self.op.file_storage_dir
8071 # build the full file storage dir path
8072 if self.op.disk_template == constants.DT_SHARED_FILE:
8073 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8075 get_fsd_fn = self.cfg.GetFileStorageDir
8077 file_storage_dir = utils.PathJoin(get_fsd_fn(),
8078 string_file_storage_dir, instance)
8080 file_storage_dir = ""
8082 disks = _GenerateDiskTemplate(self,
8083 self.op.disk_template,
8084 instance, pnode_name,
8088 self.op.file_driver,
8092 iobj = objects.Instance(name=instance, os=self.op.os_type,
8093 primary_node=pnode_name,
8094 nics=self.nics, disks=disks,
8095 disk_template=self.op.disk_template,
8097 network_port=network_port,
8098 beparams=self.op.beparams,
8099 hvparams=self.op.hvparams,
8100 hypervisor=self.op.hypervisor,
8101 osparams=self.op.osparams,
8104 if self.adopt_disks:
8105 if self.op.disk_template == constants.DT_PLAIN:
8106 # rename LVs to the newly-generated names; we need to construct
8107 # 'fake' LV disks with the old data, plus the new unique_id
8108 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8110 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8111 rename_to.append(t_dsk.logical_id)
8112 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
8113 self.cfg.SetDiskID(t_dsk, pnode_name)
8114 result = self.rpc.call_blockdev_rename(pnode_name,
8115 zip(tmp_disks, rename_to))
8116 result.Raise("Failed to rename adoped LVs")
8118 feedback_fn("* creating instance disks...")
8120 _CreateDisks(self, iobj)
8121 except errors.OpExecError:
8122 self.LogWarning("Device creation failed, reverting...")
8124 _RemoveDisks(self, iobj)
8126 self.cfg.ReleaseDRBDMinors(instance)
8129 if self.cfg.GetClusterInfo().prealloc_wipe_disks:
8130 feedback_fn("* wiping instance disks...")
8132 _WipeDisks(self, iobj)
8133 except errors.OpExecError:
8134 self.LogWarning("Device wiping failed, reverting...")
8136 _RemoveDisks(self, iobj)
8138 self.cfg.ReleaseDRBDMinors(instance)
8141 feedback_fn("adding instance %s to cluster config" % instance)
8143 self.cfg.AddInstance(iobj, self.proc.GetECId())
8145 # Declare that we don't want to remove the instance lock anymore, as we've
8146 # added the instance to the config
8147 del self.remove_locks[locking.LEVEL_INSTANCE]
8148 # Unlock all the nodes
8149 if self.op.mode == constants.INSTANCE_IMPORT:
8150 nodes_keep = [self.op.src_node]
8151 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
8152 if node != self.op.src_node]
8153 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
8154 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
8156 self.context.glm.release(locking.LEVEL_NODE)
8157 del self.acquired_locks[locking.LEVEL_NODE]
8159 if self.op.wait_for_sync:
8160 disk_abort = not _WaitForSync(self, iobj)
8161 elif iobj.disk_template in constants.DTS_INT_MIRROR:
8162 # make sure the disks are not degraded (still sync-ing is ok)
8164 feedback_fn("* checking mirrors status")
8165 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8170 _RemoveDisks(self, iobj)
8171 self.cfg.RemoveInstance(iobj.name)
8172 # Make sure the instance lock gets removed
8173 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8174 raise errors.OpExecError("There are some degraded disks for"
8177 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8178 if self.op.mode == constants.INSTANCE_CREATE:
8179 if not self.op.no_install:
8180 feedback_fn("* running the instance OS create scripts...")
8181 # FIXME: pass debug option from opcode to backend
8182 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8183 self.op.debug_level)
8184 result.Raise("Could not add os for instance %s"
8185 " on node %s" % (instance, pnode_name))
8187 elif self.op.mode == constants.INSTANCE_IMPORT:
8188 feedback_fn("* running the instance OS import scripts...")
8192 for idx, image in enumerate(self.src_images):
8196 # FIXME: pass debug option from opcode to backend
8197 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8198 constants.IEIO_FILE, (image, ),
8199 constants.IEIO_SCRIPT,
8200 (iobj.disks[idx], idx),
8202 transfers.append(dt)
8205 masterd.instance.TransferInstanceData(self, feedback_fn,
8206 self.op.src_node, pnode_name,
8207 self.pnode.secondary_ip,
8209 if not compat.all(import_result):
8210 self.LogWarning("Some disks for instance %s on node %s were not"
8211 " imported successfully" % (instance, pnode_name))
8213 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8214 feedback_fn("* preparing remote import...")
8215 # The source cluster will stop the instance before attempting to make a
8216 # connection. In some cases stopping an instance can take a long time,
8217 # hence the shutdown timeout is added to the connection timeout.
8218 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8219 self.op.source_shutdown_timeout)
8220 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8222 assert iobj.primary_node == self.pnode.name
8224 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8225 self.source_x509_ca,
8226 self._cds, timeouts)
8227 if not compat.all(disk_results):
8228 # TODO: Should the instance still be started, even if some disks
8229 # failed to import (valid for local imports, too)?
8230 self.LogWarning("Some disks for instance %s on node %s were not"
8231 " imported successfully" % (instance, pnode_name))
8233 # Run rename script on newly imported instance
8234 assert iobj.name == instance
8235 feedback_fn("Running rename script for %s" % instance)
8236 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8237 self.source_instance_name,
8238 self.op.debug_level)
8240 self.LogWarning("Failed to run rename script for %s on node"
8241 " %s: %s" % (instance, pnode_name, result.fail_msg))
8244 # also checked in the prereq part
8245 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8249 iobj.admin_up = True
8250 self.cfg.Update(iobj, feedback_fn)
8251 logging.info("Starting instance %s on node %s", instance, pnode_name)
8252 feedback_fn("* starting instance...")
8253 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8254 result.Raise("Could not start instance")
8256 return list(iobj.all_nodes)
8259 class LUInstanceConsole(NoHooksLU):
8260 """Connect to an instance's console.
8262 This is somewhat special in that it returns the command line that
8263 you need to run on the master node in order to connect to the
8269 def ExpandNames(self):
8270 self._ExpandAndLockInstance()
8272 def CheckPrereq(self):
8273 """Check prerequisites.
8275 This checks that the instance is in the cluster.
8278 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8279 assert self.instance is not None, \
8280 "Cannot retrieve locked instance %s" % self.op.instance_name
8281 _CheckNodeOnline(self, self.instance.primary_node)
8283 def Exec(self, feedback_fn):
8284 """Connect to the console of an instance
8287 instance = self.instance
8288 node = instance.primary_node
8290 node_insts = self.rpc.call_instance_list([node],
8291 [instance.hypervisor])[node]
8292 node_insts.Raise("Can't get node information from %s" % node)
8294 if instance.name not in node_insts.payload:
8295 if instance.admin_up:
8296 state = constants.INSTST_ERRORDOWN
8298 state = constants.INSTST_ADMINDOWN
8299 raise errors.OpExecError("Instance %s is not running (state %s)" %
8300 (instance.name, state))
8302 logging.debug("Connecting to console of %s on %s", instance.name, node)
8304 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8307 def _GetInstanceConsole(cluster, instance):
8308 """Returns console information for an instance.
8310 @type cluster: L{objects.Cluster}
8311 @type instance: L{objects.Instance}
8315 hyper = hypervisor.GetHypervisor(instance.hypervisor)
8316 # beparams and hvparams are passed separately, to avoid editing the
8317 # instance and then saving the defaults in the instance itself.
8318 hvparams = cluster.FillHV(instance)
8319 beparams = cluster.FillBE(instance)
8320 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8322 assert console.instance == instance.name
8323 assert console.Validate()
8325 return console.ToDict()
8328 class LUInstanceReplaceDisks(LogicalUnit):
8329 """Replace the disks of an instance.
8332 HPATH = "mirrors-replace"
8333 HTYPE = constants.HTYPE_INSTANCE
8336 def CheckArguments(self):
8337 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8340 def ExpandNames(self):
8341 self._ExpandAndLockInstance()
8343 if self.op.iallocator is not None:
8344 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8346 elif self.op.remote_node is not None:
8347 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8348 self.op.remote_node = remote_node
8350 # Warning: do not remove the locking of the new secondary here
8351 # unless DRBD8.AddChildren is changed to work in parallel;
8352 # currently it doesn't since parallel invocations of
8353 # FindUnusedMinor will conflict
8354 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8355 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8358 self.needed_locks[locking.LEVEL_NODE] = []
8359 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8361 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8362 self.op.iallocator, self.op.remote_node,
8363 self.op.disks, False, self.op.early_release)
8365 self.tasklets = [self.replacer]
8367 def DeclareLocks(self, level):
8368 # If we're not already locking all nodes in the set we have to declare the
8369 # instance's primary/secondary nodes.
8370 if (level == locking.LEVEL_NODE and
8371 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8372 self._LockInstancesNodes()
8374 def BuildHooksEnv(self):
8377 This runs on the master, the primary and all the secondaries.
8380 instance = self.replacer.instance
8382 "MODE": self.op.mode,
8383 "NEW_SECONDARY": self.op.remote_node,
8384 "OLD_SECONDARY": instance.secondary_nodes[0],
8386 env.update(_BuildInstanceHookEnvByObject(self, instance))
8389 def BuildHooksNodes(self):
8390 """Build hooks nodes.
8393 instance = self.replacer.instance
8395 self.cfg.GetMasterNode(),
8396 instance.primary_node,
8398 if self.op.remote_node is not None:
8399 nl.append(self.op.remote_node)
8403 class TLReplaceDisks(Tasklet):
8404 """Replaces disks for an instance.
8406 Note: Locking is not within the scope of this class.
8409 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8410 disks, delay_iallocator, early_release):
8411 """Initializes this class.
8414 Tasklet.__init__(self, lu)
8417 self.instance_name = instance_name
8419 self.iallocator_name = iallocator_name
8420 self.remote_node = remote_node
8422 self.delay_iallocator = delay_iallocator
8423 self.early_release = early_release
8426 self.instance = None
8427 self.new_node = None
8428 self.target_node = None
8429 self.other_node = None
8430 self.remote_node_info = None
8431 self.node_secondary_ip = None
8434 def CheckArguments(mode, remote_node, iallocator):
8435 """Helper function for users of this class.
8438 # check for valid parameter combination
8439 if mode == constants.REPLACE_DISK_CHG:
8440 if remote_node is None and iallocator is None:
8441 raise errors.OpPrereqError("When changing the secondary either an"
8442 " iallocator script must be used or the"
8443 " new node given", errors.ECODE_INVAL)
8445 if remote_node is not None and iallocator is not None:
8446 raise errors.OpPrereqError("Give either the iallocator or the new"
8447 " secondary, not both", errors.ECODE_INVAL)
8449 elif remote_node is not None or iallocator is not None:
8450 # Not replacing the secondary
8451 raise errors.OpPrereqError("The iallocator and new node options can"
8452 " only be used when changing the"
8453 " secondary node", errors.ECODE_INVAL)
8456 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8457 """Compute a new secondary node using an IAllocator.
8460 ial = IAllocator(lu.cfg, lu.rpc,
8461 mode=constants.IALLOCATOR_MODE_RELOC,
8463 relocate_from=relocate_from)
8465 ial.Run(iallocator_name)
8468 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8469 " %s" % (iallocator_name, ial.info),
8472 if len(ial.result) != ial.required_nodes:
8473 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8474 " of nodes (%s), required %s" %
8476 len(ial.result), ial.required_nodes),
8479 remote_node_name = ial.result[0]
8481 lu.LogInfo("Selected new secondary for instance '%s': %s",
8482 instance_name, remote_node_name)
8484 return remote_node_name
8486 def _FindFaultyDisks(self, node_name):
8487 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8490 def _CheckDisksActivated(self, instance):
8491 """Checks if the instance disks are activated.
8493 @param instance: The instance to check disks
8494 @return: True if they are activated, False otherwise
8497 nodes = instance.all_nodes
8499 for idx, dev in enumerate(instance.disks):
8501 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8502 self.cfg.SetDiskID(dev, node)
8504 result = self.rpc.call_blockdev_find(node, dev)
8508 elif result.fail_msg or not result.payload:
8514 def CheckPrereq(self):
8515 """Check prerequisites.
8517 This checks that the instance is in the cluster.
8520 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8521 assert instance is not None, \
8522 "Cannot retrieve locked instance %s" % self.instance_name
8524 if instance.disk_template != constants.DT_DRBD8:
8525 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8526 " instances", errors.ECODE_INVAL)
8528 if len(instance.secondary_nodes) != 1:
8529 raise errors.OpPrereqError("The instance has a strange layout,"
8530 " expected one secondary but found %d" %
8531 len(instance.secondary_nodes),
8534 if not self.delay_iallocator:
8535 self._CheckPrereq2()
8537 def _CheckPrereq2(self):
8538 """Check prerequisites, second part.
8540 This function should always be part of CheckPrereq. It was separated and is
8541 now called from Exec because during node evacuation iallocator was only
8542 called with an unmodified cluster model, not taking planned changes into
8546 instance = self.instance
8547 secondary_node = instance.secondary_nodes[0]
8549 if self.iallocator_name is None:
8550 remote_node = self.remote_node
8552 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8553 instance.name, instance.secondary_nodes)
8555 if remote_node is not None:
8556 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8557 assert self.remote_node_info is not None, \
8558 "Cannot retrieve locked node %s" % remote_node
8560 self.remote_node_info = None
8562 if remote_node == self.instance.primary_node:
8563 raise errors.OpPrereqError("The specified node is the primary node of"
8564 " the instance.", errors.ECODE_INVAL)
8566 if remote_node == secondary_node:
8567 raise errors.OpPrereqError("The specified node is already the"
8568 " secondary node of the instance.",
8571 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8572 constants.REPLACE_DISK_CHG):
8573 raise errors.OpPrereqError("Cannot specify disks to be replaced",
8576 if self.mode == constants.REPLACE_DISK_AUTO:
8577 if not self._CheckDisksActivated(instance):
8578 raise errors.OpPrereqError("Please run activate-disks on instance %s"
8579 " first" % self.instance_name,
8581 faulty_primary = self._FindFaultyDisks(instance.primary_node)
8582 faulty_secondary = self._FindFaultyDisks(secondary_node)
8584 if faulty_primary and faulty_secondary:
8585 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8586 " one node and can not be repaired"
8587 " automatically" % self.instance_name,
8591 self.disks = faulty_primary
8592 self.target_node = instance.primary_node
8593 self.other_node = secondary_node
8594 check_nodes = [self.target_node, self.other_node]
8595 elif faulty_secondary:
8596 self.disks = faulty_secondary
8597 self.target_node = secondary_node
8598 self.other_node = instance.primary_node
8599 check_nodes = [self.target_node, self.other_node]
8605 # Non-automatic modes
8606 if self.mode == constants.REPLACE_DISK_PRI:
8607 self.target_node = instance.primary_node
8608 self.other_node = secondary_node
8609 check_nodes = [self.target_node, self.other_node]
8611 elif self.mode == constants.REPLACE_DISK_SEC:
8612 self.target_node = secondary_node
8613 self.other_node = instance.primary_node
8614 check_nodes = [self.target_node, self.other_node]
8616 elif self.mode == constants.REPLACE_DISK_CHG:
8617 self.new_node = remote_node
8618 self.other_node = instance.primary_node
8619 self.target_node = secondary_node
8620 check_nodes = [self.new_node, self.other_node]
8622 _CheckNodeNotDrained(self.lu, remote_node)
8623 _CheckNodeVmCapable(self.lu, remote_node)
8625 old_node_info = self.cfg.GetNodeInfo(secondary_node)
8626 assert old_node_info is not None
8627 if old_node_info.offline and not self.early_release:
8628 # doesn't make sense to delay the release
8629 self.early_release = True
8630 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8631 " early-release mode", secondary_node)
8634 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8637 # If not specified all disks should be replaced
8639 self.disks = range(len(self.instance.disks))
8641 for node in check_nodes:
8642 _CheckNodeOnline(self.lu, node)
8644 # Check whether disks are valid
8645 for disk_idx in self.disks:
8646 instance.FindDisk(disk_idx)
8648 # Get secondary node IP addresses
8651 for node_name in [self.target_node, self.other_node, self.new_node]:
8652 if node_name is not None:
8653 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8655 self.node_secondary_ip = node_2nd_ip
8657 def Exec(self, feedback_fn):
8658 """Execute disk replacement.
8660 This dispatches the disk replacement to the appropriate handler.
8663 if self.delay_iallocator:
8664 self._CheckPrereq2()
8667 feedback_fn("No disks need replacement")
8670 feedback_fn("Replacing disk(s) %s for %s" %
8671 (utils.CommaJoin(self.disks), self.instance.name))
8673 activate_disks = (not self.instance.admin_up)
8675 # Activate the instance disks if we're replacing them on a down instance
8677 _StartInstanceDisks(self.lu, self.instance, True)
8680 # Should we replace the secondary node?
8681 if self.new_node is not None:
8682 fn = self._ExecDrbd8Secondary
8684 fn = self._ExecDrbd8DiskOnly
8686 return fn(feedback_fn)
8689 # Deactivate the instance disks if we're replacing them on a
8692 _SafeShutdownInstanceDisks(self.lu, self.instance)
8694 def _CheckVolumeGroup(self, nodes):
8695 self.lu.LogInfo("Checking volume groups")
8697 vgname = self.cfg.GetVGName()
8699 # Make sure volume group exists on all involved nodes
8700 results = self.rpc.call_vg_list(nodes)
8702 raise errors.OpExecError("Can't list volume groups on the nodes")
8706 res.Raise("Error checking node %s" % node)
8707 if vgname not in res.payload:
8708 raise errors.OpExecError("Volume group '%s' not found on node %s" %
8711 def _CheckDisksExistence(self, nodes):
8712 # Check disk existence
8713 for idx, dev in enumerate(self.instance.disks):
8714 if idx not in self.disks:
8718 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8719 self.cfg.SetDiskID(dev, node)
8721 result = self.rpc.call_blockdev_find(node, dev)
8723 msg = result.fail_msg
8724 if msg or not result.payload:
8726 msg = "disk not found"
8727 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8730 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8731 for idx, dev in enumerate(self.instance.disks):
8732 if idx not in self.disks:
8735 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8738 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8740 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8741 " replace disks for instance %s" %
8742 (node_name, self.instance.name))
8744 def _CreateNewStorage(self, node_name):
8745 vgname = self.cfg.GetVGName()
8748 for idx, dev in enumerate(self.instance.disks):
8749 if idx not in self.disks:
8752 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8754 self.cfg.SetDiskID(dev, node_name)
8756 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8757 names = _GenerateUniqueNames(self.lu, lv_names)
8759 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8760 logical_id=(vgname, names[0]))
8761 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8762 logical_id=(vgname, names[1]))
8764 new_lvs = [lv_data, lv_meta]
8765 old_lvs = dev.children
8766 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8768 # we pass force_create=True to force the LVM creation
8769 for new_lv in new_lvs:
8770 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8771 _GetInstanceInfoText(self.instance), False)
8775 def _CheckDevices(self, node_name, iv_names):
8776 for name, (dev, _, _) in iv_names.iteritems():
8777 self.cfg.SetDiskID(dev, node_name)
8779 result = self.rpc.call_blockdev_find(node_name, dev)
8781 msg = result.fail_msg
8782 if msg or not result.payload:
8784 msg = "disk not found"
8785 raise errors.OpExecError("Can't find DRBD device %s: %s" %
8788 if result.payload.is_degraded:
8789 raise errors.OpExecError("DRBD device %s is degraded!" % name)
8791 def _RemoveOldStorage(self, node_name, iv_names):
8792 for name, (_, old_lvs, _) in iv_names.iteritems():
8793 self.lu.LogInfo("Remove logical volumes for %s" % name)
8796 self.cfg.SetDiskID(lv, node_name)
8798 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8800 self.lu.LogWarning("Can't remove old LV: %s" % msg,
8801 hint="remove unused LVs manually")
8803 def _ReleaseNodeLock(self, node_name):
8804 """Releases the lock for a given node."""
8805 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8807 def _ExecDrbd8DiskOnly(self, feedback_fn):
8808 """Replace a disk on the primary or secondary for DRBD 8.
8810 The algorithm for replace is quite complicated:
8812 1. for each disk to be replaced:
8814 1. create new LVs on the target node with unique names
8815 1. detach old LVs from the drbd device
8816 1. rename old LVs to name_replaced.<time_t>
8817 1. rename new LVs to old LVs
8818 1. attach the new LVs (with the old names now) to the drbd device
8820 1. wait for sync across all devices
8822 1. for each modified disk:
8824 1. remove old LVs (which have the name name_replaces.<time_t>)
8826 Failures are not very well handled.
8831 # Step: check device activation
8832 self.lu.LogStep(1, steps_total, "Check device existence")
8833 self._CheckDisksExistence([self.other_node, self.target_node])
8834 self._CheckVolumeGroup([self.target_node, self.other_node])
8836 # Step: check other node consistency
8837 self.lu.LogStep(2, steps_total, "Check peer consistency")
8838 self._CheckDisksConsistency(self.other_node,
8839 self.other_node == self.instance.primary_node,
8842 # Step: create new storage
8843 self.lu.LogStep(3, steps_total, "Allocate new storage")
8844 iv_names = self._CreateNewStorage(self.target_node)
8846 # Step: for each lv, detach+rename*2+attach
8847 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8848 for dev, old_lvs, new_lvs in iv_names.itervalues():
8849 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8851 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8853 result.Raise("Can't detach drbd from local storage on node"
8854 " %s for device %s" % (self.target_node, dev.iv_name))
8856 #cfg.Update(instance)
8858 # ok, we created the new LVs, so now we know we have the needed
8859 # storage; as such, we proceed on the target node to rename
8860 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8861 # using the assumption that logical_id == physical_id (which in
8862 # turn is the unique_id on that node)
8864 # FIXME(iustin): use a better name for the replaced LVs
8865 temp_suffix = int(time.time())
8866 ren_fn = lambda d, suff: (d.physical_id[0],
8867 d.physical_id[1] + "_replaced-%s" % suff)
8869 # Build the rename list based on what LVs exist on the node
8870 rename_old_to_new = []
8871 for to_ren in old_lvs:
8872 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8873 if not result.fail_msg and result.payload:
8875 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8877 self.lu.LogInfo("Renaming the old LVs on the target node")
8878 result = self.rpc.call_blockdev_rename(self.target_node,
8880 result.Raise("Can't rename old LVs on node %s" % self.target_node)
8882 # Now we rename the new LVs to the old LVs
8883 self.lu.LogInfo("Renaming the new LVs on the target node")
8884 rename_new_to_old = [(new, old.physical_id)
8885 for old, new in zip(old_lvs, new_lvs)]
8886 result = self.rpc.call_blockdev_rename(self.target_node,
8888 result.Raise("Can't rename new LVs on node %s" % self.target_node)
8890 for old, new in zip(old_lvs, new_lvs):
8891 new.logical_id = old.logical_id
8892 self.cfg.SetDiskID(new, self.target_node)
8894 for disk in old_lvs:
8895 disk.logical_id = ren_fn(disk, temp_suffix)
8896 self.cfg.SetDiskID(disk, self.target_node)
8898 # Now that the new lvs have the old name, we can add them to the device
8899 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8900 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8902 msg = result.fail_msg
8904 for new_lv in new_lvs:
8905 msg2 = self.rpc.call_blockdev_remove(self.target_node,
8908 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8909 hint=("cleanup manually the unused logical"
8911 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8913 dev.children = new_lvs
8915 self.cfg.Update(self.instance, feedback_fn)
8918 if self.early_release:
8919 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8921 self._RemoveOldStorage(self.target_node, iv_names)
8922 # WARNING: we release both node locks here, do not do other RPCs
8923 # than WaitForSync to the primary node
8924 self._ReleaseNodeLock([self.target_node, self.other_node])
8927 # This can fail as the old devices are degraded and _WaitForSync
8928 # does a combined result over all disks, so we don't check its return value
8929 self.lu.LogStep(cstep, steps_total, "Sync devices")
8931 _WaitForSync(self.lu, self.instance)
8933 # Check all devices manually
8934 self._CheckDevices(self.instance.primary_node, iv_names)
8936 # Step: remove old storage
8937 if not self.early_release:
8938 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8940 self._RemoveOldStorage(self.target_node, iv_names)
8942 def _ExecDrbd8Secondary(self, feedback_fn):
8943 """Replace the secondary node for DRBD 8.
8945 The algorithm for replace is quite complicated:
8946 - for all disks of the instance:
8947 - create new LVs on the new node with same names
8948 - shutdown the drbd device on the old secondary
8949 - disconnect the drbd network on the primary
8950 - create the drbd device on the new secondary
8951 - network attach the drbd on the primary, using an artifice:
8952 the drbd code for Attach() will connect to the network if it
8953 finds a device which is connected to the good local disks but
8955 - wait for sync across all devices
8956 - remove all disks from the old secondary
8958 Failures are not very well handled.
8963 # Step: check device activation
8964 self.lu.LogStep(1, steps_total, "Check device existence")
8965 self._CheckDisksExistence([self.instance.primary_node])
8966 self._CheckVolumeGroup([self.instance.primary_node])
8968 # Step: check other node consistency
8969 self.lu.LogStep(2, steps_total, "Check peer consistency")
8970 self._CheckDisksConsistency(self.instance.primary_node, True, True)
8972 # Step: create new storage
8973 self.lu.LogStep(3, steps_total, "Allocate new storage")
8974 for idx, dev in enumerate(self.instance.disks):
8975 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8976 (self.new_node, idx))
8977 # we pass force_create=True to force LVM creation
8978 for new_lv in dev.children:
8979 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8980 _GetInstanceInfoText(self.instance), False)
8982 # Step 4: dbrd minors and drbd setups changes
8983 # after this, we must manually remove the drbd minors on both the
8984 # error and the success paths
8985 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8986 minors = self.cfg.AllocateDRBDMinor([self.new_node
8987 for dev in self.instance.disks],
8989 logging.debug("Allocated minors %r", minors)
8992 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8993 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8994 (self.new_node, idx))
8995 # create new devices on new_node; note that we create two IDs:
8996 # one without port, so the drbd will be activated without
8997 # networking information on the new node at this stage, and one
8998 # with network, for the latter activation in step 4
8999 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9000 if self.instance.primary_node == o_node1:
9003 assert self.instance.primary_node == o_node2, "Three-node instance?"
9006 new_alone_id = (self.instance.primary_node, self.new_node, None,
9007 p_minor, new_minor, o_secret)
9008 new_net_id = (self.instance.primary_node, self.new_node, o_port,
9009 p_minor, new_minor, o_secret)
9011 iv_names[idx] = (dev, dev.children, new_net_id)
9012 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9014 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9015 logical_id=new_alone_id,
9016 children=dev.children,
9019 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9020 _GetInstanceInfoText(self.instance), False)
9021 except errors.GenericError:
9022 self.cfg.ReleaseDRBDMinors(self.instance.name)
9025 # We have new devices, shutdown the drbd on the old secondary
9026 for idx, dev in enumerate(self.instance.disks):
9027 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9028 self.cfg.SetDiskID(dev, self.target_node)
9029 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9031 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9032 "node: %s" % (idx, msg),
9033 hint=("Please cleanup this device manually as"
9034 " soon as possible"))
9036 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9037 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9038 self.node_secondary_ip,
9039 self.instance.disks)\
9040 [self.instance.primary_node]
9042 msg = result.fail_msg
9044 # detaches didn't succeed (unlikely)
9045 self.cfg.ReleaseDRBDMinors(self.instance.name)
9046 raise errors.OpExecError("Can't detach the disks from the network on"
9047 " old node: %s" % (msg,))
9049 # if we managed to detach at least one, we update all the disks of
9050 # the instance to point to the new secondary
9051 self.lu.LogInfo("Updating instance configuration")
9052 for dev, _, new_logical_id in iv_names.itervalues():
9053 dev.logical_id = new_logical_id
9054 self.cfg.SetDiskID(dev, self.instance.primary_node)
9056 self.cfg.Update(self.instance, feedback_fn)
9058 # and now perform the drbd attach
9059 self.lu.LogInfo("Attaching primary drbds to new secondary"
9060 " (standalone => connected)")
9061 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9063 self.node_secondary_ip,
9064 self.instance.disks,
9067 for to_node, to_result in result.items():
9068 msg = to_result.fail_msg
9070 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9072 hint=("please do a gnt-instance info to see the"
9073 " status of disks"))
9075 if self.early_release:
9076 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9078 self._RemoveOldStorage(self.target_node, iv_names)
9079 # WARNING: we release all node locks here, do not do other RPCs
9080 # than WaitForSync to the primary node
9081 self._ReleaseNodeLock([self.instance.primary_node,
9086 # This can fail as the old devices are degraded and _WaitForSync
9087 # does a combined result over all disks, so we don't check its return value
9088 self.lu.LogStep(cstep, steps_total, "Sync devices")
9090 _WaitForSync(self.lu, self.instance)
9092 # Check all devices manually
9093 self._CheckDevices(self.instance.primary_node, iv_names)
9095 # Step: remove old storage
9096 if not self.early_release:
9097 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9098 self._RemoveOldStorage(self.target_node, iv_names)
9101 class LURepairNodeStorage(NoHooksLU):
9102 """Repairs the volume group on a node.
9107 def CheckArguments(self):
9108 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9110 storage_type = self.op.storage_type
9112 if (constants.SO_FIX_CONSISTENCY not in
9113 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9114 raise errors.OpPrereqError("Storage units of type '%s' can not be"
9115 " repaired" % storage_type,
9118 def ExpandNames(self):
9119 self.needed_locks = {
9120 locking.LEVEL_NODE: [self.op.node_name],
9123 def _CheckFaultyDisks(self, instance, node_name):
9124 """Ensure faulty disks abort the opcode or at least warn."""
9126 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9128 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9129 " node '%s'" % (instance.name, node_name),
9131 except errors.OpPrereqError, err:
9132 if self.op.ignore_consistency:
9133 self.proc.LogWarning(str(err.args[0]))
9137 def CheckPrereq(self):
9138 """Check prerequisites.
9141 # Check whether any instance on this node has faulty disks
9142 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9143 if not inst.admin_up:
9145 check_nodes = set(inst.all_nodes)
9146 check_nodes.discard(self.op.node_name)
9147 for inst_node_name in check_nodes:
9148 self._CheckFaultyDisks(inst, inst_node_name)
9150 def Exec(self, feedback_fn):
9151 feedback_fn("Repairing storage unit '%s' on %s ..." %
9152 (self.op.name, self.op.node_name))
9154 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9155 result = self.rpc.call_storage_execute(self.op.node_name,
9156 self.op.storage_type, st_args,
9158 constants.SO_FIX_CONSISTENCY)
9159 result.Raise("Failed to repair storage unit '%s' on %s" %
9160 (self.op.name, self.op.node_name))
9163 class LUNodeEvacStrategy(NoHooksLU):
9164 """Computes the node evacuation strategy.
9169 def CheckArguments(self):
9170 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9172 def ExpandNames(self):
9173 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9174 self.needed_locks = locks = {}
9175 if self.op.remote_node is None:
9176 locks[locking.LEVEL_NODE] = locking.ALL_SET
9178 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9179 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9181 def Exec(self, feedback_fn):
9182 if self.op.remote_node is not None:
9184 for node in self.op.nodes:
9185 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9188 if i.primary_node == self.op.remote_node:
9189 raise errors.OpPrereqError("Node %s is the primary node of"
9190 " instance %s, cannot use it as"
9192 (self.op.remote_node, i.name),
9194 result.append([i.name, self.op.remote_node])
9196 ial = IAllocator(self.cfg, self.rpc,
9197 mode=constants.IALLOCATOR_MODE_MEVAC,
9198 evac_nodes=self.op.nodes)
9199 ial.Run(self.op.iallocator, validate=True)
9201 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9207 class LUInstanceGrowDisk(LogicalUnit):
9208 """Grow a disk of an instance.
9212 HTYPE = constants.HTYPE_INSTANCE
9215 def ExpandNames(self):
9216 self._ExpandAndLockInstance()
9217 self.needed_locks[locking.LEVEL_NODE] = []
9218 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9220 def DeclareLocks(self, level):
9221 if level == locking.LEVEL_NODE:
9222 self._LockInstancesNodes()
9224 def BuildHooksEnv(self):
9227 This runs on the master, the primary and all the secondaries.
9231 "DISK": self.op.disk,
9232 "AMOUNT": self.op.amount,
9234 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9237 def BuildHooksNodes(self):
9238 """Build hooks nodes.
9241 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9244 def CheckPrereq(self):
9245 """Check prerequisites.
9247 This checks that the instance is in the cluster.
9250 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9251 assert instance is not None, \
9252 "Cannot retrieve locked instance %s" % self.op.instance_name
9253 nodenames = list(instance.all_nodes)
9254 for node in nodenames:
9255 _CheckNodeOnline(self, node)
9257 self.instance = instance
9259 if instance.disk_template not in constants.DTS_GROWABLE:
9260 raise errors.OpPrereqError("Instance's disk layout does not support"
9261 " growing.", errors.ECODE_INVAL)
9263 self.disk = instance.FindDisk(self.op.disk)
9265 if instance.disk_template not in (constants.DT_FILE,
9266 constants.DT_SHARED_FILE):
9267 # TODO: check the free disk space for file, when that feature will be
9269 _CheckNodesFreeDiskPerVG(self, nodenames,
9270 self.disk.ComputeGrowth(self.op.amount))
9272 def Exec(self, feedback_fn):
9273 """Execute disk grow.
9276 instance = self.instance
9279 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9281 raise errors.OpExecError("Cannot activate block device to grow")
9283 for node in instance.all_nodes:
9284 self.cfg.SetDiskID(disk, node)
9285 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9286 result.Raise("Grow request failed to node %s" % node)
9288 # TODO: Rewrite code to work properly
9289 # DRBD goes into sync mode for a short amount of time after executing the
9290 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9291 # calling "resize" in sync mode fails. Sleeping for a short amount of
9292 # time is a work-around.
9295 disk.RecordGrow(self.op.amount)
9296 self.cfg.Update(instance, feedback_fn)
9297 if self.op.wait_for_sync:
9298 disk_abort = not _WaitForSync(self, instance, disks=[disk])
9300 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
9301 " status.\nPlease check the instance.")
9302 if not instance.admin_up:
9303 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9304 elif not instance.admin_up:
9305 self.proc.LogWarning("Not shutting down the disk even if the instance is"
9306 " not supposed to be running because no wait for"
9307 " sync mode was requested.")
9310 class LUInstanceQueryData(NoHooksLU):
9311 """Query runtime instance data.
9316 def ExpandNames(self):
9317 self.needed_locks = {}
9318 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9320 if self.op.instances:
9321 self.wanted_names = []
9322 for name in self.op.instances:
9323 full_name = _ExpandInstanceName(self.cfg, name)
9324 self.wanted_names.append(full_name)
9325 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9327 self.wanted_names = None
9328 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9330 self.needed_locks[locking.LEVEL_NODE] = []
9331 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9333 def DeclareLocks(self, level):
9334 if level == locking.LEVEL_NODE:
9335 self._LockInstancesNodes()
9337 def CheckPrereq(self):
9338 """Check prerequisites.
9340 This only checks the optional instance list against the existing names.
9343 if self.wanted_names is None:
9344 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9346 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
9347 in self.wanted_names]
9349 def _ComputeBlockdevStatus(self, node, instance_name, dev):
9350 """Returns the status of a block device
9353 if self.op.static or not node:
9356 self.cfg.SetDiskID(dev, node)
9358 result = self.rpc.call_blockdev_find(node, dev)
9362 result.Raise("Can't compute disk status for %s" % instance_name)
9364 status = result.payload
9368 return (status.dev_path, status.major, status.minor,
9369 status.sync_percent, status.estimated_time,
9370 status.is_degraded, status.ldisk_status)
9372 def _ComputeDiskStatus(self, instance, snode, dev):
9373 """Compute block device status.
9376 if dev.dev_type in constants.LDS_DRBD:
9377 # we change the snode then (otherwise we use the one passed in)
9378 if dev.logical_id[0] == instance.primary_node:
9379 snode = dev.logical_id[1]
9381 snode = dev.logical_id[0]
9383 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9385 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9388 dev_children = [self._ComputeDiskStatus(instance, snode, child)
9389 for child in dev.children]
9394 "iv_name": dev.iv_name,
9395 "dev_type": dev.dev_type,
9396 "logical_id": dev.logical_id,
9397 "physical_id": dev.physical_id,
9398 "pstatus": dev_pstatus,
9399 "sstatus": dev_sstatus,
9400 "children": dev_children,
9407 def Exec(self, feedback_fn):
9408 """Gather and return data"""
9411 cluster = self.cfg.GetClusterInfo()
9413 for instance in self.wanted_instances:
9414 if not self.op.static:
9415 remote_info = self.rpc.call_instance_info(instance.primary_node,
9417 instance.hypervisor)
9418 remote_info.Raise("Error checking node %s" % instance.primary_node)
9419 remote_info = remote_info.payload
9420 if remote_info and "state" in remote_info:
9423 remote_state = "down"
9426 if instance.admin_up:
9429 config_state = "down"
9431 disks = [self._ComputeDiskStatus(instance, None, device)
9432 for device in instance.disks]
9435 "name": instance.name,
9436 "config_state": config_state,
9437 "run_state": remote_state,
9438 "pnode": instance.primary_node,
9439 "snodes": instance.secondary_nodes,
9441 # this happens to be the same format used for hooks
9442 "nics": _NICListToTuple(self, instance.nics),
9443 "disk_template": instance.disk_template,
9445 "hypervisor": instance.hypervisor,
9446 "network_port": instance.network_port,
9447 "hv_instance": instance.hvparams,
9448 "hv_actual": cluster.FillHV(instance, skip_globals=True),
9449 "be_instance": instance.beparams,
9450 "be_actual": cluster.FillBE(instance),
9451 "os_instance": instance.osparams,
9452 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9453 "serial_no": instance.serial_no,
9454 "mtime": instance.mtime,
9455 "ctime": instance.ctime,
9456 "uuid": instance.uuid,
9459 result[instance.name] = idict
9464 class LUInstanceSetParams(LogicalUnit):
9465 """Modifies an instances's parameters.
9468 HPATH = "instance-modify"
9469 HTYPE = constants.HTYPE_INSTANCE
9472 def CheckArguments(self):
9473 if not (self.op.nics or self.op.disks or self.op.disk_template or
9474 self.op.hvparams or self.op.beparams or self.op.os_name):
9475 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9477 if self.op.hvparams:
9478 _CheckGlobalHvParams(self.op.hvparams)
9482 for disk_op, disk_dict in self.op.disks:
9483 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9484 if disk_op == constants.DDM_REMOVE:
9487 elif disk_op == constants.DDM_ADD:
9490 if not isinstance(disk_op, int):
9491 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9492 if not isinstance(disk_dict, dict):
9493 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9494 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9496 if disk_op == constants.DDM_ADD:
9497 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9498 if mode not in constants.DISK_ACCESS_SET:
9499 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9501 size = disk_dict.get('size', None)
9503 raise errors.OpPrereqError("Required disk parameter size missing",
9507 except (TypeError, ValueError), err:
9508 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9509 str(err), errors.ECODE_INVAL)
9510 disk_dict['size'] = size
9512 # modification of disk
9513 if 'size' in disk_dict:
9514 raise errors.OpPrereqError("Disk size change not possible, use"
9515 " grow-disk", errors.ECODE_INVAL)
9517 if disk_addremove > 1:
9518 raise errors.OpPrereqError("Only one disk add or remove operation"
9519 " supported at a time", errors.ECODE_INVAL)
9521 if self.op.disks and self.op.disk_template is not None:
9522 raise errors.OpPrereqError("Disk template conversion and other disk"
9523 " changes not supported at the same time",
9526 if (self.op.disk_template and
9527 self.op.disk_template in constants.DTS_INT_MIRROR and
9528 self.op.remote_node is None):
9529 raise errors.OpPrereqError("Changing the disk template to a mirrored"
9530 " one requires specifying a secondary node",
9535 for nic_op, nic_dict in self.op.nics:
9536 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9537 if nic_op == constants.DDM_REMOVE:
9540 elif nic_op == constants.DDM_ADD:
9543 if not isinstance(nic_op, int):
9544 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9545 if not isinstance(nic_dict, dict):
9546 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9547 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9549 # nic_dict should be a dict
9550 nic_ip = nic_dict.get('ip', None)
9551 if nic_ip is not None:
9552 if nic_ip.lower() == constants.VALUE_NONE:
9553 nic_dict['ip'] = None
9555 if not netutils.IPAddress.IsValid(nic_ip):
9556 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9559 nic_bridge = nic_dict.get('bridge', None)
9560 nic_link = nic_dict.get('link', None)
9561 if nic_bridge and nic_link:
9562 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9563 " at the same time", errors.ECODE_INVAL)
9564 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9565 nic_dict['bridge'] = None
9566 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9567 nic_dict['link'] = None
9569 if nic_op == constants.DDM_ADD:
9570 nic_mac = nic_dict.get('mac', None)
9572 nic_dict['mac'] = constants.VALUE_AUTO
9574 if 'mac' in nic_dict:
9575 nic_mac = nic_dict['mac']
9576 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9577 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9579 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9580 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9581 " modifying an existing nic",
9584 if nic_addremove > 1:
9585 raise errors.OpPrereqError("Only one NIC add or remove operation"
9586 " supported at a time", errors.ECODE_INVAL)
9588 def ExpandNames(self):
9589 self._ExpandAndLockInstance()
9590 self.needed_locks[locking.LEVEL_NODE] = []
9591 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9593 def DeclareLocks(self, level):
9594 if level == locking.LEVEL_NODE:
9595 self._LockInstancesNodes()
9596 if self.op.disk_template and self.op.remote_node:
9597 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9598 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9600 def BuildHooksEnv(self):
9603 This runs on the master, primary and secondaries.
9607 if constants.BE_MEMORY in self.be_new:
9608 args['memory'] = self.be_new[constants.BE_MEMORY]
9609 if constants.BE_VCPUS in self.be_new:
9610 args['vcpus'] = self.be_new[constants.BE_VCPUS]
9611 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9612 # information at all.
9615 nic_override = dict(self.op.nics)
9616 for idx, nic in enumerate(self.instance.nics):
9617 if idx in nic_override:
9618 this_nic_override = nic_override[idx]
9620 this_nic_override = {}
9621 if 'ip' in this_nic_override:
9622 ip = this_nic_override['ip']
9625 if 'mac' in this_nic_override:
9626 mac = this_nic_override['mac']
9629 if idx in self.nic_pnew:
9630 nicparams = self.nic_pnew[idx]
9632 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9633 mode = nicparams[constants.NIC_MODE]
9634 link = nicparams[constants.NIC_LINK]
9635 args['nics'].append((ip, mac, mode, link))
9636 if constants.DDM_ADD in nic_override:
9637 ip = nic_override[constants.DDM_ADD].get('ip', None)
9638 mac = nic_override[constants.DDM_ADD]['mac']
9639 nicparams = self.nic_pnew[constants.DDM_ADD]
9640 mode = nicparams[constants.NIC_MODE]
9641 link = nicparams[constants.NIC_LINK]
9642 args['nics'].append((ip, mac, mode, link))
9643 elif constants.DDM_REMOVE in nic_override:
9644 del args['nics'][-1]
9646 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9647 if self.op.disk_template:
9648 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9652 def BuildHooksNodes(self):
9653 """Build hooks nodes.
9656 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9659 def CheckPrereq(self):
9660 """Check prerequisites.
9662 This only checks the instance list against the existing names.
9665 # checking the new params on the primary/secondary nodes
9667 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9668 cluster = self.cluster = self.cfg.GetClusterInfo()
9669 assert self.instance is not None, \
9670 "Cannot retrieve locked instance %s" % self.op.instance_name
9671 pnode = instance.primary_node
9672 nodelist = list(instance.all_nodes)
9675 if self.op.os_name and not self.op.force:
9676 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9677 self.op.force_variant)
9678 instance_os = self.op.os_name
9680 instance_os = instance.os
9682 if self.op.disk_template:
9683 if instance.disk_template == self.op.disk_template:
9684 raise errors.OpPrereqError("Instance already has disk template %s" %
9685 instance.disk_template, errors.ECODE_INVAL)
9687 if (instance.disk_template,
9688 self.op.disk_template) not in self._DISK_CONVERSIONS:
9689 raise errors.OpPrereqError("Unsupported disk template conversion from"
9690 " %s to %s" % (instance.disk_template,
9691 self.op.disk_template),
9693 _CheckInstanceDown(self, instance, "cannot change disk template")
9694 if self.op.disk_template in constants.DTS_INT_MIRROR:
9695 if self.op.remote_node == pnode:
9696 raise errors.OpPrereqError("Given new secondary node %s is the same"
9697 " as the primary node of the instance" %
9698 self.op.remote_node, errors.ECODE_STATE)
9699 _CheckNodeOnline(self, self.op.remote_node)
9700 _CheckNodeNotDrained(self, self.op.remote_node)
9701 # FIXME: here we assume that the old instance type is DT_PLAIN
9702 assert instance.disk_template == constants.DT_PLAIN
9703 disks = [{"size": d.size, "vg": d.logical_id[0]}
9704 for d in instance.disks]
9705 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9706 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9708 # hvparams processing
9709 if self.op.hvparams:
9710 hv_type = instance.hypervisor
9711 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9712 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9713 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9716 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9717 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9718 self.hv_new = hv_new # the new actual values
9719 self.hv_inst = i_hvdict # the new dict (without defaults)
9721 self.hv_new = self.hv_inst = {}
9723 # beparams processing
9724 if self.op.beparams:
9725 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9727 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9728 be_new = cluster.SimpleFillBE(i_bedict)
9729 self.be_new = be_new # the new actual values
9730 self.be_inst = i_bedict # the new dict (without defaults)
9732 self.be_new = self.be_inst = {}
9734 # osparams processing
9735 if self.op.osparams:
9736 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9737 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9738 self.os_inst = i_osdict # the new dict (without defaults)
9744 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9745 mem_check_list = [pnode]
9746 if be_new[constants.BE_AUTO_BALANCE]:
9747 # either we changed auto_balance to yes or it was from before
9748 mem_check_list.extend(instance.secondary_nodes)
9749 instance_info = self.rpc.call_instance_info(pnode, instance.name,
9750 instance.hypervisor)
9751 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9752 instance.hypervisor)
9753 pninfo = nodeinfo[pnode]
9754 msg = pninfo.fail_msg
9756 # Assume the primary node is unreachable and go ahead
9757 self.warn.append("Can't get info from primary node %s: %s" %
9759 elif not isinstance(pninfo.payload.get('memory_free', None), int):
9760 self.warn.append("Node data from primary node %s doesn't contain"
9761 " free memory information" % pnode)
9762 elif instance_info.fail_msg:
9763 self.warn.append("Can't get instance runtime information: %s" %
9764 instance_info.fail_msg)
9766 if instance_info.payload:
9767 current_mem = int(instance_info.payload['memory'])
9769 # Assume instance not running
9770 # (there is a slight race condition here, but it's not very probable,
9771 # and we have no other way to check)
9773 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9774 pninfo.payload['memory_free'])
9776 raise errors.OpPrereqError("This change will prevent the instance"
9777 " from starting, due to %d MB of memory"
9778 " missing on its primary node" % miss_mem,
9781 if be_new[constants.BE_AUTO_BALANCE]:
9782 for node, nres in nodeinfo.items():
9783 if node not in instance.secondary_nodes:
9787 self.warn.append("Can't get info from secondary node %s: %s" %
9789 elif not isinstance(nres.payload.get('memory_free', None), int):
9790 self.warn.append("Secondary node %s didn't return free"
9791 " memory information" % node)
9792 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9793 self.warn.append("Not enough memory to failover instance to"
9794 " secondary node %s" % node)
9799 for nic_op, nic_dict in self.op.nics:
9800 if nic_op == constants.DDM_REMOVE:
9801 if not instance.nics:
9802 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9805 if nic_op != constants.DDM_ADD:
9807 if not instance.nics:
9808 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9809 " no NICs" % nic_op,
9811 if nic_op < 0 or nic_op >= len(instance.nics):
9812 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9814 (nic_op, len(instance.nics) - 1),
9816 old_nic_params = instance.nics[nic_op].nicparams
9817 old_nic_ip = instance.nics[nic_op].ip
9822 update_params_dict = dict([(key, nic_dict[key])
9823 for key in constants.NICS_PARAMETERS
9824 if key in nic_dict])
9826 if 'bridge' in nic_dict:
9827 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9829 new_nic_params = _GetUpdatedParams(old_nic_params,
9831 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9832 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9833 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9834 self.nic_pinst[nic_op] = new_nic_params
9835 self.nic_pnew[nic_op] = new_filled_nic_params
9836 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9838 if new_nic_mode == constants.NIC_MODE_BRIDGED:
9839 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9840 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9842 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9844 self.warn.append(msg)
9846 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9847 if new_nic_mode == constants.NIC_MODE_ROUTED:
9848 if 'ip' in nic_dict:
9849 nic_ip = nic_dict['ip']
9853 raise errors.OpPrereqError('Cannot set the nic ip to None'
9854 ' on a routed nic', errors.ECODE_INVAL)
9855 if 'mac' in nic_dict:
9856 nic_mac = nic_dict['mac']
9858 raise errors.OpPrereqError('Cannot set the nic mac to None',
9860 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9861 # otherwise generate the mac
9862 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9864 # or validate/reserve the current one
9866 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9867 except errors.ReservationError:
9868 raise errors.OpPrereqError("MAC address %s already in use"
9869 " in cluster" % nic_mac,
9870 errors.ECODE_NOTUNIQUE)
9873 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9874 raise errors.OpPrereqError("Disk operations not supported for"
9875 " diskless instances",
9877 for disk_op, _ in self.op.disks:
9878 if disk_op == constants.DDM_REMOVE:
9879 if len(instance.disks) == 1:
9880 raise errors.OpPrereqError("Cannot remove the last disk of"
9881 " an instance", errors.ECODE_INVAL)
9882 _CheckInstanceDown(self, instance, "cannot remove disks")
9884 if (disk_op == constants.DDM_ADD and
9885 len(instance.disks) >= constants.MAX_DISKS):
9886 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9887 " add more" % constants.MAX_DISKS,
9889 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9891 if disk_op < 0 or disk_op >= len(instance.disks):
9892 raise errors.OpPrereqError("Invalid disk index %s, valid values"
9894 (disk_op, len(instance.disks)),
9899 def _ConvertPlainToDrbd(self, feedback_fn):
9900 """Converts an instance from plain to drbd.
9903 feedback_fn("Converting template to drbd")
9904 instance = self.instance
9905 pnode = instance.primary_node
9906 snode = self.op.remote_node
9908 # create a fake disk info for _GenerateDiskTemplate
9909 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9910 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9911 instance.name, pnode, [snode],
9912 disk_info, None, None, 0, feedback_fn)
9913 info = _GetInstanceInfoText(instance)
9914 feedback_fn("Creating aditional volumes...")
9915 # first, create the missing data and meta devices
9916 for disk in new_disks:
9917 # unfortunately this is... not too nice
9918 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9920 for child in disk.children:
9921 _CreateSingleBlockDev(self, snode, instance, child, info, True)
9922 # at this stage, all new LVs have been created, we can rename the
9924 feedback_fn("Renaming original volumes...")
9925 rename_list = [(o, n.children[0].logical_id)
9926 for (o, n) in zip(instance.disks, new_disks)]
9927 result = self.rpc.call_blockdev_rename(pnode, rename_list)
9928 result.Raise("Failed to rename original LVs")
9930 feedback_fn("Initializing DRBD devices...")
9931 # all child devices are in place, we can now create the DRBD devices
9932 for disk in new_disks:
9933 for node in [pnode, snode]:
9934 f_create = node == pnode
9935 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9937 # at this point, the instance has been modified
9938 instance.disk_template = constants.DT_DRBD8
9939 instance.disks = new_disks
9940 self.cfg.Update(instance, feedback_fn)
9942 # disks are created, waiting for sync
9943 disk_abort = not _WaitForSync(self, instance)
9945 raise errors.OpExecError("There are some degraded disks for"
9946 " this instance, please cleanup manually")
9948 def _ConvertDrbdToPlain(self, feedback_fn):
9949 """Converts an instance from drbd to plain.
9952 instance = self.instance
9953 assert len(instance.secondary_nodes) == 1
9954 pnode = instance.primary_node
9955 snode = instance.secondary_nodes[0]
9956 feedback_fn("Converting template to plain")
9958 old_disks = instance.disks
9959 new_disks = [d.children[0] for d in old_disks]
9961 # copy over size and mode
9962 for parent, child in zip(old_disks, new_disks):
9963 child.size = parent.size
9964 child.mode = parent.mode
9966 # update instance structure
9967 instance.disks = new_disks
9968 instance.disk_template = constants.DT_PLAIN
9969 self.cfg.Update(instance, feedback_fn)
9971 feedback_fn("Removing volumes on the secondary node...")
9972 for disk in old_disks:
9973 self.cfg.SetDiskID(disk, snode)
9974 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9976 self.LogWarning("Could not remove block device %s on node %s,"
9977 " continuing anyway: %s", disk.iv_name, snode, msg)
9979 feedback_fn("Removing unneeded volumes on the primary node...")
9980 for idx, disk in enumerate(old_disks):
9981 meta = disk.children[1]
9982 self.cfg.SetDiskID(meta, pnode)
9983 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9985 self.LogWarning("Could not remove metadata for disk %d on node %s,"
9986 " continuing anyway: %s", idx, pnode, msg)
9988 def Exec(self, feedback_fn):
9989 """Modifies an instance.
9991 All parameters take effect only at the next restart of the instance.
9994 # Process here the warnings from CheckPrereq, as we don't have a
9995 # feedback_fn there.
9996 for warn in self.warn:
9997 feedback_fn("WARNING: %s" % warn)
10000 instance = self.instance
10002 for disk_op, disk_dict in self.op.disks:
10003 if disk_op == constants.DDM_REMOVE:
10004 # remove the last disk
10005 device = instance.disks.pop()
10006 device_idx = len(instance.disks)
10007 for node, disk in device.ComputeNodeTree(instance.primary_node):
10008 self.cfg.SetDiskID(disk, node)
10009 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10011 self.LogWarning("Could not remove disk/%d on node %s: %s,"
10012 " continuing anyway", device_idx, node, msg)
10013 result.append(("disk/%d" % device_idx, "remove"))
10014 elif disk_op == constants.DDM_ADD:
10016 if instance.disk_template in (constants.DT_FILE,
10017 constants.DT_SHARED_FILE):
10018 file_driver, file_path = instance.disks[0].logical_id
10019 file_path = os.path.dirname(file_path)
10021 file_driver = file_path = None
10022 disk_idx_base = len(instance.disks)
10023 new_disk = _GenerateDiskTemplate(self,
10024 instance.disk_template,
10025 instance.name, instance.primary_node,
10026 instance.secondary_nodes,
10030 disk_idx_base, feedback_fn)[0]
10031 instance.disks.append(new_disk)
10032 info = _GetInstanceInfoText(instance)
10034 logging.info("Creating volume %s for instance %s",
10035 new_disk.iv_name, instance.name)
10036 # Note: this needs to be kept in sync with _CreateDisks
10038 for node in instance.all_nodes:
10039 f_create = node == instance.primary_node
10041 _CreateBlockDev(self, node, instance, new_disk,
10042 f_create, info, f_create)
10043 except errors.OpExecError, err:
10044 self.LogWarning("Failed to create volume %s (%s) on"
10046 new_disk.iv_name, new_disk, node, err)
10047 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10048 (new_disk.size, new_disk.mode)))
10050 # change a given disk
10051 instance.disks[disk_op].mode = disk_dict['mode']
10052 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
10054 if self.op.disk_template:
10055 r_shut = _ShutdownInstanceDisks(self, instance)
10057 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10058 " proceed with disk template conversion")
10059 mode = (instance.disk_template, self.op.disk_template)
10061 self._DISK_CONVERSIONS[mode](self, feedback_fn)
10063 self.cfg.ReleaseDRBDMinors(instance.name)
10065 result.append(("disk_template", self.op.disk_template))
10068 for nic_op, nic_dict in self.op.nics:
10069 if nic_op == constants.DDM_REMOVE:
10070 # remove the last nic
10071 del instance.nics[-1]
10072 result.append(("nic.%d" % len(instance.nics), "remove"))
10073 elif nic_op == constants.DDM_ADD:
10074 # mac and bridge should be set, by now
10075 mac = nic_dict['mac']
10076 ip = nic_dict.get('ip', None)
10077 nicparams = self.nic_pinst[constants.DDM_ADD]
10078 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10079 instance.nics.append(new_nic)
10080 result.append(("nic.%d" % (len(instance.nics) - 1),
10081 "add:mac=%s,ip=%s,mode=%s,link=%s" %
10082 (new_nic.mac, new_nic.ip,
10083 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10084 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10087 for key in 'mac', 'ip':
10088 if key in nic_dict:
10089 setattr(instance.nics[nic_op], key, nic_dict[key])
10090 if nic_op in self.nic_pinst:
10091 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10092 for key, val in nic_dict.iteritems():
10093 result.append(("nic.%s/%d" % (key, nic_op), val))
10096 if self.op.hvparams:
10097 instance.hvparams = self.hv_inst
10098 for key, val in self.op.hvparams.iteritems():
10099 result.append(("hv/%s" % key, val))
10102 if self.op.beparams:
10103 instance.beparams = self.be_inst
10104 for key, val in self.op.beparams.iteritems():
10105 result.append(("be/%s" % key, val))
10108 if self.op.os_name:
10109 instance.os = self.op.os_name
10112 if self.op.osparams:
10113 instance.osparams = self.os_inst
10114 for key, val in self.op.osparams.iteritems():
10115 result.append(("os/%s" % key, val))
10117 self.cfg.Update(instance, feedback_fn)
10121 _DISK_CONVERSIONS = {
10122 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10123 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10127 class LUBackupQuery(NoHooksLU):
10128 """Query the exports list
10133 def ExpandNames(self):
10134 self.needed_locks = {}
10135 self.share_locks[locking.LEVEL_NODE] = 1
10136 if not self.op.nodes:
10137 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10139 self.needed_locks[locking.LEVEL_NODE] = \
10140 _GetWantedNodes(self, self.op.nodes)
10142 def Exec(self, feedback_fn):
10143 """Compute the list of all the exported system images.
10146 @return: a dictionary with the structure node->(export-list)
10147 where export-list is a list of the instances exported on
10151 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
10152 rpcresult = self.rpc.call_export_list(self.nodes)
10154 for node in rpcresult:
10155 if rpcresult[node].fail_msg:
10156 result[node] = False
10158 result[node] = rpcresult[node].payload
10163 class LUBackupPrepare(NoHooksLU):
10164 """Prepares an instance for an export and returns useful information.
10169 def ExpandNames(self):
10170 self._ExpandAndLockInstance()
10172 def CheckPrereq(self):
10173 """Check prerequisites.
10176 instance_name = self.op.instance_name
10178 self.instance = self.cfg.GetInstanceInfo(instance_name)
10179 assert self.instance is not None, \
10180 "Cannot retrieve locked instance %s" % self.op.instance_name
10181 _CheckNodeOnline(self, self.instance.primary_node)
10183 self._cds = _GetClusterDomainSecret()
10185 def Exec(self, feedback_fn):
10186 """Prepares an instance for an export.
10189 instance = self.instance
10191 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10192 salt = utils.GenerateSecret(8)
10194 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10195 result = self.rpc.call_x509_cert_create(instance.primary_node,
10196 constants.RIE_CERT_VALIDITY)
10197 result.Raise("Can't create X509 key and certificate on %s" % result.node)
10199 (name, cert_pem) = result.payload
10201 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10205 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10206 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10208 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10214 class LUBackupExport(LogicalUnit):
10215 """Export an instance to an image in the cluster.
10218 HPATH = "instance-export"
10219 HTYPE = constants.HTYPE_INSTANCE
10222 def CheckArguments(self):
10223 """Check the arguments.
10226 self.x509_key_name = self.op.x509_key_name
10227 self.dest_x509_ca_pem = self.op.destination_x509_ca
10229 if self.op.mode == constants.EXPORT_MODE_REMOTE:
10230 if not self.x509_key_name:
10231 raise errors.OpPrereqError("Missing X509 key name for encryption",
10232 errors.ECODE_INVAL)
10234 if not self.dest_x509_ca_pem:
10235 raise errors.OpPrereqError("Missing destination X509 CA",
10236 errors.ECODE_INVAL)
10238 def ExpandNames(self):
10239 self._ExpandAndLockInstance()
10241 # Lock all nodes for local exports
10242 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10243 # FIXME: lock only instance primary and destination node
10245 # Sad but true, for now we have do lock all nodes, as we don't know where
10246 # the previous export might be, and in this LU we search for it and
10247 # remove it from its current node. In the future we could fix this by:
10248 # - making a tasklet to search (share-lock all), then create the
10249 # new one, then one to remove, after
10250 # - removing the removal operation altogether
10251 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10253 def DeclareLocks(self, level):
10254 """Last minute lock declaration."""
10255 # All nodes are locked anyway, so nothing to do here.
10257 def BuildHooksEnv(self):
10258 """Build hooks env.
10260 This will run on the master, primary node and target node.
10264 "EXPORT_MODE": self.op.mode,
10265 "EXPORT_NODE": self.op.target_node,
10266 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10267 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10268 # TODO: Generic function for boolean env variables
10269 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10272 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10276 def BuildHooksNodes(self):
10277 """Build hooks nodes.
10280 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10282 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10283 nl.append(self.op.target_node)
10287 def CheckPrereq(self):
10288 """Check prerequisites.
10290 This checks that the instance and node names are valid.
10293 instance_name = self.op.instance_name
10295 self.instance = self.cfg.GetInstanceInfo(instance_name)
10296 assert self.instance is not None, \
10297 "Cannot retrieve locked instance %s" % self.op.instance_name
10298 _CheckNodeOnline(self, self.instance.primary_node)
10300 if (self.op.remove_instance and self.instance.admin_up and
10301 not self.op.shutdown):
10302 raise errors.OpPrereqError("Can not remove instance without shutting it"
10305 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10306 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10307 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10308 assert self.dst_node is not None
10310 _CheckNodeOnline(self, self.dst_node.name)
10311 _CheckNodeNotDrained(self, self.dst_node.name)
10314 self.dest_disk_info = None
10315 self.dest_x509_ca = None
10317 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10318 self.dst_node = None
10320 if len(self.op.target_node) != len(self.instance.disks):
10321 raise errors.OpPrereqError(("Received destination information for %s"
10322 " disks, but instance %s has %s disks") %
10323 (len(self.op.target_node), instance_name,
10324 len(self.instance.disks)),
10325 errors.ECODE_INVAL)
10327 cds = _GetClusterDomainSecret()
10329 # Check X509 key name
10331 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10332 except (TypeError, ValueError), err:
10333 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10335 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10336 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10337 errors.ECODE_INVAL)
10339 # Load and verify CA
10341 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10342 except OpenSSL.crypto.Error, err:
10343 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10344 (err, ), errors.ECODE_INVAL)
10346 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10347 if errcode is not None:
10348 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10349 (msg, ), errors.ECODE_INVAL)
10351 self.dest_x509_ca = cert
10353 # Verify target information
10355 for idx, disk_data in enumerate(self.op.target_node):
10357 (host, port, magic) = \
10358 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10359 except errors.GenericError, err:
10360 raise errors.OpPrereqError("Target info for disk %s: %s" %
10361 (idx, err), errors.ECODE_INVAL)
10363 disk_info.append((host, port, magic))
10365 assert len(disk_info) == len(self.op.target_node)
10366 self.dest_disk_info = disk_info
10369 raise errors.ProgrammerError("Unhandled export mode %r" %
10372 # instance disk type verification
10373 # TODO: Implement export support for file-based disks
10374 for disk in self.instance.disks:
10375 if disk.dev_type == constants.LD_FILE:
10376 raise errors.OpPrereqError("Export not supported for instances with"
10377 " file-based disks", errors.ECODE_INVAL)
10379 def _CleanupExports(self, feedback_fn):
10380 """Removes exports of current instance from all other nodes.
10382 If an instance in a cluster with nodes A..D was exported to node C, its
10383 exports will be removed from the nodes A, B and D.
10386 assert self.op.mode != constants.EXPORT_MODE_REMOTE
10388 nodelist = self.cfg.GetNodeList()
10389 nodelist.remove(self.dst_node.name)
10391 # on one-node clusters nodelist will be empty after the removal
10392 # if we proceed the backup would be removed because OpBackupQuery
10393 # substitutes an empty list with the full cluster node list.
10394 iname = self.instance.name
10396 feedback_fn("Removing old exports for instance %s" % iname)
10397 exportlist = self.rpc.call_export_list(nodelist)
10398 for node in exportlist:
10399 if exportlist[node].fail_msg:
10401 if iname in exportlist[node].payload:
10402 msg = self.rpc.call_export_remove(node, iname).fail_msg
10404 self.LogWarning("Could not remove older export for instance %s"
10405 " on node %s: %s", iname, node, msg)
10407 def Exec(self, feedback_fn):
10408 """Export an instance to an image in the cluster.
10411 assert self.op.mode in constants.EXPORT_MODES
10413 instance = self.instance
10414 src_node = instance.primary_node
10416 if self.op.shutdown:
10417 # shutdown the instance, but not the disks
10418 feedback_fn("Shutting down instance %s" % instance.name)
10419 result = self.rpc.call_instance_shutdown(src_node, instance,
10420 self.op.shutdown_timeout)
10421 # TODO: Maybe ignore failures if ignore_remove_failures is set
10422 result.Raise("Could not shutdown instance %s on"
10423 " node %s" % (instance.name, src_node))
10425 # set the disks ID correctly since call_instance_start needs the
10426 # correct drbd minor to create the symlinks
10427 for disk in instance.disks:
10428 self.cfg.SetDiskID(disk, src_node)
10430 activate_disks = (not instance.admin_up)
10433 # Activate the instance disks if we'exporting a stopped instance
10434 feedback_fn("Activating disks for %s" % instance.name)
10435 _StartInstanceDisks(self, instance, None)
10438 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10441 helper.CreateSnapshots()
10443 if (self.op.shutdown and instance.admin_up and
10444 not self.op.remove_instance):
10445 assert not activate_disks
10446 feedback_fn("Starting instance %s" % instance.name)
10447 result = self.rpc.call_instance_start(src_node, instance, None, None)
10448 msg = result.fail_msg
10450 feedback_fn("Failed to start instance: %s" % msg)
10451 _ShutdownInstanceDisks(self, instance)
10452 raise errors.OpExecError("Could not start instance: %s" % msg)
10454 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10455 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10456 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10457 connect_timeout = constants.RIE_CONNECT_TIMEOUT
10458 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10460 (key_name, _, _) = self.x509_key_name
10463 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10466 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10467 key_name, dest_ca_pem,
10472 # Check for backwards compatibility
10473 assert len(dresults) == len(instance.disks)
10474 assert compat.all(isinstance(i, bool) for i in dresults), \
10475 "Not all results are boolean: %r" % dresults
10479 feedback_fn("Deactivating disks for %s" % instance.name)
10480 _ShutdownInstanceDisks(self, instance)
10482 if not (compat.all(dresults) and fin_resu):
10485 failures.append("export finalization")
10486 if not compat.all(dresults):
10487 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10489 failures.append("disk export: disk(s) %s" % fdsk)
10491 raise errors.OpExecError("Export failed, errors in %s" %
10492 utils.CommaJoin(failures))
10494 # At this point, the export was successful, we can cleanup/finish
10496 # Remove instance if requested
10497 if self.op.remove_instance:
10498 feedback_fn("Removing instance %s" % instance.name)
10499 _RemoveInstance(self, feedback_fn, instance,
10500 self.op.ignore_remove_failures)
10502 if self.op.mode == constants.EXPORT_MODE_LOCAL:
10503 self._CleanupExports(feedback_fn)
10505 return fin_resu, dresults
10508 class LUBackupRemove(NoHooksLU):
10509 """Remove exports related to the named instance.
10514 def ExpandNames(self):
10515 self.needed_locks = {}
10516 # We need all nodes to be locked in order for RemoveExport to work, but we
10517 # don't need to lock the instance itself, as nothing will happen to it (and
10518 # we can remove exports also for a removed instance)
10519 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10521 def Exec(self, feedback_fn):
10522 """Remove any export.
10525 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10526 # If the instance was not found we'll try with the name that was passed in.
10527 # This will only work if it was an FQDN, though.
10529 if not instance_name:
10531 instance_name = self.op.instance_name
10533 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10534 exportlist = self.rpc.call_export_list(locked_nodes)
10536 for node in exportlist:
10537 msg = exportlist[node].fail_msg
10539 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10541 if instance_name in exportlist[node].payload:
10543 result = self.rpc.call_export_remove(node, instance_name)
10544 msg = result.fail_msg
10546 logging.error("Could not remove export for instance %s"
10547 " on node %s: %s", instance_name, node, msg)
10549 if fqdn_warn and not found:
10550 feedback_fn("Export not found. If trying to remove an export belonging"
10551 " to a deleted instance please use its Fully Qualified"
10555 class LUGroupAdd(LogicalUnit):
10556 """Logical unit for creating node groups.
10559 HPATH = "group-add"
10560 HTYPE = constants.HTYPE_GROUP
10563 def ExpandNames(self):
10564 # We need the new group's UUID here so that we can create and acquire the
10565 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10566 # that it should not check whether the UUID exists in the configuration.
10567 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10568 self.needed_locks = {}
10569 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10571 def CheckPrereq(self):
10572 """Check prerequisites.
10574 This checks that the given group name is not an existing node group
10579 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10580 except errors.OpPrereqError:
10583 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10584 " node group (UUID: %s)" %
10585 (self.op.group_name, existing_uuid),
10586 errors.ECODE_EXISTS)
10588 if self.op.ndparams:
10589 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10591 def BuildHooksEnv(self):
10592 """Build hooks env.
10596 "GROUP_NAME": self.op.group_name,
10599 def BuildHooksNodes(self):
10600 """Build hooks nodes.
10603 mn = self.cfg.GetMasterNode()
10604 return ([mn], [mn])
10606 def Exec(self, feedback_fn):
10607 """Add the node group to the cluster.
10610 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10611 uuid=self.group_uuid,
10612 alloc_policy=self.op.alloc_policy,
10613 ndparams=self.op.ndparams)
10615 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10616 del self.remove_locks[locking.LEVEL_NODEGROUP]
10619 class LUGroupAssignNodes(NoHooksLU):
10620 """Logical unit for assigning nodes to groups.
10625 def ExpandNames(self):
10626 # These raise errors.OpPrereqError on their own:
10627 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10628 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10630 # We want to lock all the affected nodes and groups. We have readily
10631 # available the list of nodes, and the *destination* group. To gather the
10632 # list of "source" groups, we need to fetch node information.
10633 self.node_data = self.cfg.GetAllNodesInfo()
10634 affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10635 affected_groups.add(self.group_uuid)
10637 self.needed_locks = {
10638 locking.LEVEL_NODEGROUP: list(affected_groups),
10639 locking.LEVEL_NODE: self.op.nodes,
10642 def CheckPrereq(self):
10643 """Check prerequisites.
10646 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10647 instance_data = self.cfg.GetAllInstancesInfo()
10649 if self.group is None:
10650 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10651 (self.op.group_name, self.group_uuid))
10653 (new_splits, previous_splits) = \
10654 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10655 for node in self.op.nodes],
10656 self.node_data, instance_data)
10659 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10661 if not self.op.force:
10662 raise errors.OpExecError("The following instances get split by this"
10663 " change and --force was not given: %s" %
10666 self.LogWarning("This operation will split the following instances: %s",
10669 if previous_splits:
10670 self.LogWarning("In addition, these already-split instances continue"
10671 " to be spit across groups: %s",
10672 utils.CommaJoin(utils.NiceSort(previous_splits)))
10674 def Exec(self, feedback_fn):
10675 """Assign nodes to a new group.
10678 for node in self.op.nodes:
10679 self.node_data[node].group = self.group_uuid
10681 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10684 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10685 """Check for split instances after a node assignment.
10687 This method considers a series of node assignments as an atomic operation,
10688 and returns information about split instances after applying the set of
10691 In particular, it returns information about newly split instances, and
10692 instances that were already split, and remain so after the change.
10694 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10697 @type changes: list of (node_name, new_group_uuid) pairs.
10698 @param changes: list of node assignments to consider.
10699 @param node_data: a dict with data for all nodes
10700 @param instance_data: a dict with all instances to consider
10701 @rtype: a two-tuple
10702 @return: a list of instances that were previously okay and result split as a
10703 consequence of this change, and a list of instances that were previously
10704 split and this change does not fix.
10707 changed_nodes = dict((node, group) for node, group in changes
10708 if node_data[node].group != group)
10710 all_split_instances = set()
10711 previously_split_instances = set()
10713 def InstanceNodes(instance):
10714 return [instance.primary_node] + list(instance.secondary_nodes)
10716 for inst in instance_data.values():
10717 if inst.disk_template not in constants.DTS_INT_MIRROR:
10720 instance_nodes = InstanceNodes(inst)
10722 if len(set(node_data[node].group for node in instance_nodes)) > 1:
10723 previously_split_instances.add(inst.name)
10725 if len(set(changed_nodes.get(node, node_data[node].group)
10726 for node in instance_nodes)) > 1:
10727 all_split_instances.add(inst.name)
10729 return (list(all_split_instances - previously_split_instances),
10730 list(previously_split_instances & all_split_instances))
10733 class _GroupQuery(_QueryBase):
10734 FIELDS = query.GROUP_FIELDS
10736 def ExpandNames(self, lu):
10737 lu.needed_locks = {}
10739 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10740 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10743 self.wanted = [name_to_uuid[name]
10744 for name in utils.NiceSort(name_to_uuid.keys())]
10746 # Accept names to be either names or UUIDs.
10749 all_uuid = frozenset(self._all_groups.keys())
10751 for name in self.names:
10752 if name in all_uuid:
10753 self.wanted.append(name)
10754 elif name in name_to_uuid:
10755 self.wanted.append(name_to_uuid[name])
10757 missing.append(name)
10760 raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10761 errors.ECODE_NOENT)
10763 def DeclareLocks(self, lu, level):
10766 def _GetQueryData(self, lu):
10767 """Computes the list of node groups and their attributes.
10770 do_nodes = query.GQ_NODE in self.requested_data
10771 do_instances = query.GQ_INST in self.requested_data
10773 group_to_nodes = None
10774 group_to_instances = None
10776 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10777 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10778 # latter GetAllInstancesInfo() is not enough, for we have to go through
10779 # instance->node. Hence, we will need to process nodes even if we only need
10780 # instance information.
10781 if do_nodes or do_instances:
10782 all_nodes = lu.cfg.GetAllNodesInfo()
10783 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10786 for node in all_nodes.values():
10787 if node.group in group_to_nodes:
10788 group_to_nodes[node.group].append(node.name)
10789 node_to_group[node.name] = node.group
10792 all_instances = lu.cfg.GetAllInstancesInfo()
10793 group_to_instances = dict((uuid, []) for uuid in self.wanted)
10795 for instance in all_instances.values():
10796 node = instance.primary_node
10797 if node in node_to_group:
10798 group_to_instances[node_to_group[node]].append(instance.name)
10801 # Do not pass on node information if it was not requested.
10802 group_to_nodes = None
10804 return query.GroupQueryData([self._all_groups[uuid]
10805 for uuid in self.wanted],
10806 group_to_nodes, group_to_instances)
10809 class LUGroupQuery(NoHooksLU):
10810 """Logical unit for querying node groups.
10815 def CheckArguments(self):
10816 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
10817 self.op.output_fields, False)
10819 def ExpandNames(self):
10820 self.gq.ExpandNames(self)
10822 def Exec(self, feedback_fn):
10823 return self.gq.OldStyleQuery(self)
10826 class LUGroupSetParams(LogicalUnit):
10827 """Modifies the parameters of a node group.
10830 HPATH = "group-modify"
10831 HTYPE = constants.HTYPE_GROUP
10834 def CheckArguments(self):
10837 self.op.alloc_policy,
10840 if all_changes.count(None) == len(all_changes):
10841 raise errors.OpPrereqError("Please pass at least one modification",
10842 errors.ECODE_INVAL)
10844 def ExpandNames(self):
10845 # This raises errors.OpPrereqError on its own:
10846 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10848 self.needed_locks = {
10849 locking.LEVEL_NODEGROUP: [self.group_uuid],
10852 def CheckPrereq(self):
10853 """Check prerequisites.
10856 self.group = self.cfg.GetNodeGroup(self.group_uuid)
10858 if self.group is None:
10859 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10860 (self.op.group_name, self.group_uuid))
10862 if self.op.ndparams:
10863 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10864 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10865 self.new_ndparams = new_ndparams
10867 def BuildHooksEnv(self):
10868 """Build hooks env.
10872 "GROUP_NAME": self.op.group_name,
10873 "NEW_ALLOC_POLICY": self.op.alloc_policy,
10876 def BuildHooksNodes(self):
10877 """Build hooks nodes.
10880 mn = self.cfg.GetMasterNode()
10881 return ([mn], [mn])
10883 def Exec(self, feedback_fn):
10884 """Modifies the node group.
10889 if self.op.ndparams:
10890 self.group.ndparams = self.new_ndparams
10891 result.append(("ndparams", str(self.group.ndparams)))
10893 if self.op.alloc_policy:
10894 self.group.alloc_policy = self.op.alloc_policy
10896 self.cfg.Update(self.group, feedback_fn)
10901 class LUGroupRemove(LogicalUnit):
10902 HPATH = "group-remove"
10903 HTYPE = constants.HTYPE_GROUP
10906 def ExpandNames(self):
10907 # This will raises errors.OpPrereqError on its own:
10908 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10909 self.needed_locks = {
10910 locking.LEVEL_NODEGROUP: [self.group_uuid],
10913 def CheckPrereq(self):
10914 """Check prerequisites.
10916 This checks that the given group name exists as a node group, that is
10917 empty (i.e., contains no nodes), and that is not the last group of the
10921 # Verify that the group is empty.
10922 group_nodes = [node.name
10923 for node in self.cfg.GetAllNodesInfo().values()
10924 if node.group == self.group_uuid]
10927 raise errors.OpPrereqError("Group '%s' not empty, has the following"
10929 (self.op.group_name,
10930 utils.CommaJoin(utils.NiceSort(group_nodes))),
10931 errors.ECODE_STATE)
10933 # Verify the cluster would not be left group-less.
10934 if len(self.cfg.GetNodeGroupList()) == 1:
10935 raise errors.OpPrereqError("Group '%s' is the only group,"
10936 " cannot be removed" %
10937 self.op.group_name,
10938 errors.ECODE_STATE)
10940 def BuildHooksEnv(self):
10941 """Build hooks env.
10945 "GROUP_NAME": self.op.group_name,
10948 def BuildHooksNodes(self):
10949 """Build hooks nodes.
10952 mn = self.cfg.GetMasterNode()
10953 return ([mn], [mn])
10955 def Exec(self, feedback_fn):
10956 """Remove the node group.
10960 self.cfg.RemoveNodeGroup(self.group_uuid)
10961 except errors.ConfigurationError:
10962 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10963 (self.op.group_name, self.group_uuid))
10965 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10968 class LUGroupRename(LogicalUnit):
10969 HPATH = "group-rename"
10970 HTYPE = constants.HTYPE_GROUP
10973 def ExpandNames(self):
10974 # This raises errors.OpPrereqError on its own:
10975 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10977 self.needed_locks = {
10978 locking.LEVEL_NODEGROUP: [self.group_uuid],
10981 def CheckPrereq(self):
10982 """Check prerequisites.
10984 Ensures requested new name is not yet used.
10988 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10989 except errors.OpPrereqError:
10992 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10993 " node group (UUID: %s)" %
10994 (self.op.new_name, new_name_uuid),
10995 errors.ECODE_EXISTS)
10997 def BuildHooksEnv(self):
10998 """Build hooks env.
11002 "OLD_NAME": self.op.group_name,
11003 "NEW_NAME": self.op.new_name,
11006 def BuildHooksNodes(self):
11007 """Build hooks nodes.
11010 mn = self.cfg.GetMasterNode()
11012 all_nodes = self.cfg.GetAllNodesInfo()
11013 all_nodes.pop(mn, None)
11016 run_nodes.extend(node.name for node in all_nodes.values()
11017 if node.group == self.group_uuid)
11019 return (run_nodes, run_nodes)
11021 def Exec(self, feedback_fn):
11022 """Rename the node group.
11025 group = self.cfg.GetNodeGroup(self.group_uuid)
11028 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11029 (self.op.group_name, self.group_uuid))
11031 group.name = self.op.new_name
11032 self.cfg.Update(group, feedback_fn)
11034 return self.op.new_name
11037 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11038 """Generic tags LU.
11040 This is an abstract class which is the parent of all the other tags LUs.
11044 def ExpandNames(self):
11045 self.needed_locks = {}
11046 if self.op.kind == constants.TAG_NODE:
11047 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11048 self.needed_locks[locking.LEVEL_NODE] = self.op.name
11049 elif self.op.kind == constants.TAG_INSTANCE:
11050 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11051 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11053 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11054 # not possible to acquire the BGL based on opcode parameters)
11056 def CheckPrereq(self):
11057 """Check prerequisites.
11060 if self.op.kind == constants.TAG_CLUSTER:
11061 self.target = self.cfg.GetClusterInfo()
11062 elif self.op.kind == constants.TAG_NODE:
11063 self.target = self.cfg.GetNodeInfo(self.op.name)
11064 elif self.op.kind == constants.TAG_INSTANCE:
11065 self.target = self.cfg.GetInstanceInfo(self.op.name)
11067 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11068 str(self.op.kind), errors.ECODE_INVAL)
11071 class LUTagsGet(TagsLU):
11072 """Returns the tags of a given object.
11077 def ExpandNames(self):
11078 TagsLU.ExpandNames(self)
11080 # Share locks as this is only a read operation
11081 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11083 def Exec(self, feedback_fn):
11084 """Returns the tag list.
11087 return list(self.target.GetTags())
11090 class LUTagsSearch(NoHooksLU):
11091 """Searches the tags for a given pattern.
11096 def ExpandNames(self):
11097 self.needed_locks = {}
11099 def CheckPrereq(self):
11100 """Check prerequisites.
11102 This checks the pattern passed for validity by compiling it.
11106 self.re = re.compile(self.op.pattern)
11107 except re.error, err:
11108 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11109 (self.op.pattern, err), errors.ECODE_INVAL)
11111 def Exec(self, feedback_fn):
11112 """Returns the tag list.
11116 tgts = [("/cluster", cfg.GetClusterInfo())]
11117 ilist = cfg.GetAllInstancesInfo().values()
11118 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11119 nlist = cfg.GetAllNodesInfo().values()
11120 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11122 for path, target in tgts:
11123 for tag in target.GetTags():
11124 if self.re.search(tag):
11125 results.append((path, tag))
11129 class LUTagsSet(TagsLU):
11130 """Sets a tag on a given object.
11135 def CheckPrereq(self):
11136 """Check prerequisites.
11138 This checks the type and length of the tag name and value.
11141 TagsLU.CheckPrereq(self)
11142 for tag in self.op.tags:
11143 objects.TaggableObject.ValidateTag(tag)
11145 def Exec(self, feedback_fn):
11150 for tag in self.op.tags:
11151 self.target.AddTag(tag)
11152 except errors.TagError, err:
11153 raise errors.OpExecError("Error while setting tag: %s" % str(err))
11154 self.cfg.Update(self.target, feedback_fn)
11157 class LUTagsDel(TagsLU):
11158 """Delete a list of tags from a given object.
11163 def CheckPrereq(self):
11164 """Check prerequisites.
11166 This checks that we have the given tag.
11169 TagsLU.CheckPrereq(self)
11170 for tag in self.op.tags:
11171 objects.TaggableObject.ValidateTag(tag)
11172 del_tags = frozenset(self.op.tags)
11173 cur_tags = self.target.GetTags()
11175 diff_tags = del_tags - cur_tags
11177 diff_names = ("'%s'" % i for i in sorted(diff_tags))
11178 raise errors.OpPrereqError("Tag(s) %s not found" %
11179 (utils.CommaJoin(diff_names), ),
11180 errors.ECODE_NOENT)
11182 def Exec(self, feedback_fn):
11183 """Remove the tag from the object.
11186 for tag in self.op.tags:
11187 self.target.RemoveTag(tag)
11188 self.cfg.Update(self.target, feedback_fn)
11191 class LUTestDelay(NoHooksLU):
11192 """Sleep for a specified amount of time.
11194 This LU sleeps on the master and/or nodes for a specified amount of
11200 def ExpandNames(self):
11201 """Expand names and set required locks.
11203 This expands the node list, if any.
11206 self.needed_locks = {}
11207 if self.op.on_nodes:
11208 # _GetWantedNodes can be used here, but is not always appropriate to use
11209 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11210 # more information.
11211 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11212 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11214 def _TestDelay(self):
11215 """Do the actual sleep.
11218 if self.op.on_master:
11219 if not utils.TestDelay(self.op.duration):
11220 raise errors.OpExecError("Error during master delay test")
11221 if self.op.on_nodes:
11222 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11223 for node, node_result in result.items():
11224 node_result.Raise("Failure during rpc call to node %s" % node)
11226 def Exec(self, feedback_fn):
11227 """Execute the test delay opcode, with the wanted repetitions.
11230 if self.op.repeat == 0:
11233 top_value = self.op.repeat - 1
11234 for i in range(self.op.repeat):
11235 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11239 class LUTestJqueue(NoHooksLU):
11240 """Utility LU to test some aspects of the job queue.
11245 # Must be lower than default timeout for WaitForJobChange to see whether it
11246 # notices changed jobs
11247 _CLIENT_CONNECT_TIMEOUT = 20.0
11248 _CLIENT_CONFIRM_TIMEOUT = 60.0
11251 def _NotifyUsingSocket(cls, cb, errcls):
11252 """Opens a Unix socket and waits for another program to connect.
11255 @param cb: Callback to send socket name to client
11256 @type errcls: class
11257 @param errcls: Exception class to use for errors
11260 # Using a temporary directory as there's no easy way to create temporary
11261 # sockets without writing a custom loop around tempfile.mktemp and
11263 tmpdir = tempfile.mkdtemp()
11265 tmpsock = utils.PathJoin(tmpdir, "sock")
11267 logging.debug("Creating temporary socket at %s", tmpsock)
11268 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11273 # Send details to client
11276 # Wait for client to connect before continuing
11277 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11279 (conn, _) = sock.accept()
11280 except socket.error, err:
11281 raise errcls("Client didn't connect in time (%s)" % err)
11285 # Remove as soon as client is connected
11286 shutil.rmtree(tmpdir)
11288 # Wait for client to close
11291 # pylint: disable-msg=E1101
11292 # Instance of '_socketobject' has no ... member
11293 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11295 except socket.error, err:
11296 raise errcls("Client failed to confirm notification (%s)" % err)
11300 def _SendNotification(self, test, arg, sockname):
11301 """Sends a notification to the client.
11304 @param test: Test name
11305 @param arg: Test argument (depends on test)
11306 @type sockname: string
11307 @param sockname: Socket path
11310 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11312 def _Notify(self, prereq, test, arg):
11313 """Notifies the client of a test.
11316 @param prereq: Whether this is a prereq-phase test
11318 @param test: Test name
11319 @param arg: Test argument (depends on test)
11323 errcls = errors.OpPrereqError
11325 errcls = errors.OpExecError
11327 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11331 def CheckArguments(self):
11332 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11333 self.expandnames_calls = 0
11335 def ExpandNames(self):
11336 checkargs_calls = getattr(self, "checkargs_calls", 0)
11337 if checkargs_calls < 1:
11338 raise errors.ProgrammerError("CheckArguments was not called")
11340 self.expandnames_calls += 1
11342 if self.op.notify_waitlock:
11343 self._Notify(True, constants.JQT_EXPANDNAMES, None)
11345 self.LogInfo("Expanding names")
11347 # Get lock on master node (just to get a lock, not for a particular reason)
11348 self.needed_locks = {
11349 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11352 def Exec(self, feedback_fn):
11353 if self.expandnames_calls < 1:
11354 raise errors.ProgrammerError("ExpandNames was not called")
11356 if self.op.notify_exec:
11357 self._Notify(False, constants.JQT_EXEC, None)
11359 self.LogInfo("Executing")
11361 if self.op.log_messages:
11362 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11363 for idx, msg in enumerate(self.op.log_messages):
11364 self.LogInfo("Sending log message %s", idx + 1)
11365 feedback_fn(constants.JQT_MSGPREFIX + msg)
11366 # Report how many test messages have been sent
11367 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11370 raise errors.OpExecError("Opcode failure was requested")
11375 class IAllocator(object):
11376 """IAllocator framework.
11378 An IAllocator instance has three sets of attributes:
11379 - cfg that is needed to query the cluster
11380 - input data (all members of the _KEYS class attribute are required)
11381 - four buffer attributes (in|out_data|text), that represent the
11382 input (to the external script) in text and data structure format,
11383 and the output from it, again in two formats
11384 - the result variables from the script (success, info, nodes) for
11388 # pylint: disable-msg=R0902
11389 # lots of instance attributes
11391 "name", "mem_size", "disks", "disk_template",
11392 "os", "tags", "nics", "vcpus", "hypervisor",
11395 "name", "relocate_from",
11401 def __init__(self, cfg, rpc, mode, **kwargs):
11404 # init buffer variables
11405 self.in_text = self.out_text = self.in_data = self.out_data = None
11406 # init all input fields so that pylint is happy
11408 self.mem_size = self.disks = self.disk_template = None
11409 self.os = self.tags = self.nics = self.vcpus = None
11410 self.hypervisor = None
11411 self.relocate_from = None
11413 self.evac_nodes = None
11415 self.required_nodes = None
11416 # init result fields
11417 self.success = self.info = self.result = None
11418 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11419 keyset = self._ALLO_KEYS
11420 fn = self._AddNewInstance
11421 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11422 keyset = self._RELO_KEYS
11423 fn = self._AddRelocateInstance
11424 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11425 keyset = self._EVAC_KEYS
11426 fn = self._AddEvacuateNodes
11428 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11429 " IAllocator" % self.mode)
11431 if key not in keyset:
11432 raise errors.ProgrammerError("Invalid input parameter '%s' to"
11433 " IAllocator" % key)
11434 setattr(self, key, kwargs[key])
11437 if key not in kwargs:
11438 raise errors.ProgrammerError("Missing input parameter '%s' to"
11439 " IAllocator" % key)
11440 self._BuildInputData(fn)
11442 def _ComputeClusterData(self):
11443 """Compute the generic allocator input data.
11445 This is the data that is independent of the actual operation.
11449 cluster_info = cfg.GetClusterInfo()
11452 "version": constants.IALLOCATOR_VERSION,
11453 "cluster_name": cfg.GetClusterName(),
11454 "cluster_tags": list(cluster_info.GetTags()),
11455 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11456 # we don't have job IDs
11458 ninfo = cfg.GetAllNodesInfo()
11459 iinfo = cfg.GetAllInstancesInfo().values()
11460 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11463 node_list = [n.name for n in ninfo.values() if n.vm_capable]
11465 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11466 hypervisor_name = self.hypervisor
11467 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11468 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11469 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11470 hypervisor_name = cluster_info.enabled_hypervisors[0]
11472 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11475 self.rpc.call_all_instances_info(node_list,
11476 cluster_info.enabled_hypervisors)
11478 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11480 config_ndata = self._ComputeBasicNodeData(ninfo)
11481 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11482 i_list, config_ndata)
11483 assert len(data["nodes"]) == len(ninfo), \
11484 "Incomplete node data computed"
11486 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11488 self.in_data = data
11491 def _ComputeNodeGroupData(cfg):
11492 """Compute node groups data.
11496 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11498 "name": gdata.name,
11499 "alloc_policy": gdata.alloc_policy,
11504 def _ComputeBasicNodeData(node_cfg):
11505 """Compute global node data.
11508 @returns: a dict of name: (node dict, node config)
11512 for ninfo in node_cfg.values():
11513 # fill in static (config-based) values
11515 "tags": list(ninfo.GetTags()),
11516 "primary_ip": ninfo.primary_ip,
11517 "secondary_ip": ninfo.secondary_ip,
11518 "offline": ninfo.offline,
11519 "drained": ninfo.drained,
11520 "master_candidate": ninfo.master_candidate,
11521 "group": ninfo.group,
11522 "master_capable": ninfo.master_capable,
11523 "vm_capable": ninfo.vm_capable,
11526 node_results[ninfo.name] = pnr
11528 return node_results
11531 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11533 """Compute global node data.
11535 @param node_results: the basic node structures as filled from the config
11538 # make a copy of the current dict
11539 node_results = dict(node_results)
11540 for nname, nresult in node_data.items():
11541 assert nname in node_results, "Missing basic data for node %s" % nname
11542 ninfo = node_cfg[nname]
11544 if not (ninfo.offline or ninfo.drained):
11545 nresult.Raise("Can't get data for node %s" % nname)
11546 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11548 remote_info = nresult.payload
11550 for attr in ['memory_total', 'memory_free', 'memory_dom0',
11551 'vg_size', 'vg_free', 'cpu_total']:
11552 if attr not in remote_info:
11553 raise errors.OpExecError("Node '%s' didn't return attribute"
11554 " '%s'" % (nname, attr))
11555 if not isinstance(remote_info[attr], int):
11556 raise errors.OpExecError("Node '%s' returned invalid value"
11558 (nname, attr, remote_info[attr]))
11559 # compute memory used by primary instances
11560 i_p_mem = i_p_up_mem = 0
11561 for iinfo, beinfo in i_list:
11562 if iinfo.primary_node == nname:
11563 i_p_mem += beinfo[constants.BE_MEMORY]
11564 if iinfo.name not in node_iinfo[nname].payload:
11567 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11568 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11569 remote_info['memory_free'] -= max(0, i_mem_diff)
11572 i_p_up_mem += beinfo[constants.BE_MEMORY]
11574 # compute memory used by instances
11576 "total_memory": remote_info['memory_total'],
11577 "reserved_memory": remote_info['memory_dom0'],
11578 "free_memory": remote_info['memory_free'],
11579 "total_disk": remote_info['vg_size'],
11580 "free_disk": remote_info['vg_free'],
11581 "total_cpus": remote_info['cpu_total'],
11582 "i_pri_memory": i_p_mem,
11583 "i_pri_up_memory": i_p_up_mem,
11585 pnr_dyn.update(node_results[nname])
11586 node_results[nname] = pnr_dyn
11588 return node_results
11591 def _ComputeInstanceData(cluster_info, i_list):
11592 """Compute global instance data.
11596 for iinfo, beinfo in i_list:
11598 for nic in iinfo.nics:
11599 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11600 nic_dict = {"mac": nic.mac,
11602 "mode": filled_params[constants.NIC_MODE],
11603 "link": filled_params[constants.NIC_LINK],
11605 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11606 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11607 nic_data.append(nic_dict)
11609 "tags": list(iinfo.GetTags()),
11610 "admin_up": iinfo.admin_up,
11611 "vcpus": beinfo[constants.BE_VCPUS],
11612 "memory": beinfo[constants.BE_MEMORY],
11614 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11616 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11617 "disk_template": iinfo.disk_template,
11618 "hypervisor": iinfo.hypervisor,
11620 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11622 instance_data[iinfo.name] = pir
11624 return instance_data
11626 def _AddNewInstance(self):
11627 """Add new instance data to allocator structure.
11629 This in combination with _AllocatorGetClusterData will create the
11630 correct structure needed as input for the allocator.
11632 The checks for the completeness of the opcode must have already been
11636 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11638 if self.disk_template in constants.DTS_INT_MIRROR:
11639 self.required_nodes = 2
11641 self.required_nodes = 1
11644 "disk_template": self.disk_template,
11647 "vcpus": self.vcpus,
11648 "memory": self.mem_size,
11649 "disks": self.disks,
11650 "disk_space_total": disk_space,
11652 "required_nodes": self.required_nodes,
11656 def _AddRelocateInstance(self):
11657 """Add relocate instance data to allocator structure.
11659 This in combination with _IAllocatorGetClusterData will create the
11660 correct structure needed as input for the allocator.
11662 The checks for the completeness of the opcode must have already been
11666 instance = self.cfg.GetInstanceInfo(self.name)
11667 if instance is None:
11668 raise errors.ProgrammerError("Unknown instance '%s' passed to"
11669 " IAllocator" % self.name)
11671 if instance.disk_template not in constants.DTS_MIRRORED:
11672 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11673 errors.ECODE_INVAL)
11675 if instance.disk_template in constants.DTS_INT_MIRROR and \
11676 len(instance.secondary_nodes) != 1:
11677 raise errors.OpPrereqError("Instance has not exactly one secondary node",
11678 errors.ECODE_STATE)
11680 self.required_nodes = 1
11681 disk_sizes = [{'size': disk.size} for disk in instance.disks]
11682 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11686 "disk_space_total": disk_space,
11687 "required_nodes": self.required_nodes,
11688 "relocate_from": self.relocate_from,
11692 def _AddEvacuateNodes(self):
11693 """Add evacuate nodes data to allocator structure.
11697 "evac_nodes": self.evac_nodes
11701 def _BuildInputData(self, fn):
11702 """Build input data structures.
11705 self._ComputeClusterData()
11708 request["type"] = self.mode
11709 self.in_data["request"] = request
11711 self.in_text = serializer.Dump(self.in_data)
11713 def Run(self, name, validate=True, call_fn=None):
11714 """Run an instance allocator and return the results.
11717 if call_fn is None:
11718 call_fn = self.rpc.call_iallocator_runner
11720 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11721 result.Raise("Failure while running the iallocator script")
11723 self.out_text = result.payload
11725 self._ValidateResult()
11727 def _ValidateResult(self):
11728 """Process the allocator results.
11730 This will process and if successful save the result in
11731 self.out_data and the other parameters.
11735 rdict = serializer.Load(self.out_text)
11736 except Exception, err:
11737 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11739 if not isinstance(rdict, dict):
11740 raise errors.OpExecError("Can't parse iallocator results: not a dict")
11742 # TODO: remove backwards compatiblity in later versions
11743 if "nodes" in rdict and "result" not in rdict:
11744 rdict["result"] = rdict["nodes"]
11747 for key in "success", "info", "result":
11748 if key not in rdict:
11749 raise errors.OpExecError("Can't parse iallocator results:"
11750 " missing key '%s'" % key)
11751 setattr(self, key, rdict[key])
11753 if not isinstance(rdict["result"], list):
11754 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11756 self.out_data = rdict
11759 class LUTestAllocator(NoHooksLU):
11760 """Run allocator tests.
11762 This LU runs the allocator tests
11765 def CheckPrereq(self):
11766 """Check prerequisites.
11768 This checks the opcode parameters depending on the director and mode test.
11771 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11772 for attr in ["mem_size", "disks", "disk_template",
11773 "os", "tags", "nics", "vcpus"]:
11774 if not hasattr(self.op, attr):
11775 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11776 attr, errors.ECODE_INVAL)
11777 iname = self.cfg.ExpandInstanceName(self.op.name)
11778 if iname is not None:
11779 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11780 iname, errors.ECODE_EXISTS)
11781 if not isinstance(self.op.nics, list):
11782 raise errors.OpPrereqError("Invalid parameter 'nics'",
11783 errors.ECODE_INVAL)
11784 if not isinstance(self.op.disks, list):
11785 raise errors.OpPrereqError("Invalid parameter 'disks'",
11786 errors.ECODE_INVAL)
11787 for row in self.op.disks:
11788 if (not isinstance(row, dict) or
11789 "size" not in row or
11790 not isinstance(row["size"], int) or
11791 "mode" not in row or
11792 row["mode"] not in ['r', 'w']):
11793 raise errors.OpPrereqError("Invalid contents of the 'disks'"
11794 " parameter", errors.ECODE_INVAL)
11795 if self.op.hypervisor is None:
11796 self.op.hypervisor = self.cfg.GetHypervisorType()
11797 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11798 fname = _ExpandInstanceName(self.cfg, self.op.name)
11799 self.op.name = fname
11800 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11801 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11802 if not hasattr(self.op, "evac_nodes"):
11803 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11804 " opcode input", errors.ECODE_INVAL)
11806 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11807 self.op.mode, errors.ECODE_INVAL)
11809 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11810 if self.op.allocator is None:
11811 raise errors.OpPrereqError("Missing allocator name",
11812 errors.ECODE_INVAL)
11813 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11814 raise errors.OpPrereqError("Wrong allocator test '%s'" %
11815 self.op.direction, errors.ECODE_INVAL)
11817 def Exec(self, feedback_fn):
11818 """Run the allocator test.
11821 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11822 ial = IAllocator(self.cfg, self.rpc,
11825 mem_size=self.op.mem_size,
11826 disks=self.op.disks,
11827 disk_template=self.op.disk_template,
11831 vcpus=self.op.vcpus,
11832 hypervisor=self.op.hypervisor,
11834 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11835 ial = IAllocator(self.cfg, self.rpc,
11838 relocate_from=list(self.relocate_from),
11840 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11841 ial = IAllocator(self.cfg, self.rpc,
11843 evac_nodes=self.op.evac_nodes)
11845 raise errors.ProgrammerError("Uncatched mode %s in"
11846 " LUTestAllocator.Exec", self.op.mode)
11848 if self.op.direction == constants.IALLOCATOR_DIR_IN:
11849 result = ial.in_text
11851 ial.Run(self.op.allocator, validate=False)
11852 result = ial.out_text
11856 #: Query type implementations
11858 constants.QR_INSTANCE: _InstanceQuery,
11859 constants.QR_NODE: _NodeQuery,
11860 constants.QR_GROUP: _GroupQuery,
11861 constants.QR_OS: _OsQuery,
11864 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
11867 def _GetQueryImplementation(name):
11868 """Returns the implemtnation for a query type.
11870 @param name: Query type, must be one of L{constants.QR_VIA_OP}
11874 return _QUERY_IMPL[name]
11876 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11877 errors.ECODE_INVAL)