4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
38 from ganeti import ssh
39 from ganeti import utils
40 from ganeti import errors
41 from ganeti import hypervisor
42 from ganeti import locking
43 from ganeti import constants
44 from ganeti import objects
45 from ganeti import serializer
46 from ganeti import ssconf
49 class LogicalUnit(object):
50 """Logical Unit base class.
52 Subclasses must follow these rules:
53 - implement ExpandNames
54 - implement CheckPrereq (except when tasklets are used)
55 - implement Exec (except when tasklets are used)
56 - implement BuildHooksEnv
57 - redefine HPATH and HTYPE
58 - optionally redefine their run requirements:
59 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
61 Note that all commands require root permissions.
63 @ivar dry_run_result: the value (if any) that will be returned to the caller
64 in dry-run mode (signalled by opcode dry_run parameter)
72 def __init__(self, processor, op, context, rpc):
73 """Constructor for LogicalUnit.
75 This needs to be overridden in derived classes in order to check op
81 self.cfg = context.cfg
82 self.context = context
84 # Dicts used to declare locking needs to mcpu
85 self.needed_locks = None
86 self.acquired_locks = {}
87 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
89 self.remove_locks = {}
90 # Used to force good behavior when calling helper functions
91 self.recalculate_locks = {}
94 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
95 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
96 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
98 self.dry_run_result = None
99 # support for generic debug attribute
100 if (not hasattr(self.op, "debug_level") or
101 not isinstance(self.op.debug_level, int)):
102 self.op.debug_level = 0
107 for attr_name in self._OP_REQP:
108 attr_val = getattr(op, attr_name, None)
110 raise errors.OpPrereqError("Required parameter '%s' missing" %
111 attr_name, errors.ECODE_INVAL)
113 self.CheckArguments()
116 """Returns the SshRunner object
120 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
123 ssh = property(fget=__GetSSH)
125 def CheckArguments(self):
126 """Check syntactic validity for the opcode arguments.
128 This method is for doing a simple syntactic check and ensure
129 validity of opcode parameters, without any cluster-related
130 checks. While the same can be accomplished in ExpandNames and/or
131 CheckPrereq, doing these separate is better because:
133 - ExpandNames is left as as purely a lock-related function
134 - CheckPrereq is run after we have acquired locks (and possible
137 The function is allowed to change the self.op attribute so that
138 later methods can no longer worry about missing parameters.
143 def ExpandNames(self):
144 """Expand names for this LU.
146 This method is called before starting to execute the opcode, and it should
147 update all the parameters of the opcode to their canonical form (e.g. a
148 short node name must be fully expanded after this method has successfully
149 completed). This way locking, hooks, logging, ecc. can work correctly.
151 LUs which implement this method must also populate the self.needed_locks
152 member, as a dict with lock levels as keys, and a list of needed lock names
155 - use an empty dict if you don't need any lock
156 - if you don't need any lock at a particular level omit that level
157 - don't put anything for the BGL level
158 - if you want all locks at a level use locking.ALL_SET as a value
160 If you need to share locks (rather than acquire them exclusively) at one
161 level you can modify self.share_locks, setting a true value (usually 1) for
162 that level. By default locks are not shared.
164 This function can also define a list of tasklets, which then will be
165 executed in order instead of the usual LU-level CheckPrereq and Exec
166 functions, if those are not defined by the LU.
170 # Acquire all nodes and one instance
171 self.needed_locks = {
172 locking.LEVEL_NODE: locking.ALL_SET,
173 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
175 # Acquire just two nodes
176 self.needed_locks = {
177 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
180 self.needed_locks = {} # No, you can't leave it to the default value None
183 # The implementation of this method is mandatory only if the new LU is
184 # concurrent, so that old LUs don't need to be changed all at the same
187 self.needed_locks = {} # Exclusive LUs don't need locks.
189 raise NotImplementedError
191 def DeclareLocks(self, level):
192 """Declare LU locking needs for a level
194 While most LUs can just declare their locking needs at ExpandNames time,
195 sometimes there's the need to calculate some locks after having acquired
196 the ones before. This function is called just before acquiring locks at a
197 particular level, but after acquiring the ones at lower levels, and permits
198 such calculations. It can be used to modify self.needed_locks, and by
199 default it does nothing.
201 This function is only called if you have something already set in
202 self.needed_locks for the level.
204 @param level: Locking level which is going to be locked
205 @type level: member of ganeti.locking.LEVELS
209 def CheckPrereq(self):
210 """Check prerequisites for this LU.
212 This method should check that the prerequisites for the execution
213 of this LU are fulfilled. It can do internode communication, but
214 it should be idempotent - no cluster or system changes are
217 The method should raise errors.OpPrereqError in case something is
218 not fulfilled. Its return value is ignored.
220 This method should also update all the parameters of the opcode to
221 their canonical form if it hasn't been done by ExpandNames before.
224 if self.tasklets is not None:
225 for (idx, tl) in enumerate(self.tasklets):
226 logging.debug("Checking prerequisites for tasklet %s/%s",
227 idx + 1, len(self.tasklets))
230 raise NotImplementedError
232 def Exec(self, feedback_fn):
235 This method should implement the actual work. It should raise
236 errors.OpExecError for failures that are somewhat dealt with in
240 if self.tasklets is not None:
241 for (idx, tl) in enumerate(self.tasklets):
242 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
245 raise NotImplementedError
247 def BuildHooksEnv(self):
248 """Build hooks environment for this LU.
250 This method should return a three-node tuple consisting of: a dict
251 containing the environment that will be used for running the
252 specific hook for this LU, a list of node names on which the hook
253 should run before the execution, and a list of node names on which
254 the hook should run after the execution.
256 The keys of the dict must not have 'GANETI_' prefixed as this will
257 be handled in the hooks runner. Also note additional keys will be
258 added by the hooks runner. If the LU doesn't define any
259 environment, an empty dict (and not None) should be returned.
261 No nodes should be returned as an empty list (and not None).
263 Note that if the HPATH for a LU class is None, this function will
267 raise NotImplementedError
269 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
270 """Notify the LU about the results of its hooks.
272 This method is called every time a hooks phase is executed, and notifies
273 the Logical Unit about the hooks' result. The LU can then use it to alter
274 its result based on the hooks. By default the method does nothing and the
275 previous result is passed back unchanged but any LU can define it if it
276 wants to use the local cluster hook-scripts somehow.
278 @param phase: one of L{constants.HOOKS_PHASE_POST} or
279 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
280 @param hook_results: the results of the multi-node hooks rpc call
281 @param feedback_fn: function used send feedback back to the caller
282 @param lu_result: the previous Exec result this LU had, or None
284 @return: the new Exec result, based on the previous result
288 # API must be kept, thus we ignore the unused argument and could
289 # be a function warnings
290 # pylint: disable-msg=W0613,R0201
293 def _ExpandAndLockInstance(self):
294 """Helper function to expand and lock an instance.
296 Many LUs that work on an instance take its name in self.op.instance_name
297 and need to expand it and then declare the expanded name for locking. This
298 function does it, and then updates self.op.instance_name to the expanded
299 name. It also initializes needed_locks as a dict, if this hasn't been done
303 if self.needed_locks is None:
304 self.needed_locks = {}
306 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
307 "_ExpandAndLockInstance called with instance-level locks set"
308 self.op.instance_name = _ExpandInstanceName(self.cfg,
309 self.op.instance_name)
310 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
312 def _LockInstancesNodes(self, primary_only=False):
313 """Helper function to declare instances' nodes for locking.
315 This function should be called after locking one or more instances to lock
316 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
317 with all primary or secondary nodes for instances already locked and
318 present in self.needed_locks[locking.LEVEL_INSTANCE].
320 It should be called from DeclareLocks, and for safety only works if
321 self.recalculate_locks[locking.LEVEL_NODE] is set.
323 In the future it may grow parameters to just lock some instance's nodes, or
324 to just lock primaries or secondary nodes, if needed.
326 If should be called in DeclareLocks in a way similar to::
328 if level == locking.LEVEL_NODE:
329 self._LockInstancesNodes()
331 @type primary_only: boolean
332 @param primary_only: only lock primary nodes of locked instances
335 assert locking.LEVEL_NODE in self.recalculate_locks, \
336 "_LockInstancesNodes helper function called with no nodes to recalculate"
338 # TODO: check if we're really been called with the instance locks held
340 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
341 # future we might want to have different behaviors depending on the value
342 # of self.recalculate_locks[locking.LEVEL_NODE]
344 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
345 instance = self.context.cfg.GetInstanceInfo(instance_name)
346 wanted_nodes.append(instance.primary_node)
348 wanted_nodes.extend(instance.secondary_nodes)
350 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
351 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
352 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
353 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
355 del self.recalculate_locks[locking.LEVEL_NODE]
358 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
359 """Simple LU which runs no hooks.
361 This LU is intended as a parent for other LogicalUnits which will
362 run no hooks, in order to reduce duplicate code.
368 def BuildHooksEnv(self):
369 """Empty BuildHooksEnv for NoHooksLu.
371 This just raises an error.
374 assert False, "BuildHooksEnv called for NoHooksLUs"
378 """Tasklet base class.
380 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
381 they can mix legacy code with tasklets. Locking needs to be done in the LU,
382 tasklets know nothing about locks.
384 Subclasses must follow these rules:
385 - Implement CheckPrereq
389 def __init__(self, lu):
396 def CheckPrereq(self):
397 """Check prerequisites for this tasklets.
399 This method should check whether the prerequisites for the execution of
400 this tasklet are fulfilled. It can do internode communication, but it
401 should be idempotent - no cluster or system changes are allowed.
403 The method should raise errors.OpPrereqError in case something is not
404 fulfilled. Its return value is ignored.
406 This method should also update all parameters to their canonical form if it
407 hasn't been done before.
410 raise NotImplementedError
412 def Exec(self, feedback_fn):
413 """Execute the tasklet.
415 This method should implement the actual work. It should raise
416 errors.OpExecError for failures that are somewhat dealt with in code, or
420 raise NotImplementedError
423 def _GetWantedNodes(lu, nodes):
424 """Returns list of checked and expanded node names.
426 @type lu: L{LogicalUnit}
427 @param lu: the logical unit on whose behalf we execute
429 @param nodes: list of node names or None for all nodes
431 @return: the list of nodes, sorted
432 @raise errors.ProgrammerError: if the nodes parameter is wrong type
435 if not isinstance(nodes, list):
436 raise errors.OpPrereqError("Invalid argument type 'nodes'",
440 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
441 " non-empty list of nodes whose name is to be expanded.")
443 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
444 return utils.NiceSort(wanted)
447 def _GetWantedInstances(lu, instances):
448 """Returns list of checked and expanded instance names.
450 @type lu: L{LogicalUnit}
451 @param lu: the logical unit on whose behalf we execute
452 @type instances: list
453 @param instances: list of instance names or None for all instances
455 @return: the list of instances, sorted
456 @raise errors.OpPrereqError: if the instances parameter is wrong type
457 @raise errors.OpPrereqError: if any of the passed instances is not found
460 if not isinstance(instances, list):
461 raise errors.OpPrereqError("Invalid argument type 'instances'",
465 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
467 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
471 def _CheckOutputFields(static, dynamic, selected):
472 """Checks whether all selected fields are valid.
474 @type static: L{utils.FieldSet}
475 @param static: static fields set
476 @type dynamic: L{utils.FieldSet}
477 @param dynamic: dynamic fields set
484 delta = f.NonMatching(selected)
486 raise errors.OpPrereqError("Unknown output fields selected: %s"
487 % ",".join(delta), errors.ECODE_INVAL)
490 def _CheckBooleanOpField(op, name):
491 """Validates boolean opcode parameters.
493 This will ensure that an opcode parameter is either a boolean value,
494 or None (but that it always exists).
497 val = getattr(op, name, None)
498 if not (val is None or isinstance(val, bool)):
499 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
500 (name, str(val)), errors.ECODE_INVAL)
501 setattr(op, name, val)
504 def _CheckGlobalHvParams(params):
505 """Validates that given hypervisor params are not global ones.
507 This will ensure that instances don't get customised versions of
511 used_globals = constants.HVC_GLOBALS.intersection(params)
513 msg = ("The following hypervisor parameters are global and cannot"
514 " be customized at instance level, please modify them at"
515 " cluster level: %s" % utils.CommaJoin(used_globals))
516 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
519 def _CheckNodeOnline(lu, node):
520 """Ensure that a given node is online.
522 @param lu: the LU on behalf of which we make the check
523 @param node: the node to check
524 @raise errors.OpPrereqError: if the node is offline
527 if lu.cfg.GetNodeInfo(node).offline:
528 raise errors.OpPrereqError("Can't use offline node %s" % node,
532 def _CheckNodeNotDrained(lu, node):
533 """Ensure that a given node is not drained.
535 @param lu: the LU on behalf of which we make the check
536 @param node: the node to check
537 @raise errors.OpPrereqError: if the node is drained
540 if lu.cfg.GetNodeInfo(node).drained:
541 raise errors.OpPrereqError("Can't use drained node %s" % node,
545 def _CheckNodeHasOS(lu, node, os_name, force_variant):
546 """Ensure that a node supports a given OS.
548 @param lu: the LU on behalf of which we make the check
549 @param node: the node to check
550 @param os_name: the OS to query about
551 @param force_variant: whether to ignore variant errors
552 @raise errors.OpPrereqError: if the node is not supporting the OS
555 result = lu.rpc.call_os_get(node, os_name)
556 result.Raise("OS '%s' not in supported OS list for node %s" %
558 prereq=True, ecode=errors.ECODE_INVAL)
559 if not force_variant:
560 _CheckOSVariant(result.payload, os_name)
563 def _CheckDiskTemplate(template):
564 """Ensure a given disk template is valid.
567 if template not in constants.DISK_TEMPLATES:
568 msg = ("Invalid disk template name '%s', valid templates are: %s" %
569 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
570 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
573 def _CheckInstanceDown(lu, instance, reason):
574 """Ensure that an instance is not running."""
575 if instance.admin_up:
576 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
577 (instance.name, reason), errors.ECODE_STATE)
579 pnode = instance.primary_node
580 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
581 ins_l.Raise("Can't contact node %s for instance information" % pnode,
582 prereq=True, ecode=errors.ECODE_ENVIRON)
584 if instance.name in ins_l.payload:
585 raise errors.OpPrereqError("Instance %s is running, %s" %
586 (instance.name, reason), errors.ECODE_STATE)
589 def _ExpandItemName(fn, name, kind):
590 """Expand an item name.
592 @param fn: the function to use for expansion
593 @param name: requested item name
594 @param kind: text description ('Node' or 'Instance')
595 @return: the resolved (full) name
596 @raise errors.OpPrereqError: if the item is not found
600 if full_name is None:
601 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
606 def _ExpandNodeName(cfg, name):
607 """Wrapper over L{_ExpandItemName} for nodes."""
608 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
611 def _ExpandInstanceName(cfg, name):
612 """Wrapper over L{_ExpandItemName} for instance."""
613 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
616 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
617 memory, vcpus, nics, disk_template, disks,
618 bep, hvp, hypervisor_name):
619 """Builds instance related env variables for hooks
621 This builds the hook environment from individual variables.
624 @param name: the name of the instance
625 @type primary_node: string
626 @param primary_node: the name of the instance's primary node
627 @type secondary_nodes: list
628 @param secondary_nodes: list of secondary nodes as strings
629 @type os_type: string
630 @param os_type: the name of the instance's OS
631 @type status: boolean
632 @param status: the should_run status of the instance
634 @param memory: the memory size of the instance
636 @param vcpus: the count of VCPUs the instance has
638 @param nics: list of tuples (ip, mac, mode, link) representing
639 the NICs the instance has
640 @type disk_template: string
641 @param disk_template: the disk template of the instance
643 @param disks: the list of (size, mode) pairs
645 @param bep: the backend parameters for the instance
647 @param hvp: the hypervisor parameters for the instance
648 @type hypervisor_name: string
649 @param hypervisor_name: the hypervisor for the instance
651 @return: the hook environment for this instance
660 "INSTANCE_NAME": name,
661 "INSTANCE_PRIMARY": primary_node,
662 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
663 "INSTANCE_OS_TYPE": os_type,
664 "INSTANCE_STATUS": str_status,
665 "INSTANCE_MEMORY": memory,
666 "INSTANCE_VCPUS": vcpus,
667 "INSTANCE_DISK_TEMPLATE": disk_template,
668 "INSTANCE_HYPERVISOR": hypervisor_name,
672 nic_count = len(nics)
673 for idx, (ip, mac, mode, link) in enumerate(nics):
676 env["INSTANCE_NIC%d_IP" % idx] = ip
677 env["INSTANCE_NIC%d_MAC" % idx] = mac
678 env["INSTANCE_NIC%d_MODE" % idx] = mode
679 env["INSTANCE_NIC%d_LINK" % idx] = link
680 if mode == constants.NIC_MODE_BRIDGED:
681 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
685 env["INSTANCE_NIC_COUNT"] = nic_count
688 disk_count = len(disks)
689 for idx, (size, mode) in enumerate(disks):
690 env["INSTANCE_DISK%d_SIZE" % idx] = size
691 env["INSTANCE_DISK%d_MODE" % idx] = mode
695 env["INSTANCE_DISK_COUNT"] = disk_count
697 for source, kind in [(bep, "BE"), (hvp, "HV")]:
698 for key, value in source.items():
699 env["INSTANCE_%s_%s" % (kind, key)] = value
704 def _NICListToTuple(lu, nics):
705 """Build a list of nic information tuples.
707 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
708 value in LUQueryInstanceData.
710 @type lu: L{LogicalUnit}
711 @param lu: the logical unit on whose behalf we execute
712 @type nics: list of L{objects.NIC}
713 @param nics: list of nics to convert to hooks tuples
717 c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
721 filled_params = objects.FillDict(c_nicparams, nic.nicparams)
722 mode = filled_params[constants.NIC_MODE]
723 link = filled_params[constants.NIC_LINK]
724 hooks_nics.append((ip, mac, mode, link))
728 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
729 """Builds instance related env variables for hooks from an object.
731 @type lu: L{LogicalUnit}
732 @param lu: the logical unit on whose behalf we execute
733 @type instance: L{objects.Instance}
734 @param instance: the instance for which we should build the
737 @param override: dictionary with key/values that will override
740 @return: the hook environment dictionary
743 cluster = lu.cfg.GetClusterInfo()
744 bep = cluster.FillBE(instance)
745 hvp = cluster.FillHV(instance)
747 'name': instance.name,
748 'primary_node': instance.primary_node,
749 'secondary_nodes': instance.secondary_nodes,
750 'os_type': instance.os,
751 'status': instance.admin_up,
752 'memory': bep[constants.BE_MEMORY],
753 'vcpus': bep[constants.BE_VCPUS],
754 'nics': _NICListToTuple(lu, instance.nics),
755 'disk_template': instance.disk_template,
756 'disks': [(disk.size, disk.mode) for disk in instance.disks],
759 'hypervisor_name': instance.hypervisor,
762 args.update(override)
763 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
766 def _AdjustCandidatePool(lu, exceptions):
767 """Adjust the candidate pool after node operations.
770 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
772 lu.LogInfo("Promoted nodes to master candidate role: %s",
773 utils.CommaJoin(node.name for node in mod_list))
774 for name in mod_list:
775 lu.context.ReaddNode(name)
776 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
778 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
782 def _DecideSelfPromotion(lu, exceptions=None):
783 """Decide whether I should promote myself as a master candidate.
786 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
787 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
788 # the new node will increase mc_max with one, so:
789 mc_should = min(mc_should + 1, cp_size)
790 return mc_now < mc_should
793 def _CheckNicsBridgesExist(lu, target_nics, target_node,
794 profile=constants.PP_DEFAULT):
795 """Check that the brigdes needed by a list of nics exist.
798 c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
799 paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
800 for nic in target_nics]
801 brlist = [params[constants.NIC_LINK] for params in paramslist
802 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
804 result = lu.rpc.call_bridges_exist(target_node, brlist)
805 result.Raise("Error checking bridges on destination node '%s'" %
806 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
809 def _CheckInstanceBridgesExist(lu, instance, node=None):
810 """Check that the brigdes needed by an instance exist.
814 node = instance.primary_node
815 _CheckNicsBridgesExist(lu, instance.nics, node)
818 def _CheckOSVariant(os_obj, name):
819 """Check whether an OS name conforms to the os variants specification.
821 @type os_obj: L{objects.OS}
822 @param os_obj: OS object to check
824 @param name: OS name passed by the user, to check for validity
827 if not os_obj.supported_variants:
830 variant = name.split("+", 1)[1]
832 raise errors.OpPrereqError("OS name must include a variant",
835 if variant not in os_obj.supported_variants:
836 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
839 def _GetNodeInstancesInner(cfg, fn):
840 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
843 def _GetNodeInstances(cfg, node_name):
844 """Returns a list of all primary and secondary instances on a node.
848 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
851 def _GetNodePrimaryInstances(cfg, node_name):
852 """Returns primary instances on a node.
855 return _GetNodeInstancesInner(cfg,
856 lambda inst: node_name == inst.primary_node)
859 def _GetNodeSecondaryInstances(cfg, node_name):
860 """Returns secondary instances on a node.
863 return _GetNodeInstancesInner(cfg,
864 lambda inst: node_name in inst.secondary_nodes)
867 def _GetStorageTypeArgs(cfg, storage_type):
868 """Returns the arguments for a storage type.
871 # Special case for file storage
872 if storage_type == constants.ST_FILE:
873 # storage.FileStorage wants a list of storage directories
874 return [[cfg.GetFileStorageDir()]]
879 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
882 for dev in instance.disks:
883 cfg.SetDiskID(dev, node_name)
885 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
886 result.Raise("Failed to get disk status from node %s" % node_name,
887 prereq=prereq, ecode=errors.ECODE_ENVIRON)
889 for idx, bdev_status in enumerate(result.payload):
890 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
896 def _FormatTimestamp(secs):
897 """Formats a Unix timestamp with the local timezone.
900 return time.strftime("%F %T %Z", time.gmtime(secs))
903 class LUPostInitCluster(LogicalUnit):
904 """Logical unit for running hooks after cluster initialization.
907 HPATH = "cluster-init"
908 HTYPE = constants.HTYPE_CLUSTER
911 def BuildHooksEnv(self):
915 env = {"OP_TARGET": self.cfg.GetClusterName()}
916 mn = self.cfg.GetMasterNode()
919 def CheckPrereq(self):
920 """No prerequisites to check.
925 def Exec(self, feedback_fn):
932 class LUDestroyCluster(LogicalUnit):
933 """Logical unit for destroying the cluster.
936 HPATH = "cluster-destroy"
937 HTYPE = constants.HTYPE_CLUSTER
940 def BuildHooksEnv(self):
944 env = {"OP_TARGET": self.cfg.GetClusterName()}
947 def CheckPrereq(self):
948 """Check prerequisites.
950 This checks whether the cluster is empty.
952 Any errors are signaled by raising errors.OpPrereqError.
955 master = self.cfg.GetMasterNode()
957 nodelist = self.cfg.GetNodeList()
958 if len(nodelist) != 1 or nodelist[0] != master:
959 raise errors.OpPrereqError("There are still %d node(s) in"
960 " this cluster." % (len(nodelist) - 1),
962 instancelist = self.cfg.GetInstanceList()
964 raise errors.OpPrereqError("There are still %d instance(s) in"
965 " this cluster." % len(instancelist),
968 def Exec(self, feedback_fn):
969 """Destroys the cluster.
972 master = self.cfg.GetMasterNode()
973 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
975 # Run post hooks on master node before it's removed
976 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
978 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
980 # pylint: disable-msg=W0702
981 self.LogWarning("Errors occurred running hooks on %s" % master)
983 result = self.rpc.call_node_stop_master(master, False)
984 result.Raise("Could not disable the master role")
987 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
988 utils.CreateBackup(priv_key)
989 utils.CreateBackup(pub_key)
994 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
995 warn_days=constants.SSL_CERT_EXPIRATION_WARN,
996 error_days=constants.SSL_CERT_EXPIRATION_ERROR):
997 """Verifies certificate details for LUVerifyCluster.
1001 msg = "Certificate %s is expired" % filename
1003 if not_before is not None and not_after is not None:
1004 msg += (" (valid from %s to %s)" %
1005 (_FormatTimestamp(not_before),
1006 _FormatTimestamp(not_after)))
1007 elif not_before is not None:
1008 msg += " (valid from %s)" % _FormatTimestamp(not_before)
1009 elif not_after is not None:
1010 msg += " (valid until %s)" % _FormatTimestamp(not_after)
1012 return (LUVerifyCluster.ETYPE_ERROR, msg)
1014 elif not_before is not None and not_before > now:
1015 return (LUVerifyCluster.ETYPE_WARNING,
1016 "Certificate %s not yet valid (valid from %s)" %
1017 (filename, _FormatTimestamp(not_before)))
1019 elif not_after is not None:
1020 remaining_days = int((not_after - now) / (24 * 3600))
1022 msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1024 if remaining_days <= error_days:
1025 return (LUVerifyCluster.ETYPE_ERROR, msg)
1027 if remaining_days <= warn_days:
1028 return (LUVerifyCluster.ETYPE_WARNING, msg)
1033 def _VerifyCertificate(filename):
1034 """Verifies a certificate for LUVerifyCluster.
1036 @type filename: string
1037 @param filename: Path to PEM file
1041 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1042 utils.ReadFile(filename))
1043 except Exception, err: # pylint: disable-msg=W0703
1044 return (LUVerifyCluster.ETYPE_ERROR,
1045 "Failed to load X509 certificate %s: %s" % (filename, err))
1047 # Depending on the pyOpenSSL version, this can just return (None, None)
1048 (not_before, not_after) = utils.GetX509CertValidity(cert)
1050 return _VerifyCertificateInner(filename, cert.has_expired(),
1051 not_before, not_after, time.time())
1054 class LUVerifyCluster(LogicalUnit):
1055 """Verifies the cluster status.
1058 HPATH = "cluster-verify"
1059 HTYPE = constants.HTYPE_CLUSTER
1060 _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1063 TCLUSTER = "cluster"
1065 TINSTANCE = "instance"
1067 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1068 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1069 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1070 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1071 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1072 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1073 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1074 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1075 ENODEDRBD = (TNODE, "ENODEDRBD")
1076 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1077 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1078 ENODEHV = (TNODE, "ENODEHV")
1079 ENODELVM = (TNODE, "ENODELVM")
1080 ENODEN1 = (TNODE, "ENODEN1")
1081 ENODENET = (TNODE, "ENODENET")
1082 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1083 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1084 ENODERPC = (TNODE, "ENODERPC")
1085 ENODESSH = (TNODE, "ENODESSH")
1086 ENODEVERSION = (TNODE, "ENODEVERSION")
1087 ENODESETUP = (TNODE, "ENODESETUP")
1088 ENODETIME = (TNODE, "ENODETIME")
1090 ETYPE_FIELD = "code"
1091 ETYPE_ERROR = "ERROR"
1092 ETYPE_WARNING = "WARNING"
1094 def ExpandNames(self):
1095 self.needed_locks = {
1096 locking.LEVEL_NODE: locking.ALL_SET,
1097 locking.LEVEL_INSTANCE: locking.ALL_SET,
1099 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1101 def _Error(self, ecode, item, msg, *args, **kwargs):
1102 """Format an error message.
1104 Based on the opcode's error_codes parameter, either format a
1105 parseable error code, or a simpler error string.
1107 This must be called only from Exec and functions called from Exec.
1110 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1112 # first complete the msg
1115 # then format the whole message
1116 if self.op.error_codes:
1117 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1123 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1124 # and finally report it via the feedback_fn
1125 self._feedback_fn(" - %s" % msg)
1127 def _ErrorIf(self, cond, *args, **kwargs):
1128 """Log an error message if the passed condition is True.
1131 cond = bool(cond) or self.op.debug_simulate_errors
1133 self._Error(*args, **kwargs)
1134 # do not mark the operation as failed for WARN cases only
1135 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1136 self.bad = self.bad or cond
1138 def _VerifyNode(self, nodeinfo, file_list, local_cksum,
1139 node_result, master_files, drbd_map, vg_name):
1140 """Run multiple tests against a node.
1144 - compares ganeti version
1145 - checks vg existence and size > 20G
1146 - checks config file checksum
1147 - checks ssh to other nodes
1149 @type nodeinfo: L{objects.Node}
1150 @param nodeinfo: the node to check
1151 @param file_list: required list of files
1152 @param local_cksum: dictionary of local files and their checksums
1153 @param node_result: the results from the node
1154 @param master_files: list of files that only masters should have
1155 @param drbd_map: the useddrbd minors for this node, in
1156 form of minor: (instance, must_exist) which correspond to instances
1157 and their running status
1158 @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
1161 node = nodeinfo.name
1162 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1164 # main result, node_result should be a non-empty dict
1165 test = not node_result or not isinstance(node_result, dict)
1166 _ErrorIf(test, self.ENODERPC, node,
1167 "unable to verify node: no data returned")
1171 # compares ganeti version
1172 local_version = constants.PROTOCOL_VERSION
1173 remote_version = node_result.get('version', None)
1174 test = not (remote_version and
1175 isinstance(remote_version, (list, tuple)) and
1176 len(remote_version) == 2)
1177 _ErrorIf(test, self.ENODERPC, node,
1178 "connection to node returned invalid data")
1182 test = local_version != remote_version[0]
1183 _ErrorIf(test, self.ENODEVERSION, node,
1184 "incompatible protocol versions: master %s,"
1185 " node %s", local_version, remote_version[0])
1189 # node seems compatible, we can actually try to look into its results
1191 # full package version
1192 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1193 self.ENODEVERSION, node,
1194 "software version mismatch: master %s, node %s",
1195 constants.RELEASE_VERSION, remote_version[1],
1196 code=self.ETYPE_WARNING)
1198 # checks vg existence and size > 20G
1199 if vg_name is not None:
1200 vglist = node_result.get(constants.NV_VGLIST, None)
1202 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1204 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1205 constants.MIN_VG_SIZE)
1206 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1208 # checks config file checksum
1210 remote_cksum = node_result.get(constants.NV_FILELIST, None)
1211 test = not isinstance(remote_cksum, dict)
1212 _ErrorIf(test, self.ENODEFILECHECK, node,
1213 "node hasn't returned file checksum data")
1215 for file_name in file_list:
1216 node_is_mc = nodeinfo.master_candidate
1217 must_have = (file_name not in master_files) or node_is_mc
1219 test1 = file_name not in remote_cksum
1221 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1223 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1224 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1225 "file '%s' missing", file_name)
1226 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1227 "file '%s' has wrong checksum", file_name)
1228 # not candidate and this is not a must-have file
1229 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1230 "file '%s' should not exist on non master"
1231 " candidates (and the file is outdated)", file_name)
1232 # all good, except non-master/non-must have combination
1233 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1234 "file '%s' should not exist"
1235 " on non master candidates", file_name)
1239 test = constants.NV_NODELIST not in node_result
1240 _ErrorIf(test, self.ENODESSH, node,
1241 "node hasn't returned node ssh connectivity data")
1243 if node_result[constants.NV_NODELIST]:
1244 for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1245 _ErrorIf(True, self.ENODESSH, node,
1246 "ssh communication with node '%s': %s", a_node, a_msg)
1248 test = constants.NV_NODENETTEST not in node_result
1249 _ErrorIf(test, self.ENODENET, node,
1250 "node hasn't returned node tcp connectivity data")
1252 if node_result[constants.NV_NODENETTEST]:
1253 nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1255 _ErrorIf(True, self.ENODENET, node,
1256 "tcp communication with node '%s': %s",
1257 anode, node_result[constants.NV_NODENETTEST][anode])
1259 hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1260 if isinstance(hyp_result, dict):
1261 for hv_name, hv_result in hyp_result.iteritems():
1262 test = hv_result is not None
1263 _ErrorIf(test, self.ENODEHV, node,
1264 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1266 # check used drbd list
1267 if vg_name is not None:
1268 used_minors = node_result.get(constants.NV_DRBDLIST, [])
1269 test = not isinstance(used_minors, (tuple, list))
1270 _ErrorIf(test, self.ENODEDRBD, node,
1271 "cannot parse drbd status file: %s", str(used_minors))
1273 for minor, (iname, must_exist) in drbd_map.items():
1274 test = minor not in used_minors and must_exist
1275 _ErrorIf(test, self.ENODEDRBD, node,
1276 "drbd minor %d of instance %s is not active",
1278 for minor in used_minors:
1279 test = minor not in drbd_map
1280 _ErrorIf(test, self.ENODEDRBD, node,
1281 "unallocated drbd minor %d is in use", minor)
1282 test = node_result.get(constants.NV_NODESETUP,
1283 ["Missing NODESETUP results"])
1284 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1288 if vg_name is not None:
1289 pvlist = node_result.get(constants.NV_PVLIST, None)
1290 test = pvlist is None
1291 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1293 # check that ':' is not present in PV names, since it's a
1294 # special character for lvcreate (denotes the range of PEs to
1296 for _, pvname, owner_vg in pvlist:
1297 test = ":" in pvname
1298 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1299 " '%s' of VG '%s'", pvname, owner_vg)
1301 def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1302 node_instance, n_offline):
1303 """Verify an instance.
1305 This function checks to see if the required block devices are
1306 available on the instance's node.
1309 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1310 node_current = instanceconfig.primary_node
1312 node_vol_should = {}
1313 instanceconfig.MapLVsByNode(node_vol_should)
1315 for node in node_vol_should:
1316 if node in n_offline:
1317 # ignore missing volumes on offline nodes
1319 for volume in node_vol_should[node]:
1320 test = node not in node_vol_is or volume not in node_vol_is[node]
1321 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1322 "volume %s missing on node %s", volume, node)
1324 if instanceconfig.admin_up:
1325 test = ((node_current not in node_instance or
1326 not instance in node_instance[node_current]) and
1327 node_current not in n_offline)
1328 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1329 "instance not running on its primary node %s",
1332 for node in node_instance:
1333 if (not node == node_current):
1334 test = instance in node_instance[node]
1335 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1336 "instance should not run on node %s", node)
1338 def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1339 """Verify if there are any unknown volumes in the cluster.
1341 The .os, .swap and backup volumes are ignored. All other volumes are
1342 reported as unknown.
1345 for node in node_vol_is:
1346 for volume in node_vol_is[node]:
1347 test = (node not in node_vol_should or
1348 volume not in node_vol_should[node])
1349 self._ErrorIf(test, self.ENODEORPHANLV, node,
1350 "volume %s is unknown", volume)
1352 def _VerifyOrphanInstances(self, instancelist, node_instance):
1353 """Verify the list of running instances.
1355 This checks what instances are running but unknown to the cluster.
1358 for node in node_instance:
1359 for o_inst in node_instance[node]:
1360 test = o_inst not in instancelist
1361 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1362 "instance %s on node %s should not exist", o_inst, node)
1364 def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1365 """Verify N+1 Memory Resilience.
1367 Check that if one single node dies we can still start all the instances it
1371 for node, nodeinfo in node_info.iteritems():
1372 # This code checks that every node which is now listed as secondary has
1373 # enough memory to host all instances it is supposed to should a single
1374 # other node in the cluster fail.
1375 # FIXME: not ready for failover to an arbitrary node
1376 # FIXME: does not support file-backed instances
1377 # WARNING: we currently take into account down instances as well as up
1378 # ones, considering that even if they're down someone might want to start
1379 # them even in the event of a node failure.
1380 for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1382 for instance in instances:
1383 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1384 if bep[constants.BE_AUTO_BALANCE]:
1385 needed_mem += bep[constants.BE_MEMORY]
1386 test = nodeinfo['mfree'] < needed_mem
1387 self._ErrorIf(test, self.ENODEN1, node,
1388 "not enough memory on to accommodate"
1389 " failovers should peer node %s fail", prinode)
1391 def CheckPrereq(self):
1392 """Check prerequisites.
1394 Transform the list of checks we're going to skip into a set and check that
1395 all its members are valid.
1398 self.skip_set = frozenset(self.op.skip_checks)
1399 if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1400 raise errors.OpPrereqError("Invalid checks to be skipped specified",
1403 def BuildHooksEnv(self):
1406 Cluster-Verify hooks just ran in the post phase and their failure makes
1407 the output be logged in the verify output and the verification to fail.
1410 all_nodes = self.cfg.GetNodeList()
1412 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1414 for node in self.cfg.GetAllNodesInfo().values():
1415 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1417 return env, [], all_nodes
1419 def Exec(self, feedback_fn):
1420 """Verify integrity of cluster, performing various test on nodes.
1424 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1425 verbose = self.op.verbose
1426 self._feedback_fn = feedback_fn
1427 feedback_fn("* Verifying global settings")
1428 for msg in self.cfg.VerifyConfig():
1429 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1431 # Check the cluster certificates
1432 for cert_filename in constants.ALL_CERT_FILES:
1433 (errcode, msg) = _VerifyCertificate(cert_filename)
1434 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1436 vg_name = self.cfg.GetVGName()
1437 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1438 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1439 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1440 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1441 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1442 for iname in instancelist)
1443 i_non_redundant = [] # Non redundant instances
1444 i_non_a_balanced = [] # Non auto-balanced instances
1445 n_offline = [] # List of offline nodes
1446 n_drained = [] # List of nodes being drained
1452 # FIXME: verify OS list
1453 # do local checksums
1454 master_files = [constants.CLUSTER_CONF_FILE]
1456 file_names = ssconf.SimpleStore().GetFileList()
1457 file_names.extend(constants.ALL_CERT_FILES)
1458 file_names.extend(master_files)
1460 local_checksums = utils.FingerprintFiles(file_names)
1462 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1463 node_verify_param = {
1464 constants.NV_FILELIST: file_names,
1465 constants.NV_NODELIST: [node.name for node in nodeinfo
1466 if not node.offline],
1467 constants.NV_HYPERVISOR: hypervisors,
1468 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1469 node.secondary_ip) for node in nodeinfo
1470 if not node.offline],
1471 constants.NV_INSTANCELIST: hypervisors,
1472 constants.NV_VERSION: None,
1473 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1474 constants.NV_NODESETUP: None,
1475 constants.NV_TIME: None,
1478 if vg_name is not None:
1479 node_verify_param[constants.NV_VGLIST] = None
1480 node_verify_param[constants.NV_LVLIST] = vg_name
1481 node_verify_param[constants.NV_PVLIST] = [vg_name]
1482 node_verify_param[constants.NV_DRBDLIST] = None
1484 # Due to the way our RPC system works, exact response times cannot be
1485 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1486 # time before and after executing the request, we can at least have a time
1488 nvinfo_starttime = time.time()
1489 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1490 self.cfg.GetClusterName())
1491 nvinfo_endtime = time.time()
1493 cluster = self.cfg.GetClusterInfo()
1494 master_node = self.cfg.GetMasterNode()
1495 all_drbd_map = self.cfg.ComputeDRBDMap()
1497 feedback_fn("* Verifying node status")
1498 for node_i in nodeinfo:
1503 feedback_fn("* Skipping offline node %s" % (node,))
1504 n_offline.append(node)
1507 if node == master_node:
1509 elif node_i.master_candidate:
1510 ntype = "master candidate"
1511 elif node_i.drained:
1513 n_drained.append(node)
1517 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1519 msg = all_nvinfo[node].fail_msg
1520 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1524 nresult = all_nvinfo[node].payload
1526 for minor, instance in all_drbd_map[node].items():
1527 test = instance not in instanceinfo
1528 _ErrorIf(test, self.ECLUSTERCFG, None,
1529 "ghost instance '%s' in temporary DRBD map", instance)
1530 # ghost instance should not be running, but otherwise we
1531 # don't give double warnings (both ghost instance and
1532 # unallocated minor in use)
1534 node_drbd[minor] = (instance, False)
1536 instance = instanceinfo[instance]
1537 node_drbd[minor] = (instance.name, instance.admin_up)
1539 self._VerifyNode(node_i, file_names, local_checksums,
1540 nresult, master_files, node_drbd, vg_name)
1542 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1544 node_volume[node] = {}
1545 elif isinstance(lvdata, basestring):
1546 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1547 utils.SafeEncode(lvdata))
1548 node_volume[node] = {}
1549 elif not isinstance(lvdata, dict):
1550 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1553 node_volume[node] = lvdata
1556 idata = nresult.get(constants.NV_INSTANCELIST, None)
1557 test = not isinstance(idata, list)
1558 _ErrorIf(test, self.ENODEHV, node,
1559 "rpc call to node failed (instancelist): %s",
1560 utils.SafeEncode(str(idata)))
1564 node_instance[node] = idata
1567 nodeinfo = nresult.get(constants.NV_HVINFO, None)
1568 test = not isinstance(nodeinfo, dict)
1569 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1574 ntime = nresult.get(constants.NV_TIME, None)
1576 ntime_merged = utils.MergeTime(ntime)
1577 except (ValueError, TypeError):
1578 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1580 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1581 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1582 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1583 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1587 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1588 "Node time diverges by at least %s from master node time",
1591 if ntime_diff is not None:
1596 "mfree": int(nodeinfo['memory_free']),
1599 # dictionary holding all instances this node is secondary for,
1600 # grouped by their primary node. Each key is a cluster node, and each
1601 # value is a list of instances which have the key as primary and the
1602 # current node as secondary. this is handy to calculate N+1 memory
1603 # availability if you can only failover from a primary to its
1605 "sinst-by-pnode": {},
1607 # FIXME: devise a free space model for file based instances as well
1608 if vg_name is not None:
1609 test = (constants.NV_VGLIST not in nresult or
1610 vg_name not in nresult[constants.NV_VGLIST])
1611 _ErrorIf(test, self.ENODELVM, node,
1612 "node didn't return data for the volume group '%s'"
1613 " - it is either missing or broken", vg_name)
1616 node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1617 except (ValueError, KeyError):
1618 _ErrorIf(True, self.ENODERPC, node,
1619 "node returned invalid nodeinfo, check lvm/hypervisor")
1622 node_vol_should = {}
1624 feedback_fn("* Verifying instance status")
1625 for instance in instancelist:
1627 feedback_fn("* Verifying instance %s" % instance)
1628 inst_config = instanceinfo[instance]
1629 self._VerifyInstance(instance, inst_config, node_volume,
1630 node_instance, n_offline)
1631 inst_nodes_offline = []
1633 inst_config.MapLVsByNode(node_vol_should)
1635 instance_cfg[instance] = inst_config
1637 pnode = inst_config.primary_node
1638 _ErrorIf(pnode not in node_info and pnode not in n_offline,
1639 self.ENODERPC, pnode, "instance %s, connection to"
1640 " primary node failed", instance)
1641 if pnode in node_info:
1642 node_info[pnode]['pinst'].append(instance)
1644 if pnode in n_offline:
1645 inst_nodes_offline.append(pnode)
1647 # If the instance is non-redundant we cannot survive losing its primary
1648 # node, so we are not N+1 compliant. On the other hand we have no disk
1649 # templates with more than one secondary so that situation is not well
1651 # FIXME: does not support file-backed instances
1652 if len(inst_config.secondary_nodes) == 0:
1653 i_non_redundant.append(instance)
1654 _ErrorIf(len(inst_config.secondary_nodes) > 1,
1655 self.EINSTANCELAYOUT, instance,
1656 "instance has multiple secondary nodes", code="WARNING")
1658 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1659 i_non_a_balanced.append(instance)
1661 for snode in inst_config.secondary_nodes:
1662 _ErrorIf(snode not in node_info and snode not in n_offline,
1663 self.ENODERPC, snode,
1664 "instance %s, connection to secondary node"
1665 " failed", instance)
1667 if snode in node_info:
1668 node_info[snode]['sinst'].append(instance)
1669 if pnode not in node_info[snode]['sinst-by-pnode']:
1670 node_info[snode]['sinst-by-pnode'][pnode] = []
1671 node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1673 if snode in n_offline:
1674 inst_nodes_offline.append(snode)
1676 # warn that the instance lives on offline nodes
1677 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1678 "instance lives on offline node(s) %s",
1679 utils.CommaJoin(inst_nodes_offline))
1681 feedback_fn("* Verifying orphan volumes")
1682 self._VerifyOrphanVolumes(node_vol_should, node_volume)
1684 feedback_fn("* Verifying remaining instances")
1685 self._VerifyOrphanInstances(instancelist, node_instance)
1687 if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1688 feedback_fn("* Verifying N+1 Memory redundancy")
1689 self._VerifyNPlusOneMemory(node_info, instance_cfg)
1691 feedback_fn("* Other Notes")
1693 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
1694 % len(i_non_redundant))
1696 if i_non_a_balanced:
1697 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
1698 % len(i_non_a_balanced))
1701 feedback_fn(" - NOTICE: %d offline node(s) found." % len(n_offline))
1704 feedback_fn(" - NOTICE: %d drained node(s) found." % len(n_drained))
1708 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1709 """Analyze the post-hooks' result
1711 This method analyses the hook result, handles it, and sends some
1712 nicely-formatted feedback back to the user.
1714 @param phase: one of L{constants.HOOKS_PHASE_POST} or
1715 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1716 @param hooks_results: the results of the multi-node hooks rpc call
1717 @param feedback_fn: function used send feedback back to the caller
1718 @param lu_result: previous Exec result
1719 @return: the new Exec result, based on the previous result
1723 # We only really run POST phase hooks, and are only interested in
1725 if phase == constants.HOOKS_PHASE_POST:
1726 # Used to change hooks' output to proper indentation
1727 indent_re = re.compile('^', re.M)
1728 feedback_fn("* Hooks Results")
1729 assert hooks_results, "invalid result from hooks"
1731 for node_name in hooks_results:
1732 res = hooks_results[node_name]
1734 test = msg and not res.offline
1735 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1736 "Communication failure in hooks execution: %s", msg)
1737 if res.offline or msg:
1738 # No need to investigate payload if node is offline or gave an error.
1739 # override manually lu_result here as _ErrorIf only
1740 # overrides self.bad
1743 for script, hkr, output in res.payload:
1744 test = hkr == constants.HKR_FAIL
1745 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1746 "Script %s failed, output:", script)
1748 output = indent_re.sub(' ', output)
1749 feedback_fn("%s" % output)
1755 class LUVerifyDisks(NoHooksLU):
1756 """Verifies the cluster disks status.
1762 def ExpandNames(self):
1763 self.needed_locks = {
1764 locking.LEVEL_NODE: locking.ALL_SET,
1765 locking.LEVEL_INSTANCE: locking.ALL_SET,
1767 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1769 def CheckPrereq(self):
1770 """Check prerequisites.
1772 This has no prerequisites.
1777 def Exec(self, feedback_fn):
1778 """Verify integrity of cluster disks.
1780 @rtype: tuple of three items
1781 @return: a tuple of (dict of node-to-node_error, list of instances
1782 which need activate-disks, dict of instance: (node, volume) for
1786 result = res_nodes, res_instances, res_missing = {}, [], {}
1788 vg_name = self.cfg.GetVGName()
1789 nodes = utils.NiceSort(self.cfg.GetNodeList())
1790 instances = [self.cfg.GetInstanceInfo(name)
1791 for name in self.cfg.GetInstanceList()]
1794 for inst in instances:
1796 if (not inst.admin_up or
1797 inst.disk_template not in constants.DTS_NET_MIRROR):
1799 inst.MapLVsByNode(inst_lvs)
1800 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1801 for node, vol_list in inst_lvs.iteritems():
1802 for vol in vol_list:
1803 nv_dict[(node, vol)] = inst
1808 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1812 node_res = node_lvs[node]
1813 if node_res.offline:
1815 msg = node_res.fail_msg
1817 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1818 res_nodes[node] = msg
1821 lvs = node_res.payload
1822 for lv_name, (_, _, lv_online) in lvs.items():
1823 inst = nv_dict.pop((node, lv_name), None)
1824 if (not lv_online and inst is not None
1825 and inst.name not in res_instances):
1826 res_instances.append(inst.name)
1828 # any leftover items in nv_dict are missing LVs, let's arrange the
1830 for key, inst in nv_dict.iteritems():
1831 if inst.name not in res_missing:
1832 res_missing[inst.name] = []
1833 res_missing[inst.name].append(key)
1838 class LURepairDiskSizes(NoHooksLU):
1839 """Verifies the cluster disks sizes.
1842 _OP_REQP = ["instances"]
1845 def ExpandNames(self):
1846 if not isinstance(self.op.instances, list):
1847 raise errors.OpPrereqError("Invalid argument type 'instances'",
1850 if self.op.instances:
1851 self.wanted_names = []
1852 for name in self.op.instances:
1853 full_name = _ExpandInstanceName(self.cfg, name)
1854 self.wanted_names.append(full_name)
1855 self.needed_locks = {
1856 locking.LEVEL_NODE: [],
1857 locking.LEVEL_INSTANCE: self.wanted_names,
1859 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1861 self.wanted_names = None
1862 self.needed_locks = {
1863 locking.LEVEL_NODE: locking.ALL_SET,
1864 locking.LEVEL_INSTANCE: locking.ALL_SET,
1866 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1868 def DeclareLocks(self, level):
1869 if level == locking.LEVEL_NODE and self.wanted_names is not None:
1870 self._LockInstancesNodes(primary_only=True)
1872 def CheckPrereq(self):
1873 """Check prerequisites.
1875 This only checks the optional instance list against the existing names.
1878 if self.wanted_names is None:
1879 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1881 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1882 in self.wanted_names]
1884 def _EnsureChildSizes(self, disk):
1885 """Ensure children of the disk have the needed disk size.
1887 This is valid mainly for DRBD8 and fixes an issue where the
1888 children have smaller disk size.
1890 @param disk: an L{ganeti.objects.Disk} object
1893 if disk.dev_type == constants.LD_DRBD8:
1894 assert disk.children, "Empty children for DRBD8?"
1895 fchild = disk.children[0]
1896 mismatch = fchild.size < disk.size
1898 self.LogInfo("Child disk has size %d, parent %d, fixing",
1899 fchild.size, disk.size)
1900 fchild.size = disk.size
1902 # and we recurse on this child only, not on the metadev
1903 return self._EnsureChildSizes(fchild) or mismatch
1907 def Exec(self, feedback_fn):
1908 """Verify the size of cluster disks.
1911 # TODO: check child disks too
1912 # TODO: check differences in size between primary/secondary nodes
1914 for instance in self.wanted_instances:
1915 pnode = instance.primary_node
1916 if pnode not in per_node_disks:
1917 per_node_disks[pnode] = []
1918 for idx, disk in enumerate(instance.disks):
1919 per_node_disks[pnode].append((instance, idx, disk))
1922 for node, dskl in per_node_disks.items():
1923 newl = [v[2].Copy() for v in dskl]
1925 self.cfg.SetDiskID(dsk, node)
1926 result = self.rpc.call_blockdev_getsizes(node, newl)
1928 self.LogWarning("Failure in blockdev_getsizes call to node"
1929 " %s, ignoring", node)
1931 if len(result.data) != len(dskl):
1932 self.LogWarning("Invalid result from node %s, ignoring node results",
1935 for ((instance, idx, disk), size) in zip(dskl, result.data):
1937 self.LogWarning("Disk %d of instance %s did not return size"
1938 " information, ignoring", idx, instance.name)
1940 if not isinstance(size, (int, long)):
1941 self.LogWarning("Disk %d of instance %s did not return valid"
1942 " size information, ignoring", idx, instance.name)
1945 if size != disk.size:
1946 self.LogInfo("Disk %d of instance %s has mismatched size,"
1947 " correcting: recorded %d, actual %d", idx,
1948 instance.name, disk.size, size)
1950 self.cfg.Update(instance, feedback_fn)
1951 changed.append((instance.name, idx, size))
1952 if self._EnsureChildSizes(disk):
1953 self.cfg.Update(instance, feedback_fn)
1954 changed.append((instance.name, idx, disk.size))
1958 class LURenameCluster(LogicalUnit):
1959 """Rename the cluster.
1962 HPATH = "cluster-rename"
1963 HTYPE = constants.HTYPE_CLUSTER
1966 def BuildHooksEnv(self):
1971 "OP_TARGET": self.cfg.GetClusterName(),
1972 "NEW_NAME": self.op.name,
1974 mn = self.cfg.GetMasterNode()
1975 all_nodes = self.cfg.GetNodeList()
1976 return env, [mn], all_nodes
1978 def CheckPrereq(self):
1979 """Verify that the passed name is a valid one.
1982 hostname = utils.GetHostInfo(self.op.name)
1984 new_name = hostname.name
1985 self.ip = new_ip = hostname.ip
1986 old_name = self.cfg.GetClusterName()
1987 old_ip = self.cfg.GetMasterIP()
1988 if new_name == old_name and new_ip == old_ip:
1989 raise errors.OpPrereqError("Neither the name nor the IP address of the"
1990 " cluster has changed",
1992 if new_ip != old_ip:
1993 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1994 raise errors.OpPrereqError("The given cluster IP address (%s) is"
1995 " reachable on the network. Aborting." %
1996 new_ip, errors.ECODE_NOTUNIQUE)
1998 self.op.name = new_name
2000 def Exec(self, feedback_fn):
2001 """Rename the cluster.
2004 clustername = self.op.name
2007 # shutdown the master IP
2008 master = self.cfg.GetMasterNode()
2009 result = self.rpc.call_node_stop_master(master, False)
2010 result.Raise("Could not disable the master role")
2013 cluster = self.cfg.GetClusterInfo()
2014 cluster.cluster_name = clustername
2015 cluster.master_ip = ip
2016 self.cfg.Update(cluster, feedback_fn)
2018 # update the known hosts file
2019 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2020 node_list = self.cfg.GetNodeList()
2022 node_list.remove(master)
2025 result = self.rpc.call_upload_file(node_list,
2026 constants.SSH_KNOWN_HOSTS_FILE)
2027 for to_node, to_result in result.iteritems():
2028 msg = to_result.fail_msg
2030 msg = ("Copy of file %s to node %s failed: %s" %
2031 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2032 self.proc.LogWarning(msg)
2035 result = self.rpc.call_node_start_master(master, False, False)
2036 msg = result.fail_msg
2038 self.LogWarning("Could not re-enable the master role on"
2039 " the master, please restart manually: %s", msg)
2042 def _RecursiveCheckIfLVMBased(disk):
2043 """Check if the given disk or its children are lvm-based.
2045 @type disk: L{objects.Disk}
2046 @param disk: the disk to check
2048 @return: boolean indicating whether a LD_LV dev_type was found or not
2052 for chdisk in disk.children:
2053 if _RecursiveCheckIfLVMBased(chdisk):
2055 return disk.dev_type == constants.LD_LV
2058 class LUSetClusterParams(LogicalUnit):
2059 """Change the parameters of the cluster.
2062 HPATH = "cluster-modify"
2063 HTYPE = constants.HTYPE_CLUSTER
2067 def CheckArguments(self):
2071 if not hasattr(self.op, "candidate_pool_size"):
2072 self.op.candidate_pool_size = None
2073 if self.op.candidate_pool_size is not None:
2075 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2076 except (ValueError, TypeError), err:
2077 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2078 str(err), errors.ECODE_INVAL)
2079 if self.op.candidate_pool_size < 1:
2080 raise errors.OpPrereqError("At least one master candidate needed",
2083 def ExpandNames(self):
2084 # FIXME: in the future maybe other cluster params won't require checking on
2085 # all nodes to be modified.
2086 self.needed_locks = {
2087 locking.LEVEL_NODE: locking.ALL_SET,
2089 self.share_locks[locking.LEVEL_NODE] = 1
2091 def BuildHooksEnv(self):
2096 "OP_TARGET": self.cfg.GetClusterName(),
2097 "NEW_VG_NAME": self.op.vg_name,
2099 mn = self.cfg.GetMasterNode()
2100 return env, [mn], [mn]
2102 def CheckPrereq(self):
2103 """Check prerequisites.
2105 This checks whether the given params don't conflict and
2106 if the given volume group is valid.
2109 if self.op.vg_name is not None and not self.op.vg_name:
2110 instances = self.cfg.GetAllInstancesInfo().values()
2111 for inst in instances:
2112 for disk in inst.disks:
2113 if _RecursiveCheckIfLVMBased(disk):
2114 raise errors.OpPrereqError("Cannot disable lvm storage while"
2115 " lvm-based instances exist",
2118 node_list = self.acquired_locks[locking.LEVEL_NODE]
2120 # if vg_name not None, checks given volume group on all nodes
2122 vglist = self.rpc.call_vg_list(node_list)
2123 for node in node_list:
2124 msg = vglist[node].fail_msg
2126 # ignoring down node
2127 self.LogWarning("Error while gathering data on node %s"
2128 " (ignoring node): %s", node, msg)
2130 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2132 constants.MIN_VG_SIZE)
2134 raise errors.OpPrereqError("Error on node '%s': %s" %
2135 (node, vgstatus), errors.ECODE_ENVIRON)
2137 self.cluster = cluster = self.cfg.GetClusterInfo()
2138 # validate params changes
2139 if self.op.beparams:
2140 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2141 self.new_beparams = objects.FillDict(
2142 cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2144 if self.op.nicparams:
2145 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2146 self.new_nicparams = objects.FillDict(
2147 cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2148 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2151 # check all instances for consistency
2152 for instance in self.cfg.GetAllInstancesInfo().values():
2153 for nic_idx, nic in enumerate(instance.nics):
2154 params_copy = copy.deepcopy(nic.nicparams)
2155 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2157 # check parameter syntax
2159 objects.NIC.CheckParameterSyntax(params_filled)
2160 except errors.ConfigurationError, err:
2161 nic_errors.append("Instance %s, nic/%d: %s" %
2162 (instance.name, nic_idx, err))
2164 # if we're moving instances to routed, check that they have an ip
2165 target_mode = params_filled[constants.NIC_MODE]
2166 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2167 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2168 (instance.name, nic_idx))
2170 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2171 "\n".join(nic_errors))
2173 # hypervisor list/parameters
2174 self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2175 if self.op.hvparams:
2176 if not isinstance(self.op.hvparams, dict):
2177 raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2179 for hv_name, hv_dict in self.op.hvparams.items():
2180 if hv_name not in self.new_hvparams:
2181 self.new_hvparams[hv_name] = hv_dict
2183 self.new_hvparams[hv_name].update(hv_dict)
2185 # os hypervisor parameters
2186 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2188 if not isinstance(self.op.os_hvp, dict):
2189 raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2191 for os_name, hvs in self.op.os_hvp.items():
2192 if not isinstance(hvs, dict):
2193 raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2194 " input"), errors.ECODE_INVAL)
2195 if os_name not in self.new_os_hvp:
2196 self.new_os_hvp[os_name] = hvs
2198 for hv_name, hv_dict in hvs.items():
2199 if hv_name not in self.new_os_hvp[os_name]:
2200 self.new_os_hvp[os_name][hv_name] = hv_dict
2202 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2204 if self.op.enabled_hypervisors is not None:
2205 self.hv_list = self.op.enabled_hypervisors
2206 if not self.hv_list:
2207 raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2208 " least one member",
2210 invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2212 raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2214 utils.CommaJoin(invalid_hvs),
2217 self.hv_list = cluster.enabled_hypervisors
2219 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2220 # either the enabled list has changed, or the parameters have, validate
2221 for hv_name, hv_params in self.new_hvparams.items():
2222 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2223 (self.op.enabled_hypervisors and
2224 hv_name in self.op.enabled_hypervisors)):
2225 # either this is a new hypervisor, or its parameters have changed
2226 hv_class = hypervisor.GetHypervisor(hv_name)
2227 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2228 hv_class.CheckParameterSyntax(hv_params)
2229 _CheckHVParams(self, node_list, hv_name, hv_params)
2232 # no need to check any newly-enabled hypervisors, since the
2233 # defaults have already been checked in the above code-block
2234 for os_name, os_hvp in self.new_os_hvp.items():
2235 for hv_name, hv_params in os_hvp.items():
2236 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2237 # we need to fill in the new os_hvp on top of the actual hv_p
2238 cluster_defaults = self.new_hvparams.get(hv_name, {})
2239 new_osp = objects.FillDict(cluster_defaults, hv_params)
2240 hv_class = hypervisor.GetHypervisor(hv_name)
2241 hv_class.CheckParameterSyntax(new_osp)
2242 _CheckHVParams(self, node_list, hv_name, new_osp)
2245 def Exec(self, feedback_fn):
2246 """Change the parameters of the cluster.
2249 if self.op.vg_name is not None:
2250 new_volume = self.op.vg_name
2253 if new_volume != self.cfg.GetVGName():
2254 self.cfg.SetVGName(new_volume)
2256 feedback_fn("Cluster LVM configuration already in desired"
2257 " state, not changing")
2258 if self.op.hvparams:
2259 self.cluster.hvparams = self.new_hvparams
2261 self.cluster.os_hvp = self.new_os_hvp
2262 if self.op.enabled_hypervisors is not None:
2263 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2264 if self.op.beparams:
2265 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2266 if self.op.nicparams:
2267 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2269 if self.op.candidate_pool_size is not None:
2270 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2271 # we need to update the pool size here, otherwise the save will fail
2272 _AdjustCandidatePool(self, [])
2274 self.cfg.Update(self.cluster, feedback_fn)
2277 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2278 """Distribute additional files which are part of the cluster configuration.
2280 ConfigWriter takes care of distributing the config and ssconf files, but
2281 there are more files which should be distributed to all nodes. This function
2282 makes sure those are copied.
2284 @param lu: calling logical unit
2285 @param additional_nodes: list of nodes not in the config to distribute to
2288 # 1. Gather target nodes
2289 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2290 dist_nodes = lu.cfg.GetOnlineNodeList()
2291 if additional_nodes is not None:
2292 dist_nodes.extend(additional_nodes)
2293 if myself.name in dist_nodes:
2294 dist_nodes.remove(myself.name)
2296 # 2. Gather files to distribute
2297 dist_files = set([constants.ETC_HOSTS,
2298 constants.SSH_KNOWN_HOSTS_FILE,
2299 constants.RAPI_CERT_FILE,
2300 constants.RAPI_USERS_FILE,
2301 constants.CONFD_HMAC_KEY,
2304 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2305 for hv_name in enabled_hypervisors:
2306 hv_class = hypervisor.GetHypervisor(hv_name)
2307 dist_files.update(hv_class.GetAncillaryFiles())
2309 # 3. Perform the files upload
2310 for fname in dist_files:
2311 if os.path.exists(fname):
2312 result = lu.rpc.call_upload_file(dist_nodes, fname)
2313 for to_node, to_result in result.items():
2314 msg = to_result.fail_msg
2316 msg = ("Copy of file %s to node %s failed: %s" %
2317 (fname, to_node, msg))
2318 lu.proc.LogWarning(msg)
2321 class LURedistributeConfig(NoHooksLU):
2322 """Force the redistribution of cluster configuration.
2324 This is a very simple LU.
2330 def ExpandNames(self):
2331 self.needed_locks = {
2332 locking.LEVEL_NODE: locking.ALL_SET,
2334 self.share_locks[locking.LEVEL_NODE] = 1
2336 def CheckPrereq(self):
2337 """Check prerequisites.
2341 def Exec(self, feedback_fn):
2342 """Redistribute the configuration.
2345 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2346 _RedistributeAncillaryFiles(self)
2349 def _WaitForSync(lu, instance, oneshot=False):
2350 """Sleep and poll for an instance's disk to sync.
2353 if not instance.disks:
2357 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2359 node = instance.primary_node
2361 for dev in instance.disks:
2362 lu.cfg.SetDiskID(dev, node)
2364 # TODO: Convert to utils.Retry
2367 degr_retries = 10 # in seconds, as we sleep 1 second each time
2371 cumul_degraded = False
2372 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2373 msg = rstats.fail_msg
2375 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2378 raise errors.RemoteError("Can't contact node %s for mirror data,"
2379 " aborting." % node)
2382 rstats = rstats.payload
2384 for i, mstat in enumerate(rstats):
2386 lu.LogWarning("Can't compute data for node %s/%s",
2387 node, instance.disks[i].iv_name)
2390 cumul_degraded = (cumul_degraded or
2391 (mstat.is_degraded and mstat.sync_percent is None))
2392 if mstat.sync_percent is not None:
2394 if mstat.estimated_time is not None:
2395 rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2396 max_time = mstat.estimated_time
2398 rem_time = "no time estimate"
2399 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2400 (instance.disks[i].iv_name, mstat.sync_percent,
2403 # if we're done but degraded, let's do a few small retries, to
2404 # make sure we see a stable and not transient situation; therefore
2405 # we force restart of the loop
2406 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2407 logging.info("Degraded disks found, %d retries left", degr_retries)
2415 time.sleep(min(60, max_time))
2418 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2419 return not cumul_degraded
2422 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2423 """Check that mirrors are not degraded.
2425 The ldisk parameter, if True, will change the test from the
2426 is_degraded attribute (which represents overall non-ok status for
2427 the device(s)) to the ldisk (representing the local storage status).
2430 lu.cfg.SetDiskID(dev, node)
2434 if on_primary or dev.AssembleOnSecondary():
2435 rstats = lu.rpc.call_blockdev_find(node, dev)
2436 msg = rstats.fail_msg
2438 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2440 elif not rstats.payload:
2441 lu.LogWarning("Can't find disk on node %s", node)
2445 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2447 result = result and not rstats.payload.is_degraded
2450 for child in dev.children:
2451 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2456 class LUDiagnoseOS(NoHooksLU):
2457 """Logical unit for OS diagnose/query.
2460 _OP_REQP = ["output_fields", "names"]
2462 _FIELDS_STATIC = utils.FieldSet()
2463 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2464 # Fields that need calculation of global os validity
2465 _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2467 def ExpandNames(self):
2469 raise errors.OpPrereqError("Selective OS query not supported",
2472 _CheckOutputFields(static=self._FIELDS_STATIC,
2473 dynamic=self._FIELDS_DYNAMIC,
2474 selected=self.op.output_fields)
2476 # Lock all nodes, in shared mode
2477 # Temporary removal of locks, should be reverted later
2478 # TODO: reintroduce locks when they are lighter-weight
2479 self.needed_locks = {}
2480 #self.share_locks[locking.LEVEL_NODE] = 1
2481 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2483 def CheckPrereq(self):
2484 """Check prerequisites.
2489 def _DiagnoseByOS(rlist):
2490 """Remaps a per-node return list into an a per-os per-node dictionary
2492 @param rlist: a map with node names as keys and OS objects as values
2495 @return: a dictionary with osnames as keys and as value another map, with
2496 nodes as keys and tuples of (path, status, diagnose) as values, eg::
2498 {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2499 (/srv/..., False, "invalid api")],
2500 "node2": [(/srv/..., True, "")]}
2505 # we build here the list of nodes that didn't fail the RPC (at RPC
2506 # level), so that nodes with a non-responding node daemon don't
2507 # make all OSes invalid
2508 good_nodes = [node_name for node_name in rlist
2509 if not rlist[node_name].fail_msg]
2510 for node_name, nr in rlist.items():
2511 if nr.fail_msg or not nr.payload:
2513 for name, path, status, diagnose, variants in nr.payload:
2514 if name not in all_os:
2515 # build a list of nodes for this os containing empty lists
2516 # for each node in node_list
2518 for nname in good_nodes:
2519 all_os[name][nname] = []
2520 all_os[name][node_name].append((path, status, diagnose, variants))
2523 def Exec(self, feedback_fn):
2524 """Compute the list of OSes.
2527 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2528 node_data = self.rpc.call_os_diagnose(valid_nodes)
2529 pol = self._DiagnoseByOS(node_data)
2531 calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2532 calc_variants = "variants" in self.op.output_fields
2534 for os_name, os_data in pol.items():
2539 for osl in os_data.values():
2540 valid = valid and osl and osl[0][1]
2545 node_variants = osl[0][3]
2546 if variants is None:
2547 variants = node_variants
2549 variants = [v for v in variants if v in node_variants]
2551 for field in self.op.output_fields:
2554 elif field == "valid":
2556 elif field == "node_status":
2557 # this is just a copy of the dict
2559 for node_name, nos_list in os_data.items():
2560 val[node_name] = nos_list
2561 elif field == "variants":
2564 raise errors.ParameterError(field)
2571 class LURemoveNode(LogicalUnit):
2572 """Logical unit for removing a node.
2575 HPATH = "node-remove"
2576 HTYPE = constants.HTYPE_NODE
2577 _OP_REQP = ["node_name"]
2579 def BuildHooksEnv(self):
2582 This doesn't run on the target node in the pre phase as a failed
2583 node would then be impossible to remove.
2587 "OP_TARGET": self.op.node_name,
2588 "NODE_NAME": self.op.node_name,
2590 all_nodes = self.cfg.GetNodeList()
2592 all_nodes.remove(self.op.node_name)
2594 logging.warning("Node %s which is about to be removed not found"
2595 " in the all nodes list", self.op.node_name)
2596 return env, all_nodes, all_nodes
2598 def CheckPrereq(self):
2599 """Check prerequisites.
2602 - the node exists in the configuration
2603 - it does not have primary or secondary instances
2604 - it's not the master
2606 Any errors are signaled by raising errors.OpPrereqError.
2609 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2610 node = self.cfg.GetNodeInfo(self.op.node_name)
2611 assert node is not None
2613 instance_list = self.cfg.GetInstanceList()
2615 masternode = self.cfg.GetMasterNode()
2616 if node.name == masternode:
2617 raise errors.OpPrereqError("Node is the master node,"
2618 " you need to failover first.",
2621 for instance_name in instance_list:
2622 instance = self.cfg.GetInstanceInfo(instance_name)
2623 if node.name in instance.all_nodes:
2624 raise errors.OpPrereqError("Instance %s is still running on the node,"
2625 " please remove first." % instance_name,
2627 self.op.node_name = node.name
2630 def Exec(self, feedback_fn):
2631 """Removes the node from the cluster.
2635 logging.info("Stopping the node daemon and removing configs from node %s",
2638 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2640 # Promote nodes to master candidate as needed
2641 _AdjustCandidatePool(self, exceptions=[node.name])
2642 self.context.RemoveNode(node.name)
2644 # Run post hooks on the node before it's removed
2645 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2647 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2649 # pylint: disable-msg=W0702
2650 self.LogWarning("Errors occurred running hooks on %s" % node.name)
2652 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2653 msg = result.fail_msg
2655 self.LogWarning("Errors encountered on the remote node while leaving"
2656 " the cluster: %s", msg)
2659 class LUQueryNodes(NoHooksLU):
2660 """Logical unit for querying nodes.
2663 # pylint: disable-msg=W0142
2664 _OP_REQP = ["output_fields", "names", "use_locking"]
2667 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2668 "master_candidate", "offline", "drained"]
2670 _FIELDS_DYNAMIC = utils.FieldSet(
2672 "mtotal", "mnode", "mfree",
2674 "ctotal", "cnodes", "csockets",
2677 _FIELDS_STATIC = utils.FieldSet(*[
2678 "pinst_cnt", "sinst_cnt",
2679 "pinst_list", "sinst_list",
2680 "pip", "sip", "tags",
2682 "role"] + _SIMPLE_FIELDS
2685 def ExpandNames(self):
2686 _CheckOutputFields(static=self._FIELDS_STATIC,
2687 dynamic=self._FIELDS_DYNAMIC,
2688 selected=self.op.output_fields)
2690 self.needed_locks = {}
2691 self.share_locks[locking.LEVEL_NODE] = 1
2694 self.wanted = _GetWantedNodes(self, self.op.names)
2696 self.wanted = locking.ALL_SET
2698 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2699 self.do_locking = self.do_node_query and self.op.use_locking
2701 # if we don't request only static fields, we need to lock the nodes
2702 self.needed_locks[locking.LEVEL_NODE] = self.wanted
2704 def CheckPrereq(self):
2705 """Check prerequisites.
2708 # The validation of the node list is done in the _GetWantedNodes,
2709 # if non empty, and if empty, there's no validation to do
2712 def Exec(self, feedback_fn):
2713 """Computes the list of nodes and their attributes.
2716 all_info = self.cfg.GetAllNodesInfo()
2718 nodenames = self.acquired_locks[locking.LEVEL_NODE]
2719 elif self.wanted != locking.ALL_SET:
2720 nodenames = self.wanted
2721 missing = set(nodenames).difference(all_info.keys())
2723 raise errors.OpExecError(
2724 "Some nodes were removed before retrieving their data: %s" % missing)
2726 nodenames = all_info.keys()
2728 nodenames = utils.NiceSort(nodenames)
2729 nodelist = [all_info[name] for name in nodenames]
2731 # begin data gathering
2733 if self.do_node_query:
2735 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2736 self.cfg.GetHypervisorType())
2737 for name in nodenames:
2738 nodeinfo = node_data[name]
2739 if not nodeinfo.fail_msg and nodeinfo.payload:
2740 nodeinfo = nodeinfo.payload
2741 fn = utils.TryConvert
2743 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2744 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2745 "mfree": fn(int, nodeinfo.get('memory_free', None)),
2746 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2747 "dfree": fn(int, nodeinfo.get('vg_free', None)),
2748 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2749 "bootid": nodeinfo.get('bootid', None),
2750 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2751 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2754 live_data[name] = {}
2756 live_data = dict.fromkeys(nodenames, {})
2758 node_to_primary = dict([(name, set()) for name in nodenames])
2759 node_to_secondary = dict([(name, set()) for name in nodenames])
2761 inst_fields = frozenset(("pinst_cnt", "pinst_list",
2762 "sinst_cnt", "sinst_list"))
2763 if inst_fields & frozenset(self.op.output_fields):
2764 inst_data = self.cfg.GetAllInstancesInfo()
2766 for inst in inst_data.values():
2767 if inst.primary_node in node_to_primary:
2768 node_to_primary[inst.primary_node].add(inst.name)
2769 for secnode in inst.secondary_nodes:
2770 if secnode in node_to_secondary:
2771 node_to_secondary[secnode].add(inst.name)
2773 master_node = self.cfg.GetMasterNode()
2775 # end data gathering
2778 for node in nodelist:
2780 for field in self.op.output_fields:
2781 if field in self._SIMPLE_FIELDS:
2782 val = getattr(node, field)
2783 elif field == "pinst_list":
2784 val = list(node_to_primary[node.name])
2785 elif field == "sinst_list":
2786 val = list(node_to_secondary[node.name])
2787 elif field == "pinst_cnt":
2788 val = len(node_to_primary[node.name])
2789 elif field == "sinst_cnt":
2790 val = len(node_to_secondary[node.name])
2791 elif field == "pip":
2792 val = node.primary_ip
2793 elif field == "sip":
2794 val = node.secondary_ip
2795 elif field == "tags":
2796 val = list(node.GetTags())
2797 elif field == "master":
2798 val = node.name == master_node
2799 elif self._FIELDS_DYNAMIC.Matches(field):
2800 val = live_data[node.name].get(field, None)
2801 elif field == "role":
2802 if node.name == master_node:
2804 elif node.master_candidate:
2813 raise errors.ParameterError(field)
2814 node_output.append(val)
2815 output.append(node_output)
2820 class LUQueryNodeVolumes(NoHooksLU):
2821 """Logical unit for getting volumes on node(s).
2824 _OP_REQP = ["nodes", "output_fields"]
2826 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2827 _FIELDS_STATIC = utils.FieldSet("node")
2829 def ExpandNames(self):
2830 _CheckOutputFields(static=self._FIELDS_STATIC,
2831 dynamic=self._FIELDS_DYNAMIC,
2832 selected=self.op.output_fields)
2834 self.needed_locks = {}
2835 self.share_locks[locking.LEVEL_NODE] = 1
2836 if not self.op.nodes:
2837 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2839 self.needed_locks[locking.LEVEL_NODE] = \
2840 _GetWantedNodes(self, self.op.nodes)
2842 def CheckPrereq(self):
2843 """Check prerequisites.
2845 This checks that the fields required are valid output fields.
2848 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2850 def Exec(self, feedback_fn):
2851 """Computes the list of nodes and their attributes.
2854 nodenames = self.nodes
2855 volumes = self.rpc.call_node_volumes(nodenames)
2857 ilist = [self.cfg.GetInstanceInfo(iname) for iname
2858 in self.cfg.GetInstanceList()]
2860 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2863 for node in nodenames:
2864 nresult = volumes[node]
2867 msg = nresult.fail_msg
2869 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2872 node_vols = nresult.payload[:]
2873 node_vols.sort(key=lambda vol: vol['dev'])
2875 for vol in node_vols:
2877 for field in self.op.output_fields:
2880 elif field == "phys":
2884 elif field == "name":
2886 elif field == "size":
2887 val = int(float(vol['size']))
2888 elif field == "instance":
2890 if node not in lv_by_node[inst]:
2892 if vol['name'] in lv_by_node[inst][node]:
2898 raise errors.ParameterError(field)
2899 node_output.append(str(val))
2901 output.append(node_output)
2906 class LUQueryNodeStorage(NoHooksLU):
2907 """Logical unit for getting information on storage units on node(s).
2910 _OP_REQP = ["nodes", "storage_type", "output_fields"]
2912 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
2914 def ExpandNames(self):
2915 storage_type = self.op.storage_type
2917 if storage_type not in constants.VALID_STORAGE_TYPES:
2918 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2921 _CheckOutputFields(static=self._FIELDS_STATIC,
2922 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
2923 selected=self.op.output_fields)
2925 self.needed_locks = {}
2926 self.share_locks[locking.LEVEL_NODE] = 1
2929 self.needed_locks[locking.LEVEL_NODE] = \
2930 _GetWantedNodes(self, self.op.nodes)
2932 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2934 def CheckPrereq(self):
2935 """Check prerequisites.
2937 This checks that the fields required are valid output fields.
2940 self.op.name = getattr(self.op, "name", None)
2942 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2944 def Exec(self, feedback_fn):
2945 """Computes the list of nodes and their attributes.
2948 # Always get name to sort by
2949 if constants.SF_NAME in self.op.output_fields:
2950 fields = self.op.output_fields[:]
2952 fields = [constants.SF_NAME] + self.op.output_fields
2954 # Never ask for node or type as it's only known to the LU
2955 for extra in [constants.SF_NODE, constants.SF_TYPE]:
2956 while extra in fields:
2957 fields.remove(extra)
2959 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2960 name_idx = field_idx[constants.SF_NAME]
2962 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2963 data = self.rpc.call_storage_list(self.nodes,
2964 self.op.storage_type, st_args,
2965 self.op.name, fields)
2969 for node in utils.NiceSort(self.nodes):
2970 nresult = data[node]
2974 msg = nresult.fail_msg
2976 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2979 rows = dict([(row[name_idx], row) for row in nresult.payload])
2981 for name in utils.NiceSort(rows.keys()):
2986 for field in self.op.output_fields:
2987 if field == constants.SF_NODE:
2989 elif field == constants.SF_TYPE:
2990 val = self.op.storage_type
2991 elif field in field_idx:
2992 val = row[field_idx[field]]
2994 raise errors.ParameterError(field)
3003 class LUModifyNodeStorage(NoHooksLU):
3004 """Logical unit for modifying a storage volume on a node.
3007 _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3010 def CheckArguments(self):
3011 self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3013 storage_type = self.op.storage_type
3014 if storage_type not in constants.VALID_STORAGE_TYPES:
3015 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
3018 def ExpandNames(self):
3019 self.needed_locks = {
3020 locking.LEVEL_NODE: self.op.node_name,
3023 def CheckPrereq(self):
3024 """Check prerequisites.
3027 storage_type = self.op.storage_type
3030 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3032 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3033 " modified" % storage_type,
3036 diff = set(self.op.changes.keys()) - modifiable
3038 raise errors.OpPrereqError("The following fields can not be modified for"
3039 " storage units of type '%s': %r" %
3040 (storage_type, list(diff)),
3043 def Exec(self, feedback_fn):
3044 """Computes the list of nodes and their attributes.
3047 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3048 result = self.rpc.call_storage_modify(self.op.node_name,
3049 self.op.storage_type, st_args,
3050 self.op.name, self.op.changes)
3051 result.Raise("Failed to modify storage unit '%s' on %s" %
3052 (self.op.name, self.op.node_name))
3055 class LUAddNode(LogicalUnit):
3056 """Logical unit for adding node to the cluster.
3060 HTYPE = constants.HTYPE_NODE
3061 _OP_REQP = ["node_name"]
3063 def CheckArguments(self):
3064 # validate/normalize the node name
3065 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3067 def BuildHooksEnv(self):
3070 This will run on all nodes before, and on all nodes + the new node after.
3074 "OP_TARGET": self.op.node_name,
3075 "NODE_NAME": self.op.node_name,
3076 "NODE_PIP": self.op.primary_ip,
3077 "NODE_SIP": self.op.secondary_ip,
3079 nodes_0 = self.cfg.GetNodeList()
3080 nodes_1 = nodes_0 + [self.op.node_name, ]
3081 return env, nodes_0, nodes_1
3083 def CheckPrereq(self):
3084 """Check prerequisites.
3087 - the new node is not already in the config
3089 - its parameters (single/dual homed) matches the cluster
3091 Any errors are signaled by raising errors.OpPrereqError.
3094 node_name = self.op.node_name
3097 dns_data = utils.GetHostInfo(node_name)
3099 node = dns_data.name
3100 primary_ip = self.op.primary_ip = dns_data.ip
3101 secondary_ip = getattr(self.op, "secondary_ip", None)
3102 if secondary_ip is None:
3103 secondary_ip = primary_ip
3104 if not utils.IsValidIP(secondary_ip):
3105 raise errors.OpPrereqError("Invalid secondary IP given",
3107 self.op.secondary_ip = secondary_ip
3109 node_list = cfg.GetNodeList()
3110 if not self.op.readd and node in node_list:
3111 raise errors.OpPrereqError("Node %s is already in the configuration" %
3112 node, errors.ECODE_EXISTS)
3113 elif self.op.readd and node not in node_list:
3114 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3117 for existing_node_name in node_list:
3118 existing_node = cfg.GetNodeInfo(existing_node_name)
3120 if self.op.readd and node == existing_node_name:
3121 if (existing_node.primary_ip != primary_ip or
3122 existing_node.secondary_ip != secondary_ip):
3123 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3124 " address configuration as before",
3128 if (existing_node.primary_ip == primary_ip or
3129 existing_node.secondary_ip == primary_ip or
3130 existing_node.primary_ip == secondary_ip or
3131 existing_node.secondary_ip == secondary_ip):
3132 raise errors.OpPrereqError("New node ip address(es) conflict with"
3133 " existing node %s" % existing_node.name,
3134 errors.ECODE_NOTUNIQUE)
3136 # check that the type of the node (single versus dual homed) is the
3137 # same as for the master
3138 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3139 master_singlehomed = myself.secondary_ip == myself.primary_ip
3140 newbie_singlehomed = secondary_ip == primary_ip
3141 if master_singlehomed != newbie_singlehomed:
3142 if master_singlehomed:
3143 raise errors.OpPrereqError("The master has no private ip but the"
3144 " new node has one",
3147 raise errors.OpPrereqError("The master has a private ip but the"
3148 " new node doesn't have one",
3151 # checks reachability
3152 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3153 raise errors.OpPrereqError("Node not reachable by ping",
3154 errors.ECODE_ENVIRON)
3156 if not newbie_singlehomed:
3157 # check reachability from my secondary ip to newbie's secondary ip
3158 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3159 source=myself.secondary_ip):
3160 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3161 " based ping to noded port",
3162 errors.ECODE_ENVIRON)
3169 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3172 self.new_node = self.cfg.GetNodeInfo(node)
3173 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3175 self.new_node = objects.Node(name=node,
3176 primary_ip=primary_ip,
3177 secondary_ip=secondary_ip,
3178 master_candidate=self.master_candidate,
3179 offline=False, drained=False)
3181 def Exec(self, feedback_fn):
3182 """Adds the new node to the cluster.
3185 new_node = self.new_node
3186 node = new_node.name
3188 # for re-adds, reset the offline/drained/master-candidate flags;
3189 # we need to reset here, otherwise offline would prevent RPC calls
3190 # later in the procedure; this also means that if the re-add
3191 # fails, we are left with a non-offlined, broken node
3193 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3194 self.LogInfo("Readding a node, the offline/drained flags were reset")
3195 # if we demote the node, we do cleanup later in the procedure
3196 new_node.master_candidate = self.master_candidate
3198 # notify the user about any possible mc promotion
3199 if new_node.master_candidate:
3200 self.LogInfo("Node will be a master candidate")
3202 # check connectivity
3203 result = self.rpc.call_version([node])[node]
3204 result.Raise("Can't get version information from node %s" % node)
3205 if constants.PROTOCOL_VERSION == result.payload:
3206 logging.info("Communication to node %s fine, sw version %s match",
3207 node, result.payload)
3209 raise errors.OpExecError("Version mismatch master version %s,"
3210 " node version %s" %
3211 (constants.PROTOCOL_VERSION, result.payload))
3214 if self.cfg.GetClusterInfo().modify_ssh_setup:
3215 logging.info("Copy ssh key to node %s", node)
3216 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3218 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3219 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3223 keyarray.append(utils.ReadFile(i))
3225 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3226 keyarray[2], keyarray[3], keyarray[4],
3228 result.Raise("Cannot transfer ssh keys to the new node")
3230 # Add node to our /etc/hosts, and add key to known_hosts
3231 if self.cfg.GetClusterInfo().modify_etc_hosts:
3232 utils.AddHostToEtcHosts(new_node.name)
3234 if new_node.secondary_ip != new_node.primary_ip:
3235 result = self.rpc.call_node_has_ip_address(new_node.name,
3236 new_node.secondary_ip)
3237 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3238 prereq=True, ecode=errors.ECODE_ENVIRON)
3239 if not result.payload:
3240 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3241 " you gave (%s). Please fix and re-run this"
3242 " command." % new_node.secondary_ip)
3244 node_verify_list = [self.cfg.GetMasterNode()]
3245 node_verify_param = {
3246 constants.NV_NODELIST: [node],
3247 # TODO: do a node-net-test as well?
3250 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3251 self.cfg.GetClusterName())
3252 for verifier in node_verify_list:
3253 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3254 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3256 for failed in nl_payload:
3257 feedback_fn("ssh/hostname verification failed"
3258 " (checking from %s): %s" %
3259 (verifier, nl_payload[failed]))
3260 raise errors.OpExecError("ssh/hostname verification failed.")
3263 _RedistributeAncillaryFiles(self)
3264 self.context.ReaddNode(new_node)
3265 # make sure we redistribute the config
3266 self.cfg.Update(new_node, feedback_fn)
3267 # and make sure the new node will not have old files around
3268 if not new_node.master_candidate:
3269 result = self.rpc.call_node_demote_from_mc(new_node.name)
3270 msg = result.fail_msg
3272 self.LogWarning("Node failed to demote itself from master"
3273 " candidate status: %s" % msg)
3275 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3276 self.context.AddNode(new_node, self.proc.GetECId())
3279 class LUSetNodeParams(LogicalUnit):
3280 """Modifies the parameters of a node.
3283 HPATH = "node-modify"
3284 HTYPE = constants.HTYPE_NODE
3285 _OP_REQP = ["node_name"]
3288 def CheckArguments(self):
3289 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3290 _CheckBooleanOpField(self.op, 'master_candidate')
3291 _CheckBooleanOpField(self.op, 'offline')
3292 _CheckBooleanOpField(self.op, 'drained')
3293 _CheckBooleanOpField(self.op, 'auto_promote')
3294 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3295 if all_mods.count(None) == 3:
3296 raise errors.OpPrereqError("Please pass at least one modification",
3298 if all_mods.count(True) > 1:
3299 raise errors.OpPrereqError("Can't set the node into more than one"
3300 " state at the same time",
3303 # Boolean value that tells us whether we're offlining or draining the node
3304 self.offline_or_drain = (self.op.offline == True or
3305 self.op.drained == True)
3306 self.deoffline_or_drain = (self.op.offline == False or
3307 self.op.drained == False)
3308 self.might_demote = (self.op.master_candidate == False or
3309 self.offline_or_drain)
3311 self.lock_all = self.op.auto_promote and self.might_demote
3314 def ExpandNames(self):
3316 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3318 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3320 def BuildHooksEnv(self):
3323 This runs on the master node.
3327 "OP_TARGET": self.op.node_name,
3328 "MASTER_CANDIDATE": str(self.op.master_candidate),
3329 "OFFLINE": str(self.op.offline),
3330 "DRAINED": str(self.op.drained),
3332 nl = [self.cfg.GetMasterNode(),
3336 def CheckPrereq(self):
3337 """Check prerequisites.
3339 This only checks the instance list against the existing names.
3342 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3344 if (self.op.master_candidate is not None or
3345 self.op.drained is not None or
3346 self.op.offline is not None):
3347 # we can't change the master's node flags
3348 if self.op.node_name == self.cfg.GetMasterNode():
3349 raise errors.OpPrereqError("The master role can be changed"
3350 " only via masterfailover",
3354 if node.master_candidate and self.might_demote and not self.lock_all:
3355 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3356 # check if after removing the current node, we're missing master
3358 (mc_remaining, mc_should, _) = \
3359 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3360 if mc_remaining < mc_should:
3361 raise errors.OpPrereqError("Not enough master candidates, please"
3362 " pass auto_promote to allow promotion",
3365 if (self.op.master_candidate == True and
3366 ((node.offline and not self.op.offline == False) or
3367 (node.drained and not self.op.drained == False))):
3368 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3369 " to master_candidate" % node.name,
3372 # If we're being deofflined/drained, we'll MC ourself if needed
3373 if (self.deoffline_or_drain and not self.offline_or_drain and not
3374 self.op.master_candidate == True and not node.master_candidate):
3375 self.op.master_candidate = _DecideSelfPromotion(self)
3376 if self.op.master_candidate:
3377 self.LogInfo("Autopromoting node to master candidate")
3381 def Exec(self, feedback_fn):
3390 if self.op.offline is not None:
3391 node.offline = self.op.offline
3392 result.append(("offline", str(self.op.offline)))
3393 if self.op.offline == True:
3394 if node.master_candidate:
3395 node.master_candidate = False
3397 result.append(("master_candidate", "auto-demotion due to offline"))
3399 node.drained = False
3400 result.append(("drained", "clear drained status due to offline"))
3402 if self.op.master_candidate is not None:
3403 node.master_candidate = self.op.master_candidate
3405 result.append(("master_candidate", str(self.op.master_candidate)))
3406 if self.op.master_candidate == False:
3407 rrc = self.rpc.call_node_demote_from_mc(node.name)
3410 self.LogWarning("Node failed to demote itself: %s" % msg)
3412 if self.op.drained is not None:
3413 node.drained = self.op.drained
3414 result.append(("drained", str(self.op.drained)))
3415 if self.op.drained == True:
3416 if node.master_candidate:
3417 node.master_candidate = False
3419 result.append(("master_candidate", "auto-demotion due to drain"))
3420 rrc = self.rpc.call_node_demote_from_mc(node.name)
3423 self.LogWarning("Node failed to demote itself: %s" % msg)
3425 node.offline = False
3426 result.append(("offline", "clear offline status due to drain"))
3428 # we locked all nodes, we adjust the CP before updating this node
3430 _AdjustCandidatePool(self, [node.name])
3432 # this will trigger configuration file update, if needed
3433 self.cfg.Update(node, feedback_fn)
3435 # this will trigger job queue propagation or cleanup
3437 self.context.ReaddNode(node)
3442 class LUPowercycleNode(NoHooksLU):
3443 """Powercycles a node.
3446 _OP_REQP = ["node_name", "force"]
3449 def CheckArguments(self):
3450 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3451 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3452 raise errors.OpPrereqError("The node is the master and the force"
3453 " parameter was not set",
3456 def ExpandNames(self):
3457 """Locking for PowercycleNode.
3459 This is a last-resort option and shouldn't block on other
3460 jobs. Therefore, we grab no locks.
3463 self.needed_locks = {}
3465 def CheckPrereq(self):
3466 """Check prerequisites.
3468 This LU has no prereqs.
3473 def Exec(self, feedback_fn):
3477 result = self.rpc.call_node_powercycle(self.op.node_name,
3478 self.cfg.GetHypervisorType())
3479 result.Raise("Failed to schedule the reboot")
3480 return result.payload
3483 class LUQueryClusterInfo(NoHooksLU):
3484 """Query cluster configuration.
3490 def ExpandNames(self):
3491 self.needed_locks = {}
3493 def CheckPrereq(self):
3494 """No prerequsites needed for this LU.
3499 def Exec(self, feedback_fn):
3500 """Return cluster config.
3503 cluster = self.cfg.GetClusterInfo()
3506 # Filter just for enabled hypervisors
3507 for os_name, hv_dict in cluster.os_hvp.items():
3508 os_hvp[os_name] = {}
3509 for hv_name, hv_params in hv_dict.items():
3510 if hv_name in cluster.enabled_hypervisors:
3511 os_hvp[os_name][hv_name] = hv_params
3514 "software_version": constants.RELEASE_VERSION,
3515 "protocol_version": constants.PROTOCOL_VERSION,
3516 "config_version": constants.CONFIG_VERSION,
3517 "os_api_version": max(constants.OS_API_VERSIONS),
3518 "export_version": constants.EXPORT_VERSION,
3519 "architecture": (platform.architecture()[0], platform.machine()),
3520 "name": cluster.cluster_name,
3521 "master": cluster.master_node,
3522 "default_hypervisor": cluster.enabled_hypervisors[0],
3523 "enabled_hypervisors": cluster.enabled_hypervisors,
3524 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3525 for hypervisor_name in cluster.enabled_hypervisors]),
3527 "beparams": cluster.beparams,
3528 "nicparams": cluster.nicparams,
3529 "candidate_pool_size": cluster.candidate_pool_size,
3530 "master_netdev": cluster.master_netdev,
3531 "volume_group_name": cluster.volume_group_name,
3532 "file_storage_dir": cluster.file_storage_dir,
3533 "ctime": cluster.ctime,
3534 "mtime": cluster.mtime,
3535 "uuid": cluster.uuid,
3536 "tags": list(cluster.GetTags()),
3542 class LUQueryConfigValues(NoHooksLU):
3543 """Return configuration values.
3548 _FIELDS_DYNAMIC = utils.FieldSet()
3549 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3552 def ExpandNames(self):
3553 self.needed_locks = {}
3555 _CheckOutputFields(static=self._FIELDS_STATIC,
3556 dynamic=self._FIELDS_DYNAMIC,
3557 selected=self.op.output_fields)
3559 def CheckPrereq(self):
3560 """No prerequisites.
3565 def Exec(self, feedback_fn):
3566 """Dump a representation of the cluster config to the standard output.
3570 for field in self.op.output_fields:
3571 if field == "cluster_name":
3572 entry = self.cfg.GetClusterName()
3573 elif field == "master_node":
3574 entry = self.cfg.GetMasterNode()
3575 elif field == "drain_flag":
3576 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3577 elif field == "watcher_pause":
3578 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3580 raise errors.ParameterError(field)
3581 values.append(entry)
3585 class LUActivateInstanceDisks(NoHooksLU):
3586 """Bring up an instance's disks.
3589 _OP_REQP = ["instance_name"]
3592 def ExpandNames(self):
3593 self._ExpandAndLockInstance()
3594 self.needed_locks[locking.LEVEL_NODE] = []
3595 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3597 def DeclareLocks(self, level):
3598 if level == locking.LEVEL_NODE:
3599 self._LockInstancesNodes()
3601 def CheckPrereq(self):
3602 """Check prerequisites.
3604 This checks that the instance is in the cluster.
3607 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3608 assert self.instance is not None, \
3609 "Cannot retrieve locked instance %s" % self.op.instance_name
3610 _CheckNodeOnline(self, self.instance.primary_node)
3611 if not hasattr(self.op, "ignore_size"):
3612 self.op.ignore_size = False
3614 def Exec(self, feedback_fn):
3615 """Activate the disks.
3618 disks_ok, disks_info = \
3619 _AssembleInstanceDisks(self, self.instance,
3620 ignore_size=self.op.ignore_size)
3622 raise errors.OpExecError("Cannot activate block devices")
3627 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3629 """Prepare the block devices for an instance.
3631 This sets up the block devices on all nodes.
3633 @type lu: L{LogicalUnit}
3634 @param lu: the logical unit on whose behalf we execute
3635 @type instance: L{objects.Instance}
3636 @param instance: the instance for whose disks we assemble
3637 @type ignore_secondaries: boolean
3638 @param ignore_secondaries: if true, errors on secondary nodes
3639 won't result in an error return from the function
3640 @type ignore_size: boolean
3641 @param ignore_size: if true, the current known size of the disk
3642 will not be used during the disk activation, useful for cases
3643 when the size is wrong
3644 @return: False if the operation failed, otherwise a list of
3645 (host, instance_visible_name, node_visible_name)
3646 with the mapping from node devices to instance devices
3651 iname = instance.name
3652 # With the two passes mechanism we try to reduce the window of
3653 # opportunity for the race condition of switching DRBD to primary
3654 # before handshaking occured, but we do not eliminate it
3656 # The proper fix would be to wait (with some limits) until the
3657 # connection has been made and drbd transitions from WFConnection
3658 # into any other network-connected state (Connected, SyncTarget,
3661 # 1st pass, assemble on all nodes in secondary mode
3662 for inst_disk in instance.disks:
3663 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3665 node_disk = node_disk.Copy()
3666 node_disk.UnsetSize()
3667 lu.cfg.SetDiskID(node_disk, node)
3668 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3669 msg = result.fail_msg
3671 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3672 " (is_primary=False, pass=1): %s",
3673 inst_disk.iv_name, node, msg)
3674 if not ignore_secondaries:
3677 # FIXME: race condition on drbd migration to primary
3679 # 2nd pass, do only the primary node
3680 for inst_disk in instance.disks:
3683 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3684 if node != instance.primary_node:
3687 node_disk = node_disk.Copy()
3688 node_disk.UnsetSize()
3689 lu.cfg.SetDiskID(node_disk, node)
3690 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3691 msg = result.fail_msg
3693 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3694 " (is_primary=True, pass=2): %s",
3695 inst_disk.iv_name, node, msg)
3698 dev_path = result.payload
3700 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3702 # leave the disks configured for the primary node
3703 # this is a workaround that would be fixed better by
3704 # improving the logical/physical id handling
3705 for disk in instance.disks:
3706 lu.cfg.SetDiskID(disk, instance.primary_node)
3708 return disks_ok, device_info
3711 def _StartInstanceDisks(lu, instance, force):
3712 """Start the disks of an instance.
3715 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3716 ignore_secondaries=force)
3718 _ShutdownInstanceDisks(lu, instance)
3719 if force is not None and not force:
3720 lu.proc.LogWarning("", hint="If the message above refers to a"
3722 " you can retry the operation using '--force'.")
3723 raise errors.OpExecError("Disk consistency error")
3726 class LUDeactivateInstanceDisks(NoHooksLU):
3727 """Shutdown an instance's disks.
3730 _OP_REQP = ["instance_name"]
3733 def ExpandNames(self):
3734 self._ExpandAndLockInstance()
3735 self.needed_locks[locking.LEVEL_NODE] = []
3736 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3738 def DeclareLocks(self, level):
3739 if level == locking.LEVEL_NODE:
3740 self._LockInstancesNodes()
3742 def CheckPrereq(self):
3743 """Check prerequisites.
3745 This checks that the instance is in the cluster.
3748 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3749 assert self.instance is not None, \
3750 "Cannot retrieve locked instance %s" % self.op.instance_name
3752 def Exec(self, feedback_fn):
3753 """Deactivate the disks
3756 instance = self.instance
3757 _SafeShutdownInstanceDisks(self, instance)
3760 def _SafeShutdownInstanceDisks(lu, instance):
3761 """Shutdown block devices of an instance.
3763 This function checks if an instance is running, before calling
3764 _ShutdownInstanceDisks.
3767 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
3768 _ShutdownInstanceDisks(lu, instance)
3771 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3772 """Shutdown block devices of an instance.
3774 This does the shutdown on all nodes of the instance.
3776 If the ignore_primary is false, errors on the primary node are
3781 for disk in instance.disks:
3782 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3783 lu.cfg.SetDiskID(top_disk, node)
3784 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3785 msg = result.fail_msg
3787 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3788 disk.iv_name, node, msg)
3789 if not ignore_primary or node != instance.primary_node:
3794 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3795 """Checks if a node has enough free memory.
3797 This function check if a given node has the needed amount of free
3798 memory. In case the node has less memory or we cannot get the
3799 information from the node, this function raise an OpPrereqError
3802 @type lu: C{LogicalUnit}
3803 @param lu: a logical unit from which we get configuration data
3805 @param node: the node to check
3806 @type reason: C{str}
3807 @param reason: string to use in the error message
3808 @type requested: C{int}
3809 @param requested: the amount of memory in MiB to check for
3810 @type hypervisor_name: C{str}
3811 @param hypervisor_name: the hypervisor to ask for memory stats
3812 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3813 we cannot check the node
3816 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3817 nodeinfo[node].Raise("Can't get data from node %s" % node,
3818 prereq=True, ecode=errors.ECODE_ENVIRON)
3819 free_mem = nodeinfo[node].payload.get('memory_free', None)
3820 if not isinstance(free_mem, int):
3821 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3822 " was '%s'" % (node, free_mem),
3823 errors.ECODE_ENVIRON)
3824 if requested > free_mem:
3825 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3826 " needed %s MiB, available %s MiB" %
3827 (node, reason, requested, free_mem),
3831 def _CheckNodesFreeDisk(lu, nodenames, requested):
3832 """Checks if nodes have enough free disk space in the default VG.
3834 This function check if all given nodes have the needed amount of
3835 free disk. In case any node has less disk or we cannot get the
3836 information from the node, this function raise an OpPrereqError
3839 @type lu: C{LogicalUnit}
3840 @param lu: a logical unit from which we get configuration data
3841 @type nodenames: C{list}
3842 @param node: the list of node names to check
3843 @type requested: C{int}
3844 @param requested: the amount of disk in MiB to check for
3845 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
3846 we cannot check the node
3849 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3850 lu.cfg.GetHypervisorType())
3851 for node in nodenames:
3852 info = nodeinfo[node]
3853 info.Raise("Cannot get current information from node %s" % node,
3854 prereq=True, ecode=errors.ECODE_ENVIRON)
3855 vg_free = info.payload.get("vg_free", None)
3856 if not isinstance(vg_free, int):
3857 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
3858 " result was '%s'" % (node, vg_free),
3859 errors.ECODE_ENVIRON)
3860 if requested > vg_free:
3861 raise errors.OpPrereqError("Not enough disk space on target node %s:"
3862 " required %d MiB, available %d MiB" %
3863 (node, requested, vg_free),
3867 class LUStartupInstance(LogicalUnit):
3868 """Starts an instance.
3871 HPATH = "instance-start"
3872 HTYPE = constants.HTYPE_INSTANCE
3873 _OP_REQP = ["instance_name", "force"]
3876 def ExpandNames(self):
3877 self._ExpandAndLockInstance()
3879 def BuildHooksEnv(self):
3882 This runs on master, primary and secondary nodes of the instance.
3886 "FORCE": self.op.force,
3888 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3889 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3892 def CheckPrereq(self):
3893 """Check prerequisites.
3895 This checks that the instance is in the cluster.
3898 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3899 assert self.instance is not None, \
3900 "Cannot retrieve locked instance %s" % self.op.instance_name
3903 self.beparams = getattr(self.op, "beparams", {})
3905 if not isinstance(self.beparams, dict):
3906 raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3907 " dict" % (type(self.beparams), ),
3909 # fill the beparams dict
3910 utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3911 self.op.beparams = self.beparams
3914 self.hvparams = getattr(self.op, "hvparams", {})
3916 if not isinstance(self.hvparams, dict):
3917 raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3918 " dict" % (type(self.hvparams), ),
3921 # check hypervisor parameter syntax (locally)
3922 cluster = self.cfg.GetClusterInfo()
3923 utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3924 filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3926 filled_hvp.update(self.hvparams)
3927 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3928 hv_type.CheckParameterSyntax(filled_hvp)
3929 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3930 self.op.hvparams = self.hvparams
3932 _CheckNodeOnline(self, instance.primary_node)
3934 bep = self.cfg.GetClusterInfo().FillBE(instance)
3935 # check bridges existence
3936 _CheckInstanceBridgesExist(self, instance)
3938 remote_info = self.rpc.call_instance_info(instance.primary_node,
3940 instance.hypervisor)
3941 remote_info.Raise("Error checking node %s" % instance.primary_node,
3942 prereq=True, ecode=errors.ECODE_ENVIRON)
3943 if not remote_info.payload: # not running already
3944 _CheckNodeFreeMemory(self, instance.primary_node,
3945 "starting instance %s" % instance.name,
3946 bep[constants.BE_MEMORY], instance.hypervisor)
3948 def Exec(self, feedback_fn):
3949 """Start the instance.
3952 instance = self.instance
3953 force = self.op.force
3955 self.cfg.MarkInstanceUp(instance.name)
3957 node_current = instance.primary_node
3959 _StartInstanceDisks(self, instance, force)
3961 result = self.rpc.call_instance_start(node_current, instance,
3962 self.hvparams, self.beparams)
3963 msg = result.fail_msg
3965 _ShutdownInstanceDisks(self, instance)
3966 raise errors.OpExecError("Could not start instance: %s" % msg)
3969 class LURebootInstance(LogicalUnit):
3970 """Reboot an instance.
3973 HPATH = "instance-reboot"
3974 HTYPE = constants.HTYPE_INSTANCE
3975 _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3978 def CheckArguments(self):
3979 """Check the arguments.
3982 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3983 constants.DEFAULT_SHUTDOWN_TIMEOUT)
3985 def ExpandNames(self):
3986 if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3987 constants.INSTANCE_REBOOT_HARD,
3988 constants.INSTANCE_REBOOT_FULL]:
3989 raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3990 (constants.INSTANCE_REBOOT_SOFT,
3991 constants.INSTANCE_REBOOT_HARD,
3992 constants.INSTANCE_REBOOT_FULL))
3993 self._ExpandAndLockInstance()
3995 def BuildHooksEnv(self):
3998 This runs on master, primary and secondary nodes of the instance.
4002 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4003 "REBOOT_TYPE": self.op.reboot_type,
4004 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4006 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4007 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4010 def CheckPrereq(self):
4011 """Check prerequisites.
4013 This checks that the instance is in the cluster.
4016 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4017 assert self.instance is not None, \
4018 "Cannot retrieve locked instance %s" % self.op.instance_name
4020 _CheckNodeOnline(self, instance.primary_node)
4022 # check bridges existence
4023 _CheckInstanceBridgesExist(self, instance)
4025 def Exec(self, feedback_fn):
4026 """Reboot the instance.
4029 instance = self.instance
4030 ignore_secondaries = self.op.ignore_secondaries
4031 reboot_type = self.op.reboot_type
4033 node_current = instance.primary_node
4035 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4036 constants.INSTANCE_REBOOT_HARD]:
4037 for disk in instance.disks:
4038 self.cfg.SetDiskID(disk, node_current)
4039 result = self.rpc.call_instance_reboot(node_current, instance,
4041 self.shutdown_timeout)
4042 result.Raise("Could not reboot instance")
4044 result = self.rpc.call_instance_shutdown(node_current, instance,
4045 self.shutdown_timeout)
4046 result.Raise("Could not shutdown instance for full reboot")
4047 _ShutdownInstanceDisks(self, instance)
4048 _StartInstanceDisks(self, instance, ignore_secondaries)
4049 result = self.rpc.call_instance_start(node_current, instance, None, None)
4050 msg = result.fail_msg
4052 _ShutdownInstanceDisks(self, instance)
4053 raise errors.OpExecError("Could not start instance for"
4054 " full reboot: %s" % msg)
4056 self.cfg.MarkInstanceUp(instance.name)
4059 class LUShutdownInstance(LogicalUnit):
4060 """Shutdown an instance.
4063 HPATH = "instance-stop"
4064 HTYPE = constants.HTYPE_INSTANCE
4065 _OP_REQP = ["instance_name"]
4068 def CheckArguments(self):
4069 """Check the arguments.
4072 self.timeout = getattr(self.op, "timeout",
4073 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4075 def ExpandNames(self):
4076 self._ExpandAndLockInstance()
4078 def BuildHooksEnv(self):
4081 This runs on master, primary and secondary nodes of the instance.
4084 env = _BuildInstanceHookEnvByObject(self, self.instance)
4085 env["TIMEOUT"] = self.timeout
4086 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4089 def CheckPrereq(self):
4090 """Check prerequisites.
4092 This checks that the instance is in the cluster.
4095 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4096 assert self.instance is not None, \
4097 "Cannot retrieve locked instance %s" % self.op.instance_name
4098 _CheckNodeOnline(self, self.instance.primary_node)
4100 def Exec(self, feedback_fn):
4101 """Shutdown the instance.
4104 instance = self.instance
4105 node_current = instance.primary_node
4106 timeout = self.timeout
4107 self.cfg.MarkInstanceDown(instance.name)
4108 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4109 msg = result.fail_msg
4111 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4113 _ShutdownInstanceDisks(self, instance)
4116 class LUReinstallInstance(LogicalUnit):
4117 """Reinstall an instance.
4120 HPATH = "instance-reinstall"
4121 HTYPE = constants.HTYPE_INSTANCE
4122 _OP_REQP = ["instance_name"]
4125 def ExpandNames(self):
4126 self._ExpandAndLockInstance()
4128 def BuildHooksEnv(self):
4131 This runs on master, primary and secondary nodes of the instance.
4134 env = _BuildInstanceHookEnvByObject(self, self.instance)
4135 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4138 def CheckPrereq(self):
4139 """Check prerequisites.
4141 This checks that the instance is in the cluster and is not running.
4144 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4145 assert instance is not None, \
4146 "Cannot retrieve locked instance %s" % self.op.instance_name
4147 _CheckNodeOnline(self, instance.primary_node)
4149 if instance.disk_template == constants.DT_DISKLESS:
4150 raise errors.OpPrereqError("Instance '%s' has no disks" %
4151 self.op.instance_name,
4153 _CheckInstanceDown(self, instance, "cannot reinstall")
4155 self.op.os_type = getattr(self.op, "os_type", None)
4156 self.op.force_variant = getattr(self.op, "force_variant", False)
4157 if self.op.os_type is not None:
4159 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4160 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4162 self.instance = instance
4164 def Exec(self, feedback_fn):
4165 """Reinstall the instance.
4168 inst = self.instance
4170 if self.op.os_type is not None:
4171 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4172 inst.os = self.op.os_type
4173 self.cfg.Update(inst, feedback_fn)
4175 _StartInstanceDisks(self, inst, None)
4177 feedback_fn("Running the instance OS create scripts...")
4178 # FIXME: pass debug option from opcode to backend
4179 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4180 self.op.debug_level)
4181 result.Raise("Could not install OS for instance %s on node %s" %
4182 (inst.name, inst.primary_node))
4184 _ShutdownInstanceDisks(self, inst)
4187 class LURecreateInstanceDisks(LogicalUnit):
4188 """Recreate an instance's missing disks.
4191 HPATH = "instance-recreate-disks"
4192 HTYPE = constants.HTYPE_INSTANCE
4193 _OP_REQP = ["instance_name", "disks"]
4196 def CheckArguments(self):
4197 """Check the arguments.
4200 if not isinstance(self.op.disks, list):
4201 raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4202 for item in self.op.disks:
4203 if (not isinstance(item, int) or
4205 raise errors.OpPrereqError("Invalid disk specification '%s'" %
4206 str(item), errors.ECODE_INVAL)
4208 def ExpandNames(self):
4209 self._ExpandAndLockInstance()
4211 def BuildHooksEnv(self):
4214 This runs on master, primary and secondary nodes of the instance.
4217 env = _BuildInstanceHookEnvByObject(self, self.instance)
4218 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4221 def CheckPrereq(self):
4222 """Check prerequisites.
4224 This checks that the instance is in the cluster and is not running.
4227 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4228 assert instance is not None, \
4229 "Cannot retrieve locked instance %s" % self.op.instance_name
4230 _CheckNodeOnline(self, instance.primary_node)
4232 if instance.disk_template == constants.DT_DISKLESS:
4233 raise errors.OpPrereqError("Instance '%s' has no disks" %
4234 self.op.instance_name, errors.ECODE_INVAL)
4235 _CheckInstanceDown(self, instance, "cannot recreate disks")
4237 if not self.op.disks:
4238 self.op.disks = range(len(instance.disks))
4240 for idx in self.op.disks:
4241 if idx >= len(instance.disks):
4242 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4245 self.instance = instance
4247 def Exec(self, feedback_fn):
4248 """Recreate the disks.
4252 for idx, _ in enumerate(self.instance.disks):
4253 if idx not in self.op.disks: # disk idx has not been passed in
4257 _CreateDisks(self, self.instance, to_skip=to_skip)
4260 class LURenameInstance(LogicalUnit):
4261 """Rename an instance.
4264 HPATH = "instance-rename"
4265 HTYPE = constants.HTYPE_INSTANCE
4266 _OP_REQP = ["instance_name", "new_name"]
4268 def BuildHooksEnv(self):
4271 This runs on master, primary and secondary nodes of the instance.
4274 env = _BuildInstanceHookEnvByObject(self, self.instance)
4275 env["INSTANCE_NEW_NAME"] = self.op.new_name
4276 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4279 def CheckPrereq(self):
4280 """Check prerequisites.
4282 This checks that the instance is in the cluster and is not running.
4285 self.op.instance_name = _ExpandInstanceName(self.cfg,
4286 self.op.instance_name)
4287 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4288 assert instance is not None
4289 _CheckNodeOnline(self, instance.primary_node)
4290 _CheckInstanceDown(self, instance, "cannot rename")
4291 self.instance = instance
4293 # new name verification
4294 name_info = utils.GetHostInfo(self.op.new_name)
4296 self.op.new_name = new_name = name_info.name
4297 instance_list = self.cfg.GetInstanceList()
4298 if new_name in instance_list:
4299 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4300 new_name, errors.ECODE_EXISTS)
4302 if not getattr(self.op, "ignore_ip", False):
4303 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4304 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4305 (name_info.ip, new_name),
4306 errors.ECODE_NOTUNIQUE)
4309 def Exec(self, feedback_fn):
4310 """Reinstall the instance.
4313 inst = self.instance
4314 old_name = inst.name
4316 if inst.disk_template == constants.DT_FILE:
4317 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4319 self.cfg.RenameInstance(inst.name, self.op.new_name)
4320 # Change the instance lock. This is definitely safe while we hold the BGL
4321 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4322 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4324 # re-read the instance from the configuration after rename
4325 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4327 if inst.disk_template == constants.DT_FILE:
4328 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4329 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4330 old_file_storage_dir,
4331 new_file_storage_dir)
4332 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4333 " (but the instance has been renamed in Ganeti)" %
4334 (inst.primary_node, old_file_storage_dir,
4335 new_file_storage_dir))
4337 _StartInstanceDisks(self, inst, None)
4339 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4340 old_name, self.op.debug_level)
4341 msg = result.fail_msg
4343 msg = ("Could not run OS rename script for instance %s on node %s"
4344 " (but the instance has been renamed in Ganeti): %s" %
4345 (inst.name, inst.primary_node, msg))
4346 self.proc.LogWarning(msg)
4348 _ShutdownInstanceDisks(self, inst)
4351 class LURemoveInstance(LogicalUnit):
4352 """Remove an instance.
4355 HPATH = "instance-remove"
4356 HTYPE = constants.HTYPE_INSTANCE
4357 _OP_REQP = ["instance_name", "ignore_failures"]
4360 def CheckArguments(self):
4361 """Check the arguments.
4364 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4365 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4367 def ExpandNames(self):
4368 self._ExpandAndLockInstance()
4369 self.needed_locks[locking.LEVEL_NODE] = []
4370 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4372 def DeclareLocks(self, level):
4373 if level == locking.LEVEL_NODE:
4374 self._LockInstancesNodes()
4376 def BuildHooksEnv(self):
4379 This runs on master, primary and secondary nodes of the instance.
4382 env = _BuildInstanceHookEnvByObject(self, self.instance)
4383 env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4384 nl = [self.cfg.GetMasterNode()]
4385 nl_post = list(self.instance.all_nodes) + nl
4386 return env, nl, nl_post
4388 def CheckPrereq(self):
4389 """Check prerequisites.
4391 This checks that the instance is in the cluster.
4394 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4395 assert self.instance is not None, \
4396 "Cannot retrieve locked instance %s" % self.op.instance_name
4398 def Exec(self, feedback_fn):
4399 """Remove the instance.
4402 instance = self.instance
4403 logging.info("Shutting down instance %s on node %s",
4404 instance.name, instance.primary_node)
4406 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4407 self.shutdown_timeout)
4408 msg = result.fail_msg
4410 if self.op.ignore_failures:
4411 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4413 raise errors.OpExecError("Could not shutdown instance %s on"
4415 (instance.name, instance.primary_node, msg))
4417 logging.info("Removing block devices for instance %s", instance.name)
4419 if not _RemoveDisks(self, instance):
4420 if self.op.ignore_failures:
4421 feedback_fn("Warning: can't remove instance's disks")
4423 raise errors.OpExecError("Can't remove instance's disks")
4425 logging.info("Removing instance %s out of cluster config", instance.name)
4427 self.cfg.RemoveInstance(instance.name)
4428 self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4431 class LUQueryInstances(NoHooksLU):
4432 """Logical unit for querying instances.
4435 # pylint: disable-msg=W0142
4436 _OP_REQP = ["output_fields", "names", "use_locking"]
4438 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4439 "serial_no", "ctime", "mtime", "uuid"]
4440 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4442 "disk_template", "ip", "mac", "bridge",
4443 "nic_mode", "nic_link",
4444 "sda_size", "sdb_size", "vcpus", "tags",
4445 "network_port", "beparams",
4446 r"(disk)\.(size)/([0-9]+)",
4447 r"(disk)\.(sizes)", "disk_usage",
4448 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4449 r"(nic)\.(bridge)/([0-9]+)",
4450 r"(nic)\.(macs|ips|modes|links|bridges)",
4451 r"(disk|nic)\.(count)",
4453 ] + _SIMPLE_FIELDS +
4455 for name in constants.HVS_PARAMETERS
4456 if name not in constants.HVC_GLOBALS] +
4458 for name in constants.BES_PARAMETERS])
4459 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4462 def ExpandNames(self):
4463 _CheckOutputFields(static=self._FIELDS_STATIC,
4464 dynamic=self._FIELDS_DYNAMIC,
4465 selected=self.op.output_fields)
4467 self.needed_locks = {}
4468 self.share_locks[locking.LEVEL_INSTANCE] = 1
4469 self.share_locks[locking.LEVEL_NODE] = 1
4472 self.wanted = _GetWantedInstances(self, self.op.names)
4474 self.wanted = locking.ALL_SET
4476 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4477 self.do_locking = self.do_node_query and self.op.use_locking
4479 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4480 self.needed_locks[locking.LEVEL_NODE] = []
4481 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4483 def DeclareLocks(self, level):
4484 if level == locking.LEVEL_NODE and self.do_locking:
4485 self._LockInstancesNodes()
4487 def CheckPrereq(self):
4488 """Check prerequisites.
4493 def Exec(self, feedback_fn):
4494 """Computes the list of nodes and their attributes.
4497 # pylint: disable-msg=R0912
4498 # way too many branches here
4499 all_info = self.cfg.GetAllInstancesInfo()
4500 if self.wanted == locking.ALL_SET:
4501 # caller didn't specify instance names, so ordering is not important
4503 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4505 instance_names = all_info.keys()
4506 instance_names = utils.NiceSort(instance_names)
4508 # caller did specify names, so we must keep the ordering
4510 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4512 tgt_set = all_info.keys()
4513 missing = set(self.wanted).difference(tgt_set)
4515 raise errors.OpExecError("Some instances were removed before"
4516 " retrieving their data: %s" % missing)
4517 instance_names = self.wanted
4519 instance_list = [all_info[iname] for iname in instance_names]
4521 # begin data gathering
4523 nodes = frozenset([inst.primary_node for inst in instance_list])
4524 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4528 if self.do_node_query:
4530 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4532 result = node_data[name]
4534 # offline nodes will be in both lists
4535 off_nodes.append(name)
4537 bad_nodes.append(name)
4540 live_data.update(result.payload)
4541 # else no instance is alive
4543 live_data = dict([(name, {}) for name in instance_names])
4545 # end data gathering
4550 cluster = self.cfg.GetClusterInfo()
4551 for instance in instance_list:
4553 i_hv = cluster.FillHV(instance, skip_globals=True)
4554 i_be = cluster.FillBE(instance)
4555 i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4556 nic.nicparams) for nic in instance.nics]
4557 for field in self.op.output_fields:
4558 st_match = self._FIELDS_STATIC.Matches(field)
4559 if field in self._SIMPLE_FIELDS:
4560 val = getattr(instance, field)
4561 elif field == "pnode":
4562 val = instance.primary_node
4563 elif field == "snodes":
4564 val = list(instance.secondary_nodes)
4565 elif field == "admin_state":
4566 val = instance.admin_up
4567 elif field == "oper_state":
4568 if instance.primary_node in bad_nodes:
4571 val = bool(live_data.get(instance.name))
4572 elif field == "status":
4573 if instance.primary_node in off_nodes:
4574 val = "ERROR_nodeoffline"
4575 elif instance.primary_node in bad_nodes:
4576 val = "ERROR_nodedown"
4578 running = bool(live_data.get(instance.name))
4580 if instance.admin_up:
4585 if instance.admin_up:
4589 elif field == "oper_ram":
4590 if instance.primary_node in bad_nodes:
4592 elif instance.name in live_data:
4593 val = live_data[instance.name].get("memory", "?")
4596 elif field == "vcpus":
4597 val = i_be[constants.BE_VCPUS]
4598 elif field == "disk_template":
4599 val = instance.disk_template
4602 val = instance.nics[0].ip
4605 elif field == "nic_mode":
4607 val = i_nicp[0][constants.NIC_MODE]
4610 elif field == "nic_link":
4612 val = i_nicp[0][constants.NIC_LINK]
4615 elif field == "bridge":
4616 if (instance.nics and
4617 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4618 val = i_nicp[0][constants.NIC_LINK]
4621 elif field == "mac":
4623 val = instance.nics[0].mac
4626 elif field == "sda_size" or field == "sdb_size":
4627 idx = ord(field[2]) - ord('a')
4629 val = instance.FindDisk(idx).size
4630 except errors.OpPrereqError:
4632 elif field == "disk_usage": # total disk usage per node
4633 disk_sizes = [{'size': disk.size} for disk in instance.disks]
4634 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4635 elif field == "tags":
4636 val = list(instance.GetTags())
4637 elif field == "hvparams":
4639 elif (field.startswith(HVPREFIX) and
4640 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4641 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4642 val = i_hv.get(field[len(HVPREFIX):], None)
4643 elif field == "beparams":
4645 elif (field.startswith(BEPREFIX) and
4646 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4647 val = i_be.get(field[len(BEPREFIX):], None)
4648 elif st_match and st_match.groups():
4649 # matches a variable list
4650 st_groups = st_match.groups()
4651 if st_groups and st_groups[0] == "disk":
4652 if st_groups[1] == "count":
4653 val = len(instance.disks)
4654 elif st_groups[1] == "sizes":
4655 val = [disk.size for disk in instance.disks]
4656 elif st_groups[1] == "size":
4658 val = instance.FindDisk(st_groups[2]).size
4659 except errors.OpPrereqError:
4662 assert False, "Unhandled disk parameter"
4663 elif st_groups[0] == "nic":
4664 if st_groups[1] == "count":
4665 val = len(instance.nics)
4666 elif st_groups[1] == "macs":
4667 val = [nic.mac for nic in instance.nics]
4668 elif st_groups[1] == "ips":
4669 val = [nic.ip for nic in instance.nics]
4670 elif st_groups[1] == "modes":
4671 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4672 elif st_groups[1] == "links":
4673 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4674 elif st_groups[1] == "bridges":
4677 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4678 val.append(nicp[constants.NIC_LINK])
4683 nic_idx = int(st_groups[2])
4684 if nic_idx >= len(instance.nics):
4687 if st_groups[1] == "mac":
4688 val = instance.nics[nic_idx].mac
4689 elif st_groups[1] == "ip":
4690 val = instance.nics[nic_idx].ip
4691 elif st_groups[1] == "mode":
4692 val = i_nicp[nic_idx][constants.NIC_MODE]
4693 elif st_groups[1] == "link":
4694 val = i_nicp[nic_idx][constants.NIC_LINK]
4695 elif st_groups[1] == "bridge":
4696 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4697 if nic_mode == constants.NIC_MODE_BRIDGED:
4698 val = i_nicp[nic_idx][constants.NIC_LINK]
4702 assert False, "Unhandled NIC parameter"
4704 assert False, ("Declared but unhandled variable parameter '%s'" %
4707 assert False, "Declared but unhandled parameter '%s'" % field
4714 class LUFailoverInstance(LogicalUnit):
4715 """Failover an instance.
4718 HPATH = "instance-failover"
4719 HTYPE = constants.HTYPE_INSTANCE
4720 _OP_REQP = ["instance_name", "ignore_consistency"]
4723 def CheckArguments(self):
4724 """Check the arguments.
4727 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4728 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4730 def ExpandNames(self):
4731 self._ExpandAndLockInstance()
4732 self.needed_locks[locking.LEVEL_NODE] = []
4733 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4735 def DeclareLocks(self, level):
4736 if level == locking.LEVEL_NODE:
4737 self._LockInstancesNodes()
4739 def BuildHooksEnv(self):
4742 This runs on master, primary and secondary nodes of the instance.
4745 instance = self.instance
4746 source_node = instance.primary_node
4747 target_node = instance.secondary_nodes[0]
4749 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4750 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4751 "OLD_PRIMARY": source_node,
4752 "OLD_SECONDARY": target_node,
4753 "NEW_PRIMARY": target_node,
4754 "NEW_SECONDARY": source_node,
4756 env.update(_BuildInstanceHookEnvByObject(self, instance))
4757 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4759 nl_post.append(source_node)
4760 return env, nl, nl_post
4762 def CheckPrereq(self):
4763 """Check prerequisites.
4765 This checks that the instance is in the cluster.
4768 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4769 assert self.instance is not None, \
4770 "Cannot retrieve locked instance %s" % self.op.instance_name
4772 bep = self.cfg.GetClusterInfo().FillBE(instance)
4773 if instance.disk_template not in constants.DTS_NET_MIRROR:
4774 raise errors.OpPrereqError("Instance's disk layout is not"
4775 " network mirrored, cannot failover.",
4778 secondary_nodes = instance.secondary_nodes
4779 if not secondary_nodes:
4780 raise errors.ProgrammerError("no secondary node but using "
4781 "a mirrored disk template")
4783 target_node = secondary_nodes[0]
4784 _CheckNodeOnline(self, target_node)
4785 _CheckNodeNotDrained(self, target_node)
4786 if instance.admin_up:
4787 # check memory requirements on the secondary node
4788 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4789 instance.name, bep[constants.BE_MEMORY],
4790 instance.hypervisor)
4792 self.LogInfo("Not checking memory on the secondary node as"
4793 " instance will not be started")
4795 # check bridge existance
4796 _CheckInstanceBridgesExist(self, instance, node=target_node)
4798 def Exec(self, feedback_fn):
4799 """Failover an instance.
4801 The failover is done by shutting it down on its present node and
4802 starting it on the secondary.
4805 instance = self.instance
4807 source_node = instance.primary_node
4808 target_node = instance.secondary_nodes[0]
4810 if instance.admin_up:
4811 feedback_fn("* checking disk consistency between source and target")
4812 for dev in instance.disks:
4813 # for drbd, these are drbd over lvm
4814 if not _CheckDiskConsistency(self, dev, target_node, False):
4815 if not self.op.ignore_consistency:
4816 raise errors.OpExecError("Disk %s is degraded on target node,"
4817 " aborting failover." % dev.iv_name)
4819 feedback_fn("* not checking disk consistency as instance is not running")
4821 feedback_fn("* shutting down instance on source node")
4822 logging.info("Shutting down instance %s on node %s",
4823 instance.name, source_node)
4825 result = self.rpc.call_instance_shutdown(source_node, instance,
4826 self.shutdown_timeout)
4827 msg = result.fail_msg
4829 if self.op.ignore_consistency:
4830 self.proc.LogWarning("Could not shutdown instance %s on node %s."
4831 " Proceeding anyway. Please make sure node"
4832 " %s is down. Error details: %s",
4833 instance.name, source_node, source_node, msg)
4835 raise errors.OpExecError("Could not shutdown instance %s on"
4837 (instance.name, source_node, msg))
4839 feedback_fn("* deactivating the instance's disks on source node")
4840 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4841 raise errors.OpExecError("Can't shut down the instance's disks.")
4843 instance.primary_node = target_node
4844 # distribute new instance config to the other nodes
4845 self.cfg.Update(instance, feedback_fn)
4847 # Only start the instance if it's marked as up
4848 if instance.admin_up:
4849 feedback_fn("* activating the instance's disks on target node")
4850 logging.info("Starting instance %s on node %s",
4851 instance.name, target_node)
4853 disks_ok, _ = _AssembleInstanceDisks(self, instance,
4854 ignore_secondaries=True)
4856 _ShutdownInstanceDisks(self, instance)
4857 raise errors.OpExecError("Can't activate the instance's disks")
4859 feedback_fn("* starting the instance on the target node")
4860 result = self.rpc.call_instance_start(target_node, instance, None, None)
4861 msg = result.fail_msg
4863 _ShutdownInstanceDisks(self, instance)
4864 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4865 (instance.name, target_node, msg))
4868 class LUMigrateInstance(LogicalUnit):
4869 """Migrate an instance.
4871 This is migration without shutting down, compared to the failover,
4872 which is done with shutdown.
4875 HPATH = "instance-migrate"
4876 HTYPE = constants.HTYPE_INSTANCE
4877 _OP_REQP = ["instance_name", "live", "cleanup"]
4881 def ExpandNames(self):
4882 self._ExpandAndLockInstance()
4884 self.needed_locks[locking.LEVEL_NODE] = []
4885 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4887 self._migrater = TLMigrateInstance(self, self.op.instance_name,
4888 self.op.live, self.op.cleanup)
4889 self.tasklets = [self._migrater]
4891 def DeclareLocks(self, level):
4892 if level == locking.LEVEL_NODE:
4893 self._LockInstancesNodes()
4895 def BuildHooksEnv(self):
4898 This runs on master, primary and secondary nodes of the instance.
4901 instance = self._migrater.instance
4902 source_node = instance.primary_node
4903 target_node = instance.secondary_nodes[0]
4904 env = _BuildInstanceHookEnvByObject(self, instance)
4905 env["MIGRATE_LIVE"] = self.op.live
4906 env["MIGRATE_CLEANUP"] = self.op.cleanup
4908 "OLD_PRIMARY": source_node,
4909 "OLD_SECONDARY": target_node,
4910 "NEW_PRIMARY": target_node,
4911 "NEW_SECONDARY": source_node,
4913 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4915 nl_post.append(source_node)
4916 return env, nl, nl_post
4919 class LUMoveInstance(LogicalUnit):
4920 """Move an instance by data-copying.
4923 HPATH = "instance-move"
4924 HTYPE = constants.HTYPE_INSTANCE
4925 _OP_REQP = ["instance_name", "target_node"]
4928 def CheckArguments(self):
4929 """Check the arguments.
4932 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4933 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4935 def ExpandNames(self):
4936 self._ExpandAndLockInstance()
4937 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
4938 self.op.target_node = target_node
4939 self.needed_locks[locking.LEVEL_NODE] = [target_node]
4940 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4942 def DeclareLocks(self, level):
4943 if level == locking.LEVEL_NODE:
4944 self._LockInstancesNodes(primary_only=True)
4946 def BuildHooksEnv(self):
4949 This runs on master, primary and secondary nodes of the instance.
4953 "TARGET_NODE": self.op.target_node,
4954 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4956 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4957 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4958 self.op.target_node]
4961 def CheckPrereq(self):
4962 """Check prerequisites.
4964 This checks that the instance is in the cluster.
4967 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4968 assert self.instance is not None, \
4969 "Cannot retrieve locked instance %s" % self.op.instance_name
4971 node = self.cfg.GetNodeInfo(self.op.target_node)
4972 assert node is not None, \
4973 "Cannot retrieve locked node %s" % self.op.target_node
4975 self.target_node = target_node = node.name
4977 if target_node == instance.primary_node:
4978 raise errors.OpPrereqError("Instance %s is already on the node %s" %
4979 (instance.name, target_node),
4982 bep = self.cfg.GetClusterInfo().FillBE(instance)
4984 for idx, dsk in enumerate(instance.disks):
4985 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4986 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4987 " cannot copy" % idx, errors.ECODE_STATE)
4989 _CheckNodeOnline(self, target_node)
4990 _CheckNodeNotDrained(self, target_node)
4992 if instance.admin_up:
4993 # check memory requirements on the secondary node
4994 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4995 instance.name, bep[constants.BE_MEMORY],
4996 instance.hypervisor)
4998 self.LogInfo("Not checking memory on the secondary node as"
4999 " instance will not be started")
5001 # check bridge existance
5002 _CheckInstanceBridgesExist(self, instance, node=target_node)
5004 def Exec(self, feedback_fn):
5005 """Move an instance.
5007 The move is done by shutting it down on its present node, copying
5008 the data over (slow) and starting it on the new node.
5011 instance = self.instance
5013 source_node = instance.primary_node
5014 target_node = self.target_node
5016 self.LogInfo("Shutting down instance %s on source node %s",
5017 instance.name, source_node)
5019 result = self.rpc.call_instance_shutdown(source_node, instance,
5020 self.shutdown_timeout)
5021 msg = result.fail_msg
5023 if self.op.ignore_consistency:
5024 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5025 " Proceeding anyway. Please make sure node"
5026 " %s is down. Error details: %s",
5027 instance.name, source_node, source_node, msg)
5029 raise errors.OpExecError("Could not shutdown instance %s on"
5031 (instance.name, source_node, msg))
5033 # create the target disks
5035 _CreateDisks(self, instance, target_node=target_node)
5036 except errors.OpExecError:
5037 self.LogWarning("Device creation failed, reverting...")
5039 _RemoveDisks(self, instance, target_node=target_node)
5041 self.cfg.ReleaseDRBDMinors(instance.name)
5044 cluster_name = self.cfg.GetClusterInfo().cluster_name
5047 # activate, get path, copy the data over
5048 for idx, disk in enumerate(instance.disks):
5049 self.LogInfo("Copying data for disk %d", idx)
5050 result = self.rpc.call_blockdev_assemble(target_node, disk,
5051 instance.name, True)
5053 self.LogWarning("Can't assemble newly created disk %d: %s",
5054 idx, result.fail_msg)
5055 errs.append(result.fail_msg)
5057 dev_path = result.payload
5058 result = self.rpc.call_blockdev_export(source_node, disk,
5059 target_node, dev_path,
5062 self.LogWarning("Can't copy data over for disk %d: %s",
5063 idx, result.fail_msg)
5064 errs.append(result.fail_msg)
5068 self.LogWarning("Some disks failed to copy, aborting")
5070 _RemoveDisks(self, instance, target_node=target_node)
5072 self.cfg.ReleaseDRBDMinors(instance.name)
5073 raise errors.OpExecError("Errors during disk copy: %s" %
5076 instance.primary_node = target_node
5077 self.cfg.Update(instance, feedback_fn)
5079 self.LogInfo("Removing the disks on the original node")
5080 _RemoveDisks(self, instance, target_node=source_node)
5082 # Only start the instance if it's marked as up
5083 if instance.admin_up:
5084 self.LogInfo("Starting instance %s on node %s",
5085 instance.name, target_node)
5087 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5088 ignore_secondaries=True)
5090 _ShutdownInstanceDisks(self, instance)
5091 raise errors.OpExecError("Can't activate the instance's disks")
5093 result = self.rpc.call_instance_start(target_node, instance, None, None)
5094 msg = result.fail_msg
5096 _ShutdownInstanceDisks(self, instance)
5097 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5098 (instance.name, target_node, msg))
5101 class LUMigrateNode(LogicalUnit):
5102 """Migrate all instances from a node.
5105 HPATH = "node-migrate"
5106 HTYPE = constants.HTYPE_NODE
5107 _OP_REQP = ["node_name", "live"]
5110 def ExpandNames(self):
5111 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5113 self.needed_locks = {
5114 locking.LEVEL_NODE: [self.op.node_name],
5117 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5119 # Create tasklets for migrating instances for all instances on this node
5123 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5124 logging.debug("Migrating instance %s", inst.name)
5125 names.append(inst.name)
5127 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5129 self.tasklets = tasklets
5131 # Declare instance locks
5132 self.needed_locks[locking.LEVEL_INSTANCE] = names
5134 def DeclareLocks(self, level):
5135 if level == locking.LEVEL_NODE:
5136 self._LockInstancesNodes()
5138 def BuildHooksEnv(self):
5141 This runs on the master, the primary and all the secondaries.
5145 "NODE_NAME": self.op.node_name,
5148 nl = [self.cfg.GetMasterNode()]
5150 return (env, nl, nl)
5153 class TLMigrateInstance(Tasklet):
5154 def __init__(self, lu, instance_name, live, cleanup):
5155 """Initializes this class.
5158 Tasklet.__init__(self, lu)
5161 self.instance_name = instance_name
5163 self.cleanup = cleanup
5165 def CheckPrereq(self):
5166 """Check prerequisites.
5168 This checks that the instance is in the cluster.
5171 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5172 instance = self.cfg.GetInstanceInfo(instance_name)
5173 assert instance is not None
5175 if instance.disk_template != constants.DT_DRBD8:
5176 raise errors.OpPrereqError("Instance's disk layout is not"
5177 " drbd8, cannot migrate.", errors.ECODE_STATE)
5179 secondary_nodes = instance.secondary_nodes
5180 if not secondary_nodes:
5181 raise errors.ConfigurationError("No secondary node but using"
5182 " drbd8 disk template")
5184 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5186 target_node = secondary_nodes[0]
5187 # check memory requirements on the secondary node
5188 _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5189 instance.name, i_be[constants.BE_MEMORY],
5190 instance.hypervisor)
5192 # check bridge existance
5193 _CheckInstanceBridgesExist(self, instance, node=target_node)
5195 if not self.cleanup:
5196 _CheckNodeNotDrained(self, target_node)
5197 result = self.rpc.call_instance_migratable(instance.primary_node,
5199 result.Raise("Can't migrate, please use failover",
5200 prereq=True, ecode=errors.ECODE_STATE)
5202 self.instance = instance
5204 def _WaitUntilSync(self):
5205 """Poll with custom rpc for disk sync.
5207 This uses our own step-based rpc call.
5210 self.feedback_fn("* wait until resync is done")
5214 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5216 self.instance.disks)
5218 for node, nres in result.items():
5219 nres.Raise("Cannot resync disks on node %s" % node)
5220 node_done, node_percent = nres.payload
5221 all_done = all_done and node_done
5222 if node_percent is not None:
5223 min_percent = min(min_percent, node_percent)
5225 if min_percent < 100:
5226 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5229 def _EnsureSecondary(self, node):
5230 """Demote a node to secondary.
5233 self.feedback_fn("* switching node %s to secondary mode" % node)
5235 for dev in self.instance.disks:
5236 self.cfg.SetDiskID(dev, node)
5238 result = self.rpc.call_blockdev_close(node, self.instance.name,
5239 self.instance.disks)
5240 result.Raise("Cannot change disk to secondary on node %s" % node)
5242 def _GoStandalone(self):
5243 """Disconnect from the network.
5246 self.feedback_fn("* changing into standalone mode")
5247 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5248 self.instance.disks)
5249 for node, nres in result.items():
5250 nres.Raise("Cannot disconnect disks node %s" % node)
5252 def _GoReconnect(self, multimaster):
5253 """Reconnect to the network.
5259 msg = "single-master"
5260 self.feedback_fn("* changing disks into %s mode" % msg)
5261 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5262 self.instance.disks,
5263 self.instance.name, multimaster)
5264 for node, nres in result.items():
5265 nres.Raise("Cannot change disks config on node %s" % node)
5267 def _ExecCleanup(self):
5268 """Try to cleanup after a failed migration.
5270 The cleanup is done by:
5271 - check that the instance is running only on one node
5272 (and update the config if needed)
5273 - change disks on its secondary node to secondary
5274 - wait until disks are fully synchronized
5275 - disconnect from the network
5276 - change disks into single-master mode
5277 - wait again until disks are fully synchronized
5280 instance = self.instance
5281 target_node = self.target_node
5282 source_node = self.source_node
5284 # check running on only one node
5285 self.feedback_fn("* checking where the instance actually runs"
5286 " (if this hangs, the hypervisor might be in"
5288 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5289 for node, result in ins_l.items():
5290 result.Raise("Can't contact node %s" % node)
5292 runningon_source = instance.name in ins_l[source_node].payload
5293 runningon_target = instance.name in ins_l[target_node].payload
5295 if runningon_source and runningon_target:
5296 raise errors.OpExecError("Instance seems to be running on two nodes,"
5297 " or the hypervisor is confused. You will have"
5298 " to ensure manually that it runs only on one"
5299 " and restart this operation.")
5301 if not (runningon_source or runningon_target):
5302 raise errors.OpExecError("Instance does not seem to be running at all."
5303 " In this case, it's safer to repair by"
5304 " running 'gnt-instance stop' to ensure disk"
5305 " shutdown, and then restarting it.")
5307 if runningon_target:
5308 # the migration has actually succeeded, we need to update the config
5309 self.feedback_fn("* instance running on secondary node (%s),"
5310 " updating config" % target_node)
5311 instance.primary_node = target_node
5312 self.cfg.Update(instance, self.feedback_fn)
5313 demoted_node = source_node
5315 self.feedback_fn("* instance confirmed to be running on its"
5316 " primary node (%s)" % source_node)
5317 demoted_node = target_node
5319 self._EnsureSecondary(demoted_node)
5321 self._WaitUntilSync()
5322 except errors.OpExecError:
5323 # we ignore here errors, since if the device is standalone, it
5324 # won't be able to sync
5326 self._GoStandalone()
5327 self._GoReconnect(False)
5328 self._WaitUntilSync()
5330 self.feedback_fn("* done")
5332 def _RevertDiskStatus(self):
5333 """Try to revert the disk status after a failed migration.
5336 target_node = self.target_node
5338 self._EnsureSecondary(target_node)
5339 self._GoStandalone()
5340 self._GoReconnect(False)
5341 self._WaitUntilSync()
5342 except errors.OpExecError, err:
5343 self.lu.LogWarning("Migration failed and I can't reconnect the"
5344 " drives: error '%s'\n"
5345 "Please look and recover the instance status" %
5348 def _AbortMigration(self):
5349 """Call the hypervisor code to abort a started migration.
5352 instance = self.instance
5353 target_node = self.target_node
5354 migration_info = self.migration_info
5356 abort_result = self.rpc.call_finalize_migration(target_node,
5360 abort_msg = abort_result.fail_msg
5362 logging.error("Aborting migration failed on target node %s: %s",
5363 target_node, abort_msg)
5364 # Don't raise an exception here, as we stil have to try to revert the
5365 # disk status, even if this step failed.
5367 def _ExecMigration(self):
5368 """Migrate an instance.
5370 The migrate is done by:
5371 - change the disks into dual-master mode
5372 - wait until disks are fully synchronized again
5373 - migrate the instance
5374 - change disks on the new secondary node (the old primary) to secondary
5375 - wait until disks are fully synchronized
5376 - change disks into single-master mode
5379 instance = self.instance
5380 target_node = self.target_node
5381 source_node = self.source_node
5383 self.feedback_fn("* checking disk consistency between source and target")
5384 for dev in instance.disks:
5385 if not _CheckDiskConsistency(self, dev, target_node, False):
5386 raise errors.OpExecError("Disk %s is degraded or not fully"
5387 " synchronized on target node,"
5388 " aborting migrate." % dev.iv_name)
5390 # First get the migration information from the remote node
5391 result = self.rpc.call_migration_info(source_node, instance)
5392 msg = result.fail_msg
5394 log_err = ("Failed fetching source migration information from %s: %s" %
5396 logging.error(log_err)
5397 raise errors.OpExecError(log_err)
5399 self.migration_info = migration_info = result.payload
5401 # Then switch the disks to master/master mode
5402 self._EnsureSecondary(target_node)
5403 self._GoStandalone()
5404 self._GoReconnect(True)
5405 self._WaitUntilSync()
5407 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5408 result = self.rpc.call_accept_instance(target_node,
5411 self.nodes_ip[target_node])
5413 msg = result.fail_msg
5415 logging.error("Instance pre-migration failed, trying to revert"
5416 " disk status: %s", msg)
5417 self.feedback_fn("Pre-migration failed, aborting")
5418 self._AbortMigration()
5419 self._RevertDiskStatus()
5420 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5421 (instance.name, msg))
5423 self.feedback_fn("* migrating instance to %s" % target_node)
5425 result = self.rpc.call_instance_migrate(source_node, instance,
5426 self.nodes_ip[target_node],
5428 msg = result.fail_msg
5430 logging.error("Instance migration failed, trying to revert"
5431 " disk status: %s", msg)
5432 self.feedback_fn("Migration failed, aborting")
5433 self._AbortMigration()
5434 self._RevertDiskStatus()
5435 raise errors.OpExecError("Could not migrate instance %s: %s" %
5436 (instance.name, msg))
5439 instance.primary_node = target_node
5440 # distribute new instance config to the other nodes
5441 self.cfg.Update(instance, self.feedback_fn)
5443 result = self.rpc.call_finalize_migration(target_node,
5447 msg = result.fail_msg
5449 logging.error("Instance migration succeeded, but finalization failed:"
5451 raise errors.OpExecError("Could not finalize instance migration: %s" %
5454 self._EnsureSecondary(source_node)
5455 self._WaitUntilSync()
5456 self._GoStandalone()
5457 self._GoReconnect(False)
5458 self._WaitUntilSync()
5460 self.feedback_fn("* done")
5462 def Exec(self, feedback_fn):
5463 """Perform the migration.
5466 feedback_fn("Migrating instance %s" % self.instance.name)
5468 self.feedback_fn = feedback_fn
5470 self.source_node = self.instance.primary_node
5471 self.target_node = self.instance.secondary_nodes[0]
5472 self.all_nodes = [self.source_node, self.target_node]
5474 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5475 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5479 return self._ExecCleanup()
5481 return self._ExecMigration()
5484 def _CreateBlockDev(lu, node, instance, device, force_create,
5486 """Create a tree of block devices on a given node.
5488 If this device type has to be created on secondaries, create it and
5491 If not, just recurse to children keeping the same 'force' value.
5493 @param lu: the lu on whose behalf we execute
5494 @param node: the node on which to create the device
5495 @type instance: L{objects.Instance}
5496 @param instance: the instance which owns the device
5497 @type device: L{objects.Disk}
5498 @param device: the device to create
5499 @type force_create: boolean
5500 @param force_create: whether to force creation of this device; this
5501 will be change to True whenever we find a device which has
5502 CreateOnSecondary() attribute
5503 @param info: the extra 'metadata' we should attach to the device
5504 (this will be represented as a LVM tag)
5505 @type force_open: boolean
5506 @param force_open: this parameter will be passes to the
5507 L{backend.BlockdevCreate} function where it specifies
5508 whether we run on primary or not, and it affects both
5509 the child assembly and the device own Open() execution
5512 if device.CreateOnSecondary():
5516 for child in device.children:
5517 _CreateBlockDev(lu, node, instance, child, force_create,
5520 if not force_create:
5523 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5526 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5527 """Create a single block device on a given node.
5529 This will not recurse over children of the device, so they must be
5532 @param lu: the lu on whose behalf we execute
5533 @param node: the node on which to create the device
5534 @type instance: L{objects.Instance}
5535 @param instance: the instance which owns the device
5536 @type device: L{objects.Disk}
5537 @param device: the device to create
5538 @param info: the extra 'metadata' we should attach to the device
5539 (this will be represented as a LVM tag)
5540 @type force_open: boolean
5541 @param force_open: this parameter will be passes to the
5542 L{backend.BlockdevCreate} function where it specifies
5543 whether we run on primary or not, and it affects both
5544 the child assembly and the device own Open() execution
5547 lu.cfg.SetDiskID(device, node)
5548 result = lu.rpc.call_blockdev_create(node, device, device.size,
5549 instance.name, force_open, info)
5550 result.Raise("Can't create block device %s on"
5551 " node %s for instance %s" % (device, node, instance.name))
5552 if device.physical_id is None:
5553 device.physical_id = result.payload
5556 def _GenerateUniqueNames(lu, exts):
5557 """Generate a suitable LV name.
5559 This will generate a logical volume name for the given instance.
5564 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5565 results.append("%s%s" % (new_id, val))
5569 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5571 """Generate a drbd8 device complete with its children.
5574 port = lu.cfg.AllocatePort()
5575 vgname = lu.cfg.GetVGName()
5576 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5577 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5578 logical_id=(vgname, names[0]))
5579 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5580 logical_id=(vgname, names[1]))
5581 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5582 logical_id=(primary, secondary, port,
5585 children=[dev_data, dev_meta],
5590 def _GenerateDiskTemplate(lu, template_name,
5591 instance_name, primary_node,
5592 secondary_nodes, disk_info,
5593 file_storage_dir, file_driver,
5595 """Generate the entire disk layout for a given template type.
5598 #TODO: compute space requirements
5600 vgname = lu.cfg.GetVGName()
5601 disk_count = len(disk_info)
5603 if template_name == constants.DT_DISKLESS:
5605 elif template_name == constants.DT_PLAIN:
5606 if len(secondary_nodes) != 0:
5607 raise errors.ProgrammerError("Wrong template configuration")
5609 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5610 for i in range(disk_count)])
5611 for idx, disk in enumerate(disk_info):
5612 disk_index = idx + base_index
5613 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5614 logical_id=(vgname, names[idx]),
5615 iv_name="disk/%d" % disk_index,
5617 disks.append(disk_dev)
5618 elif template_name == constants.DT_DRBD8:
5619 if len(secondary_nodes) != 1:
5620 raise errors.ProgrammerError("Wrong template configuration")
5621 remote_node = secondary_nodes[0]
5622 minors = lu.cfg.AllocateDRBDMinor(
5623 [primary_node, remote_node] * len(disk_info), instance_name)
5626 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5627 for i in range(disk_count)]):
5628 names.append(lv_prefix + "_data")
5629 names.append(lv_prefix + "_meta")
5630 for idx, disk in enumerate(disk_info):
5631 disk_index = idx + base_index
5632 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5633 disk["size"], names[idx*2:idx*2+2],
5634 "disk/%d" % disk_index,
5635 minors[idx*2], minors[idx*2+1])
5636 disk_dev.mode = disk["mode"]
5637 disks.append(disk_dev)
5638 elif template_name == constants.DT_FILE:
5639 if len(secondary_nodes) != 0:
5640 raise errors.ProgrammerError("Wrong template configuration")
5642 for idx, disk in enumerate(disk_info):
5643 disk_index = idx + base_index
5644 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5645 iv_name="disk/%d" % disk_index,
5646 logical_id=(file_driver,
5647 "%s/disk%d" % (file_storage_dir,
5650 disks.append(disk_dev)
5652 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5656 def _GetInstanceInfoText(instance):
5657 """Compute that text that should be added to the disk's metadata.
5660 return "originstname+%s" % instance.name
5663 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5664 """Create all disks for an instance.
5666 This abstracts away some work from AddInstance.
5668 @type lu: L{LogicalUnit}
5669 @param lu: the logical unit on whose behalf we execute
5670 @type instance: L{objects.Instance}
5671 @param instance: the instance whose disks we should create
5673 @param to_skip: list of indices to skip
5674 @type target_node: string
5675 @param target_node: if passed, overrides the target node for creation
5677 @return: the success of the creation
5680 info = _GetInstanceInfoText(instance)
5681 if target_node is None:
5682 pnode = instance.primary_node
5683 all_nodes = instance.all_nodes
5688 if instance.disk_template == constants.DT_FILE:
5689 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5690 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5692 result.Raise("Failed to create directory '%s' on"
5693 " node %s" % (file_storage_dir, pnode))
5695 # Note: this needs to be kept in sync with adding of disks in
5696 # LUSetInstanceParams
5697 for idx, device in enumerate(instance.disks):
5698 if to_skip and idx in to_skip:
5700 logging.info("Creating volume %s for instance %s",
5701 device.iv_name, instance.name)
5703 for node in all_nodes:
5704 f_create = node == pnode
5705 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5708 def _RemoveDisks(lu, instance, target_node=None):
5709 """Remove all disks for an instance.
5711 This abstracts away some work from `AddInstance()` and
5712 `RemoveInstance()`. Note that in case some of the devices couldn't
5713 be removed, the removal will continue with the other ones (compare
5714 with `_CreateDisks()`).
5716 @type lu: L{LogicalUnit}
5717 @param lu: the logical unit on whose behalf we execute
5718 @type instance: L{objects.Instance}
5719 @param instance: the instance whose disks we should remove
5720 @type target_node: string
5721 @param target_node: used to override the node on which to remove the disks
5723 @return: the success of the removal
5726 logging.info("Removing block devices for instance %s", instance.name)
5729 for device in instance.disks:
5731 edata = [(target_node, device)]
5733 edata = device.ComputeNodeTree(instance.primary_node)
5734 for node, disk in edata:
5735 lu.cfg.SetDiskID(disk, node)
5736 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5738 lu.LogWarning("Could not remove block device %s on node %s,"
5739 " continuing anyway: %s", device.iv_name, node, msg)
5742 if instance.disk_template == constants.DT_FILE:
5743 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5747 tgt = instance.primary_node
5748 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5750 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5751 file_storage_dir, instance.primary_node, result.fail_msg)
5757 def _ComputeDiskSize(disk_template, disks):
5758 """Compute disk size requirements in the volume group
5761 # Required free disk space as a function of disk and swap space
5763 constants.DT_DISKLESS: None,
5764 constants.DT_PLAIN: sum(d["size"] for d in disks),
5765 # 128 MB are added for drbd metadata for each disk
5766 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5767 constants.DT_FILE: None,
5770 if disk_template not in req_size_dict:
5771 raise errors.ProgrammerError("Disk template '%s' size requirement"
5772 " is unknown" % disk_template)
5774 return req_size_dict[disk_template]
5777 def _CheckHVParams(lu, nodenames, hvname, hvparams):
5778 """Hypervisor parameter validation.
5780 This function abstract the hypervisor parameter validation to be
5781 used in both instance create and instance modify.
5783 @type lu: L{LogicalUnit}
5784 @param lu: the logical unit for which we check
5785 @type nodenames: list
5786 @param nodenames: the list of nodes on which we should check
5787 @type hvname: string
5788 @param hvname: the name of the hypervisor we should use
5789 @type hvparams: dict
5790 @param hvparams: the parameters which we need to check
5791 @raise errors.OpPrereqError: if the parameters are not valid
5794 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5797 for node in nodenames:
5801 info.Raise("Hypervisor parameter validation failed on node %s" % node)
5804 class LUCreateInstance(LogicalUnit):
5805 """Create an instance.
5808 HPATH = "instance-add"
5809 HTYPE = constants.HTYPE_INSTANCE
5810 _OP_REQP = ["instance_name", "disks", "disk_template",
5812 "wait_for_sync", "ip_check", "nics",
5813 "hvparams", "beparams"]
5816 def CheckArguments(self):
5820 # set optional parameters to none if they don't exist
5821 for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5822 if not hasattr(self.op, attr):
5823 setattr(self.op, attr, None)
5825 # do not require name_check to ease forward/backward compatibility
5827 if not hasattr(self.op, "name_check"):
5828 self.op.name_check = True
5829 if not hasattr(self.op, "no_install"):
5830 self.op.no_install = False
5831 if self.op.no_install and self.op.start:
5832 self.LogInfo("No-installation mode selected, disabling startup")
5833 self.op.start = False
5834 # validate/normalize the instance name
5835 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
5836 if self.op.ip_check and not self.op.name_check:
5837 # TODO: make the ip check more flexible and not depend on the name check
5838 raise errors.OpPrereqError("Cannot do ip checks without a name check",
5840 if (self.op.disk_template == constants.DT_FILE and
5841 not constants.ENABLE_FILE_STORAGE):
5842 raise errors.OpPrereqError("File storage disabled at configure time",
5844 # check disk information: either all adopt, or no adopt
5845 has_adopt = has_no_adopt = False
5846 for disk in self.op.disks:
5851 if has_adopt and has_no_adopt:
5852 raise errors.OpPrereqError("Either all disks have are adoped or none is",
5855 if self.op.disk_template != constants.DT_PLAIN:
5856 raise errors.OpPrereqError("Disk adoption is only supported for the"
5857 " 'plain' disk template",
5859 if self.op.iallocator is not None:
5860 raise errors.OpPrereqError("Disk adoption not allowed with an"
5861 " iallocator script", errors.ECODE_INVAL)
5862 if self.op.mode == constants.INSTANCE_IMPORT:
5863 raise errors.OpPrereqError("Disk adoption not allowed for"
5864 " instance import", errors.ECODE_INVAL)
5866 self.adopt_disks = has_adopt
5868 def ExpandNames(self):
5869 """ExpandNames for CreateInstance.
5871 Figure out the right locks for instance creation.
5874 self.needed_locks = {}
5876 # cheap checks, mostly valid constants given
5878 # verify creation mode
5879 if self.op.mode not in (constants.INSTANCE_CREATE,
5880 constants.INSTANCE_IMPORT):
5881 raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5882 self.op.mode, errors.ECODE_INVAL)
5884 # disk template and mirror node verification
5885 _CheckDiskTemplate(self.op.disk_template)
5887 if self.op.hypervisor is None:
5888 self.op.hypervisor = self.cfg.GetHypervisorType()
5890 cluster = self.cfg.GetClusterInfo()
5891 enabled_hvs = cluster.enabled_hypervisors
5892 if self.op.hypervisor not in enabled_hvs:
5893 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5894 " cluster (%s)" % (self.op.hypervisor,
5895 ",".join(enabled_hvs)),
5898 # check hypervisor parameter syntax (locally)
5899 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5900 filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5902 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5903 hv_type.CheckParameterSyntax(filled_hvp)
5904 self.hv_full = filled_hvp
5905 # check that we don't specify global parameters on an instance
5906 _CheckGlobalHvParams(self.op.hvparams)
5908 # fill and remember the beparams dict
5909 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5910 self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5913 #### instance parameters check
5915 # instance name verification
5916 if self.op.name_check:
5917 hostname1 = utils.GetHostInfo(self.op.instance_name)
5918 self.op.instance_name = instance_name = hostname1.name
5919 # used in CheckPrereq for ip ping check
5920 self.check_ip = hostname1.ip
5922 instance_name = self.op.instance_name
5923 self.check_ip = None
5925 # this is just a preventive check, but someone might still add this
5926 # instance in the meantime, and creation will fail at lock-add time
5927 if instance_name in self.cfg.GetInstanceList():
5928 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5929 instance_name, errors.ECODE_EXISTS)
5931 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5935 for idx, nic in enumerate(self.op.nics):
5936 nic_mode_req = nic.get("mode", None)
5937 nic_mode = nic_mode_req
5938 if nic_mode is None:
5939 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5941 # in routed mode, for the first nic, the default ip is 'auto'
5942 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5943 default_ip_mode = constants.VALUE_AUTO
5945 default_ip_mode = constants.VALUE_NONE
5947 # ip validity checks
5948 ip = nic.get("ip", default_ip_mode)
5949 if ip is None or ip.lower() == constants.VALUE_NONE:
5951 elif ip.lower() == constants.VALUE_AUTO:
5952 if not self.op.name_check:
5953 raise errors.OpPrereqError("IP address set to auto but name checks"
5954 " have been skipped. Aborting.",
5956 nic_ip = hostname1.ip
5958 if not utils.IsValidIP(ip):
5959 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5960 " like a valid IP" % ip,
5964 # TODO: check the ip address for uniqueness
5965 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5966 raise errors.OpPrereqError("Routed nic mode requires an ip address",
5969 # MAC address verification
5970 mac = nic.get("mac", constants.VALUE_AUTO)
5971 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5972 mac = utils.NormalizeAndValidateMac(mac)
5975 self.cfg.ReserveMAC(mac, self.proc.GetECId())
5976 except errors.ReservationError:
5977 raise errors.OpPrereqError("MAC address %s already in use"
5978 " in cluster" % mac,
5979 errors.ECODE_NOTUNIQUE)
5981 # bridge verification
5982 bridge = nic.get("bridge", None)
5983 link = nic.get("link", None)
5985 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5986 " at the same time", errors.ECODE_INVAL)
5987 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5988 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
5995 nicparams[constants.NIC_MODE] = nic_mode_req
5997 nicparams[constants.NIC_LINK] = link
5999 check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6001 objects.NIC.CheckParameterSyntax(check_params)
6002 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6004 # disk checks/pre-build
6006 for disk in self.op.disks:
6007 mode = disk.get("mode", constants.DISK_RDWR)
6008 if mode not in constants.DISK_ACCESS_SET:
6009 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6010 mode, errors.ECODE_INVAL)
6011 size = disk.get("size", None)
6013 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6016 except (TypeError, ValueError):
6017 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6019 new_disk = {"size": size, "mode": mode}
6021 new_disk["adopt"] = disk["adopt"]
6022 self.disks.append(new_disk)
6024 # file storage checks
6025 if (self.op.file_driver and
6026 not self.op.file_driver in constants.FILE_DRIVER):
6027 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6028 self.op.file_driver, errors.ECODE_INVAL)
6030 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6031 raise errors.OpPrereqError("File storage directory path not absolute",
6034 ### Node/iallocator related checks
6035 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6036 raise errors.OpPrereqError("One and only one of iallocator and primary"
6037 " node must be given",
6040 if self.op.iallocator:
6041 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6043 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6044 nodelist = [self.op.pnode]
6045 if self.op.snode is not None:
6046 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6047 nodelist.append(self.op.snode)
6048 self.needed_locks[locking.LEVEL_NODE] = nodelist
6050 # in case of import lock the source node too
6051 if self.op.mode == constants.INSTANCE_IMPORT:
6052 src_node = getattr(self.op, "src_node", None)
6053 src_path = getattr(self.op, "src_path", None)
6055 if src_path is None:
6056 self.op.src_path = src_path = self.op.instance_name
6058 if src_node is None:
6059 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6060 self.op.src_node = None
6061 if os.path.isabs(src_path):
6062 raise errors.OpPrereqError("Importing an instance from an absolute"
6063 " path requires a source node option.",
6066 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6067 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6068 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6069 if not os.path.isabs(src_path):
6070 self.op.src_path = src_path = \
6071 utils.PathJoin(constants.EXPORT_DIR, src_path)
6073 # On import force_variant must be True, because if we forced it at
6074 # initial install, our only chance when importing it back is that it
6076 self.op.force_variant = True
6078 if self.op.no_install:
6079 self.LogInfo("No-installation mode has no effect during import")
6081 else: # INSTANCE_CREATE
6082 if getattr(self.op, "os_type", None) is None:
6083 raise errors.OpPrereqError("No guest OS specified",
6085 self.op.force_variant = getattr(self.op, "force_variant", False)
6087 def _RunAllocator(self):
6088 """Run the allocator based on input opcode.
6091 nics = [n.ToDict() for n in self.nics]
6092 ial = IAllocator(self.cfg, self.rpc,
6093 mode=constants.IALLOCATOR_MODE_ALLOC,
6094 name=self.op.instance_name,
6095 disk_template=self.op.disk_template,
6098 vcpus=self.be_full[constants.BE_VCPUS],
6099 mem_size=self.be_full[constants.BE_MEMORY],
6102 hypervisor=self.op.hypervisor,
6105 ial.Run(self.op.iallocator)
6108 raise errors.OpPrereqError("Can't compute nodes using"
6109 " iallocator '%s': %s" %
6110 (self.op.iallocator, ial.info),
6112 if len(ial.result) != ial.required_nodes:
6113 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6114 " of nodes (%s), required %s" %
6115 (self.op.iallocator, len(ial.result),
6116 ial.required_nodes), errors.ECODE_FAULT)
6117 self.op.pnode = ial.result[0]
6118 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6119 self.op.instance_name, self.op.iallocator,
6120 utils.CommaJoin(ial.result))
6121 if ial.required_nodes == 2:
6122 self.op.snode = ial.result[1]
6124 def BuildHooksEnv(self):
6127 This runs on master, primary and secondary nodes of the instance.
6131 "ADD_MODE": self.op.mode,
6133 if self.op.mode == constants.INSTANCE_IMPORT:
6134 env["SRC_NODE"] = self.op.src_node
6135 env["SRC_PATH"] = self.op.src_path
6136 env["SRC_IMAGES"] = self.src_images
6138 env.update(_BuildInstanceHookEnv(
6139 name=self.op.instance_name,
6140 primary_node=self.op.pnode,
6141 secondary_nodes=self.secondaries,
6142 status=self.op.start,
6143 os_type=self.op.os_type,
6144 memory=self.be_full[constants.BE_MEMORY],
6145 vcpus=self.be_full[constants.BE_VCPUS],
6146 nics=_NICListToTuple(self, self.nics),
6147 disk_template=self.op.disk_template,
6148 disks=[(d["size"], d["mode"]) for d in self.disks],
6151 hypervisor_name=self.op.hypervisor,
6154 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6159 def CheckPrereq(self):
6160 """Check prerequisites.
6163 if (not self.cfg.GetVGName() and
6164 self.op.disk_template not in constants.DTS_NOT_LVM):
6165 raise errors.OpPrereqError("Cluster does not support lvm-based"
6166 " instances", errors.ECODE_STATE)
6168 if self.op.mode == constants.INSTANCE_IMPORT:
6169 src_node = self.op.src_node
6170 src_path = self.op.src_path
6172 if src_node is None:
6173 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6174 exp_list = self.rpc.call_export_list(locked_nodes)
6176 for node in exp_list:
6177 if exp_list[node].fail_msg:
6179 if src_path in exp_list[node].payload:
6181 self.op.src_node = src_node = node
6182 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6186 raise errors.OpPrereqError("No export found for relative path %s" %
6187 src_path, errors.ECODE_INVAL)
6189 _CheckNodeOnline(self, src_node)
6190 result = self.rpc.call_export_info(src_node, src_path)
6191 result.Raise("No export or invalid export found in dir %s" % src_path)
6193 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6194 if not export_info.has_section(constants.INISECT_EXP):
6195 raise errors.ProgrammerError("Corrupted export config",
6196 errors.ECODE_ENVIRON)
6198 ei_version = export_info.get(constants.INISECT_EXP, 'version')
6199 if (int(ei_version) != constants.EXPORT_VERSION):
6200 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6201 (ei_version, constants.EXPORT_VERSION),
6202 errors.ECODE_ENVIRON)
6204 # Check that the new instance doesn't have less disks than the export
6205 instance_disks = len(self.disks)
6206 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6207 if instance_disks < export_disks:
6208 raise errors.OpPrereqError("Not enough disks to import."
6209 " (instance: %d, export: %d)" %
6210 (instance_disks, export_disks),
6213 self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
6215 for idx in range(export_disks):
6216 option = 'disk%d_dump' % idx
6217 if export_info.has_option(constants.INISECT_INS, option):
6218 # FIXME: are the old os-es, disk sizes, etc. useful?
6219 export_name = export_info.get(constants.INISECT_INS, option)
6220 image = utils.PathJoin(src_path, export_name)
6221 disk_images.append(image)
6223 disk_images.append(False)
6225 self.src_images = disk_images
6227 old_name = export_info.get(constants.INISECT_INS, 'name')
6228 # FIXME: int() here could throw a ValueError on broken exports
6229 exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
6230 if self.op.instance_name == old_name:
6231 for idx, nic in enumerate(self.nics):
6232 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6233 nic_mac_ini = 'nic%d_mac' % idx
6234 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6236 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6238 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6239 if self.op.ip_check:
6240 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6241 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6242 (self.check_ip, self.op.instance_name),
6243 errors.ECODE_NOTUNIQUE)
6245 #### mac address generation
6246 # By generating here the mac address both the allocator and the hooks get
6247 # the real final mac address rather than the 'auto' or 'generate' value.
6248 # There is a race condition between the generation and the instance object
6249 # creation, which means that we know the mac is valid now, but we're not
6250 # sure it will be when we actually add the instance. If things go bad
6251 # adding the instance will abort because of a duplicate mac, and the
6252 # creation job will fail.
6253 for nic in self.nics:
6254 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6255 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6259 if self.op.iallocator is not None:
6260 self._RunAllocator()
6262 #### node related checks
6264 # check primary node
6265 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6266 assert self.pnode is not None, \
6267 "Cannot retrieve locked node %s" % self.op.pnode
6269 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6270 pnode.name, errors.ECODE_STATE)
6272 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6273 pnode.name, errors.ECODE_STATE)
6275 self.secondaries = []
6277 # mirror node verification
6278 if self.op.disk_template in constants.DTS_NET_MIRROR:
6279 if self.op.snode is None:
6280 raise errors.OpPrereqError("The networked disk templates need"
6281 " a mirror node", errors.ECODE_INVAL)
6282 if self.op.snode == pnode.name:
6283 raise errors.OpPrereqError("The secondary node cannot be the"
6284 " primary node.", errors.ECODE_INVAL)
6285 _CheckNodeOnline(self, self.op.snode)
6286 _CheckNodeNotDrained(self, self.op.snode)
6287 self.secondaries.append(self.op.snode)
6289 nodenames = [pnode.name] + self.secondaries
6291 req_size = _ComputeDiskSize(self.op.disk_template,
6294 # Check lv size requirements, if not adopting
6295 if req_size is not None and not self.adopt_disks:
6296 _CheckNodesFreeDisk(self, nodenames, req_size)
6298 if self.adopt_disks: # instead, we must check the adoption data
6299 all_lvs = set([i["adopt"] for i in self.disks])
6300 if len(all_lvs) != len(self.disks):
6301 raise errors.OpPrereqError("Duplicate volume names given for adoption",
6303 for lv_name in all_lvs:
6305 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6306 except errors.ReservationError:
6307 raise errors.OpPrereqError("LV named %s used by another instance" %
6308 lv_name, errors.ECODE_NOTUNIQUE)
6310 node_lvs = self.rpc.call_lv_list([pnode.name],
6311 self.cfg.GetVGName())[pnode.name]
6312 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6313 node_lvs = node_lvs.payload
6314 delta = all_lvs.difference(node_lvs.keys())
6316 raise errors.OpPrereqError("Missing logical volume(s): %s" %
6317 utils.CommaJoin(delta),
6319 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6321 raise errors.OpPrereqError("Online logical volumes found, cannot"
6322 " adopt: %s" % utils.CommaJoin(online_lvs),
6324 # update the size of disk based on what is found
6325 for dsk in self.disks:
6326 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6328 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6330 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6332 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6334 # memory check on primary node
6336 _CheckNodeFreeMemory(self, self.pnode.name,
6337 "creating instance %s" % self.op.instance_name,
6338 self.be_full[constants.BE_MEMORY],
6341 self.dry_run_result = list(nodenames)
6343 def Exec(self, feedback_fn):
6344 """Create and add the instance to the cluster.
6347 instance = self.op.instance_name
6348 pnode_name = self.pnode.name
6350 ht_kind = self.op.hypervisor
6351 if ht_kind in constants.HTS_REQ_PORT:
6352 network_port = self.cfg.AllocatePort()
6356 ##if self.op.vnc_bind_address is None:
6357 ## self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
6359 # this is needed because os.path.join does not accept None arguments
6360 if self.op.file_storage_dir is None:
6361 string_file_storage_dir = ""
6363 string_file_storage_dir = self.op.file_storage_dir
6365 # build the full file storage dir path
6366 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6367 string_file_storage_dir, instance)
6370 disks = _GenerateDiskTemplate(self,
6371 self.op.disk_template,
6372 instance, pnode_name,
6376 self.op.file_driver,
6379 iobj = objects.Instance(name=instance, os=self.op.os_type,
6380 primary_node=pnode_name,
6381 nics=self.nics, disks=disks,
6382 disk_template=self.op.disk_template,
6384 network_port=network_port,
6385 beparams=self.op.beparams,
6386 hvparams=self.op.hvparams,
6387 hypervisor=self.op.hypervisor,
6390 if self.adopt_disks:
6391 # rename LVs to the newly-generated names; we need to construct
6392 # 'fake' LV disks with the old data, plus the new unique_id
6393 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6395 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6396 rename_to.append(t_dsk.logical_id)
6397 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6398 self.cfg.SetDiskID(t_dsk, pnode_name)
6399 result = self.rpc.call_blockdev_rename(pnode_name,
6400 zip(tmp_disks, rename_to))
6401 result.Raise("Failed to rename adoped LVs")
6403 feedback_fn("* creating instance disks...")
6405 _CreateDisks(self, iobj)
6406 except errors.OpExecError:
6407 self.LogWarning("Device creation failed, reverting...")
6409 _RemoveDisks(self, iobj)
6411 self.cfg.ReleaseDRBDMinors(instance)
6414 feedback_fn("adding instance %s to cluster config" % instance)
6416 self.cfg.AddInstance(iobj, self.proc.GetECId())
6418 # Declare that we don't want to remove the instance lock anymore, as we've
6419 # added the instance to the config
6420 del self.remove_locks[locking.LEVEL_INSTANCE]
6421 # Unlock all the nodes
6422 if self.op.mode == constants.INSTANCE_IMPORT:
6423 nodes_keep = [self.op.src_node]
6424 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6425 if node != self.op.src_node]
6426 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6427 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6429 self.context.glm.release(locking.LEVEL_NODE)
6430 del self.acquired_locks[locking.LEVEL_NODE]
6432 if self.op.wait_for_sync:
6433 disk_abort = not _WaitForSync(self, iobj)
6434 elif iobj.disk_template in constants.DTS_NET_MIRROR:
6435 # make sure the disks are not degraded (still sync-ing is ok)
6437 feedback_fn("* checking mirrors status")
6438 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6443 _RemoveDisks(self, iobj)
6444 self.cfg.RemoveInstance(iobj.name)
6445 # Make sure the instance lock gets removed
6446 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6447 raise errors.OpExecError("There are some degraded disks for"
6450 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6451 if self.op.mode == constants.INSTANCE_CREATE:
6452 if not self.op.no_install:
6453 feedback_fn("* running the instance OS create scripts...")
6454 # FIXME: pass debug option from opcode to backend
6455 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6456 self.op.debug_level)
6457 result.Raise("Could not add os for instance %s"
6458 " on node %s" % (instance, pnode_name))
6460 elif self.op.mode == constants.INSTANCE_IMPORT:
6461 feedback_fn("* running the instance OS import scripts...")
6462 src_node = self.op.src_node
6463 src_images = self.src_images
6464 cluster_name = self.cfg.GetClusterName()
6465 # FIXME: pass debug option from opcode to backend
6466 import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6467 src_node, src_images,
6469 self.op.debug_level)
6470 msg = import_result.fail_msg
6472 self.LogWarning("Error while importing the disk images for instance"
6473 " %s on node %s: %s" % (instance, pnode_name, msg))
6475 # also checked in the prereq part
6476 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6480 iobj.admin_up = True
6481 self.cfg.Update(iobj, feedback_fn)
6482 logging.info("Starting instance %s on node %s", instance, pnode_name)
6483 feedback_fn("* starting instance...")
6484 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6485 result.Raise("Could not start instance")
6487 return list(iobj.all_nodes)
6490 class LUConnectConsole(NoHooksLU):
6491 """Connect to an instance's console.
6493 This is somewhat special in that it returns the command line that
6494 you need to run on the master node in order to connect to the
6498 _OP_REQP = ["instance_name"]
6501 def ExpandNames(self):
6502 self._ExpandAndLockInstance()
6504 def CheckPrereq(self):
6505 """Check prerequisites.
6507 This checks that the instance is in the cluster.
6510 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6511 assert self.instance is not None, \
6512 "Cannot retrieve locked instance %s" % self.op.instance_name
6513 _CheckNodeOnline(self, self.instance.primary_node)
6515 def Exec(self, feedback_fn):
6516 """Connect to the console of an instance
6519 instance = self.instance
6520 node = instance.primary_node
6522 node_insts = self.rpc.call_instance_list([node],
6523 [instance.hypervisor])[node]
6524 node_insts.Raise("Can't get node information from %s" % node)
6526 if instance.name not in node_insts.payload:
6527 raise errors.OpExecError("Instance %s is not running." % instance.name)
6529 logging.debug("Connecting to console of %s on %s", instance.name, node)
6531 hyper = hypervisor.GetHypervisor(instance.hypervisor)
6532 cluster = self.cfg.GetClusterInfo()
6533 # beparams and hvparams are passed separately, to avoid editing the
6534 # instance and then saving the defaults in the instance itself.
6535 hvparams = cluster.FillHV(instance)
6536 beparams = cluster.FillBE(instance)
6537 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6540 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6543 class LUReplaceDisks(LogicalUnit):
6544 """Replace the disks of an instance.
6547 HPATH = "mirrors-replace"
6548 HTYPE = constants.HTYPE_INSTANCE
6549 _OP_REQP = ["instance_name", "mode", "disks"]
6552 def CheckArguments(self):
6553 if not hasattr(self.op, "remote_node"):
6554 self.op.remote_node = None
6555 if not hasattr(self.op, "iallocator"):
6556 self.op.iallocator = None
6557 if not hasattr(self.op, "early_release"):
6558 self.op.early_release = False
6560 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6563 def ExpandNames(self):
6564 self._ExpandAndLockInstance()
6566 if self.op.iallocator is not None:
6567 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6569 elif self.op.remote_node is not None:
6570 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6571 self.op.remote_node = remote_node
6573 # Warning: do not remove the locking of the new secondary here
6574 # unless DRBD8.AddChildren is changed to work in parallel;
6575 # currently it doesn't since parallel invocations of
6576 # FindUnusedMinor will conflict
6577 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6578 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6581 self.needed_locks[locking.LEVEL_NODE] = []
6582 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6584 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6585 self.op.iallocator, self.op.remote_node,
6586 self.op.disks, False, self.op.early_release)
6588 self.tasklets = [self.replacer]
6590 def DeclareLocks(self, level):
6591 # If we're not already locking all nodes in the set we have to declare the
6592 # instance's primary/secondary nodes.
6593 if (level == locking.LEVEL_NODE and
6594 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6595 self._LockInstancesNodes()
6597 def BuildHooksEnv(self):
6600 This runs on the master, the primary and all the secondaries.
6603 instance = self.replacer.instance
6605 "MODE": self.op.mode,
6606 "NEW_SECONDARY": self.op.remote_node,
6607 "OLD_SECONDARY": instance.secondary_nodes[0],
6609 env.update(_BuildInstanceHookEnvByObject(self, instance))
6611 self.cfg.GetMasterNode(),
6612 instance.primary_node,
6614 if self.op.remote_node is not None:
6615 nl.append(self.op.remote_node)
6619 class LUEvacuateNode(LogicalUnit):
6620 """Relocate the secondary instances from a node.
6623 HPATH = "node-evacuate"
6624 HTYPE = constants.HTYPE_NODE
6625 _OP_REQP = ["node_name"]
6628 def CheckArguments(self):
6629 if not hasattr(self.op, "remote_node"):
6630 self.op.remote_node = None
6631 if not hasattr(self.op, "iallocator"):
6632 self.op.iallocator = None
6633 if not hasattr(self.op, "early_release"):
6634 self.op.early_release = False
6636 TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6637 self.op.remote_node,
6640 def ExpandNames(self):
6641 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6643 self.needed_locks = {}
6645 # Declare node locks
6646 if self.op.iallocator is not None:
6647 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6649 elif self.op.remote_node is not None:
6650 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6652 # Warning: do not remove the locking of the new secondary here
6653 # unless DRBD8.AddChildren is changed to work in parallel;
6654 # currently it doesn't since parallel invocations of
6655 # FindUnusedMinor will conflict
6656 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6657 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6660 raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6662 # Create tasklets for replacing disks for all secondary instances on this
6667 for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6668 logging.debug("Replacing disks for instance %s", inst.name)
6669 names.append(inst.name)
6671 replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6672 self.op.iallocator, self.op.remote_node, [],
6673 True, self.op.early_release)
6674 tasklets.append(replacer)
6676 self.tasklets = tasklets
6677 self.instance_names = names
6679 # Declare instance locks
6680 self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6682 def DeclareLocks(self, level):
6683 # If we're not already locking all nodes in the set we have to declare the
6684 # instance's primary/secondary nodes.
6685 if (level == locking.LEVEL_NODE and
6686 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6687 self._LockInstancesNodes()
6689 def BuildHooksEnv(self):
6692 This runs on the master, the primary and all the secondaries.
6696 "NODE_NAME": self.op.node_name,
6699 nl = [self.cfg.GetMasterNode()]
6701 if self.op.remote_node is not None:
6702 env["NEW_SECONDARY"] = self.op.remote_node
6703 nl.append(self.op.remote_node)
6705 return (env, nl, nl)
6708 class TLReplaceDisks(Tasklet):
6709 """Replaces disks for an instance.
6711 Note: Locking is not within the scope of this class.
6714 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6715 disks, delay_iallocator, early_release):
6716 """Initializes this class.
6719 Tasklet.__init__(self, lu)
6722 self.instance_name = instance_name
6724 self.iallocator_name = iallocator_name
6725 self.remote_node = remote_node
6727 self.delay_iallocator = delay_iallocator
6728 self.early_release = early_release
6731 self.instance = None
6732 self.new_node = None
6733 self.target_node = None
6734 self.other_node = None
6735 self.remote_node_info = None
6736 self.node_secondary_ip = None
6739 def CheckArguments(mode, remote_node, iallocator):
6740 """Helper function for users of this class.
6743 # check for valid parameter combination
6744 if mode == constants.REPLACE_DISK_CHG:
6745 if remote_node is None and iallocator is None:
6746 raise errors.OpPrereqError("When changing the secondary either an"
6747 " iallocator script must be used or the"
6748 " new node given", errors.ECODE_INVAL)
6750 if remote_node is not None and iallocator is not None:
6751 raise errors.OpPrereqError("Give either the iallocator or the new"
6752 " secondary, not both", errors.ECODE_INVAL)
6754 elif remote_node is not None or iallocator is not None:
6755 # Not replacing the secondary
6756 raise errors.OpPrereqError("The iallocator and new node options can"
6757 " only be used when changing the"
6758 " secondary node", errors.ECODE_INVAL)
6761 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6762 """Compute a new secondary node using an IAllocator.
6765 ial = IAllocator(lu.cfg, lu.rpc,
6766 mode=constants.IALLOCATOR_MODE_RELOC,
6768 relocate_from=relocate_from)
6770 ial.Run(iallocator_name)
6773 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6774 " %s" % (iallocator_name, ial.info),
6777 if len(ial.result) != ial.required_nodes:
6778 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6779 " of nodes (%s), required %s" %
6781 len(ial.result), ial.required_nodes),
6784 remote_node_name = ial.result[0]
6786 lu.LogInfo("Selected new secondary for instance '%s': %s",
6787 instance_name, remote_node_name)
6789 return remote_node_name
6791 def _FindFaultyDisks(self, node_name):
6792 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6795 def CheckPrereq(self):
6796 """Check prerequisites.
6798 This checks that the instance is in the cluster.
6801 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
6802 assert instance is not None, \
6803 "Cannot retrieve locked instance %s" % self.instance_name
6805 if instance.disk_template != constants.DT_DRBD8:
6806 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6807 " instances", errors.ECODE_INVAL)
6809 if len(instance.secondary_nodes) != 1:
6810 raise errors.OpPrereqError("The instance has a strange layout,"
6811 " expected one secondary but found %d" %
6812 len(instance.secondary_nodes),
6815 if not self.delay_iallocator:
6816 self._CheckPrereq2()
6818 def _CheckPrereq2(self):
6819 """Check prerequisites, second part.
6821 This function should always be part of CheckPrereq. It was separated and is
6822 now called from Exec because during node evacuation iallocator was only
6823 called with an unmodified cluster model, not taking planned changes into
6827 instance = self.instance
6828 secondary_node = instance.secondary_nodes[0]
6830 if self.iallocator_name is None:
6831 remote_node = self.remote_node
6833 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6834 instance.name, instance.secondary_nodes)
6836 if remote_node is not None:
6837 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6838 assert self.remote_node_info is not None, \
6839 "Cannot retrieve locked node %s" % remote_node
6841 self.remote_node_info = None
6843 if remote_node == self.instance.primary_node:
6844 raise errors.OpPrereqError("The specified node is the primary node of"
6845 " the instance.", errors.ECODE_INVAL)
6847 if remote_node == secondary_node:
6848 raise errors.OpPrereqError("The specified node is already the"
6849 " secondary node of the instance.",
6852 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6853 constants.REPLACE_DISK_CHG):
6854 raise errors.OpPrereqError("Cannot specify disks to be replaced",
6857 if self.mode == constants.REPLACE_DISK_AUTO:
6858 faulty_primary = self._FindFaultyDisks(instance.primary_node)
6859 faulty_secondary = self._FindFaultyDisks(secondary_node)
6861 if faulty_primary and faulty_secondary:
6862 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6863 " one node and can not be repaired"
6864 " automatically" % self.instance_name,
6868 self.disks = faulty_primary
6869 self.target_node = instance.primary_node
6870 self.other_node = secondary_node
6871 check_nodes = [self.target_node, self.other_node]
6872 elif faulty_secondary:
6873 self.disks = faulty_secondary
6874 self.target_node = secondary_node
6875 self.other_node = instance.primary_node
6876 check_nodes = [self.target_node, self.other_node]
6882 # Non-automatic modes
6883 if self.mode == constants.REPLACE_DISK_PRI:
6884 self.target_node = instance.primary_node
6885 self.other_node = secondary_node
6886 check_nodes = [self.target_node, self.other_node]
6888 elif self.mode == constants.REPLACE_DISK_SEC:
6889 self.target_node = secondary_node
6890 self.other_node = instance.primary_node
6891 check_nodes = [self.target_node, self.other_node]
6893 elif self.mode == constants.REPLACE_DISK_CHG:
6894 self.new_node = remote_node
6895 self.other_node = instance.primary_node
6896 self.target_node = secondary_node
6897 check_nodes = [self.new_node, self.other_node]
6899 _CheckNodeNotDrained(self.lu, remote_node)
6901 old_node_info = self.cfg.GetNodeInfo(secondary_node)
6902 assert old_node_info is not None
6903 if old_node_info.offline and not self.early_release:
6904 # doesn't make sense to delay the release
6905 self.early_release = True
6906 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
6907 " early-release mode", secondary_node)
6910 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6913 # If not specified all disks should be replaced
6915 self.disks = range(len(self.instance.disks))
6917 for node in check_nodes:
6918 _CheckNodeOnline(self.lu, node)
6920 # Check whether disks are valid
6921 for disk_idx in self.disks:
6922 instance.FindDisk(disk_idx)
6924 # Get secondary node IP addresses
6927 for node_name in [self.target_node, self.other_node, self.new_node]:
6928 if node_name is not None:
6929 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6931 self.node_secondary_ip = node_2nd_ip
6933 def Exec(self, feedback_fn):
6934 """Execute disk replacement.
6936 This dispatches the disk replacement to the appropriate handler.
6939 if self.delay_iallocator:
6940 self._CheckPrereq2()
6943 feedback_fn("No disks need replacement")
6946 feedback_fn("Replacing disk(s) %s for %s" %
6947 (utils.CommaJoin(self.disks), self.instance.name))
6949 activate_disks = (not self.instance.admin_up)
6951 # Activate the instance disks if we're replacing them on a down instance
6953 _StartInstanceDisks(self.lu, self.instance, True)
6956 # Should we replace the secondary node?
6957 if self.new_node is not None:
6958 fn = self._ExecDrbd8Secondary
6960 fn = self._ExecDrbd8DiskOnly
6962 return fn(feedback_fn)
6965 # Deactivate the instance disks if we're replacing them on a
6968 _SafeShutdownInstanceDisks(self.lu, self.instance)
6970 def _CheckVolumeGroup(self, nodes):
6971 self.lu.LogInfo("Checking volume groups")
6973 vgname = self.cfg.GetVGName()
6975 # Make sure volume group exists on all involved nodes
6976 results = self.rpc.call_vg_list(nodes)
6978 raise errors.OpExecError("Can't list volume groups on the nodes")
6982 res.Raise("Error checking node %s" % node)
6983 if vgname not in res.payload:
6984 raise errors.OpExecError("Volume group '%s' not found on node %s" %
6987 def _CheckDisksExistence(self, nodes):
6988 # Check disk existence
6989 for idx, dev in enumerate(self.instance.disks):
6990 if idx not in self.disks:
6994 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6995 self.cfg.SetDiskID(dev, node)
6997 result = self.rpc.call_blockdev_find(node, dev)
6999 msg = result.fail_msg
7000 if msg or not result.payload:
7002 msg = "disk not found"
7003 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7006 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7007 for idx, dev in enumerate(self.instance.disks):
7008 if idx not in self.disks:
7011 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7014 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7016 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7017 " replace disks for instance %s" %
7018 (node_name, self.instance.name))
7020 def _CreateNewStorage(self, node_name):
7021 vgname = self.cfg.GetVGName()
7024 for idx, dev in enumerate(self.instance.disks):
7025 if idx not in self.disks:
7028 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7030 self.cfg.SetDiskID(dev, node_name)
7032 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7033 names = _GenerateUniqueNames(self.lu, lv_names)
7035 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7036 logical_id=(vgname, names[0]))
7037 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7038 logical_id=(vgname, names[1]))
7040 new_lvs = [lv_data, lv_meta]
7041 old_lvs = dev.children
7042 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7044 # we pass force_create=True to force the LVM creation
7045 for new_lv in new_lvs:
7046 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7047 _GetInstanceInfoText(self.instance), False)
7051 def _CheckDevices(self, node_name, iv_names):
7052 for name, (dev, _, _) in iv_names.iteritems():
7053 self.cfg.SetDiskID(dev, node_name)
7055 result = self.rpc.call_blockdev_find(node_name, dev)
7057 msg = result.fail_msg
7058 if msg or not result.payload:
7060 msg = "disk not found"
7061 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7064 if result.payload.is_degraded:
7065 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7067 def _RemoveOldStorage(self, node_name, iv_names):
7068 for name, (_, old_lvs, _) in iv_names.iteritems():
7069 self.lu.LogInfo("Remove logical volumes for %s" % name)
7072 self.cfg.SetDiskID(lv, node_name)
7074 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7076 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7077 hint="remove unused LVs manually")
7079 def _ReleaseNodeLock(self, node_name):
7080 """Releases the lock for a given node."""
7081 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7083 def _ExecDrbd8DiskOnly(self, feedback_fn):
7084 """Replace a disk on the primary or secondary for DRBD 8.
7086 The algorithm for replace is quite complicated:
7088 1. for each disk to be replaced:
7090 1. create new LVs on the target node with unique names
7091 1. detach old LVs from the drbd device
7092 1. rename old LVs to name_replaced.<time_t>
7093 1. rename new LVs to old LVs
7094 1. attach the new LVs (with the old names now) to the drbd device
7096 1. wait for sync across all devices
7098 1. for each modified disk:
7100 1. remove old LVs (which have the name name_replaces.<time_t>)
7102 Failures are not very well handled.
7107 # Step: check device activation
7108 self.lu.LogStep(1, steps_total, "Check device existence")
7109 self._CheckDisksExistence([self.other_node, self.target_node])
7110 self._CheckVolumeGroup([self.target_node, self.other_node])
7112 # Step: check other node consistency
7113 self.lu.LogStep(2, steps_total, "Check peer consistency")
7114 self._CheckDisksConsistency(self.other_node,
7115 self.other_node == self.instance.primary_node,
7118 # Step: create new storage
7119 self.lu.LogStep(3, steps_total, "Allocate new storage")
7120 iv_names = self._CreateNewStorage(self.target_node)
7122 # Step: for each lv, detach+rename*2+attach
7123 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7124 for dev, old_lvs, new_lvs in iv_names.itervalues():
7125 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7127 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7129 result.Raise("Can't detach drbd from local storage on node"
7130 " %s for device %s" % (self.target_node, dev.iv_name))
7132 #cfg.Update(instance)
7134 # ok, we created the new LVs, so now we know we have the needed
7135 # storage; as such, we proceed on the target node to rename
7136 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7137 # using the assumption that logical_id == physical_id (which in
7138 # turn is the unique_id on that node)
7140 # FIXME(iustin): use a better name for the replaced LVs
7141 temp_suffix = int(time.time())
7142 ren_fn = lambda d, suff: (d.physical_id[0],
7143 d.physical_id[1] + "_replaced-%s" % suff)
7145 # Build the rename list based on what LVs exist on the node
7146 rename_old_to_new = []
7147 for to_ren in old_lvs:
7148 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7149 if not result.fail_msg and result.payload:
7151 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7153 self.lu.LogInfo("Renaming the old LVs on the target node")
7154 result = self.rpc.call_blockdev_rename(self.target_node,
7156 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7158 # Now we rename the new LVs to the old LVs
7159 self.lu.LogInfo("Renaming the new LVs on the target node")
7160 rename_new_to_old = [(new, old.physical_id)
7161 for old, new in zip(old_lvs, new_lvs)]
7162 result = self.rpc.call_blockdev_rename(self.target_node,
7164 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7166 for old, new in zip(old_lvs, new_lvs):
7167 new.logical_id = old.logical_id
7168 self.cfg.SetDiskID(new, self.target_node)
7170 for disk in old_lvs:
7171 disk.logical_id = ren_fn(disk, temp_suffix)
7172 self.cfg.SetDiskID(disk, self.target_node)
7174 # Now that the new lvs have the old name, we can add them to the device
7175 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7176 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7178 msg = result.fail_msg
7180 for new_lv in new_lvs:
7181 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7184 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7185 hint=("cleanup manually the unused logical"
7187 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7189 dev.children = new_lvs
7191 self.cfg.Update(self.instance, feedback_fn)
7194 if self.early_release:
7195 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7197 self._RemoveOldStorage(self.target_node, iv_names)
7198 # WARNING: we release both node locks here, do not do other RPCs
7199 # than WaitForSync to the primary node
7200 self._ReleaseNodeLock([self.target_node, self.other_node])
7203 # This can fail as the old devices are degraded and _WaitForSync
7204 # does a combined result over all disks, so we don't check its return value
7205 self.lu.LogStep(cstep, steps_total, "Sync devices")
7207 _WaitForSync(self.lu, self.instance)
7209 # Check all devices manually
7210 self._CheckDevices(self.instance.primary_node, iv_names)
7212 # Step: remove old storage
7213 if not self.early_release:
7214 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7216 self._RemoveOldStorage(self.target_node, iv_names)
7218 def _ExecDrbd8Secondary(self, feedback_fn):
7219 """Replace the secondary node for DRBD 8.
7221 The algorithm for replace is quite complicated:
7222 - for all disks of the instance:
7223 - create new LVs on the new node with same names
7224 - shutdown the drbd device on the old secondary
7225 - disconnect the drbd network on the primary
7226 - create the drbd device on the new secondary
7227 - network attach the drbd on the primary, using an artifice:
7228 the drbd code for Attach() will connect to the network if it
7229 finds a device which is connected to the good local disks but
7231 - wait for sync across all devices
7232 - remove all disks from the old secondary
7234 Failures are not very well handled.
7239 # Step: check device activation
7240 self.lu.LogStep(1, steps_total, "Check device existence")
7241 self._CheckDisksExistence([self.instance.primary_node])
7242 self._CheckVolumeGroup([self.instance.primary_node])
7244 # Step: check other node consistency
7245 self.lu.LogStep(2, steps_total, "Check peer consistency")
7246 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7248 # Step: create new storage
7249 self.lu.LogStep(3, steps_total, "Allocate new storage")
7250 for idx, dev in enumerate(self.instance.disks):
7251 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7252 (self.new_node, idx))
7253 # we pass force_create=True to force LVM creation
7254 for new_lv in dev.children:
7255 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7256 _GetInstanceInfoText(self.instance), False)
7258 # Step 4: dbrd minors and drbd setups changes
7259 # after this, we must manually remove the drbd minors on both the
7260 # error and the success paths
7261 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7262 minors = self.cfg.AllocateDRBDMinor([self.new_node
7263 for dev in self.instance.disks],
7265 logging.debug("Allocated minors %r", minors)
7268 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7269 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7270 (self.new_node, idx))
7271 # create new devices on new_node; note that we create two IDs:
7272 # one without port, so the drbd will be activated without
7273 # networking information on the new node at this stage, and one
7274 # with network, for the latter activation in step 4
7275 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7276 if self.instance.primary_node == o_node1:
7279 assert self.instance.primary_node == o_node2, "Three-node instance?"
7282 new_alone_id = (self.instance.primary_node, self.new_node, None,
7283 p_minor, new_minor, o_secret)
7284 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7285 p_minor, new_minor, o_secret)
7287 iv_names[idx] = (dev, dev.children, new_net_id)
7288 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7290 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7291 logical_id=new_alone_id,
7292 children=dev.children,
7295 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7296 _GetInstanceInfoText(self.instance), False)
7297 except errors.GenericError:
7298 self.cfg.ReleaseDRBDMinors(self.instance.name)
7301 # We have new devices, shutdown the drbd on the old secondary
7302 for idx, dev in enumerate(self.instance.disks):
7303 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7304 self.cfg.SetDiskID(dev, self.target_node)
7305 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7307 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7308 "node: %s" % (idx, msg),
7309 hint=("Please cleanup this device manually as"
7310 " soon as possible"))
7312 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7313 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7314 self.node_secondary_ip,
7315 self.instance.disks)\
7316 [self.instance.primary_node]
7318 msg = result.fail_msg
7320 # detaches didn't succeed (unlikely)
7321 self.cfg.ReleaseDRBDMinors(self.instance.name)
7322 raise errors.OpExecError("Can't detach the disks from the network on"
7323 " old node: %s" % (msg,))
7325 # if we managed to detach at least one, we update all the disks of
7326 # the instance to point to the new secondary
7327 self.lu.LogInfo("Updating instance configuration")
7328 for dev, _, new_logical_id in iv_names.itervalues():
7329 dev.logical_id = new_logical_id
7330 self.cfg.SetDiskID(dev, self.instance.primary_node)
7332 self.cfg.Update(self.instance, feedback_fn)
7334 # and now perform the drbd attach
7335 self.lu.LogInfo("Attaching primary drbds to new secondary"
7336 " (standalone => connected)")
7337 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7339 self.node_secondary_ip,
7340 self.instance.disks,
7343 for to_node, to_result in result.items():
7344 msg = to_result.fail_msg
7346 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7348 hint=("please do a gnt-instance info to see the"
7349 " status of disks"))
7351 if self.early_release:
7352 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7354 self._RemoveOldStorage(self.target_node, iv_names)
7355 # WARNING: we release all node locks here, do not do other RPCs
7356 # than WaitForSync to the primary node
7357 self._ReleaseNodeLock([self.instance.primary_node,
7362 # This can fail as the old devices are degraded and _WaitForSync
7363 # does a combined result over all disks, so we don't check its return value
7364 self.lu.LogStep(cstep, steps_total, "Sync devices")
7366 _WaitForSync(self.lu, self.instance)
7368 # Check all devices manually
7369 self._CheckDevices(self.instance.primary_node, iv_names)
7371 # Step: remove old storage
7372 if not self.early_release:
7373 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7374 self._RemoveOldStorage(self.target_node, iv_names)
7377 class LURepairNodeStorage(NoHooksLU):
7378 """Repairs the volume group on a node.
7381 _OP_REQP = ["node_name"]
7384 def CheckArguments(self):
7385 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7387 def ExpandNames(self):
7388 self.needed_locks = {
7389 locking.LEVEL_NODE: [self.op.node_name],
7392 def _CheckFaultyDisks(self, instance, node_name):
7393 """Ensure faulty disks abort the opcode or at least warn."""
7395 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7397 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7398 " node '%s'" % (instance.name, node_name),
7400 except errors.OpPrereqError, err:
7401 if self.op.ignore_consistency:
7402 self.proc.LogWarning(str(err.args[0]))
7406 def CheckPrereq(self):
7407 """Check prerequisites.
7410 storage_type = self.op.storage_type
7412 if (constants.SO_FIX_CONSISTENCY not in
7413 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7414 raise errors.OpPrereqError("Storage units of type '%s' can not be"
7415 " repaired" % storage_type,
7418 # Check whether any instance on this node has faulty disks
7419 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7420 if not inst.admin_up:
7422 check_nodes = set(inst.all_nodes)
7423 check_nodes.discard(self.op.node_name)
7424 for inst_node_name in check_nodes:
7425 self._CheckFaultyDisks(inst, inst_node_name)
7427 def Exec(self, feedback_fn):
7428 feedback_fn("Repairing storage unit '%s' on %s ..." %
7429 (self.op.name, self.op.node_name))
7431 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7432 result = self.rpc.call_storage_execute(self.op.node_name,
7433 self.op.storage_type, st_args,
7435 constants.SO_FIX_CONSISTENCY)
7436 result.Raise("Failed to repair storage unit '%s' on %s" %
7437 (self.op.name, self.op.node_name))
7440 class LUNodeEvacuationStrategy(NoHooksLU):
7441 """Computes the node evacuation strategy.
7444 _OP_REQP = ["nodes"]
7447 def CheckArguments(self):
7448 if not hasattr(self.op, "remote_node"):
7449 self.op.remote_node = None
7450 if not hasattr(self.op, "iallocator"):
7451 self.op.iallocator = None
7452 if self.op.remote_node is not None and self.op.iallocator is not None:
7453 raise errors.OpPrereqError("Give either the iallocator or the new"
7454 " secondary, not both", errors.ECODE_INVAL)
7456 def ExpandNames(self):
7457 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7458 self.needed_locks = locks = {}
7459 if self.op.remote_node is None:
7460 locks[locking.LEVEL_NODE] = locking.ALL_SET
7462 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7463 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7465 def CheckPrereq(self):
7468 def Exec(self, feedback_fn):
7469 if self.op.remote_node is not None:
7471 for node in self.op.nodes:
7472 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7475 if i.primary_node == self.op.remote_node:
7476 raise errors.OpPrereqError("Node %s is the primary node of"
7477 " instance %s, cannot use it as"
7479 (self.op.remote_node, i.name),
7481 result.append([i.name, self.op.remote_node])
7483 ial = IAllocator(self.cfg, self.rpc,
7484 mode=constants.IALLOCATOR_MODE_MEVAC,
7485 evac_nodes=self.op.nodes)
7486 ial.Run(self.op.iallocator, validate=True)
7488 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7494 class LUGrowDisk(LogicalUnit):
7495 """Grow a disk of an instance.
7499 HTYPE = constants.HTYPE_INSTANCE
7500 _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7503 def ExpandNames(self):
7504 self._ExpandAndLockInstance()
7505 self.needed_locks[locking.LEVEL_NODE] = []
7506 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7508 def DeclareLocks(self, level):
7509 if level == locking.LEVEL_NODE:
7510 self._LockInstancesNodes()
7512 def BuildHooksEnv(self):
7515 This runs on the master, the primary and all the secondaries.
7519 "DISK": self.op.disk,
7520 "AMOUNT": self.op.amount,
7522 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7523 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7526 def CheckPrereq(self):
7527 """Check prerequisites.
7529 This checks that the instance is in the cluster.
7532 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7533 assert instance is not None, \
7534 "Cannot retrieve locked instance %s" % self.op.instance_name
7535 nodenames = list(instance.all_nodes)
7536 for node in nodenames:
7537 _CheckNodeOnline(self, node)
7540 self.instance = instance
7542 if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
7543 raise errors.OpPrereqError("Instance's disk layout does not support"
7544 " growing.", errors.ECODE_INVAL)
7546 self.disk = instance.FindDisk(self.op.disk)
7548 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7550 def Exec(self, feedback_fn):
7551 """Execute disk grow.
7554 instance = self.instance
7556 for node in instance.all_nodes:
7557 self.cfg.SetDiskID(disk, node)
7558 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7559 result.Raise("Grow request failed to node %s" % node)
7561 # TODO: Rewrite code to work properly
7562 # DRBD goes into sync mode for a short amount of time after executing the
7563 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7564 # calling "resize" in sync mode fails. Sleeping for a short amount of
7565 # time is a work-around.
7568 disk.RecordGrow(self.op.amount)
7569 self.cfg.Update(instance, feedback_fn)
7570 if self.op.wait_for_sync:
7571 disk_abort = not _WaitForSync(self, instance)
7573 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7574 " status.\nPlease check the instance.")
7577 class LUQueryInstanceData(NoHooksLU):
7578 """Query runtime instance data.
7581 _OP_REQP = ["instances", "static"]
7584 def ExpandNames(self):
7585 self.needed_locks = {}
7586 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7588 if not isinstance(self.op.instances, list):
7589 raise errors.OpPrereqError("Invalid argument type 'instances'",
7592 if self.op.instances:
7593 self.wanted_names = []
7594 for name in self.op.instances:
7595 full_name = _ExpandInstanceName(self.cfg, name)
7596 self.wanted_names.append(full_name)
7597 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7599 self.wanted_names = None
7600 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7602 self.needed_locks[locking.LEVEL_NODE] = []
7603 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7605 def DeclareLocks(self, level):
7606 if level == locking.LEVEL_NODE:
7607 self._LockInstancesNodes()
7609 def CheckPrereq(self):
7610 """Check prerequisites.
7612 This only checks the optional instance list against the existing names.
7615 if self.wanted_names is None:
7616 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7618 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7619 in self.wanted_names]
7622 def _ComputeBlockdevStatus(self, node, instance_name, dev):
7623 """Returns the status of a block device
7626 if self.op.static or not node:
7629 self.cfg.SetDiskID(dev, node)
7631 result = self.rpc.call_blockdev_find(node, dev)
7635 result.Raise("Can't compute disk status for %s" % instance_name)
7637 status = result.payload
7641 return (status.dev_path, status.major, status.minor,
7642 status.sync_percent, status.estimated_time,
7643 status.is_degraded, status.ldisk_status)
7645 def _ComputeDiskStatus(self, instance, snode, dev):
7646 """Compute block device status.
7649 if dev.dev_type in constants.LDS_DRBD:
7650 # we change the snode then (otherwise we use the one passed in)
7651 if dev.logical_id[0] == instance.primary_node:
7652 snode = dev.logical_id[1]
7654 snode = dev.logical_id[0]
7656 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7658 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7661 dev_children = [self._ComputeDiskStatus(instance, snode, child)
7662 for child in dev.children]
7667 "iv_name": dev.iv_name,
7668 "dev_type": dev.dev_type,
7669 "logical_id": dev.logical_id,
7670 "physical_id": dev.physical_id,
7671 "pstatus": dev_pstatus,
7672 "sstatus": dev_sstatus,
7673 "children": dev_children,
7680 def Exec(self, feedback_fn):
7681 """Gather and return data"""
7684 cluster = self.cfg.GetClusterInfo()
7686 for instance in self.wanted_instances:
7687 if not self.op.static:
7688 remote_info = self.rpc.call_instance_info(instance.primary_node,
7690 instance.hypervisor)
7691 remote_info.Raise("Error checking node %s" % instance.primary_node)
7692 remote_info = remote_info.payload
7693 if remote_info and "state" in remote_info:
7696 remote_state = "down"
7699 if instance.admin_up:
7702 config_state = "down"
7704 disks = [self._ComputeDiskStatus(instance, None, device)
7705 for device in instance.disks]
7708 "name": instance.name,
7709 "config_state": config_state,
7710 "run_state": remote_state,
7711 "pnode": instance.primary_node,
7712 "snodes": instance.secondary_nodes,
7714 # this happens to be the same format used for hooks
7715 "nics": _NICListToTuple(self, instance.nics),
7717 "hypervisor": instance.hypervisor,
7718 "network_port": instance.network_port,
7719 "hv_instance": instance.hvparams,
7720 "hv_actual": cluster.FillHV(instance, skip_globals=True),
7721 "be_instance": instance.beparams,
7722 "be_actual": cluster.FillBE(instance),
7723 "serial_no": instance.serial_no,
7724 "mtime": instance.mtime,
7725 "ctime": instance.ctime,
7726 "uuid": instance.uuid,
7729 result[instance.name] = idict
7734 class LUSetInstanceParams(LogicalUnit):
7735 """Modifies an instances's parameters.
7738 HPATH = "instance-modify"
7739 HTYPE = constants.HTYPE_INSTANCE
7740 _OP_REQP = ["instance_name"]
7743 def CheckArguments(self):
7744 if not hasattr(self.op, 'nics'):
7746 if not hasattr(self.op, 'disks'):
7748 if not hasattr(self.op, 'beparams'):
7749 self.op.beparams = {}
7750 if not hasattr(self.op, 'hvparams'):
7751 self.op.hvparams = {}
7752 if not hasattr(self.op, "disk_template"):
7753 self.op.disk_template = None
7754 if not hasattr(self.op, "remote_node"):
7755 self.op.remote_node = None
7756 if not hasattr(self.op, "os_name"):
7757 self.op.os_name = None
7758 if not hasattr(self.op, "force_variant"):
7759 self.op.force_variant = False
7760 self.op.force = getattr(self.op, "force", False)
7761 if not (self.op.nics or self.op.disks or self.op.disk_template or
7762 self.op.hvparams or self.op.beparams or self.op.os_name):
7763 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
7765 if self.op.hvparams:
7766 _CheckGlobalHvParams(self.op.hvparams)
7770 for disk_op, disk_dict in self.op.disks:
7771 if disk_op == constants.DDM_REMOVE:
7774 elif disk_op == constants.DDM_ADD:
7777 if not isinstance(disk_op, int):
7778 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
7779 if not isinstance(disk_dict, dict):
7780 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7781 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7783 if disk_op == constants.DDM_ADD:
7784 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7785 if mode not in constants.DISK_ACCESS_SET:
7786 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
7788 size = disk_dict.get('size', None)
7790 raise errors.OpPrereqError("Required disk parameter size missing",
7794 except (TypeError, ValueError), err:
7795 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7796 str(err), errors.ECODE_INVAL)
7797 disk_dict['size'] = size
7799 # modification of disk
7800 if 'size' in disk_dict:
7801 raise errors.OpPrereqError("Disk size change not possible, use"
7802 " grow-disk", errors.ECODE_INVAL)
7804 if disk_addremove > 1:
7805 raise errors.OpPrereqError("Only one disk add or remove operation"
7806 " supported at a time", errors.ECODE_INVAL)
7808 if self.op.disks and self.op.disk_template is not None:
7809 raise errors.OpPrereqError("Disk template conversion and other disk"
7810 " changes not supported at the same time",
7813 if self.op.disk_template:
7814 _CheckDiskTemplate(self.op.disk_template)
7815 if (self.op.disk_template in constants.DTS_NET_MIRROR and
7816 self.op.remote_node is None):
7817 raise errors.OpPrereqError("Changing the disk template to a mirrored"
7818 " one requires specifying a secondary node",
7823 for nic_op, nic_dict in self.op.nics:
7824 if nic_op == constants.DDM_REMOVE:
7827 elif nic_op == constants.DDM_ADD:
7830 if not isinstance(nic_op, int):
7831 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
7832 if not isinstance(nic_dict, dict):
7833 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7834 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7836 # nic_dict should be a dict
7837 nic_ip = nic_dict.get('ip', None)
7838 if nic_ip is not None:
7839 if nic_ip.lower() == constants.VALUE_NONE:
7840 nic_dict['ip'] = None
7842 if not utils.IsValidIP(nic_ip):
7843 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
7846 nic_bridge = nic_dict.get('bridge', None)
7847 nic_link = nic_dict.get('link', None)
7848 if nic_bridge and nic_link:
7849 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7850 " at the same time", errors.ECODE_INVAL)
7851 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7852 nic_dict['bridge'] = None
7853 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7854 nic_dict['link'] = None
7856 if nic_op == constants.DDM_ADD:
7857 nic_mac = nic_dict.get('mac', None)
7859 nic_dict['mac'] = constants.VALUE_AUTO
7861 if 'mac' in nic_dict:
7862 nic_mac = nic_dict['mac']
7863 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7864 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
7866 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7867 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7868 " modifying an existing nic",
7871 if nic_addremove > 1:
7872 raise errors.OpPrereqError("Only one NIC add or remove operation"
7873 " supported at a time", errors.ECODE_INVAL)
7875 def ExpandNames(self):
7876 self._ExpandAndLockInstance()
7877 self.needed_locks[locking.LEVEL_NODE] = []
7878 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7880 def DeclareLocks(self, level):
7881 if level == locking.LEVEL_NODE:
7882 self._LockInstancesNodes()
7883 if self.op.disk_template and self.op.remote_node:
7884 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7885 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
7887 def BuildHooksEnv(self):
7890 This runs on the master, primary and secondaries.
7894 if constants.BE_MEMORY in self.be_new:
7895 args['memory'] = self.be_new[constants.BE_MEMORY]
7896 if constants.BE_VCPUS in self.be_new:
7897 args['vcpus'] = self.be_new[constants.BE_VCPUS]
7898 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7899 # information at all.
7902 nic_override = dict(self.op.nics)
7903 c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7904 for idx, nic in enumerate(self.instance.nics):
7905 if idx in nic_override:
7906 this_nic_override = nic_override[idx]
7908 this_nic_override = {}
7909 if 'ip' in this_nic_override:
7910 ip = this_nic_override['ip']
7913 if 'mac' in this_nic_override:
7914 mac = this_nic_override['mac']
7917 if idx in self.nic_pnew:
7918 nicparams = self.nic_pnew[idx]
7920 nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7921 mode = nicparams[constants.NIC_MODE]
7922 link = nicparams[constants.NIC_LINK]
7923 args['nics'].append((ip, mac, mode, link))
7924 if constants.DDM_ADD in nic_override:
7925 ip = nic_override[constants.DDM_ADD].get('ip', None)
7926 mac = nic_override[constants.DDM_ADD]['mac']
7927 nicparams = self.nic_pnew[constants.DDM_ADD]
7928 mode = nicparams[constants.NIC_MODE]
7929 link = nicparams[constants.NIC_LINK]
7930 args['nics'].append((ip, mac, mode, link))
7931 elif constants.DDM_REMOVE in nic_override:
7932 del args['nics'][-1]
7934 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7935 if self.op.disk_template:
7936 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
7937 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7941 def _GetUpdatedParams(old_params, update_dict,
7942 default_values, parameter_types):
7943 """Return the new params dict for the given params.
7945 @type old_params: dict
7946 @param old_params: old parameters
7947 @type update_dict: dict
7948 @param update_dict: dict containing new parameter values,
7949 or constants.VALUE_DEFAULT to reset the
7950 parameter to its default value
7951 @type default_values: dict
7952 @param default_values: default values for the filled parameters
7953 @type parameter_types: dict
7954 @param parameter_types: dict mapping target dict keys to types
7955 in constants.ENFORCEABLE_TYPES
7956 @rtype: (dict, dict)
7957 @return: (new_parameters, filled_parameters)
7960 params_copy = copy.deepcopy(old_params)
7961 for key, val in update_dict.iteritems():
7962 if val == constants.VALUE_DEFAULT:
7964 del params_copy[key]
7968 params_copy[key] = val
7969 utils.ForceDictType(params_copy, parameter_types)
7970 params_filled = objects.FillDict(default_values, params_copy)
7971 return (params_copy, params_filled)
7973 def CheckPrereq(self):
7974 """Check prerequisites.
7976 This only checks the instance list against the existing names.
7979 self.force = self.op.force
7981 # checking the new params on the primary/secondary nodes
7983 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7984 cluster = self.cluster = self.cfg.GetClusterInfo()
7985 assert self.instance is not None, \
7986 "Cannot retrieve locked instance %s" % self.op.instance_name
7987 pnode = instance.primary_node
7988 nodelist = list(instance.all_nodes)
7990 if self.op.disk_template:
7991 if instance.disk_template == self.op.disk_template:
7992 raise errors.OpPrereqError("Instance already has disk template %s" %
7993 instance.disk_template, errors.ECODE_INVAL)
7995 if (instance.disk_template,
7996 self.op.disk_template) not in self._DISK_CONVERSIONS:
7997 raise errors.OpPrereqError("Unsupported disk template conversion from"
7998 " %s to %s" % (instance.disk_template,
7999 self.op.disk_template),
8001 if self.op.disk_template in constants.DTS_NET_MIRROR:
8002 _CheckNodeOnline(self, self.op.remote_node)
8003 _CheckNodeNotDrained(self, self.op.remote_node)
8004 disks = [{"size": d.size} for d in instance.disks]
8005 required = _ComputeDiskSize(self.op.disk_template, disks)
8006 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8007 _CheckInstanceDown(self, instance, "cannot change disk template")
8009 # hvparams processing
8010 if self.op.hvparams:
8011 i_hvdict, hv_new = self._GetUpdatedParams(
8012 instance.hvparams, self.op.hvparams,
8013 cluster.hvparams[instance.hypervisor],
8014 constants.HVS_PARAMETER_TYPES)
8016 hypervisor.GetHypervisor(
8017 instance.hypervisor).CheckParameterSyntax(hv_new)
8018 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8019 self.hv_new = hv_new # the new actual values
8020 self.hv_inst = i_hvdict # the new dict (without defaults)
8022 self.hv_new = self.hv_inst = {}
8024 # beparams processing
8025 if self.op.beparams:
8026 i_bedict, be_new = self._GetUpdatedParams(
8027 instance.beparams, self.op.beparams,
8028 cluster.beparams[constants.PP_DEFAULT],
8029 constants.BES_PARAMETER_TYPES)
8030 self.be_new = be_new # the new actual values
8031 self.be_inst = i_bedict # the new dict (without defaults)
8033 self.be_new = self.be_inst = {}
8037 if constants.BE_MEMORY in self.op.beparams and not self.force:
8038 mem_check_list = [pnode]
8039 if be_new[constants.BE_AUTO_BALANCE]:
8040 # either we changed auto_balance to yes or it was from before
8041 mem_check_list.extend(instance.secondary_nodes)
8042 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8043 instance.hypervisor)
8044 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8045 instance.hypervisor)
8046 pninfo = nodeinfo[pnode]
8047 msg = pninfo.fail_msg
8049 # Assume the primary node is unreachable and go ahead
8050 self.warn.append("Can't get info from primary node %s: %s" %
8052 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8053 self.warn.append("Node data from primary node %s doesn't contain"
8054 " free memory information" % pnode)
8055 elif instance_info.fail_msg:
8056 self.warn.append("Can't get instance runtime information: %s" %
8057 instance_info.fail_msg)
8059 if instance_info.payload:
8060 current_mem = int(instance_info.payload['memory'])
8062 # Assume instance not running
8063 # (there is a slight race condition here, but it's not very probable,
8064 # and we have no other way to check)
8066 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8067 pninfo.payload['memory_free'])
8069 raise errors.OpPrereqError("This change will prevent the instance"
8070 " from starting, due to %d MB of memory"
8071 " missing on its primary node" % miss_mem,
8074 if be_new[constants.BE_AUTO_BALANCE]:
8075 for node, nres in nodeinfo.items():
8076 if node not in instance.secondary_nodes:
8080 self.warn.append("Can't get info from secondary node %s: %s" %
8082 elif not isinstance(nres.payload.get('memory_free', None), int):
8083 self.warn.append("Secondary node %s didn't return free"
8084 " memory information" % node)
8085 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8086 self.warn.append("Not enough memory to failover instance to"
8087 " secondary node %s" % node)
8092 for nic_op, nic_dict in self.op.nics:
8093 if nic_op == constants.DDM_REMOVE:
8094 if not instance.nics:
8095 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8098 if nic_op != constants.DDM_ADD:
8100 if not instance.nics:
8101 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8102 " no NICs" % nic_op,
8104 if nic_op < 0 or nic_op >= len(instance.nics):
8105 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8107 (nic_op, len(instance.nics) - 1),
8109 old_nic_params = instance.nics[nic_op].nicparams
8110 old_nic_ip = instance.nics[nic_op].ip
8115 update_params_dict = dict([(key, nic_dict[key])
8116 for key in constants.NICS_PARAMETERS
8117 if key in nic_dict])
8119 if 'bridge' in nic_dict:
8120 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8122 new_nic_params, new_filled_nic_params = \
8123 self._GetUpdatedParams(old_nic_params, update_params_dict,
8124 cluster.nicparams[constants.PP_DEFAULT],
8125 constants.NICS_PARAMETER_TYPES)
8126 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8127 self.nic_pinst[nic_op] = new_nic_params
8128 self.nic_pnew[nic_op] = new_filled_nic_params
8129 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8131 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8132 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8133 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8135 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8137 self.warn.append(msg)
8139 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8140 if new_nic_mode == constants.NIC_MODE_ROUTED:
8141 if 'ip' in nic_dict:
8142 nic_ip = nic_dict['ip']
8146 raise errors.OpPrereqError('Cannot set the nic ip to None'
8147 ' on a routed nic', errors.ECODE_INVAL)
8148 if 'mac' in nic_dict:
8149 nic_mac = nic_dict['mac']
8151 raise errors.OpPrereqError('Cannot set the nic mac to None',
8153 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8154 # otherwise generate the mac
8155 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8157 # or validate/reserve the current one
8159 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8160 except errors.ReservationError:
8161 raise errors.OpPrereqError("MAC address %s already in use"
8162 " in cluster" % nic_mac,
8163 errors.ECODE_NOTUNIQUE)
8166 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8167 raise errors.OpPrereqError("Disk operations not supported for"
8168 " diskless instances",
8170 for disk_op, _ in self.op.disks:
8171 if disk_op == constants.DDM_REMOVE:
8172 if len(instance.disks) == 1:
8173 raise errors.OpPrereqError("Cannot remove the last disk of"
8174 " an instance", errors.ECODE_INVAL)
8175 _CheckInstanceDown(self, instance, "cannot remove disks")
8177 if (disk_op == constants.DDM_ADD and
8178 len(instance.nics) >= constants.MAX_DISKS):
8179 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8180 " add more" % constants.MAX_DISKS,
8182 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8184 if disk_op < 0 or disk_op >= len(instance.disks):
8185 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8187 (disk_op, len(instance.disks)),
8191 if self.op.os_name and not self.op.force:
8192 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8193 self.op.force_variant)
8197 def _ConvertPlainToDrbd(self, feedback_fn):
8198 """Converts an instance from plain to drbd.
8201 feedback_fn("Converting template to drbd")
8202 instance = self.instance
8203 pnode = instance.primary_node
8204 snode = self.op.remote_node
8206 # create a fake disk info for _GenerateDiskTemplate
8207 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8208 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8209 instance.name, pnode, [snode],
8210 disk_info, None, None, 0)
8211 info = _GetInstanceInfoText(instance)
8212 feedback_fn("Creating aditional volumes...")
8213 # first, create the missing data and meta devices
8214 for disk in new_disks:
8215 # unfortunately this is... not too nice
8216 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8218 for child in disk.children:
8219 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8220 # at this stage, all new LVs have been created, we can rename the
8222 feedback_fn("Renaming original volumes...")
8223 rename_list = [(o, n.children[0].logical_id)
8224 for (o, n) in zip(instance.disks, new_disks)]
8225 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8226 result.Raise("Failed to rename original LVs")
8228 feedback_fn("Initializing DRBD devices...")
8229 # all child devices are in place, we can now create the DRBD devices
8230 for disk in new_disks:
8231 for node in [pnode, snode]:
8232 f_create = node == pnode
8233 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8235 # at this point, the instance has been modified
8236 instance.disk_template = constants.DT_DRBD8
8237 instance.disks = new_disks
8238 self.cfg.Update(instance, feedback_fn)
8240 # disks are created, waiting for sync
8241 disk_abort = not _WaitForSync(self, instance)
8243 raise errors.OpExecError("There are some degraded disks for"
8244 " this instance, please cleanup manually")
8246 def _ConvertDrbdToPlain(self, feedback_fn):
8247 """Converts an instance from drbd to plain.
8250 instance = self.instance
8251 assert len(instance.secondary_nodes) == 1
8252 pnode = instance.primary_node
8253 snode = instance.secondary_nodes[0]
8254 feedback_fn("Converting template to plain")
8256 old_disks = instance.disks
8257 new_disks = [d.children[0] for d in old_disks]
8259 # copy over size and mode
8260 for parent, child in zip(old_disks, new_disks):
8261 child.size = parent.size
8262 child.mode = parent.mode
8264 # update instance structure
8265 instance.disks = new_disks
8266 instance.disk_template = constants.DT_PLAIN
8267 self.cfg.Update(instance, feedback_fn)
8269 feedback_fn("Removing volumes on the secondary node...")
8270 for disk in old_disks:
8271 self.cfg.SetDiskID(disk, snode)
8272 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8274 self.LogWarning("Could not remove block device %s on node %s,"
8275 " continuing anyway: %s", disk.iv_name, snode, msg)
8277 feedback_fn("Removing unneeded volumes on the primary node...")
8278 for idx, disk in enumerate(old_disks):
8279 meta = disk.children[1]
8280 self.cfg.SetDiskID(meta, pnode)
8281 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8283 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8284 " continuing anyway: %s", idx, pnode, msg)
8287 def Exec(self, feedback_fn):
8288 """Modifies an instance.
8290 All parameters take effect only at the next restart of the instance.
8293 # Process here the warnings from CheckPrereq, as we don't have a
8294 # feedback_fn there.
8295 for warn in self.warn:
8296 feedback_fn("WARNING: %s" % warn)
8299 instance = self.instance
8301 for disk_op, disk_dict in self.op.disks:
8302 if disk_op == constants.DDM_REMOVE:
8303 # remove the last disk
8304 device = instance.disks.pop()
8305 device_idx = len(instance.disks)
8306 for node, disk in device.ComputeNodeTree(instance.primary_node):
8307 self.cfg.SetDiskID(disk, node)
8308 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8310 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8311 " continuing anyway", device_idx, node, msg)
8312 result.append(("disk/%d" % device_idx, "remove"))
8313 elif disk_op == constants.DDM_ADD:
8315 if instance.disk_template == constants.DT_FILE:
8316 file_driver, file_path = instance.disks[0].logical_id
8317 file_path = os.path.dirname(file_path)
8319 file_driver = file_path = None
8320 disk_idx_base = len(instance.disks)
8321 new_disk = _GenerateDiskTemplate(self,
8322 instance.disk_template,
8323 instance.name, instance.primary_node,
8324 instance.secondary_nodes,
8329 instance.disks.append(new_disk)
8330 info = _GetInstanceInfoText(instance)
8332 logging.info("Creating volume %s for instance %s",
8333 new_disk.iv_name, instance.name)
8334 # Note: this needs to be kept in sync with _CreateDisks
8336 for node in instance.all_nodes:
8337 f_create = node == instance.primary_node
8339 _CreateBlockDev(self, node, instance, new_disk,
8340 f_create, info, f_create)
8341 except errors.OpExecError, err:
8342 self.LogWarning("Failed to create volume %s (%s) on"
8344 new_disk.iv_name, new_disk, node, err)
8345 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8346 (new_disk.size, new_disk.mode)))
8348 # change a given disk
8349 instance.disks[disk_op].mode = disk_dict['mode']
8350 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8352 if self.op.disk_template:
8353 r_shut = _ShutdownInstanceDisks(self, instance)
8355 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8356 " proceed with disk template conversion")
8357 mode = (instance.disk_template, self.op.disk_template)
8359 self._DISK_CONVERSIONS[mode](self, feedback_fn)
8361 self.cfg.ReleaseDRBDMinors(instance.name)
8363 result.append(("disk_template", self.op.disk_template))
8366 for nic_op, nic_dict in self.op.nics:
8367 if nic_op == constants.DDM_REMOVE:
8368 # remove the last nic
8369 del instance.nics[-1]
8370 result.append(("nic.%d" % len(instance.nics), "remove"))
8371 elif nic_op == constants.DDM_ADD:
8372 # mac and bridge should be set, by now
8373 mac = nic_dict['mac']
8374 ip = nic_dict.get('ip', None)
8375 nicparams = self.nic_pinst[constants.DDM_ADD]
8376 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8377 instance.nics.append(new_nic)
8378 result.append(("nic.%d" % (len(instance.nics) - 1),
8379 "add:mac=%s,ip=%s,mode=%s,link=%s" %
8380 (new_nic.mac, new_nic.ip,
8381 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8382 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8385 for key in 'mac', 'ip':
8387 setattr(instance.nics[nic_op], key, nic_dict[key])
8388 if nic_op in self.nic_pinst:
8389 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8390 for key, val in nic_dict.iteritems():
8391 result.append(("nic.%s/%d" % (key, nic_op), val))
8394 if self.op.hvparams:
8395 instance.hvparams = self.hv_inst
8396 for key, val in self.op.hvparams.iteritems():
8397 result.append(("hv/%s" % key, val))
8400 if self.op.beparams:
8401 instance.beparams = self.be_inst
8402 for key, val in self.op.beparams.iteritems():
8403 result.append(("be/%s" % key, val))
8407 instance.os = self.op.os_name
8409 self.cfg.Update(instance, feedback_fn)
8413 _DISK_CONVERSIONS = {
8414 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8415 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8418 class LUQueryExports(NoHooksLU):
8419 """Query the exports list
8422 _OP_REQP = ['nodes']
8425 def ExpandNames(self):
8426 self.needed_locks = {}
8427 self.share_locks[locking.LEVEL_NODE] = 1
8428 if not self.op.nodes:
8429 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8431 self.needed_locks[locking.LEVEL_NODE] = \
8432 _GetWantedNodes(self, self.op.nodes)
8434 def CheckPrereq(self):
8435 """Check prerequisites.
8438 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8440 def Exec(self, feedback_fn):
8441 """Compute the list of all the exported system images.
8444 @return: a dictionary with the structure node->(export-list)
8445 where export-list is a list of the instances exported on
8449 rpcresult = self.rpc.call_export_list(self.nodes)
8451 for node in rpcresult:
8452 if rpcresult[node].fail_msg:
8453 result[node] = False
8455 result[node] = rpcresult[node].payload
8460 class LUExportInstance(LogicalUnit):
8461 """Export an instance to an image in the cluster.
8464 HPATH = "instance-export"
8465 HTYPE = constants.HTYPE_INSTANCE
8466 _OP_REQP = ["instance_name", "target_node", "shutdown"]
8469 def CheckArguments(self):
8470 """Check the arguments.
8473 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8474 constants.DEFAULT_SHUTDOWN_TIMEOUT)
8476 def ExpandNames(self):
8477 self._ExpandAndLockInstance()
8478 # FIXME: lock only instance primary and destination node
8480 # Sad but true, for now we have do lock all nodes, as we don't know where
8481 # the previous export might be, and and in this LU we search for it and
8482 # remove it from its current node. In the future we could fix this by:
8483 # - making a tasklet to search (share-lock all), then create the new one,
8484 # then one to remove, after
8485 # - removing the removal operation altogether
8486 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8488 def DeclareLocks(self, level):
8489 """Last minute lock declaration."""
8490 # All nodes are locked anyway, so nothing to do here.
8492 def BuildHooksEnv(self):
8495 This will run on the master, primary node and target node.
8499 "EXPORT_NODE": self.op.target_node,
8500 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8501 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8503 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8504 nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8505 self.op.target_node]
8508 def CheckPrereq(self):
8509 """Check prerequisites.
8511 This checks that the instance and node names are valid.
8514 instance_name = self.op.instance_name
8515 self.instance = self.cfg.GetInstanceInfo(instance_name)
8516 assert self.instance is not None, \
8517 "Cannot retrieve locked instance %s" % self.op.instance_name
8518 _CheckNodeOnline(self, self.instance.primary_node)
8520 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8521 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8522 assert self.dst_node is not None
8524 _CheckNodeOnline(self, self.dst_node.name)
8525 _CheckNodeNotDrained(self, self.dst_node.name)
8527 # instance disk type verification
8528 for disk in self.instance.disks:
8529 if disk.dev_type == constants.LD_FILE:
8530 raise errors.OpPrereqError("Export not supported for instances with"
8531 " file-based disks", errors.ECODE_INVAL)
8533 def Exec(self, feedback_fn):
8534 """Export an instance to an image in the cluster.
8537 instance = self.instance
8538 dst_node = self.dst_node
8539 src_node = instance.primary_node
8541 if self.op.shutdown:
8542 # shutdown the instance, but not the disks
8543 feedback_fn("Shutting down instance %s" % instance.name)
8544 result = self.rpc.call_instance_shutdown(src_node, instance,
8545 self.shutdown_timeout)
8546 result.Raise("Could not shutdown instance %s on"
8547 " node %s" % (instance.name, src_node))
8549 vgname = self.cfg.GetVGName()
8553 # set the disks ID correctly since call_instance_start needs the
8554 # correct drbd minor to create the symlinks
8555 for disk in instance.disks:
8556 self.cfg.SetDiskID(disk, src_node)
8558 activate_disks = (not instance.admin_up)
8561 # Activate the instance disks if we'exporting a stopped instance
8562 feedback_fn("Activating disks for %s" % instance.name)
8563 _StartInstanceDisks(self, instance, None)
8569 for idx, disk in enumerate(instance.disks):
8570 feedback_fn("Creating a snapshot of disk/%s on node %s" %
8573 # result.payload will be a snapshot of an lvm leaf of the one we
8575 result = self.rpc.call_blockdev_snapshot(src_node, disk)
8576 msg = result.fail_msg
8578 self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8580 snap_disks.append(False)
8582 disk_id = (vgname, result.payload)
8583 new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8584 logical_id=disk_id, physical_id=disk_id,
8585 iv_name=disk.iv_name)
8586 snap_disks.append(new_dev)
8589 if self.op.shutdown and instance.admin_up:
8590 feedback_fn("Starting instance %s" % instance.name)
8591 result = self.rpc.call_instance_start(src_node, instance, None, None)
8592 msg = result.fail_msg
8594 _ShutdownInstanceDisks(self, instance)
8595 raise errors.OpExecError("Could not start instance: %s" % msg)
8597 # TODO: check for size
8599 cluster_name = self.cfg.GetClusterName()
8600 for idx, dev in enumerate(snap_disks):
8601 feedback_fn("Exporting snapshot %s from %s to %s" %
8602 (idx, src_node, dst_node.name))
8604 # FIXME: pass debug from opcode to backend
8605 result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8606 instance, cluster_name,
8607 idx, self.op.debug_level)
8608 msg = result.fail_msg
8610 self.LogWarning("Could not export disk/%s from node %s to"
8611 " node %s: %s", idx, src_node, dst_node.name, msg)
8612 dresults.append(False)
8614 dresults.append(True)
8615 msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8617 self.LogWarning("Could not remove snapshot for disk/%d from node"
8618 " %s: %s", idx, src_node, msg)
8620 dresults.append(False)
8622 feedback_fn("Finalizing export on %s" % dst_node.name)
8623 result = self.rpc.call_finalize_export(dst_node.name, instance,
8626 msg = result.fail_msg
8628 self.LogWarning("Could not finalize export for instance %s"
8629 " on node %s: %s", instance.name, dst_node.name, msg)
8634 feedback_fn("Deactivating disks for %s" % instance.name)
8635 _ShutdownInstanceDisks(self, instance)
8637 nodelist = self.cfg.GetNodeList()
8638 nodelist.remove(dst_node.name)
8640 # on one-node clusters nodelist will be empty after the removal
8641 # if we proceed the backup would be removed because OpQueryExports
8642 # substitutes an empty list with the full cluster node list.
8643 iname = instance.name
8645 feedback_fn("Removing old exports for instance %s" % iname)
8646 exportlist = self.rpc.call_export_list(nodelist)
8647 for node in exportlist:
8648 if exportlist[node].fail_msg:
8650 if iname in exportlist[node].payload:
8651 msg = self.rpc.call_export_remove(node, iname).fail_msg
8653 self.LogWarning("Could not remove older export for instance %s"
8654 " on node %s: %s", iname, node, msg)
8655 return fin_resu, dresults
8658 class LURemoveExport(NoHooksLU):
8659 """Remove exports related to the named instance.
8662 _OP_REQP = ["instance_name"]
8665 def ExpandNames(self):
8666 self.needed_locks = {}
8667 # We need all nodes to be locked in order for RemoveExport to work, but we
8668 # don't need to lock the instance itself, as nothing will happen to it (and
8669 # we can remove exports also for a removed instance)
8670 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8672 def CheckPrereq(self):
8673 """Check prerequisites.
8677 def Exec(self, feedback_fn):
8678 """Remove any export.
8681 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8682 # If the instance was not found we'll try with the name that was passed in.
8683 # This will only work if it was an FQDN, though.
8685 if not instance_name:
8687 instance_name = self.op.instance_name
8689 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8690 exportlist = self.rpc.call_export_list(locked_nodes)
8692 for node in exportlist:
8693 msg = exportlist[node].fail_msg
8695 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8697 if instance_name in exportlist[node].payload:
8699 result = self.rpc.call_export_remove(node, instance_name)
8700 msg = result.fail_msg
8702 logging.error("Could not remove export for instance %s"
8703 " on node %s: %s", instance_name, node, msg)
8705 if fqdn_warn and not found:
8706 feedback_fn("Export not found. If trying to remove an export belonging"
8707 " to a deleted instance please use its Fully Qualified"
8711 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
8714 This is an abstract class which is the parent of all the other tags LUs.
8718 def ExpandNames(self):
8719 self.needed_locks = {}
8720 if self.op.kind == constants.TAG_NODE:
8721 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
8722 self.needed_locks[locking.LEVEL_NODE] = self.op.name
8723 elif self.op.kind == constants.TAG_INSTANCE:
8724 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
8725 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
8727 def CheckPrereq(self):
8728 """Check prerequisites.
8731 if self.op.kind == constants.TAG_CLUSTER:
8732 self.target = self.cfg.GetClusterInfo()
8733 elif self.op.kind == constants.TAG_NODE:
8734 self.target = self.cfg.GetNodeInfo(self.op.name)
8735 elif self.op.kind == constants.TAG_INSTANCE:
8736 self.target = self.cfg.GetInstanceInfo(self.op.name)
8738 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8739 str(self.op.kind), errors.ECODE_INVAL)
8742 class LUGetTags(TagsLU):
8743 """Returns the tags of a given object.
8746 _OP_REQP = ["kind", "name"]
8749 def Exec(self, feedback_fn):
8750 """Returns the tag list.
8753 return list(self.target.GetTags())
8756 class LUSearchTags(NoHooksLU):
8757 """Searches the tags for a given pattern.
8760 _OP_REQP = ["pattern"]
8763 def ExpandNames(self):
8764 self.needed_locks = {}
8766 def CheckPrereq(self):
8767 """Check prerequisites.
8769 This checks the pattern passed for validity by compiling it.
8773 self.re = re.compile(self.op.pattern)
8774 except re.error, err:
8775 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
8776 (self.op.pattern, err), errors.ECODE_INVAL)
8778 def Exec(self, feedback_fn):
8779 """Returns the tag list.
8783 tgts = [("/cluster", cfg.GetClusterInfo())]
8784 ilist = cfg.GetAllInstancesInfo().values()
8785 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
8786 nlist = cfg.GetAllNodesInfo().values()
8787 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
8789 for path, target in tgts:
8790 for tag in target.GetTags():
8791 if self.re.search(tag):
8792 results.append((path, tag))
8796 class LUAddTags(TagsLU):
8797 """Sets a tag on a given object.
8800 _OP_REQP = ["kind", "name", "tags"]
8803 def CheckPrereq(self):
8804 """Check prerequisites.
8806 This checks the type and length of the tag name and value.
8809 TagsLU.CheckPrereq(self)
8810 for tag in self.op.tags:
8811 objects.TaggableObject.ValidateTag(tag)
8813 def Exec(self, feedback_fn):
8818 for tag in self.op.tags:
8819 self.target.AddTag(tag)
8820 except errors.TagError, err:
8821 raise errors.OpExecError("Error while setting tag: %s" % str(err))
8822 self.cfg.Update(self.target, feedback_fn)
8825 class LUDelTags(TagsLU):
8826 """Delete a list of tags from a given object.
8829 _OP_REQP = ["kind", "name", "tags"]
8832 def CheckPrereq(self):
8833 """Check prerequisites.
8835 This checks that we have the given tag.
8838 TagsLU.CheckPrereq(self)
8839 for tag in self.op.tags:
8840 objects.TaggableObject.ValidateTag(tag)
8841 del_tags = frozenset(self.op.tags)
8842 cur_tags = self.target.GetTags()
8843 if not del_tags <= cur_tags:
8844 diff_tags = del_tags - cur_tags
8845 diff_names = ["'%s'" % tag for tag in diff_tags]
8847 raise errors.OpPrereqError("Tag(s) %s not found" %
8848 (",".join(diff_names)), errors.ECODE_NOENT)
8850 def Exec(self, feedback_fn):
8851 """Remove the tag from the object.
8854 for tag in self.op.tags:
8855 self.target.RemoveTag(tag)
8856 self.cfg.Update(self.target, feedback_fn)
8859 class LUTestDelay(NoHooksLU):
8860 """Sleep for a specified amount of time.
8862 This LU sleeps on the master and/or nodes for a specified amount of
8866 _OP_REQP = ["duration", "on_master", "on_nodes"]
8869 def ExpandNames(self):
8870 """Expand names and set required locks.
8872 This expands the node list, if any.
8875 self.needed_locks = {}
8876 if self.op.on_nodes:
8877 # _GetWantedNodes can be used here, but is not always appropriate to use
8878 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8880 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8881 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8883 def CheckPrereq(self):
8884 """Check prerequisites.
8888 def Exec(self, feedback_fn):
8889 """Do the actual sleep.
8892 if self.op.on_master:
8893 if not utils.TestDelay(self.op.duration):
8894 raise errors.OpExecError("Error during master delay test")
8895 if self.op.on_nodes:
8896 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8897 for node, node_result in result.items():
8898 node_result.Raise("Failure during rpc call to node %s" % node)
8901 class IAllocator(object):
8902 """IAllocator framework.
8904 An IAllocator instance has three sets of attributes:
8905 - cfg that is needed to query the cluster
8906 - input data (all members of the _KEYS class attribute are required)
8907 - four buffer attributes (in|out_data|text), that represent the
8908 input (to the external script) in text and data structure format,
8909 and the output from it, again in two formats
8910 - the result variables from the script (success, info, nodes) for
8914 # pylint: disable-msg=R0902
8915 # lots of instance attributes
8917 "name", "mem_size", "disks", "disk_template",
8918 "os", "tags", "nics", "vcpus", "hypervisor",
8921 "name", "relocate_from",
8927 def __init__(self, cfg, rpc, mode, **kwargs):
8930 # init buffer variables
8931 self.in_text = self.out_text = self.in_data = self.out_data = None
8932 # init all input fields so that pylint is happy
8934 self.mem_size = self.disks = self.disk_template = None
8935 self.os = self.tags = self.nics = self.vcpus = None
8936 self.hypervisor = None
8937 self.relocate_from = None
8939 self.evac_nodes = None
8941 self.required_nodes = None
8942 # init result fields
8943 self.success = self.info = self.result = None
8944 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8945 keyset = self._ALLO_KEYS
8946 fn = self._AddNewInstance
8947 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8948 keyset = self._RELO_KEYS
8949 fn = self._AddRelocateInstance
8950 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
8951 keyset = self._EVAC_KEYS
8952 fn = self._AddEvacuateNodes
8954 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8955 " IAllocator" % self.mode)
8957 if key not in keyset:
8958 raise errors.ProgrammerError("Invalid input parameter '%s' to"
8959 " IAllocator" % key)
8960 setattr(self, key, kwargs[key])
8963 if key not in kwargs:
8964 raise errors.ProgrammerError("Missing input parameter '%s' to"
8965 " IAllocator" % key)
8966 self._BuildInputData(fn)
8968 def _ComputeClusterData(self):
8969 """Compute the generic allocator input data.
8971 This is the data that is independent of the actual operation.
8975 cluster_info = cfg.GetClusterInfo()
8978 "version": constants.IALLOCATOR_VERSION,
8979 "cluster_name": cfg.GetClusterName(),
8980 "cluster_tags": list(cluster_info.GetTags()),
8981 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8982 # we don't have job IDs
8984 iinfo = cfg.GetAllInstancesInfo().values()
8985 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8989 node_list = cfg.GetNodeList()
8991 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8992 hypervisor_name = self.hypervisor
8993 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8994 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8995 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
8996 hypervisor_name = cluster_info.enabled_hypervisors[0]
8998 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9001 self.rpc.call_all_instances_info(node_list,
9002 cluster_info.enabled_hypervisors)
9003 for nname, nresult in node_data.items():
9004 # first fill in static (config-based) values
9005 ninfo = cfg.GetNodeInfo(nname)
9007 "tags": list(ninfo.GetTags()),
9008 "primary_ip": ninfo.primary_ip,
9009 "secondary_ip": ninfo.secondary_ip,
9010 "offline": ninfo.offline,
9011 "drained": ninfo.drained,
9012 "master_candidate": ninfo.master_candidate,
9015 if not (ninfo.offline or ninfo.drained):
9016 nresult.Raise("Can't get data for node %s" % nname)
9017 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9019 remote_info = nresult.payload
9021 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9022 'vg_size', 'vg_free', 'cpu_total']:
9023 if attr not in remote_info:
9024 raise errors.OpExecError("Node '%s' didn't return attribute"
9025 " '%s'" % (nname, attr))
9026 if not isinstance(remote_info[attr], int):
9027 raise errors.OpExecError("Node '%s' returned invalid value"
9029 (nname, attr, remote_info[attr]))
9030 # compute memory used by primary instances
9031 i_p_mem = i_p_up_mem = 0
9032 for iinfo, beinfo in i_list:
9033 if iinfo.primary_node == nname:
9034 i_p_mem += beinfo[constants.BE_MEMORY]
9035 if iinfo.name not in node_iinfo[nname].payload:
9038 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9039 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9040 remote_info['memory_free'] -= max(0, i_mem_diff)
9043 i_p_up_mem += beinfo[constants.BE_MEMORY]
9045 # compute memory used by instances
9047 "total_memory": remote_info['memory_total'],
9048 "reserved_memory": remote_info['memory_dom0'],
9049 "free_memory": remote_info['memory_free'],
9050 "total_disk": remote_info['vg_size'],
9051 "free_disk": remote_info['vg_free'],
9052 "total_cpus": remote_info['cpu_total'],
9053 "i_pri_memory": i_p_mem,
9054 "i_pri_up_memory": i_p_up_mem,
9058 node_results[nname] = pnr
9059 data["nodes"] = node_results
9063 for iinfo, beinfo in i_list:
9065 for nic in iinfo.nics:
9066 filled_params = objects.FillDict(
9067 cluster_info.nicparams[constants.PP_DEFAULT],
9069 nic_dict = {"mac": nic.mac,
9071 "mode": filled_params[constants.NIC_MODE],
9072 "link": filled_params[constants.NIC_LINK],
9074 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9075 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9076 nic_data.append(nic_dict)
9078 "tags": list(iinfo.GetTags()),
9079 "admin_up": iinfo.admin_up,
9080 "vcpus": beinfo[constants.BE_VCPUS],
9081 "memory": beinfo[constants.BE_MEMORY],
9083 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9085 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9086 "disk_template": iinfo.disk_template,
9087 "hypervisor": iinfo.hypervisor,
9089 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9091 instance_data[iinfo.name] = pir
9093 data["instances"] = instance_data
9097 def _AddNewInstance(self):
9098 """Add new instance data to allocator structure.
9100 This in combination with _AllocatorGetClusterData will create the
9101 correct structure needed as input for the allocator.
9103 The checks for the completeness of the opcode must have already been
9107 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9109 if self.disk_template in constants.DTS_NET_MIRROR:
9110 self.required_nodes = 2
9112 self.required_nodes = 1
9115 "disk_template": self.disk_template,
9118 "vcpus": self.vcpus,
9119 "memory": self.mem_size,
9120 "disks": self.disks,
9121 "disk_space_total": disk_space,
9123 "required_nodes": self.required_nodes,
9127 def _AddRelocateInstance(self):
9128 """Add relocate instance data to allocator structure.
9130 This in combination with _IAllocatorGetClusterData will create the
9131 correct structure needed as input for the allocator.
9133 The checks for the completeness of the opcode must have already been
9137 instance = self.cfg.GetInstanceInfo(self.name)
9138 if instance is None:
9139 raise errors.ProgrammerError("Unknown instance '%s' passed to"
9140 " IAllocator" % self.name)
9142 if instance.disk_template not in constants.DTS_NET_MIRROR:
9143 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9146 if len(instance.secondary_nodes) != 1:
9147 raise errors.OpPrereqError("Instance has not exactly one secondary node",
9150 self.required_nodes = 1
9151 disk_sizes = [{'size': disk.size} for disk in instance.disks]
9152 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9156 "disk_space_total": disk_space,
9157 "required_nodes": self.required_nodes,
9158 "relocate_from": self.relocate_from,
9162 def _AddEvacuateNodes(self):
9163 """Add evacuate nodes data to allocator structure.
9167 "evac_nodes": self.evac_nodes
9171 def _BuildInputData(self, fn):
9172 """Build input data structures.
9175 self._ComputeClusterData()
9178 request["type"] = self.mode
9179 self.in_data["request"] = request
9181 self.in_text = serializer.Dump(self.in_data)
9183 def Run(self, name, validate=True, call_fn=None):
9184 """Run an instance allocator and return the results.
9188 call_fn = self.rpc.call_iallocator_runner
9190 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9191 result.Raise("Failure while running the iallocator script")
9193 self.out_text = result.payload
9195 self._ValidateResult()
9197 def _ValidateResult(self):
9198 """Process the allocator results.
9200 This will process and if successful save the result in
9201 self.out_data and the other parameters.
9205 rdict = serializer.Load(self.out_text)
9206 except Exception, err:
9207 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9209 if not isinstance(rdict, dict):
9210 raise errors.OpExecError("Can't parse iallocator results: not a dict")
9212 # TODO: remove backwards compatiblity in later versions
9213 if "nodes" in rdict and "result" not in rdict:
9214 rdict["result"] = rdict["nodes"]
9217 for key in "success", "info", "result":
9218 if key not in rdict:
9219 raise errors.OpExecError("Can't parse iallocator results:"
9220 " missing key '%s'" % key)
9221 setattr(self, key, rdict[key])
9223 if not isinstance(rdict["result"], list):
9224 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9226 self.out_data = rdict
9229 class LUTestAllocator(NoHooksLU):
9230 """Run allocator tests.
9232 This LU runs the allocator tests
9235 _OP_REQP = ["direction", "mode", "name"]
9237 def CheckPrereq(self):
9238 """Check prerequisites.
9240 This checks the opcode parameters depending on the director and mode test.
9243 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9244 for attr in ["name", "mem_size", "disks", "disk_template",
9245 "os", "tags", "nics", "vcpus"]:
9246 if not hasattr(self.op, attr):
9247 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9248 attr, errors.ECODE_INVAL)
9249 iname = self.cfg.ExpandInstanceName(self.op.name)
9250 if iname is not None:
9251 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9252 iname, errors.ECODE_EXISTS)
9253 if not isinstance(self.op.nics, list):
9254 raise errors.OpPrereqError("Invalid parameter 'nics'",
9256 for row in self.op.nics:
9257 if (not isinstance(row, dict) or
9260 "bridge" not in row):
9261 raise errors.OpPrereqError("Invalid contents of the 'nics'"
9262 " parameter", errors.ECODE_INVAL)
9263 if not isinstance(self.op.disks, list):
9264 raise errors.OpPrereqError("Invalid parameter 'disks'",
9266 for row in self.op.disks:
9267 if (not isinstance(row, dict) or
9268 "size" not in row or
9269 not isinstance(row["size"], int) or
9270 "mode" not in row or
9271 row["mode"] not in ['r', 'w']):
9272 raise errors.OpPrereqError("Invalid contents of the 'disks'"
9273 " parameter", errors.ECODE_INVAL)
9274 if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9275 self.op.hypervisor = self.cfg.GetHypervisorType()
9276 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9277 if not hasattr(self.op, "name"):
9278 raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9280 fname = _ExpandInstanceName(self.cfg, self.op.name)
9281 self.op.name = fname
9282 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9283 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9284 if not hasattr(self.op, "evac_nodes"):
9285 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9286 " opcode input", errors.ECODE_INVAL)
9288 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9289 self.op.mode, errors.ECODE_INVAL)
9291 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9292 if not hasattr(self.op, "allocator") or self.op.allocator is None:
9293 raise errors.OpPrereqError("Missing allocator name",
9295 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9296 raise errors.OpPrereqError("Wrong allocator test '%s'" %
9297 self.op.direction, errors.ECODE_INVAL)
9299 def Exec(self, feedback_fn):
9300 """Run the allocator test.
9303 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9304 ial = IAllocator(self.cfg, self.rpc,
9307 mem_size=self.op.mem_size,
9308 disks=self.op.disks,
9309 disk_template=self.op.disk_template,
9313 vcpus=self.op.vcpus,
9314 hypervisor=self.op.hypervisor,
9316 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9317 ial = IAllocator(self.cfg, self.rpc,
9320 relocate_from=list(self.relocate_from),
9322 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9323 ial = IAllocator(self.cfg, self.rpc,
9325 evac_nodes=self.op.evac_nodes)
9327 raise errors.ProgrammerError("Uncatched mode %s in"
9328 " LUTestAllocator.Exec", self.op.mode)
9330 if self.op.direction == constants.IALLOCATOR_DIR_IN:
9331 result = ial.in_text
9333 ial.Run(self.op.allocator, validate=False)
9334 result = ial.out_text