4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
38 from ganeti import ssh
39 from ganeti import utils
40 from ganeti import errors
41 from ganeti import hypervisor
42 from ganeti import locking
43 from ganeti import constants
44 from ganeti import objects
45 from ganeti import serializer
46 from ganeti import ssconf
49 class LogicalUnit(object):
50 """Logical Unit base class.
52 Subclasses must follow these rules:
53 - implement ExpandNames
54 - implement CheckPrereq (except when tasklets are used)
55 - implement Exec (except when tasklets are used)
56 - implement BuildHooksEnv
57 - redefine HPATH and HTYPE
58 - optionally redefine their run requirements:
59 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
61 Note that all commands require root permissions.
63 @ivar dry_run_result: the value (if any) that will be returned to the caller
64 in dry-run mode (signalled by opcode dry_run parameter)
72 def __init__(self, processor, op, context, rpc):
73 """Constructor for LogicalUnit.
75 This needs to be overridden in derived classes in order to check op
81 self.cfg = context.cfg
82 self.context = context
84 # Dicts used to declare locking needs to mcpu
85 self.needed_locks = None
86 self.acquired_locks = {}
87 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
89 self.remove_locks = {}
90 # Used to force good behavior when calling helper functions
91 self.recalculate_locks = {}
94 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
95 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
96 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
98 self.dry_run_result = None
99 # support for generic debug attribute
100 if (not hasattr(self.op, "debug_level") or
101 not isinstance(self.op.debug_level, int)):
102 self.op.debug_level = 0
107 for attr_name in self._OP_REQP:
108 attr_val = getattr(op, attr_name, None)
110 raise errors.OpPrereqError("Required parameter '%s' missing" %
111 attr_name, errors.ECODE_INVAL)
113 self.CheckArguments()
116 """Returns the SshRunner object
120 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
123 ssh = property(fget=__GetSSH)
125 def CheckArguments(self):
126 """Check syntactic validity for the opcode arguments.
128 This method is for doing a simple syntactic check and ensure
129 validity of opcode parameters, without any cluster-related
130 checks. While the same can be accomplished in ExpandNames and/or
131 CheckPrereq, doing these separate is better because:
133 - ExpandNames is left as as purely a lock-related function
134 - CheckPrereq is run after we have acquired locks (and possible
137 The function is allowed to change the self.op attribute so that
138 later methods can no longer worry about missing parameters.
143 def ExpandNames(self):
144 """Expand names for this LU.
146 This method is called before starting to execute the opcode, and it should
147 update all the parameters of the opcode to their canonical form (e.g. a
148 short node name must be fully expanded after this method has successfully
149 completed). This way locking, hooks, logging, ecc. can work correctly.
151 LUs which implement this method must also populate the self.needed_locks
152 member, as a dict with lock levels as keys, and a list of needed lock names
155 - use an empty dict if you don't need any lock
156 - if you don't need any lock at a particular level omit that level
157 - don't put anything for the BGL level
158 - if you want all locks at a level use locking.ALL_SET as a value
160 If you need to share locks (rather than acquire them exclusively) at one
161 level you can modify self.share_locks, setting a true value (usually 1) for
162 that level. By default locks are not shared.
164 This function can also define a list of tasklets, which then will be
165 executed in order instead of the usual LU-level CheckPrereq and Exec
166 functions, if those are not defined by the LU.
170 # Acquire all nodes and one instance
171 self.needed_locks = {
172 locking.LEVEL_NODE: locking.ALL_SET,
173 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
175 # Acquire just two nodes
176 self.needed_locks = {
177 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
180 self.needed_locks = {} # No, you can't leave it to the default value None
183 # The implementation of this method is mandatory only if the new LU is
184 # concurrent, so that old LUs don't need to be changed all at the same
187 self.needed_locks = {} # Exclusive LUs don't need locks.
189 raise NotImplementedError
191 def DeclareLocks(self, level):
192 """Declare LU locking needs for a level
194 While most LUs can just declare their locking needs at ExpandNames time,
195 sometimes there's the need to calculate some locks after having acquired
196 the ones before. This function is called just before acquiring locks at a
197 particular level, but after acquiring the ones at lower levels, and permits
198 such calculations. It can be used to modify self.needed_locks, and by
199 default it does nothing.
201 This function is only called if you have something already set in
202 self.needed_locks for the level.
204 @param level: Locking level which is going to be locked
205 @type level: member of ganeti.locking.LEVELS
209 def CheckPrereq(self):
210 """Check prerequisites for this LU.
212 This method should check that the prerequisites for the execution
213 of this LU are fulfilled. It can do internode communication, but
214 it should be idempotent - no cluster or system changes are
217 The method should raise errors.OpPrereqError in case something is
218 not fulfilled. Its return value is ignored.
220 This method should also update all the parameters of the opcode to
221 their canonical form if it hasn't been done by ExpandNames before.
224 if self.tasklets is not None:
225 for (idx, tl) in enumerate(self.tasklets):
226 logging.debug("Checking prerequisites for tasklet %s/%s",
227 idx + 1, len(self.tasklets))
230 raise NotImplementedError
232 def Exec(self, feedback_fn):
235 This method should implement the actual work. It should raise
236 errors.OpExecError for failures that are somewhat dealt with in
240 if self.tasklets is not None:
241 for (idx, tl) in enumerate(self.tasklets):
242 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
245 raise NotImplementedError
247 def BuildHooksEnv(self):
248 """Build hooks environment for this LU.
250 This method should return a three-node tuple consisting of: a dict
251 containing the environment that will be used for running the
252 specific hook for this LU, a list of node names on which the hook
253 should run before the execution, and a list of node names on which
254 the hook should run after the execution.
256 The keys of the dict must not have 'GANETI_' prefixed as this will
257 be handled in the hooks runner. Also note additional keys will be
258 added by the hooks runner. If the LU doesn't define any
259 environment, an empty dict (and not None) should be returned.
261 No nodes should be returned as an empty list (and not None).
263 Note that if the HPATH for a LU class is None, this function will
267 raise NotImplementedError
269 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
270 """Notify the LU about the results of its hooks.
272 This method is called every time a hooks phase is executed, and notifies
273 the Logical Unit about the hooks' result. The LU can then use it to alter
274 its result based on the hooks. By default the method does nothing and the
275 previous result is passed back unchanged but any LU can define it if it
276 wants to use the local cluster hook-scripts somehow.
278 @param phase: one of L{constants.HOOKS_PHASE_POST} or
279 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
280 @param hook_results: the results of the multi-node hooks rpc call
281 @param feedback_fn: function used send feedback back to the caller
282 @param lu_result: the previous Exec result this LU had, or None
284 @return: the new Exec result, based on the previous result
288 # API must be kept, thus we ignore the unused argument and could
289 # be a function warnings
290 # pylint: disable-msg=W0613,R0201
293 def _ExpandAndLockInstance(self):
294 """Helper function to expand and lock an instance.
296 Many LUs that work on an instance take its name in self.op.instance_name
297 and need to expand it and then declare the expanded name for locking. This
298 function does it, and then updates self.op.instance_name to the expanded
299 name. It also initializes needed_locks as a dict, if this hasn't been done
303 if self.needed_locks is None:
304 self.needed_locks = {}
306 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
307 "_ExpandAndLockInstance called with instance-level locks set"
308 self.op.instance_name = _ExpandInstanceName(self.cfg,
309 self.op.instance_name)
310 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
312 def _LockInstancesNodes(self, primary_only=False):
313 """Helper function to declare instances' nodes for locking.
315 This function should be called after locking one or more instances to lock
316 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
317 with all primary or secondary nodes for instances already locked and
318 present in self.needed_locks[locking.LEVEL_INSTANCE].
320 It should be called from DeclareLocks, and for safety only works if
321 self.recalculate_locks[locking.LEVEL_NODE] is set.
323 In the future it may grow parameters to just lock some instance's nodes, or
324 to just lock primaries or secondary nodes, if needed.
326 If should be called in DeclareLocks in a way similar to::
328 if level == locking.LEVEL_NODE:
329 self._LockInstancesNodes()
331 @type primary_only: boolean
332 @param primary_only: only lock primary nodes of locked instances
335 assert locking.LEVEL_NODE in self.recalculate_locks, \
336 "_LockInstancesNodes helper function called with no nodes to recalculate"
338 # TODO: check if we're really been called with the instance locks held
340 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
341 # future we might want to have different behaviors depending on the value
342 # of self.recalculate_locks[locking.LEVEL_NODE]
344 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
345 instance = self.context.cfg.GetInstanceInfo(instance_name)
346 wanted_nodes.append(instance.primary_node)
348 wanted_nodes.extend(instance.secondary_nodes)
350 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
351 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
352 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
353 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
355 del self.recalculate_locks[locking.LEVEL_NODE]
358 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
359 """Simple LU which runs no hooks.
361 This LU is intended as a parent for other LogicalUnits which will
362 run no hooks, in order to reduce duplicate code.
368 def BuildHooksEnv(self):
369 """Empty BuildHooksEnv for NoHooksLu.
371 This just raises an error.
374 assert False, "BuildHooksEnv called for NoHooksLUs"
378 """Tasklet base class.
380 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
381 they can mix legacy code with tasklets. Locking needs to be done in the LU,
382 tasklets know nothing about locks.
384 Subclasses must follow these rules:
385 - Implement CheckPrereq
389 def __init__(self, lu):
396 def CheckPrereq(self):
397 """Check prerequisites for this tasklets.
399 This method should check whether the prerequisites for the execution of
400 this tasklet are fulfilled. It can do internode communication, but it
401 should be idempotent - no cluster or system changes are allowed.
403 The method should raise errors.OpPrereqError in case something is not
404 fulfilled. Its return value is ignored.
406 This method should also update all parameters to their canonical form if it
407 hasn't been done before.
410 raise NotImplementedError
412 def Exec(self, feedback_fn):
413 """Execute the tasklet.
415 This method should implement the actual work. It should raise
416 errors.OpExecError for failures that are somewhat dealt with in code, or
420 raise NotImplementedError
423 def _GetWantedNodes(lu, nodes):
424 """Returns list of checked and expanded node names.
426 @type lu: L{LogicalUnit}
427 @param lu: the logical unit on whose behalf we execute
429 @param nodes: list of node names or None for all nodes
431 @return: the list of nodes, sorted
432 @raise errors.ProgrammerError: if the nodes parameter is wrong type
435 if not isinstance(nodes, list):
436 raise errors.OpPrereqError("Invalid argument type 'nodes'",
440 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
441 " non-empty list of nodes whose name is to be expanded.")
443 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
444 return utils.NiceSort(wanted)
447 def _GetWantedInstances(lu, instances):
448 """Returns list of checked and expanded instance names.
450 @type lu: L{LogicalUnit}
451 @param lu: the logical unit on whose behalf we execute
452 @type instances: list
453 @param instances: list of instance names or None for all instances
455 @return: the list of instances, sorted
456 @raise errors.OpPrereqError: if the instances parameter is wrong type
457 @raise errors.OpPrereqError: if any of the passed instances is not found
460 if not isinstance(instances, list):
461 raise errors.OpPrereqError("Invalid argument type 'instances'",
465 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
467 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
471 def _CheckOutputFields(static, dynamic, selected):
472 """Checks whether all selected fields are valid.
474 @type static: L{utils.FieldSet}
475 @param static: static fields set
476 @type dynamic: L{utils.FieldSet}
477 @param dynamic: dynamic fields set
484 delta = f.NonMatching(selected)
486 raise errors.OpPrereqError("Unknown output fields selected: %s"
487 % ",".join(delta), errors.ECODE_INVAL)
490 def _CheckBooleanOpField(op, name):
491 """Validates boolean opcode parameters.
493 This will ensure that an opcode parameter is either a boolean value,
494 or None (but that it always exists).
497 val = getattr(op, name, None)
498 if not (val is None or isinstance(val, bool)):
499 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
500 (name, str(val)), errors.ECODE_INVAL)
501 setattr(op, name, val)
504 def _CheckGlobalHvParams(params):
505 """Validates that given hypervisor params are not global ones.
507 This will ensure that instances don't get customised versions of
511 used_globals = constants.HVC_GLOBALS.intersection(params)
513 msg = ("The following hypervisor parameters are global and cannot"
514 " be customized at instance level, please modify them at"
515 " cluster level: %s" % utils.CommaJoin(used_globals))
516 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
519 def _CheckNodeOnline(lu, node):
520 """Ensure that a given node is online.
522 @param lu: the LU on behalf of which we make the check
523 @param node: the node to check
524 @raise errors.OpPrereqError: if the node is offline
527 if lu.cfg.GetNodeInfo(node).offline:
528 raise errors.OpPrereqError("Can't use offline node %s" % node,
532 def _CheckNodeNotDrained(lu, node):
533 """Ensure that a given node is not drained.
535 @param lu: the LU on behalf of which we make the check
536 @param node: the node to check
537 @raise errors.OpPrereqError: if the node is drained
540 if lu.cfg.GetNodeInfo(node).drained:
541 raise errors.OpPrereqError("Can't use drained node %s" % node,
545 def _CheckNodeHasOS(lu, node, os_name, force_variant):
546 """Ensure that a node supports a given OS.
548 @param lu: the LU on behalf of which we make the check
549 @param node: the node to check
550 @param os_name: the OS to query about
551 @param force_variant: whether to ignore variant errors
552 @raise errors.OpPrereqError: if the node is not supporting the OS
555 result = lu.rpc.call_os_get(node, os_name)
556 result.Raise("OS '%s' not in supported OS list for node %s" %
558 prereq=True, ecode=errors.ECODE_INVAL)
559 if not force_variant:
560 _CheckOSVariant(result.payload, os_name)
563 def _CheckDiskTemplate(template):
564 """Ensure a given disk template is valid.
567 if template not in constants.DISK_TEMPLATES:
568 msg = ("Invalid disk template name '%s', valid templates are: %s" %
569 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
570 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
571 if template == constants.DT_FILE and not constants.ENABLE_FILE_STORAGE:
572 raise errors.OpPrereqError("File storage disabled at configure time",
576 def _CheckInstanceDown(lu, instance, reason):
577 """Ensure that an instance is not running."""
578 if instance.admin_up:
579 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
580 (instance.name, reason), errors.ECODE_STATE)
582 pnode = instance.primary_node
583 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
584 ins_l.Raise("Can't contact node %s for instance information" % pnode,
585 prereq=True, ecode=errors.ECODE_ENVIRON)
587 if instance.name in ins_l.payload:
588 raise errors.OpPrereqError("Instance %s is running, %s" %
589 (instance.name, reason), errors.ECODE_STATE)
592 def _ExpandItemName(fn, name, kind):
593 """Expand an item name.
595 @param fn: the function to use for expansion
596 @param name: requested item name
597 @param kind: text description ('Node' or 'Instance')
598 @return: the resolved (full) name
599 @raise errors.OpPrereqError: if the item is not found
603 if full_name is None:
604 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
609 def _ExpandNodeName(cfg, name):
610 """Wrapper over L{_ExpandItemName} for nodes."""
611 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
614 def _ExpandInstanceName(cfg, name):
615 """Wrapper over L{_ExpandItemName} for instance."""
616 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
619 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
620 memory, vcpus, nics, disk_template, disks,
621 bep, hvp, hypervisor_name):
622 """Builds instance related env variables for hooks
624 This builds the hook environment from individual variables.
627 @param name: the name of the instance
628 @type primary_node: string
629 @param primary_node: the name of the instance's primary node
630 @type secondary_nodes: list
631 @param secondary_nodes: list of secondary nodes as strings
632 @type os_type: string
633 @param os_type: the name of the instance's OS
634 @type status: boolean
635 @param status: the should_run status of the instance
637 @param memory: the memory size of the instance
639 @param vcpus: the count of VCPUs the instance has
641 @param nics: list of tuples (ip, mac, mode, link) representing
642 the NICs the instance has
643 @type disk_template: string
644 @param disk_template: the disk template of the instance
646 @param disks: the list of (size, mode) pairs
648 @param bep: the backend parameters for the instance
650 @param hvp: the hypervisor parameters for the instance
651 @type hypervisor_name: string
652 @param hypervisor_name: the hypervisor for the instance
654 @return: the hook environment for this instance
663 "INSTANCE_NAME": name,
664 "INSTANCE_PRIMARY": primary_node,
665 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
666 "INSTANCE_OS_TYPE": os_type,
667 "INSTANCE_STATUS": str_status,
668 "INSTANCE_MEMORY": memory,
669 "INSTANCE_VCPUS": vcpus,
670 "INSTANCE_DISK_TEMPLATE": disk_template,
671 "INSTANCE_HYPERVISOR": hypervisor_name,
675 nic_count = len(nics)
676 for idx, (ip, mac, mode, link) in enumerate(nics):
679 env["INSTANCE_NIC%d_IP" % idx] = ip
680 env["INSTANCE_NIC%d_MAC" % idx] = mac
681 env["INSTANCE_NIC%d_MODE" % idx] = mode
682 env["INSTANCE_NIC%d_LINK" % idx] = link
683 if mode == constants.NIC_MODE_BRIDGED:
684 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
688 env["INSTANCE_NIC_COUNT"] = nic_count
691 disk_count = len(disks)
692 for idx, (size, mode) in enumerate(disks):
693 env["INSTANCE_DISK%d_SIZE" % idx] = size
694 env["INSTANCE_DISK%d_MODE" % idx] = mode
698 env["INSTANCE_DISK_COUNT"] = disk_count
700 for source, kind in [(bep, "BE"), (hvp, "HV")]:
701 for key, value in source.items():
702 env["INSTANCE_%s_%s" % (kind, key)] = value
707 def _NICListToTuple(lu, nics):
708 """Build a list of nic information tuples.
710 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
711 value in LUQueryInstanceData.
713 @type lu: L{LogicalUnit}
714 @param lu: the logical unit on whose behalf we execute
715 @type nics: list of L{objects.NIC}
716 @param nics: list of nics to convert to hooks tuples
720 c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
724 filled_params = objects.FillDict(c_nicparams, nic.nicparams)
725 mode = filled_params[constants.NIC_MODE]
726 link = filled_params[constants.NIC_LINK]
727 hooks_nics.append((ip, mac, mode, link))
731 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
732 """Builds instance related env variables for hooks from an object.
734 @type lu: L{LogicalUnit}
735 @param lu: the logical unit on whose behalf we execute
736 @type instance: L{objects.Instance}
737 @param instance: the instance for which we should build the
740 @param override: dictionary with key/values that will override
743 @return: the hook environment dictionary
746 cluster = lu.cfg.GetClusterInfo()
747 bep = cluster.FillBE(instance)
748 hvp = cluster.FillHV(instance)
750 'name': instance.name,
751 'primary_node': instance.primary_node,
752 'secondary_nodes': instance.secondary_nodes,
753 'os_type': instance.os,
754 'status': instance.admin_up,
755 'memory': bep[constants.BE_MEMORY],
756 'vcpus': bep[constants.BE_VCPUS],
757 'nics': _NICListToTuple(lu, instance.nics),
758 'disk_template': instance.disk_template,
759 'disks': [(disk.size, disk.mode) for disk in instance.disks],
762 'hypervisor_name': instance.hypervisor,
765 args.update(override)
766 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
769 def _AdjustCandidatePool(lu, exceptions):
770 """Adjust the candidate pool after node operations.
773 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
775 lu.LogInfo("Promoted nodes to master candidate role: %s",
776 utils.CommaJoin(node.name for node in mod_list))
777 for name in mod_list:
778 lu.context.ReaddNode(name)
779 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
781 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
785 def _DecideSelfPromotion(lu, exceptions=None):
786 """Decide whether I should promote myself as a master candidate.
789 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
790 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
791 # the new node will increase mc_max with one, so:
792 mc_should = min(mc_should + 1, cp_size)
793 return mc_now < mc_should
796 def _CheckNicsBridgesExist(lu, target_nics, target_node,
797 profile=constants.PP_DEFAULT):
798 """Check that the brigdes needed by a list of nics exist.
801 c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
802 paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
803 for nic in target_nics]
804 brlist = [params[constants.NIC_LINK] for params in paramslist
805 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
807 result = lu.rpc.call_bridges_exist(target_node, brlist)
808 result.Raise("Error checking bridges on destination node '%s'" %
809 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
812 def _CheckInstanceBridgesExist(lu, instance, node=None):
813 """Check that the brigdes needed by an instance exist.
817 node = instance.primary_node
818 _CheckNicsBridgesExist(lu, instance.nics, node)
821 def _CheckOSVariant(os_obj, name):
822 """Check whether an OS name conforms to the os variants specification.
824 @type os_obj: L{objects.OS}
825 @param os_obj: OS object to check
827 @param name: OS name passed by the user, to check for validity
830 if not os_obj.supported_variants:
833 variant = name.split("+", 1)[1]
835 raise errors.OpPrereqError("OS name must include a variant",
838 if variant not in os_obj.supported_variants:
839 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
842 def _GetNodeInstancesInner(cfg, fn):
843 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
846 def _GetNodeInstances(cfg, node_name):
847 """Returns a list of all primary and secondary instances on a node.
851 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
854 def _GetNodePrimaryInstances(cfg, node_name):
855 """Returns primary instances on a node.
858 return _GetNodeInstancesInner(cfg,
859 lambda inst: node_name == inst.primary_node)
862 def _GetNodeSecondaryInstances(cfg, node_name):
863 """Returns secondary instances on a node.
866 return _GetNodeInstancesInner(cfg,
867 lambda inst: node_name in inst.secondary_nodes)
870 def _GetStorageTypeArgs(cfg, storage_type):
871 """Returns the arguments for a storage type.
874 # Special case for file storage
875 if storage_type == constants.ST_FILE:
876 # storage.FileStorage wants a list of storage directories
877 return [[cfg.GetFileStorageDir()]]
882 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
885 for dev in instance.disks:
886 cfg.SetDiskID(dev, node_name)
888 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
889 result.Raise("Failed to get disk status from node %s" % node_name,
890 prereq=prereq, ecode=errors.ECODE_ENVIRON)
892 for idx, bdev_status in enumerate(result.payload):
893 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
899 def _FormatTimestamp(secs):
900 """Formats a Unix timestamp with the local timezone.
903 return time.strftime("%F %T %Z", time.gmtime(secs))
906 class LUPostInitCluster(LogicalUnit):
907 """Logical unit for running hooks after cluster initialization.
910 HPATH = "cluster-init"
911 HTYPE = constants.HTYPE_CLUSTER
914 def BuildHooksEnv(self):
918 env = {"OP_TARGET": self.cfg.GetClusterName()}
919 mn = self.cfg.GetMasterNode()
922 def CheckPrereq(self):
923 """No prerequisites to check.
928 def Exec(self, feedback_fn):
935 class LUDestroyCluster(LogicalUnit):
936 """Logical unit for destroying the cluster.
939 HPATH = "cluster-destroy"
940 HTYPE = constants.HTYPE_CLUSTER
943 def BuildHooksEnv(self):
947 env = {"OP_TARGET": self.cfg.GetClusterName()}
950 def CheckPrereq(self):
951 """Check prerequisites.
953 This checks whether the cluster is empty.
955 Any errors are signaled by raising errors.OpPrereqError.
958 master = self.cfg.GetMasterNode()
960 nodelist = self.cfg.GetNodeList()
961 if len(nodelist) != 1 or nodelist[0] != master:
962 raise errors.OpPrereqError("There are still %d node(s) in"
963 " this cluster." % (len(nodelist) - 1),
965 instancelist = self.cfg.GetInstanceList()
967 raise errors.OpPrereqError("There are still %d instance(s) in"
968 " this cluster." % len(instancelist),
971 def Exec(self, feedback_fn):
972 """Destroys the cluster.
975 master = self.cfg.GetMasterNode()
976 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
978 # Run post hooks on master node before it's removed
979 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
981 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
983 # pylint: disable-msg=W0702
984 self.LogWarning("Errors occurred running hooks on %s" % master)
986 result = self.rpc.call_node_stop_master(master, False)
987 result.Raise("Could not disable the master role")
990 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
991 utils.CreateBackup(priv_key)
992 utils.CreateBackup(pub_key)
997 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
998 warn_days=constants.SSL_CERT_EXPIRATION_WARN,
999 error_days=constants.SSL_CERT_EXPIRATION_ERROR):
1000 """Verifies certificate details for LUVerifyCluster.
1004 msg = "Certificate %s is expired" % filename
1006 if not_before is not None and not_after is not None:
1007 msg += (" (valid from %s to %s)" %
1008 (_FormatTimestamp(not_before),
1009 _FormatTimestamp(not_after)))
1010 elif not_before is not None:
1011 msg += " (valid from %s)" % _FormatTimestamp(not_before)
1012 elif not_after is not None:
1013 msg += " (valid until %s)" % _FormatTimestamp(not_after)
1015 return (LUVerifyCluster.ETYPE_ERROR, msg)
1017 elif not_before is not None and not_before > now:
1018 return (LUVerifyCluster.ETYPE_WARNING,
1019 "Certificate %s not yet valid (valid from %s)" %
1020 (filename, _FormatTimestamp(not_before)))
1022 elif not_after is not None:
1023 remaining_days = int((not_after - now) / (24 * 3600))
1025 msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1027 if remaining_days <= error_days:
1028 return (LUVerifyCluster.ETYPE_ERROR, msg)
1030 if remaining_days <= warn_days:
1031 return (LUVerifyCluster.ETYPE_WARNING, msg)
1036 def _VerifyCertificate(filename):
1037 """Verifies a certificate for LUVerifyCluster.
1039 @type filename: string
1040 @param filename: Path to PEM file
1044 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1045 utils.ReadFile(filename))
1046 except Exception, err: # pylint: disable-msg=W0703
1047 return (LUVerifyCluster.ETYPE_ERROR,
1048 "Failed to load X509 certificate %s: %s" % (filename, err))
1050 # Depending on the pyOpenSSL version, this can just return (None, None)
1051 (not_before, not_after) = utils.GetX509CertValidity(cert)
1053 return _VerifyCertificateInner(filename, cert.has_expired(),
1054 not_before, not_after, time.time())
1057 class LUVerifyCluster(LogicalUnit):
1058 """Verifies the cluster status.
1061 HPATH = "cluster-verify"
1062 HTYPE = constants.HTYPE_CLUSTER
1063 _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1066 TCLUSTER = "cluster"
1068 TINSTANCE = "instance"
1070 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1071 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1072 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1073 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1074 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1075 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1076 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1077 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1078 ENODEDRBD = (TNODE, "ENODEDRBD")
1079 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1080 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1081 ENODEHV = (TNODE, "ENODEHV")
1082 ENODELVM = (TNODE, "ENODELVM")
1083 ENODEN1 = (TNODE, "ENODEN1")
1084 ENODENET = (TNODE, "ENODENET")
1085 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1086 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1087 ENODERPC = (TNODE, "ENODERPC")
1088 ENODESSH = (TNODE, "ENODESSH")
1089 ENODEVERSION = (TNODE, "ENODEVERSION")
1090 ENODESETUP = (TNODE, "ENODESETUP")
1091 ENODETIME = (TNODE, "ENODETIME")
1093 ETYPE_FIELD = "code"
1094 ETYPE_ERROR = "ERROR"
1095 ETYPE_WARNING = "WARNING"
1097 class NodeImage(object):
1098 """A class representing the logical and physical status of a node.
1100 @ivar volumes: a structure as returned from
1101 L{ganeti.backend.GetVolumeList} (runtime)
1102 @ivar instances: a list of running instances (runtime)
1103 @ivar pinst: list of configured primary instances (config)
1104 @ivar sinst: list of configured secondary instances (config)
1105 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1106 of this node (config)
1107 @ivar mfree: free memory, as reported by hypervisor (runtime)
1108 @ivar dfree: free disk, as reported by the node (runtime)
1109 @ivar offline: the offline status (config)
1110 @type rpc_fail: boolean
1111 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1112 not whether the individual keys were correct) (runtime)
1113 @type lvm_fail: boolean
1114 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1115 @type hyp_fail: boolean
1116 @ivar hyp_fail: whether the RPC call didn't return the instance list
1117 @type ghost: boolean
1118 @ivar ghost: whether this is a known node or not (config)
1121 def __init__(self, offline=False):
1129 self.offline = offline
1130 self.rpc_fail = False
1131 self.lvm_fail = False
1132 self.hyp_fail = False
1135 def ExpandNames(self):
1136 self.needed_locks = {
1137 locking.LEVEL_NODE: locking.ALL_SET,
1138 locking.LEVEL_INSTANCE: locking.ALL_SET,
1140 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1142 def _Error(self, ecode, item, msg, *args, **kwargs):
1143 """Format an error message.
1145 Based on the opcode's error_codes parameter, either format a
1146 parseable error code, or a simpler error string.
1148 This must be called only from Exec and functions called from Exec.
1151 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1153 # first complete the msg
1156 # then format the whole message
1157 if self.op.error_codes:
1158 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1164 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1165 # and finally report it via the feedback_fn
1166 self._feedback_fn(" - %s" % msg)
1168 def _ErrorIf(self, cond, *args, **kwargs):
1169 """Log an error message if the passed condition is True.
1172 cond = bool(cond) or self.op.debug_simulate_errors
1174 self._Error(*args, **kwargs)
1175 # do not mark the operation as failed for WARN cases only
1176 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1177 self.bad = self.bad or cond
1179 def _VerifyNode(self, ninfo, nresult):
1180 """Run multiple tests against a node.
1184 - compares ganeti version
1185 - checks vg existence and size > 20G
1186 - checks config file checksum
1187 - checks ssh to other nodes
1189 @type ninfo: L{objects.Node}
1190 @param ninfo: the node to check
1191 @param nresult: the results from the node
1193 @return: whether overall this call was successful (and we can expect
1194 reasonable values in the respose)
1198 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1200 # main result, nresult should be a non-empty dict
1201 test = not nresult or not isinstance(nresult, dict)
1202 _ErrorIf(test, self.ENODERPC, node,
1203 "unable to verify node: no data returned")
1207 # compares ganeti version
1208 local_version = constants.PROTOCOL_VERSION
1209 remote_version = nresult.get("version", None)
1210 test = not (remote_version and
1211 isinstance(remote_version, (list, tuple)) and
1212 len(remote_version) == 2)
1213 _ErrorIf(test, self.ENODERPC, node,
1214 "connection to node returned invalid data")
1218 test = local_version != remote_version[0]
1219 _ErrorIf(test, self.ENODEVERSION, node,
1220 "incompatible protocol versions: master %s,"
1221 " node %s", local_version, remote_version[0])
1225 # node seems compatible, we can actually try to look into its results
1227 # full package version
1228 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1229 self.ENODEVERSION, node,
1230 "software version mismatch: master %s, node %s",
1231 constants.RELEASE_VERSION, remote_version[1],
1232 code=self.ETYPE_WARNING)
1234 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1235 if isinstance(hyp_result, dict):
1236 for hv_name, hv_result in hyp_result.iteritems():
1237 test = hv_result is not None
1238 _ErrorIf(test, self.ENODEHV, node,
1239 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1242 test = nresult.get(constants.NV_NODESETUP,
1243 ["Missing NODESETUP results"])
1244 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1249 def _VerifyNodeTime(self, ninfo, nresult,
1250 nvinfo_starttime, nvinfo_endtime):
1251 """Check the node time.
1253 @type ninfo: L{objects.Node}
1254 @param ninfo: the node to check
1255 @param nresult: the remote results for the node
1256 @param nvinfo_starttime: the start time of the RPC call
1257 @param nvinfo_endtime: the end time of the RPC call
1261 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1263 ntime = nresult.get(constants.NV_TIME, None)
1265 ntime_merged = utils.MergeTime(ntime)
1266 except (ValueError, TypeError):
1267 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1270 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1271 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1272 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1273 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1277 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1278 "Node time diverges by at least %s from master node time",
1281 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1282 """Check the node time.
1284 @type ninfo: L{objects.Node}
1285 @param ninfo: the node to check
1286 @param nresult: the remote results for the node
1287 @param vg_name: the configured VG name
1294 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1296 # checks vg existence and size > 20G
1297 vglist = nresult.get(constants.NV_VGLIST, None)
1299 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1301 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1302 constants.MIN_VG_SIZE)
1303 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1306 pvlist = nresult.get(constants.NV_PVLIST, None)
1307 test = pvlist is None
1308 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1310 # check that ':' is not present in PV names, since it's a
1311 # special character for lvcreate (denotes the range of PEs to
1313 for _, pvname, owner_vg in pvlist:
1314 test = ":" in pvname
1315 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1316 " '%s' of VG '%s'", pvname, owner_vg)
1318 def _VerifyNodeNetwork(self, ninfo, nresult):
1319 """Check the node time.
1321 @type ninfo: L{objects.Node}
1322 @param ninfo: the node to check
1323 @param nresult: the remote results for the node
1327 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1329 test = constants.NV_NODELIST not in nresult
1330 _ErrorIf(test, self.ENODESSH, node,
1331 "node hasn't returned node ssh connectivity data")
1333 if nresult[constants.NV_NODELIST]:
1334 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1335 _ErrorIf(True, self.ENODESSH, node,
1336 "ssh communication with node '%s': %s", a_node, a_msg)
1338 test = constants.NV_NODENETTEST not in nresult
1339 _ErrorIf(test, self.ENODENET, node,
1340 "node hasn't returned node tcp connectivity data")
1342 if nresult[constants.NV_NODENETTEST]:
1343 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1345 _ErrorIf(True, self.ENODENET, node,
1346 "tcp communication with node '%s': %s",
1347 anode, nresult[constants.NV_NODENETTEST][anode])
1349 def _VerifyInstance(self, instance, instanceconfig, node_image):
1350 """Verify an instance.
1352 This function checks to see if the required block devices are
1353 available on the instance's node.
1356 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1357 node_current = instanceconfig.primary_node
1359 node_vol_should = {}
1360 instanceconfig.MapLVsByNode(node_vol_should)
1362 for node in node_vol_should:
1363 n_img = node_image[node]
1364 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1365 # ignore missing volumes on offline or broken nodes
1367 for volume in node_vol_should[node]:
1368 test = volume not in n_img.volumes
1369 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1370 "volume %s missing on node %s", volume, node)
1372 if instanceconfig.admin_up:
1373 pri_img = node_image[node_current]
1374 test = instance not in pri_img.instances and not pri_img.offline
1375 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1376 "instance not running on its primary node %s",
1379 for node, n_img in node_image.items():
1380 if (not node == node_current):
1381 test = instance in n_img.instances
1382 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1383 "instance should not run on node %s", node)
1385 def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1386 """Verify if there are any unknown volumes in the cluster.
1388 The .os, .swap and backup volumes are ignored. All other volumes are
1389 reported as unknown.
1392 for node, n_img in node_image.items():
1393 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1394 # skip non-healthy nodes
1396 for volume in n_img.volumes:
1397 test = (node not in node_vol_should or
1398 volume not in node_vol_should[node])
1399 self._ErrorIf(test, self.ENODEORPHANLV, node,
1400 "volume %s is unknown", volume)
1402 def _VerifyOrphanInstances(self, instancelist, node_image):
1403 """Verify the list of running instances.
1405 This checks what instances are running but unknown to the cluster.
1408 for node, n_img in node_image.items():
1409 for o_inst in n_img.instances:
1410 test = o_inst not in instancelist
1411 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1412 "instance %s on node %s should not exist", o_inst, node)
1414 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1415 """Verify N+1 Memory Resilience.
1417 Check that if one single node dies we can still start all the
1418 instances it was primary for.
1421 for node, n_img in node_image.items():
1422 # This code checks that every node which is now listed as
1423 # secondary has enough memory to host all instances it is
1424 # supposed to should a single other node in the cluster fail.
1425 # FIXME: not ready for failover to an arbitrary node
1426 # FIXME: does not support file-backed instances
1427 # WARNING: we currently take into account down instances as well
1428 # as up ones, considering that even if they're down someone
1429 # might want to start them even in the event of a node failure.
1430 for prinode, instances in n_img.sbp.items():
1432 for instance in instances:
1433 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1434 if bep[constants.BE_AUTO_BALANCE]:
1435 needed_mem += bep[constants.BE_MEMORY]
1436 test = n_img.mfree < needed_mem
1437 self._ErrorIf(test, self.ENODEN1, node,
1438 "not enough memory on to accommodate"
1439 " failovers should peer node %s fail", prinode)
1441 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1443 """Verifies and computes the node required file checksums.
1445 @type ninfo: L{objects.Node}
1446 @param ninfo: the node to check
1447 @param nresult: the remote results for the node
1448 @param file_list: required list of files
1449 @param local_cksum: dictionary of local files and their checksums
1450 @param master_files: list of files that only masters should have
1454 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1456 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1457 test = not isinstance(remote_cksum, dict)
1458 _ErrorIf(test, self.ENODEFILECHECK, node,
1459 "node hasn't returned file checksum data")
1463 for file_name in file_list:
1464 node_is_mc = ninfo.master_candidate
1465 must_have = (file_name not in master_files) or node_is_mc
1467 test1 = file_name not in remote_cksum
1469 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1471 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1472 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1473 "file '%s' missing", file_name)
1474 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1475 "file '%s' has wrong checksum", file_name)
1476 # not candidate and this is not a must-have file
1477 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1478 "file '%s' should not exist on non master"
1479 " candidates (and the file is outdated)", file_name)
1480 # all good, except non-master/non-must have combination
1481 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1482 "file '%s' should not exist"
1483 " on non master candidates", file_name)
1485 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1486 """Verifies and the node DRBD status.
1488 @type ninfo: L{objects.Node}
1489 @param ninfo: the node to check
1490 @param nresult: the remote results for the node
1491 @param instanceinfo: the dict of instances
1492 @param drbd_map: the DRBD map as returned by
1493 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1497 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1499 # compute the DRBD minors
1501 for minor, instance in drbd_map[node].items():
1502 test = instance not in instanceinfo
1503 _ErrorIf(test, self.ECLUSTERCFG, None,
1504 "ghost instance '%s' in temporary DRBD map", instance)
1505 # ghost instance should not be running, but otherwise we
1506 # don't give double warnings (both ghost instance and
1507 # unallocated minor in use)
1509 node_drbd[minor] = (instance, False)
1511 instance = instanceinfo[instance]
1512 node_drbd[minor] = (instance.name, instance.admin_up)
1514 # and now check them
1515 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1516 test = not isinstance(used_minors, (tuple, list))
1517 _ErrorIf(test, self.ENODEDRBD, node,
1518 "cannot parse drbd status file: %s", str(used_minors))
1520 # we cannot check drbd status
1523 for minor, (iname, must_exist) in node_drbd.items():
1524 test = minor not in used_minors and must_exist
1525 _ErrorIf(test, self.ENODEDRBD, node,
1526 "drbd minor %d of instance %s is not active", minor, iname)
1527 for minor in used_minors:
1528 test = minor not in node_drbd
1529 _ErrorIf(test, self.ENODEDRBD, node,
1530 "unallocated drbd minor %d is in use", minor)
1532 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1533 """Verifies and updates the node volume data.
1535 This function will update a L{NodeImage}'s internal structures
1536 with data from the remote call.
1538 @type ninfo: L{objects.Node}
1539 @param ninfo: the node to check
1540 @param nresult: the remote results for the node
1541 @param nimg: the node image object
1542 @param vg_name: the configured VG name
1546 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1548 nimg.lvm_fail = True
1549 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1552 elif isinstance(lvdata, basestring):
1553 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1554 utils.SafeEncode(lvdata))
1555 elif not isinstance(lvdata, dict):
1556 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1558 nimg.volumes = lvdata
1559 nimg.lvm_fail = False
1561 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1562 """Verifies and updates the node instance list.
1564 If the listing was successful, then updates this node's instance
1565 list. Otherwise, it marks the RPC call as failed for the instance
1568 @type ninfo: L{objects.Node}
1569 @param ninfo: the node to check
1570 @param nresult: the remote results for the node
1571 @param nimg: the node image object
1574 idata = nresult.get(constants.NV_INSTANCELIST, None)
1575 test = not isinstance(idata, list)
1576 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1577 " (instancelist): %s", utils.SafeEncode(str(idata)))
1579 nimg.hyp_fail = True
1581 nimg.instances = idata
1583 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1584 """Verifies and computes a node information map
1586 @type ninfo: L{objects.Node}
1587 @param ninfo: the node to check
1588 @param nresult: the remote results for the node
1589 @param nimg: the node image object
1590 @param vg_name: the configured VG name
1594 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1596 # try to read free memory (from the hypervisor)
1597 hv_info = nresult.get(constants.NV_HVINFO, None)
1598 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1599 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1602 nimg.mfree = int(hv_info["memory_free"])
1603 except (ValueError, TypeError):
1604 _ErrorIf(True, self.ENODERPC, node,
1605 "node returned invalid nodeinfo, check hypervisor")
1607 # FIXME: devise a free space model for file based instances as well
1608 if vg_name is not None:
1609 test = (constants.NV_VGLIST not in nresult or
1610 vg_name not in nresult[constants.NV_VGLIST])
1611 _ErrorIf(test, self.ENODELVM, node,
1612 "node didn't return data for the volume group '%s'"
1613 " - it is either missing or broken", vg_name)
1616 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1617 except (ValueError, TypeError):
1618 _ErrorIf(True, self.ENODERPC, node,
1619 "node returned invalid LVM info, check LVM status")
1621 def CheckPrereq(self):
1622 """Check prerequisites.
1624 Transform the list of checks we're going to skip into a set and check that
1625 all its members are valid.
1628 self.skip_set = frozenset(self.op.skip_checks)
1629 if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1630 raise errors.OpPrereqError("Invalid checks to be skipped specified",
1633 def BuildHooksEnv(self):
1636 Cluster-Verify hooks just ran in the post phase and their failure makes
1637 the output be logged in the verify output and the verification to fail.
1640 all_nodes = self.cfg.GetNodeList()
1642 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1644 for node in self.cfg.GetAllNodesInfo().values():
1645 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1647 return env, [], all_nodes
1649 def Exec(self, feedback_fn):
1650 """Verify integrity of cluster, performing various test on nodes.
1654 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1655 verbose = self.op.verbose
1656 self._feedback_fn = feedback_fn
1657 feedback_fn("* Verifying global settings")
1658 for msg in self.cfg.VerifyConfig():
1659 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1661 # Check the cluster certificates
1662 for cert_filename in constants.ALL_CERT_FILES:
1663 (errcode, msg) = _VerifyCertificate(cert_filename)
1664 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1666 vg_name = self.cfg.GetVGName()
1667 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1668 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1669 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1670 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1671 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1672 for iname in instancelist)
1673 i_non_redundant = [] # Non redundant instances
1674 i_non_a_balanced = [] # Non auto-balanced instances
1675 n_offline = 0 # Count of offline nodes
1676 n_drained = 0 # Count of nodes being drained
1677 node_vol_should = {}
1679 # FIXME: verify OS list
1680 # do local checksums
1681 master_files = [constants.CLUSTER_CONF_FILE]
1683 file_names = ssconf.SimpleStore().GetFileList()
1684 file_names.extend(constants.ALL_CERT_FILES)
1685 file_names.extend(master_files)
1687 local_checksums = utils.FingerprintFiles(file_names)
1689 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1690 node_verify_param = {
1691 constants.NV_FILELIST: file_names,
1692 constants.NV_NODELIST: [node.name for node in nodeinfo
1693 if not node.offline],
1694 constants.NV_HYPERVISOR: hypervisors,
1695 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1696 node.secondary_ip) for node in nodeinfo
1697 if not node.offline],
1698 constants.NV_INSTANCELIST: hypervisors,
1699 constants.NV_VERSION: None,
1700 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1701 constants.NV_NODESETUP: None,
1702 constants.NV_TIME: None,
1705 if vg_name is not None:
1706 node_verify_param[constants.NV_VGLIST] = None
1707 node_verify_param[constants.NV_LVLIST] = vg_name
1708 node_verify_param[constants.NV_PVLIST] = [vg_name]
1709 node_verify_param[constants.NV_DRBDLIST] = None
1711 # Build our expected cluster state
1712 node_image = dict((node.name, self.NodeImage(offline=node.offline))
1713 for node in nodeinfo)
1715 for instance in instancelist:
1716 inst_config = instanceinfo[instance]
1718 for nname in inst_config.all_nodes:
1719 if nname not in node_image:
1721 gnode = self.NodeImage()
1723 node_image[nname] = gnode
1725 inst_config.MapLVsByNode(node_vol_should)
1727 pnode = inst_config.primary_node
1728 node_image[pnode].pinst.append(instance)
1730 for snode in inst_config.secondary_nodes:
1731 nimg = node_image[snode]
1732 nimg.sinst.append(instance)
1733 if pnode not in nimg.sbp:
1734 nimg.sbp[pnode] = []
1735 nimg.sbp[pnode].append(instance)
1737 # At this point, we have the in-memory data structures complete,
1738 # except for the runtime information, which we'll gather next
1740 # Due to the way our RPC system works, exact response times cannot be
1741 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1742 # time before and after executing the request, we can at least have a time
1744 nvinfo_starttime = time.time()
1745 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1746 self.cfg.GetClusterName())
1747 nvinfo_endtime = time.time()
1749 cluster = self.cfg.GetClusterInfo()
1750 master_node = self.cfg.GetMasterNode()
1751 all_drbd_map = self.cfg.ComputeDRBDMap()
1753 feedback_fn("* Verifying node status")
1754 for node_i in nodeinfo:
1756 nimg = node_image[node]
1760 feedback_fn("* Skipping offline node %s" % (node,))
1764 if node == master_node:
1766 elif node_i.master_candidate:
1767 ntype = "master candidate"
1768 elif node_i.drained:
1774 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1776 msg = all_nvinfo[node].fail_msg
1777 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1779 nimg.rpc_fail = True
1782 nresult = all_nvinfo[node].payload
1784 nimg.call_ok = self._VerifyNode(node_i, nresult)
1785 self._VerifyNodeNetwork(node_i, nresult)
1786 self._VerifyNodeLVM(node_i, nresult, vg_name)
1787 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1789 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1790 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1792 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1793 self._UpdateNodeInstances(node_i, nresult, nimg)
1794 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1796 feedback_fn("* Verifying instance status")
1797 for instance in instancelist:
1799 feedback_fn("* Verifying instance %s" % instance)
1800 inst_config = instanceinfo[instance]
1801 self._VerifyInstance(instance, inst_config, node_image)
1802 inst_nodes_offline = []
1804 pnode = inst_config.primary_node
1805 pnode_img = node_image[pnode]
1806 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1807 self.ENODERPC, pnode, "instance %s, connection to"
1808 " primary node failed", instance)
1810 if pnode_img.offline:
1811 inst_nodes_offline.append(pnode)
1813 # If the instance is non-redundant we cannot survive losing its primary
1814 # node, so we are not N+1 compliant. On the other hand we have no disk
1815 # templates with more than one secondary so that situation is not well
1817 # FIXME: does not support file-backed instances
1818 if not inst_config.secondary_nodes:
1819 i_non_redundant.append(instance)
1820 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1821 instance, "instance has multiple secondary nodes: %s",
1822 utils.CommaJoin(inst_config.secondary_nodes),
1823 code=self.ETYPE_WARNING)
1825 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1826 i_non_a_balanced.append(instance)
1828 for snode in inst_config.secondary_nodes:
1829 s_img = node_image[snode]
1830 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1831 "instance %s, connection to secondary node failed", instance)
1834 inst_nodes_offline.append(snode)
1836 # warn that the instance lives on offline nodes
1837 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1838 "instance lives on offline node(s) %s",
1839 utils.CommaJoin(inst_nodes_offline))
1840 # ... or ghost nodes
1841 for node in inst_config.all_nodes:
1842 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1843 "instance lives on ghost node %s", node)
1845 feedback_fn("* Verifying orphan volumes")
1846 self._VerifyOrphanVolumes(node_vol_should, node_image)
1848 feedback_fn("* Verifying oprhan instances")
1849 self._VerifyOrphanInstances(instancelist, node_image)
1851 if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1852 feedback_fn("* Verifying N+1 Memory redundancy")
1853 self._VerifyNPlusOneMemory(node_image, instanceinfo)
1855 feedback_fn("* Other Notes")
1857 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
1858 % len(i_non_redundant))
1860 if i_non_a_balanced:
1861 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
1862 % len(i_non_a_balanced))
1865 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
1868 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
1872 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1873 """Analyze the post-hooks' result
1875 This method analyses the hook result, handles it, and sends some
1876 nicely-formatted feedback back to the user.
1878 @param phase: one of L{constants.HOOKS_PHASE_POST} or
1879 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1880 @param hooks_results: the results of the multi-node hooks rpc call
1881 @param feedback_fn: function used send feedback back to the caller
1882 @param lu_result: previous Exec result
1883 @return: the new Exec result, based on the previous result
1887 # We only really run POST phase hooks, and are only interested in
1889 if phase == constants.HOOKS_PHASE_POST:
1890 # Used to change hooks' output to proper indentation
1891 indent_re = re.compile('^', re.M)
1892 feedback_fn("* Hooks Results")
1893 assert hooks_results, "invalid result from hooks"
1895 for node_name in hooks_results:
1896 res = hooks_results[node_name]
1898 test = msg and not res.offline
1899 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1900 "Communication failure in hooks execution: %s", msg)
1901 if res.offline or msg:
1902 # No need to investigate payload if node is offline or gave an error.
1903 # override manually lu_result here as _ErrorIf only
1904 # overrides self.bad
1907 for script, hkr, output in res.payload:
1908 test = hkr == constants.HKR_FAIL
1909 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1910 "Script %s failed, output:", script)
1912 output = indent_re.sub(' ', output)
1913 feedback_fn("%s" % output)
1919 class LUVerifyDisks(NoHooksLU):
1920 """Verifies the cluster disks status.
1926 def ExpandNames(self):
1927 self.needed_locks = {
1928 locking.LEVEL_NODE: locking.ALL_SET,
1929 locking.LEVEL_INSTANCE: locking.ALL_SET,
1931 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1933 def CheckPrereq(self):
1934 """Check prerequisites.
1936 This has no prerequisites.
1941 def Exec(self, feedback_fn):
1942 """Verify integrity of cluster disks.
1944 @rtype: tuple of three items
1945 @return: a tuple of (dict of node-to-node_error, list of instances
1946 which need activate-disks, dict of instance: (node, volume) for
1950 result = res_nodes, res_instances, res_missing = {}, [], {}
1952 vg_name = self.cfg.GetVGName()
1953 nodes = utils.NiceSort(self.cfg.GetNodeList())
1954 instances = [self.cfg.GetInstanceInfo(name)
1955 for name in self.cfg.GetInstanceList()]
1958 for inst in instances:
1960 if (not inst.admin_up or
1961 inst.disk_template not in constants.DTS_NET_MIRROR):
1963 inst.MapLVsByNode(inst_lvs)
1964 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1965 for node, vol_list in inst_lvs.iteritems():
1966 for vol in vol_list:
1967 nv_dict[(node, vol)] = inst
1972 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1976 node_res = node_lvs[node]
1977 if node_res.offline:
1979 msg = node_res.fail_msg
1981 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1982 res_nodes[node] = msg
1985 lvs = node_res.payload
1986 for lv_name, (_, _, lv_online) in lvs.items():
1987 inst = nv_dict.pop((node, lv_name), None)
1988 if (not lv_online and inst is not None
1989 and inst.name not in res_instances):
1990 res_instances.append(inst.name)
1992 # any leftover items in nv_dict are missing LVs, let's arrange the
1994 for key, inst in nv_dict.iteritems():
1995 if inst.name not in res_missing:
1996 res_missing[inst.name] = []
1997 res_missing[inst.name].append(key)
2002 class LURepairDiskSizes(NoHooksLU):
2003 """Verifies the cluster disks sizes.
2006 _OP_REQP = ["instances"]
2009 def ExpandNames(self):
2010 if not isinstance(self.op.instances, list):
2011 raise errors.OpPrereqError("Invalid argument type 'instances'",
2014 if self.op.instances:
2015 self.wanted_names = []
2016 for name in self.op.instances:
2017 full_name = _ExpandInstanceName(self.cfg, name)
2018 self.wanted_names.append(full_name)
2019 self.needed_locks = {
2020 locking.LEVEL_NODE: [],
2021 locking.LEVEL_INSTANCE: self.wanted_names,
2023 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2025 self.wanted_names = None
2026 self.needed_locks = {
2027 locking.LEVEL_NODE: locking.ALL_SET,
2028 locking.LEVEL_INSTANCE: locking.ALL_SET,
2030 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2032 def DeclareLocks(self, level):
2033 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2034 self._LockInstancesNodes(primary_only=True)
2036 def CheckPrereq(self):
2037 """Check prerequisites.
2039 This only checks the optional instance list against the existing names.
2042 if self.wanted_names is None:
2043 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2045 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2046 in self.wanted_names]
2048 def _EnsureChildSizes(self, disk):
2049 """Ensure children of the disk have the needed disk size.
2051 This is valid mainly for DRBD8 and fixes an issue where the
2052 children have smaller disk size.
2054 @param disk: an L{ganeti.objects.Disk} object
2057 if disk.dev_type == constants.LD_DRBD8:
2058 assert disk.children, "Empty children for DRBD8?"
2059 fchild = disk.children[0]
2060 mismatch = fchild.size < disk.size
2062 self.LogInfo("Child disk has size %d, parent %d, fixing",
2063 fchild.size, disk.size)
2064 fchild.size = disk.size
2066 # and we recurse on this child only, not on the metadev
2067 return self._EnsureChildSizes(fchild) or mismatch
2071 def Exec(self, feedback_fn):
2072 """Verify the size of cluster disks.
2075 # TODO: check child disks too
2076 # TODO: check differences in size between primary/secondary nodes
2078 for instance in self.wanted_instances:
2079 pnode = instance.primary_node
2080 if pnode not in per_node_disks:
2081 per_node_disks[pnode] = []
2082 for idx, disk in enumerate(instance.disks):
2083 per_node_disks[pnode].append((instance, idx, disk))
2086 for node, dskl in per_node_disks.items():
2087 newl = [v[2].Copy() for v in dskl]
2089 self.cfg.SetDiskID(dsk, node)
2090 result = self.rpc.call_blockdev_getsizes(node, newl)
2092 self.LogWarning("Failure in blockdev_getsizes call to node"
2093 " %s, ignoring", node)
2095 if len(result.data) != len(dskl):
2096 self.LogWarning("Invalid result from node %s, ignoring node results",
2099 for ((instance, idx, disk), size) in zip(dskl, result.data):
2101 self.LogWarning("Disk %d of instance %s did not return size"
2102 " information, ignoring", idx, instance.name)
2104 if not isinstance(size, (int, long)):
2105 self.LogWarning("Disk %d of instance %s did not return valid"
2106 " size information, ignoring", idx, instance.name)
2109 if size != disk.size:
2110 self.LogInfo("Disk %d of instance %s has mismatched size,"
2111 " correcting: recorded %d, actual %d", idx,
2112 instance.name, disk.size, size)
2114 self.cfg.Update(instance, feedback_fn)
2115 changed.append((instance.name, idx, size))
2116 if self._EnsureChildSizes(disk):
2117 self.cfg.Update(instance, feedback_fn)
2118 changed.append((instance.name, idx, disk.size))
2122 class LURenameCluster(LogicalUnit):
2123 """Rename the cluster.
2126 HPATH = "cluster-rename"
2127 HTYPE = constants.HTYPE_CLUSTER
2130 def BuildHooksEnv(self):
2135 "OP_TARGET": self.cfg.GetClusterName(),
2136 "NEW_NAME": self.op.name,
2138 mn = self.cfg.GetMasterNode()
2139 all_nodes = self.cfg.GetNodeList()
2140 return env, [mn], all_nodes
2142 def CheckPrereq(self):
2143 """Verify that the passed name is a valid one.
2146 hostname = utils.GetHostInfo(self.op.name)
2148 new_name = hostname.name
2149 self.ip = new_ip = hostname.ip
2150 old_name = self.cfg.GetClusterName()
2151 old_ip = self.cfg.GetMasterIP()
2152 if new_name == old_name and new_ip == old_ip:
2153 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2154 " cluster has changed",
2156 if new_ip != old_ip:
2157 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2158 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2159 " reachable on the network. Aborting." %
2160 new_ip, errors.ECODE_NOTUNIQUE)
2162 self.op.name = new_name
2164 def Exec(self, feedback_fn):
2165 """Rename the cluster.
2168 clustername = self.op.name
2171 # shutdown the master IP
2172 master = self.cfg.GetMasterNode()
2173 result = self.rpc.call_node_stop_master(master, False)
2174 result.Raise("Could not disable the master role")
2177 cluster = self.cfg.GetClusterInfo()
2178 cluster.cluster_name = clustername
2179 cluster.master_ip = ip
2180 self.cfg.Update(cluster, feedback_fn)
2182 # update the known hosts file
2183 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2184 node_list = self.cfg.GetNodeList()
2186 node_list.remove(master)
2189 result = self.rpc.call_upload_file(node_list,
2190 constants.SSH_KNOWN_HOSTS_FILE)
2191 for to_node, to_result in result.iteritems():
2192 msg = to_result.fail_msg
2194 msg = ("Copy of file %s to node %s failed: %s" %
2195 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2196 self.proc.LogWarning(msg)
2199 result = self.rpc.call_node_start_master(master, False, False)
2200 msg = result.fail_msg
2202 self.LogWarning("Could not re-enable the master role on"
2203 " the master, please restart manually: %s", msg)
2206 def _RecursiveCheckIfLVMBased(disk):
2207 """Check if the given disk or its children are lvm-based.
2209 @type disk: L{objects.Disk}
2210 @param disk: the disk to check
2212 @return: boolean indicating whether a LD_LV dev_type was found or not
2216 for chdisk in disk.children:
2217 if _RecursiveCheckIfLVMBased(chdisk):
2219 return disk.dev_type == constants.LD_LV
2222 class LUSetClusterParams(LogicalUnit):
2223 """Change the parameters of the cluster.
2226 HPATH = "cluster-modify"
2227 HTYPE = constants.HTYPE_CLUSTER
2231 def CheckArguments(self):
2235 if not hasattr(self.op, "candidate_pool_size"):
2236 self.op.candidate_pool_size = None
2237 if self.op.candidate_pool_size is not None:
2239 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2240 except (ValueError, TypeError), err:
2241 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2242 str(err), errors.ECODE_INVAL)
2243 if self.op.candidate_pool_size < 1:
2244 raise errors.OpPrereqError("At least one master candidate needed",
2246 _CheckBooleanOpField(self.op, "maintain_node_health")
2248 def ExpandNames(self):
2249 # FIXME: in the future maybe other cluster params won't require checking on
2250 # all nodes to be modified.
2251 self.needed_locks = {
2252 locking.LEVEL_NODE: locking.ALL_SET,
2254 self.share_locks[locking.LEVEL_NODE] = 1
2256 def BuildHooksEnv(self):
2261 "OP_TARGET": self.cfg.GetClusterName(),
2262 "NEW_VG_NAME": self.op.vg_name,
2264 mn = self.cfg.GetMasterNode()
2265 return env, [mn], [mn]
2267 def CheckPrereq(self):
2268 """Check prerequisites.
2270 This checks whether the given params don't conflict and
2271 if the given volume group is valid.
2274 if self.op.vg_name is not None and not self.op.vg_name:
2275 instances = self.cfg.GetAllInstancesInfo().values()
2276 for inst in instances:
2277 for disk in inst.disks:
2278 if _RecursiveCheckIfLVMBased(disk):
2279 raise errors.OpPrereqError("Cannot disable lvm storage while"
2280 " lvm-based instances exist",
2283 node_list = self.acquired_locks[locking.LEVEL_NODE]
2285 # if vg_name not None, checks given volume group on all nodes
2287 vglist = self.rpc.call_vg_list(node_list)
2288 for node in node_list:
2289 msg = vglist[node].fail_msg
2291 # ignoring down node
2292 self.LogWarning("Error while gathering data on node %s"
2293 " (ignoring node): %s", node, msg)
2295 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2297 constants.MIN_VG_SIZE)
2299 raise errors.OpPrereqError("Error on node '%s': %s" %
2300 (node, vgstatus), errors.ECODE_ENVIRON)
2302 self.cluster = cluster = self.cfg.GetClusterInfo()
2303 # validate params changes
2304 if self.op.beparams:
2305 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2306 self.new_beparams = objects.FillDict(
2307 cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2309 if self.op.nicparams:
2310 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2311 self.new_nicparams = objects.FillDict(
2312 cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2313 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2316 # check all instances for consistency
2317 for instance in self.cfg.GetAllInstancesInfo().values():
2318 for nic_idx, nic in enumerate(instance.nics):
2319 params_copy = copy.deepcopy(nic.nicparams)
2320 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2322 # check parameter syntax
2324 objects.NIC.CheckParameterSyntax(params_filled)
2325 except errors.ConfigurationError, err:
2326 nic_errors.append("Instance %s, nic/%d: %s" %
2327 (instance.name, nic_idx, err))
2329 # if we're moving instances to routed, check that they have an ip
2330 target_mode = params_filled[constants.NIC_MODE]
2331 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2332 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2333 (instance.name, nic_idx))
2335 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2336 "\n".join(nic_errors))
2338 # hypervisor list/parameters
2339 self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2340 if self.op.hvparams:
2341 if not isinstance(self.op.hvparams, dict):
2342 raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2344 for hv_name, hv_dict in self.op.hvparams.items():
2345 if hv_name not in self.new_hvparams:
2346 self.new_hvparams[hv_name] = hv_dict
2348 self.new_hvparams[hv_name].update(hv_dict)
2350 # os hypervisor parameters
2351 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2353 if not isinstance(self.op.os_hvp, dict):
2354 raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2356 for os_name, hvs in self.op.os_hvp.items():
2357 if not isinstance(hvs, dict):
2358 raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2359 " input"), errors.ECODE_INVAL)
2360 if os_name not in self.new_os_hvp:
2361 self.new_os_hvp[os_name] = hvs
2363 for hv_name, hv_dict in hvs.items():
2364 if hv_name not in self.new_os_hvp[os_name]:
2365 self.new_os_hvp[os_name][hv_name] = hv_dict
2367 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2369 if self.op.enabled_hypervisors is not None:
2370 self.hv_list = self.op.enabled_hypervisors
2371 if not self.hv_list:
2372 raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2373 " least one member",
2375 invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2377 raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2379 utils.CommaJoin(invalid_hvs),
2382 self.hv_list = cluster.enabled_hypervisors
2384 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2385 # either the enabled list has changed, or the parameters have, validate
2386 for hv_name, hv_params in self.new_hvparams.items():
2387 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2388 (self.op.enabled_hypervisors and
2389 hv_name in self.op.enabled_hypervisors)):
2390 # either this is a new hypervisor, or its parameters have changed
2391 hv_class = hypervisor.GetHypervisor(hv_name)
2392 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2393 hv_class.CheckParameterSyntax(hv_params)
2394 _CheckHVParams(self, node_list, hv_name, hv_params)
2397 # no need to check any newly-enabled hypervisors, since the
2398 # defaults have already been checked in the above code-block
2399 for os_name, os_hvp in self.new_os_hvp.items():
2400 for hv_name, hv_params in os_hvp.items():
2401 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2402 # we need to fill in the new os_hvp on top of the actual hv_p
2403 cluster_defaults = self.new_hvparams.get(hv_name, {})
2404 new_osp = objects.FillDict(cluster_defaults, hv_params)
2405 hv_class = hypervisor.GetHypervisor(hv_name)
2406 hv_class.CheckParameterSyntax(new_osp)
2407 _CheckHVParams(self, node_list, hv_name, new_osp)
2410 def Exec(self, feedback_fn):
2411 """Change the parameters of the cluster.
2414 if self.op.vg_name is not None:
2415 new_volume = self.op.vg_name
2418 if new_volume != self.cfg.GetVGName():
2419 self.cfg.SetVGName(new_volume)
2421 feedback_fn("Cluster LVM configuration already in desired"
2422 " state, not changing")
2423 if self.op.hvparams:
2424 self.cluster.hvparams = self.new_hvparams
2426 self.cluster.os_hvp = self.new_os_hvp
2427 if self.op.enabled_hypervisors is not None:
2428 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2429 if self.op.beparams:
2430 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2431 if self.op.nicparams:
2432 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2434 if self.op.candidate_pool_size is not None:
2435 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2436 # we need to update the pool size here, otherwise the save will fail
2437 _AdjustCandidatePool(self, [])
2439 if self.op.maintain_node_health is not None:
2440 self.cluster.maintain_node_health = self.op.maintain_node_health
2442 self.cfg.Update(self.cluster, feedback_fn)
2445 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2446 """Distribute additional files which are part of the cluster configuration.
2448 ConfigWriter takes care of distributing the config and ssconf files, but
2449 there are more files which should be distributed to all nodes. This function
2450 makes sure those are copied.
2452 @param lu: calling logical unit
2453 @param additional_nodes: list of nodes not in the config to distribute to
2456 # 1. Gather target nodes
2457 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2458 dist_nodes = lu.cfg.GetOnlineNodeList()
2459 if additional_nodes is not None:
2460 dist_nodes.extend(additional_nodes)
2461 if myself.name in dist_nodes:
2462 dist_nodes.remove(myself.name)
2464 # 2. Gather files to distribute
2465 dist_files = set([constants.ETC_HOSTS,
2466 constants.SSH_KNOWN_HOSTS_FILE,
2467 constants.RAPI_CERT_FILE,
2468 constants.RAPI_USERS_FILE,
2469 constants.CONFD_HMAC_KEY,
2472 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2473 for hv_name in enabled_hypervisors:
2474 hv_class = hypervisor.GetHypervisor(hv_name)
2475 dist_files.update(hv_class.GetAncillaryFiles())
2477 # 3. Perform the files upload
2478 for fname in dist_files:
2479 if os.path.exists(fname):
2480 result = lu.rpc.call_upload_file(dist_nodes, fname)
2481 for to_node, to_result in result.items():
2482 msg = to_result.fail_msg
2484 msg = ("Copy of file %s to node %s failed: %s" %
2485 (fname, to_node, msg))
2486 lu.proc.LogWarning(msg)
2489 class LURedistributeConfig(NoHooksLU):
2490 """Force the redistribution of cluster configuration.
2492 This is a very simple LU.
2498 def ExpandNames(self):
2499 self.needed_locks = {
2500 locking.LEVEL_NODE: locking.ALL_SET,
2502 self.share_locks[locking.LEVEL_NODE] = 1
2504 def CheckPrereq(self):
2505 """Check prerequisites.
2509 def Exec(self, feedback_fn):
2510 """Redistribute the configuration.
2513 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2514 _RedistributeAncillaryFiles(self)
2517 def _WaitForSync(lu, instance, oneshot=False):
2518 """Sleep and poll for an instance's disk to sync.
2521 if not instance.disks:
2525 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2527 node = instance.primary_node
2529 for dev in instance.disks:
2530 lu.cfg.SetDiskID(dev, node)
2532 # TODO: Convert to utils.Retry
2535 degr_retries = 10 # in seconds, as we sleep 1 second each time
2539 cumul_degraded = False
2540 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2541 msg = rstats.fail_msg
2543 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2546 raise errors.RemoteError("Can't contact node %s for mirror data,"
2547 " aborting." % node)
2550 rstats = rstats.payload
2552 for i, mstat in enumerate(rstats):
2554 lu.LogWarning("Can't compute data for node %s/%s",
2555 node, instance.disks[i].iv_name)
2558 cumul_degraded = (cumul_degraded or
2559 (mstat.is_degraded and mstat.sync_percent is None))
2560 if mstat.sync_percent is not None:
2562 if mstat.estimated_time is not None:
2563 rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2564 max_time = mstat.estimated_time
2566 rem_time = "no time estimate"
2567 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2568 (instance.disks[i].iv_name, mstat.sync_percent,
2571 # if we're done but degraded, let's do a few small retries, to
2572 # make sure we see a stable and not transient situation; therefore
2573 # we force restart of the loop
2574 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2575 logging.info("Degraded disks found, %d retries left", degr_retries)
2583 time.sleep(min(60, max_time))
2586 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2587 return not cumul_degraded
2590 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2591 """Check that mirrors are not degraded.
2593 The ldisk parameter, if True, will change the test from the
2594 is_degraded attribute (which represents overall non-ok status for
2595 the device(s)) to the ldisk (representing the local storage status).
2598 lu.cfg.SetDiskID(dev, node)
2602 if on_primary or dev.AssembleOnSecondary():
2603 rstats = lu.rpc.call_blockdev_find(node, dev)
2604 msg = rstats.fail_msg
2606 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2608 elif not rstats.payload:
2609 lu.LogWarning("Can't find disk on node %s", node)
2613 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2615 result = result and not rstats.payload.is_degraded
2618 for child in dev.children:
2619 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2624 class LUDiagnoseOS(NoHooksLU):
2625 """Logical unit for OS diagnose/query.
2628 _OP_REQP = ["output_fields", "names"]
2630 _FIELDS_STATIC = utils.FieldSet()
2631 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2632 # Fields that need calculation of global os validity
2633 _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2635 def ExpandNames(self):
2637 raise errors.OpPrereqError("Selective OS query not supported",
2640 _CheckOutputFields(static=self._FIELDS_STATIC,
2641 dynamic=self._FIELDS_DYNAMIC,
2642 selected=self.op.output_fields)
2644 # Lock all nodes, in shared mode
2645 # Temporary removal of locks, should be reverted later
2646 # TODO: reintroduce locks when they are lighter-weight
2647 self.needed_locks = {}
2648 #self.share_locks[locking.LEVEL_NODE] = 1
2649 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2651 def CheckPrereq(self):
2652 """Check prerequisites.
2657 def _DiagnoseByOS(rlist):
2658 """Remaps a per-node return list into an a per-os per-node dictionary
2660 @param rlist: a map with node names as keys and OS objects as values
2663 @return: a dictionary with osnames as keys and as value another map, with
2664 nodes as keys and tuples of (path, status, diagnose) as values, eg::
2666 {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2667 (/srv/..., False, "invalid api")],
2668 "node2": [(/srv/..., True, "")]}
2673 # we build here the list of nodes that didn't fail the RPC (at RPC
2674 # level), so that nodes with a non-responding node daemon don't
2675 # make all OSes invalid
2676 good_nodes = [node_name for node_name in rlist
2677 if not rlist[node_name].fail_msg]
2678 for node_name, nr in rlist.items():
2679 if nr.fail_msg or not nr.payload:
2681 for name, path, status, diagnose, variants in nr.payload:
2682 if name not in all_os:
2683 # build a list of nodes for this os containing empty lists
2684 # for each node in node_list
2686 for nname in good_nodes:
2687 all_os[name][nname] = []
2688 all_os[name][node_name].append((path, status, diagnose, variants))
2691 def Exec(self, feedback_fn):
2692 """Compute the list of OSes.
2695 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2696 node_data = self.rpc.call_os_diagnose(valid_nodes)
2697 pol = self._DiagnoseByOS(node_data)
2699 calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2700 calc_variants = "variants" in self.op.output_fields
2702 for os_name, os_data in pol.items():
2707 for osl in os_data.values():
2708 valid = valid and osl and osl[0][1]
2713 node_variants = osl[0][3]
2714 if variants is None:
2715 variants = node_variants
2717 variants = [v for v in variants if v in node_variants]
2719 for field in self.op.output_fields:
2722 elif field == "valid":
2724 elif field == "node_status":
2725 # this is just a copy of the dict
2727 for node_name, nos_list in os_data.items():
2728 val[node_name] = nos_list
2729 elif field == "variants":
2732 raise errors.ParameterError(field)
2739 class LURemoveNode(LogicalUnit):
2740 """Logical unit for removing a node.
2743 HPATH = "node-remove"
2744 HTYPE = constants.HTYPE_NODE
2745 _OP_REQP = ["node_name"]
2747 def BuildHooksEnv(self):
2750 This doesn't run on the target node in the pre phase as a failed
2751 node would then be impossible to remove.
2755 "OP_TARGET": self.op.node_name,
2756 "NODE_NAME": self.op.node_name,
2758 all_nodes = self.cfg.GetNodeList()
2760 all_nodes.remove(self.op.node_name)
2762 logging.warning("Node %s which is about to be removed not found"
2763 " in the all nodes list", self.op.node_name)
2764 return env, all_nodes, all_nodes
2766 def CheckPrereq(self):
2767 """Check prerequisites.
2770 - the node exists in the configuration
2771 - it does not have primary or secondary instances
2772 - it's not the master
2774 Any errors are signaled by raising errors.OpPrereqError.
2777 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2778 node = self.cfg.GetNodeInfo(self.op.node_name)
2779 assert node is not None
2781 instance_list = self.cfg.GetInstanceList()
2783 masternode = self.cfg.GetMasterNode()
2784 if node.name == masternode:
2785 raise errors.OpPrereqError("Node is the master node,"
2786 " you need to failover first.",
2789 for instance_name in instance_list:
2790 instance = self.cfg.GetInstanceInfo(instance_name)
2791 if node.name in instance.all_nodes:
2792 raise errors.OpPrereqError("Instance %s is still running on the node,"
2793 " please remove first." % instance_name,
2795 self.op.node_name = node.name
2798 def Exec(self, feedback_fn):
2799 """Removes the node from the cluster.
2803 logging.info("Stopping the node daemon and removing configs from node %s",
2806 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2808 # Promote nodes to master candidate as needed
2809 _AdjustCandidatePool(self, exceptions=[node.name])
2810 self.context.RemoveNode(node.name)
2812 # Run post hooks on the node before it's removed
2813 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2815 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2817 # pylint: disable-msg=W0702
2818 self.LogWarning("Errors occurred running hooks on %s" % node.name)
2820 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2821 msg = result.fail_msg
2823 self.LogWarning("Errors encountered on the remote node while leaving"
2824 " the cluster: %s", msg)
2827 class LUQueryNodes(NoHooksLU):
2828 """Logical unit for querying nodes.
2831 # pylint: disable-msg=W0142
2832 _OP_REQP = ["output_fields", "names", "use_locking"]
2835 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2836 "master_candidate", "offline", "drained"]
2838 _FIELDS_DYNAMIC = utils.FieldSet(
2840 "mtotal", "mnode", "mfree",
2842 "ctotal", "cnodes", "csockets",
2845 _FIELDS_STATIC = utils.FieldSet(*[
2846 "pinst_cnt", "sinst_cnt",
2847 "pinst_list", "sinst_list",
2848 "pip", "sip", "tags",
2850 "role"] + _SIMPLE_FIELDS
2853 def ExpandNames(self):
2854 _CheckOutputFields(static=self._FIELDS_STATIC,
2855 dynamic=self._FIELDS_DYNAMIC,
2856 selected=self.op.output_fields)
2858 self.needed_locks = {}
2859 self.share_locks[locking.LEVEL_NODE] = 1
2862 self.wanted = _GetWantedNodes(self, self.op.names)
2864 self.wanted = locking.ALL_SET
2866 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2867 self.do_locking = self.do_node_query and self.op.use_locking
2869 # if we don't request only static fields, we need to lock the nodes
2870 self.needed_locks[locking.LEVEL_NODE] = self.wanted
2872 def CheckPrereq(self):
2873 """Check prerequisites.
2876 # The validation of the node list is done in the _GetWantedNodes,
2877 # if non empty, and if empty, there's no validation to do
2880 def Exec(self, feedback_fn):
2881 """Computes the list of nodes and their attributes.
2884 all_info = self.cfg.GetAllNodesInfo()
2886 nodenames = self.acquired_locks[locking.LEVEL_NODE]
2887 elif self.wanted != locking.ALL_SET:
2888 nodenames = self.wanted
2889 missing = set(nodenames).difference(all_info.keys())
2891 raise errors.OpExecError(
2892 "Some nodes were removed before retrieving their data: %s" % missing)
2894 nodenames = all_info.keys()
2896 nodenames = utils.NiceSort(nodenames)
2897 nodelist = [all_info[name] for name in nodenames]
2899 # begin data gathering
2901 if self.do_node_query:
2903 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2904 self.cfg.GetHypervisorType())
2905 for name in nodenames:
2906 nodeinfo = node_data[name]
2907 if not nodeinfo.fail_msg and nodeinfo.payload:
2908 nodeinfo = nodeinfo.payload
2909 fn = utils.TryConvert
2911 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2912 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2913 "mfree": fn(int, nodeinfo.get('memory_free', None)),
2914 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2915 "dfree": fn(int, nodeinfo.get('vg_free', None)),
2916 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2917 "bootid": nodeinfo.get('bootid', None),
2918 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2919 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2922 live_data[name] = {}
2924 live_data = dict.fromkeys(nodenames, {})
2926 node_to_primary = dict([(name, set()) for name in nodenames])
2927 node_to_secondary = dict([(name, set()) for name in nodenames])
2929 inst_fields = frozenset(("pinst_cnt", "pinst_list",
2930 "sinst_cnt", "sinst_list"))
2931 if inst_fields & frozenset(self.op.output_fields):
2932 inst_data = self.cfg.GetAllInstancesInfo()
2934 for inst in inst_data.values():
2935 if inst.primary_node in node_to_primary:
2936 node_to_primary[inst.primary_node].add(inst.name)
2937 for secnode in inst.secondary_nodes:
2938 if secnode in node_to_secondary:
2939 node_to_secondary[secnode].add(inst.name)
2941 master_node = self.cfg.GetMasterNode()
2943 # end data gathering
2946 for node in nodelist:
2948 for field in self.op.output_fields:
2949 if field in self._SIMPLE_FIELDS:
2950 val = getattr(node, field)
2951 elif field == "pinst_list":
2952 val = list(node_to_primary[node.name])
2953 elif field == "sinst_list":
2954 val = list(node_to_secondary[node.name])
2955 elif field == "pinst_cnt":
2956 val = len(node_to_primary[node.name])
2957 elif field == "sinst_cnt":
2958 val = len(node_to_secondary[node.name])
2959 elif field == "pip":
2960 val = node.primary_ip
2961 elif field == "sip":
2962 val = node.secondary_ip
2963 elif field == "tags":
2964 val = list(node.GetTags())
2965 elif field == "master":
2966 val = node.name == master_node
2967 elif self._FIELDS_DYNAMIC.Matches(field):
2968 val = live_data[node.name].get(field, None)
2969 elif field == "role":
2970 if node.name == master_node:
2972 elif node.master_candidate:
2981 raise errors.ParameterError(field)
2982 node_output.append(val)
2983 output.append(node_output)
2988 class LUQueryNodeVolumes(NoHooksLU):
2989 """Logical unit for getting volumes on node(s).
2992 _OP_REQP = ["nodes", "output_fields"]
2994 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2995 _FIELDS_STATIC = utils.FieldSet("node")
2997 def ExpandNames(self):
2998 _CheckOutputFields(static=self._FIELDS_STATIC,
2999 dynamic=self._FIELDS_DYNAMIC,
3000 selected=self.op.output_fields)
3002 self.needed_locks = {}
3003 self.share_locks[locking.LEVEL_NODE] = 1
3004 if not self.op.nodes:
3005 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3007 self.needed_locks[locking.LEVEL_NODE] = \
3008 _GetWantedNodes(self, self.op.nodes)
3010 def CheckPrereq(self):
3011 """Check prerequisites.
3013 This checks that the fields required are valid output fields.
3016 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3018 def Exec(self, feedback_fn):
3019 """Computes the list of nodes and their attributes.
3022 nodenames = self.nodes
3023 volumes = self.rpc.call_node_volumes(nodenames)
3025 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3026 in self.cfg.GetInstanceList()]
3028 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3031 for node in nodenames:
3032 nresult = volumes[node]
3035 msg = nresult.fail_msg
3037 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3040 node_vols = nresult.payload[:]
3041 node_vols.sort(key=lambda vol: vol['dev'])
3043 for vol in node_vols:
3045 for field in self.op.output_fields:
3048 elif field == "phys":
3052 elif field == "name":
3054 elif field == "size":
3055 val = int(float(vol['size']))
3056 elif field == "instance":
3058 if node not in lv_by_node[inst]:
3060 if vol['name'] in lv_by_node[inst][node]:
3066 raise errors.ParameterError(field)
3067 node_output.append(str(val))
3069 output.append(node_output)
3074 class LUQueryNodeStorage(NoHooksLU):
3075 """Logical unit for getting information on storage units on node(s).
3078 _OP_REQP = ["nodes", "storage_type", "output_fields"]
3080 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3082 def ExpandNames(self):
3083 storage_type = self.op.storage_type
3085 if storage_type not in constants.VALID_STORAGE_TYPES:
3086 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
3089 _CheckOutputFields(static=self._FIELDS_STATIC,
3090 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3091 selected=self.op.output_fields)
3093 self.needed_locks = {}
3094 self.share_locks[locking.LEVEL_NODE] = 1
3097 self.needed_locks[locking.LEVEL_NODE] = \
3098 _GetWantedNodes(self, self.op.nodes)
3100 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3102 def CheckPrereq(self):
3103 """Check prerequisites.
3105 This checks that the fields required are valid output fields.
3108 self.op.name = getattr(self.op, "name", None)
3110 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3112 def Exec(self, feedback_fn):
3113 """Computes the list of nodes and their attributes.
3116 # Always get name to sort by
3117 if constants.SF_NAME in self.op.output_fields:
3118 fields = self.op.output_fields[:]
3120 fields = [constants.SF_NAME] + self.op.output_fields
3122 # Never ask for node or type as it's only known to the LU
3123 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3124 while extra in fields:
3125 fields.remove(extra)
3127 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3128 name_idx = field_idx[constants.SF_NAME]
3130 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3131 data = self.rpc.call_storage_list(self.nodes,
3132 self.op.storage_type, st_args,
3133 self.op.name, fields)
3137 for node in utils.NiceSort(self.nodes):
3138 nresult = data[node]
3142 msg = nresult.fail_msg
3144 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3147 rows = dict([(row[name_idx], row) for row in nresult.payload])
3149 for name in utils.NiceSort(rows.keys()):
3154 for field in self.op.output_fields:
3155 if field == constants.SF_NODE:
3157 elif field == constants.SF_TYPE:
3158 val = self.op.storage_type
3159 elif field in field_idx:
3160 val = row[field_idx[field]]
3162 raise errors.ParameterError(field)
3171 class LUModifyNodeStorage(NoHooksLU):
3172 """Logical unit for modifying a storage volume on a node.
3175 _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3178 def CheckArguments(self):
3179 self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3181 storage_type = self.op.storage_type
3182 if storage_type not in constants.VALID_STORAGE_TYPES:
3183 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
3186 def ExpandNames(self):
3187 self.needed_locks = {
3188 locking.LEVEL_NODE: self.op.node_name,
3191 def CheckPrereq(self):
3192 """Check prerequisites.
3195 storage_type = self.op.storage_type
3198 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3200 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3201 " modified" % storage_type,
3204 diff = set(self.op.changes.keys()) - modifiable
3206 raise errors.OpPrereqError("The following fields can not be modified for"
3207 " storage units of type '%s': %r" %
3208 (storage_type, list(diff)),
3211 def Exec(self, feedback_fn):
3212 """Computes the list of nodes and their attributes.
3215 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3216 result = self.rpc.call_storage_modify(self.op.node_name,
3217 self.op.storage_type, st_args,
3218 self.op.name, self.op.changes)
3219 result.Raise("Failed to modify storage unit '%s' on %s" %
3220 (self.op.name, self.op.node_name))
3223 class LUAddNode(LogicalUnit):
3224 """Logical unit for adding node to the cluster.
3228 HTYPE = constants.HTYPE_NODE
3229 _OP_REQP = ["node_name"]
3231 def CheckArguments(self):
3232 # validate/normalize the node name
3233 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3235 def BuildHooksEnv(self):
3238 This will run on all nodes before, and on all nodes + the new node after.
3242 "OP_TARGET": self.op.node_name,
3243 "NODE_NAME": self.op.node_name,
3244 "NODE_PIP": self.op.primary_ip,
3245 "NODE_SIP": self.op.secondary_ip,
3247 nodes_0 = self.cfg.GetNodeList()
3248 nodes_1 = nodes_0 + [self.op.node_name, ]
3249 return env, nodes_0, nodes_1
3251 def CheckPrereq(self):
3252 """Check prerequisites.
3255 - the new node is not already in the config
3257 - its parameters (single/dual homed) matches the cluster
3259 Any errors are signaled by raising errors.OpPrereqError.
3262 node_name = self.op.node_name
3265 dns_data = utils.GetHostInfo(node_name)
3267 node = dns_data.name
3268 primary_ip = self.op.primary_ip = dns_data.ip
3269 secondary_ip = getattr(self.op, "secondary_ip", None)
3270 if secondary_ip is None:
3271 secondary_ip = primary_ip
3272 if not utils.IsValidIP(secondary_ip):
3273 raise errors.OpPrereqError("Invalid secondary IP given",
3275 self.op.secondary_ip = secondary_ip
3277 node_list = cfg.GetNodeList()
3278 if not self.op.readd and node in node_list:
3279 raise errors.OpPrereqError("Node %s is already in the configuration" %
3280 node, errors.ECODE_EXISTS)
3281 elif self.op.readd and node not in node_list:
3282 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3285 for existing_node_name in node_list:
3286 existing_node = cfg.GetNodeInfo(existing_node_name)
3288 if self.op.readd and node == existing_node_name:
3289 if (existing_node.primary_ip != primary_ip or
3290 existing_node.secondary_ip != secondary_ip):
3291 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3292 " address configuration as before",
3296 if (existing_node.primary_ip == primary_ip or
3297 existing_node.secondary_ip == primary_ip or
3298 existing_node.primary_ip == secondary_ip or
3299 existing_node.secondary_ip == secondary_ip):
3300 raise errors.OpPrereqError("New node ip address(es) conflict with"
3301 " existing node %s" % existing_node.name,
3302 errors.ECODE_NOTUNIQUE)
3304 # check that the type of the node (single versus dual homed) is the
3305 # same as for the master
3306 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3307 master_singlehomed = myself.secondary_ip == myself.primary_ip
3308 newbie_singlehomed = secondary_ip == primary_ip
3309 if master_singlehomed != newbie_singlehomed:
3310 if master_singlehomed:
3311 raise errors.OpPrereqError("The master has no private ip but the"
3312 " new node has one",
3315 raise errors.OpPrereqError("The master has a private ip but the"
3316 " new node doesn't have one",
3319 # checks reachability
3320 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3321 raise errors.OpPrereqError("Node not reachable by ping",
3322 errors.ECODE_ENVIRON)
3324 if not newbie_singlehomed:
3325 # check reachability from my secondary ip to newbie's secondary ip
3326 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3327 source=myself.secondary_ip):
3328 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3329 " based ping to noded port",
3330 errors.ECODE_ENVIRON)
3337 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3340 self.new_node = self.cfg.GetNodeInfo(node)
3341 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3343 self.new_node = objects.Node(name=node,
3344 primary_ip=primary_ip,
3345 secondary_ip=secondary_ip,
3346 master_candidate=self.master_candidate,
3347 offline=False, drained=False)
3349 def Exec(self, feedback_fn):
3350 """Adds the new node to the cluster.
3353 new_node = self.new_node
3354 node = new_node.name
3356 # for re-adds, reset the offline/drained/master-candidate flags;
3357 # we need to reset here, otherwise offline would prevent RPC calls
3358 # later in the procedure; this also means that if the re-add
3359 # fails, we are left with a non-offlined, broken node
3361 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3362 self.LogInfo("Readding a node, the offline/drained flags were reset")
3363 # if we demote the node, we do cleanup later in the procedure
3364 new_node.master_candidate = self.master_candidate
3366 # notify the user about any possible mc promotion
3367 if new_node.master_candidate:
3368 self.LogInfo("Node will be a master candidate")
3370 # check connectivity
3371 result = self.rpc.call_version([node])[node]
3372 result.Raise("Can't get version information from node %s" % node)
3373 if constants.PROTOCOL_VERSION == result.payload:
3374 logging.info("Communication to node %s fine, sw version %s match",
3375 node, result.payload)
3377 raise errors.OpExecError("Version mismatch master version %s,"
3378 " node version %s" %
3379 (constants.PROTOCOL_VERSION, result.payload))
3382 if self.cfg.GetClusterInfo().modify_ssh_setup:
3383 logging.info("Copy ssh key to node %s", node)
3384 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3386 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3387 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3391 keyarray.append(utils.ReadFile(i))
3393 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3394 keyarray[2], keyarray[3], keyarray[4],
3396 result.Raise("Cannot transfer ssh keys to the new node")
3398 # Add node to our /etc/hosts, and add key to known_hosts
3399 if self.cfg.GetClusterInfo().modify_etc_hosts:
3400 utils.AddHostToEtcHosts(new_node.name)
3402 if new_node.secondary_ip != new_node.primary_ip:
3403 result = self.rpc.call_node_has_ip_address(new_node.name,
3404 new_node.secondary_ip)
3405 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3406 prereq=True, ecode=errors.ECODE_ENVIRON)
3407 if not result.payload:
3408 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3409 " you gave (%s). Please fix and re-run this"
3410 " command." % new_node.secondary_ip)
3412 node_verify_list = [self.cfg.GetMasterNode()]
3413 node_verify_param = {
3414 constants.NV_NODELIST: [node],
3415 # TODO: do a node-net-test as well?
3418 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3419 self.cfg.GetClusterName())
3420 for verifier in node_verify_list:
3421 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3422 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3424 for failed in nl_payload:
3425 feedback_fn("ssh/hostname verification failed"
3426 " (checking from %s): %s" %
3427 (verifier, nl_payload[failed]))
3428 raise errors.OpExecError("ssh/hostname verification failed.")
3431 _RedistributeAncillaryFiles(self)
3432 self.context.ReaddNode(new_node)
3433 # make sure we redistribute the config
3434 self.cfg.Update(new_node, feedback_fn)
3435 # and make sure the new node will not have old files around
3436 if not new_node.master_candidate:
3437 result = self.rpc.call_node_demote_from_mc(new_node.name)
3438 msg = result.fail_msg
3440 self.LogWarning("Node failed to demote itself from master"
3441 " candidate status: %s" % msg)
3443 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3444 self.context.AddNode(new_node, self.proc.GetECId())
3447 class LUSetNodeParams(LogicalUnit):
3448 """Modifies the parameters of a node.
3451 HPATH = "node-modify"
3452 HTYPE = constants.HTYPE_NODE
3453 _OP_REQP = ["node_name"]
3456 def CheckArguments(self):
3457 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3458 _CheckBooleanOpField(self.op, 'master_candidate')
3459 _CheckBooleanOpField(self.op, 'offline')
3460 _CheckBooleanOpField(self.op, 'drained')
3461 _CheckBooleanOpField(self.op, 'auto_promote')
3462 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3463 if all_mods.count(None) == 3:
3464 raise errors.OpPrereqError("Please pass at least one modification",
3466 if all_mods.count(True) > 1:
3467 raise errors.OpPrereqError("Can't set the node into more than one"
3468 " state at the same time",
3471 # Boolean value that tells us whether we're offlining or draining the node
3472 self.offline_or_drain = (self.op.offline == True or
3473 self.op.drained == True)
3474 self.deoffline_or_drain = (self.op.offline == False or
3475 self.op.drained == False)
3476 self.might_demote = (self.op.master_candidate == False or
3477 self.offline_or_drain)
3479 self.lock_all = self.op.auto_promote and self.might_demote
3482 def ExpandNames(self):
3484 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3486 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3488 def BuildHooksEnv(self):
3491 This runs on the master node.
3495 "OP_TARGET": self.op.node_name,
3496 "MASTER_CANDIDATE": str(self.op.master_candidate),
3497 "OFFLINE": str(self.op.offline),
3498 "DRAINED": str(self.op.drained),
3500 nl = [self.cfg.GetMasterNode(),
3504 def CheckPrereq(self):
3505 """Check prerequisites.
3507 This only checks the instance list against the existing names.
3510 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3512 if (self.op.master_candidate is not None or
3513 self.op.drained is not None or
3514 self.op.offline is not None):
3515 # we can't change the master's node flags
3516 if self.op.node_name == self.cfg.GetMasterNode():
3517 raise errors.OpPrereqError("The master role can be changed"
3518 " only via masterfailover",
3522 if node.master_candidate and self.might_demote and not self.lock_all:
3523 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3524 # check if after removing the current node, we're missing master
3526 (mc_remaining, mc_should, _) = \
3527 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3528 if mc_remaining < mc_should:
3529 raise errors.OpPrereqError("Not enough master candidates, please"
3530 " pass auto_promote to allow promotion",
3533 if (self.op.master_candidate == True and
3534 ((node.offline and not self.op.offline == False) or
3535 (node.drained and not self.op.drained == False))):
3536 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3537 " to master_candidate" % node.name,
3540 # If we're being deofflined/drained, we'll MC ourself if needed
3541 if (self.deoffline_or_drain and not self.offline_or_drain and not
3542 self.op.master_candidate == True and not node.master_candidate):
3543 self.op.master_candidate = _DecideSelfPromotion(self)
3544 if self.op.master_candidate:
3545 self.LogInfo("Autopromoting node to master candidate")
3549 def Exec(self, feedback_fn):
3558 if self.op.offline is not None:
3559 node.offline = self.op.offline
3560 result.append(("offline", str(self.op.offline)))
3561 if self.op.offline == True:
3562 if node.master_candidate:
3563 node.master_candidate = False
3565 result.append(("master_candidate", "auto-demotion due to offline"))
3567 node.drained = False
3568 result.append(("drained", "clear drained status due to offline"))
3570 if self.op.master_candidate is not None:
3571 node.master_candidate = self.op.master_candidate
3573 result.append(("master_candidate", str(self.op.master_candidate)))
3574 if self.op.master_candidate == False:
3575 rrc = self.rpc.call_node_demote_from_mc(node.name)
3578 self.LogWarning("Node failed to demote itself: %s" % msg)
3580 if self.op.drained is not None:
3581 node.drained = self.op.drained
3582 result.append(("drained", str(self.op.drained)))
3583 if self.op.drained == True:
3584 if node.master_candidate:
3585 node.master_candidate = False
3587 result.append(("master_candidate", "auto-demotion due to drain"))
3588 rrc = self.rpc.call_node_demote_from_mc(node.name)
3591 self.LogWarning("Node failed to demote itself: %s" % msg)
3593 node.offline = False
3594 result.append(("offline", "clear offline status due to drain"))
3596 # we locked all nodes, we adjust the CP before updating this node
3598 _AdjustCandidatePool(self, [node.name])
3600 # this will trigger configuration file update, if needed
3601 self.cfg.Update(node, feedback_fn)
3603 # this will trigger job queue propagation or cleanup
3605 self.context.ReaddNode(node)
3610 class LUPowercycleNode(NoHooksLU):
3611 """Powercycles a node.
3614 _OP_REQP = ["node_name", "force"]
3617 def CheckArguments(self):
3618 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3619 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3620 raise errors.OpPrereqError("The node is the master and the force"
3621 " parameter was not set",
3624 def ExpandNames(self):
3625 """Locking for PowercycleNode.
3627 This is a last-resort option and shouldn't block on other
3628 jobs. Therefore, we grab no locks.
3631 self.needed_locks = {}
3633 def CheckPrereq(self):
3634 """Check prerequisites.
3636 This LU has no prereqs.
3641 def Exec(self, feedback_fn):
3645 result = self.rpc.call_node_powercycle(self.op.node_name,
3646 self.cfg.GetHypervisorType())
3647 result.Raise("Failed to schedule the reboot")
3648 return result.payload
3651 class LUQueryClusterInfo(NoHooksLU):
3652 """Query cluster configuration.
3658 def ExpandNames(self):
3659 self.needed_locks = {}
3661 def CheckPrereq(self):
3662 """No prerequsites needed for this LU.
3667 def Exec(self, feedback_fn):
3668 """Return cluster config.
3671 cluster = self.cfg.GetClusterInfo()
3674 # Filter just for enabled hypervisors
3675 for os_name, hv_dict in cluster.os_hvp.items():
3676 os_hvp[os_name] = {}
3677 for hv_name, hv_params in hv_dict.items():
3678 if hv_name in cluster.enabled_hypervisors:
3679 os_hvp[os_name][hv_name] = hv_params
3682 "software_version": constants.RELEASE_VERSION,
3683 "protocol_version": constants.PROTOCOL_VERSION,
3684 "config_version": constants.CONFIG_VERSION,
3685 "os_api_version": max(constants.OS_API_VERSIONS),
3686 "export_version": constants.EXPORT_VERSION,
3687 "architecture": (platform.architecture()[0], platform.machine()),
3688 "name": cluster.cluster_name,
3689 "master": cluster.master_node,
3690 "default_hypervisor": cluster.enabled_hypervisors[0],
3691 "enabled_hypervisors": cluster.enabled_hypervisors,
3692 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3693 for hypervisor_name in cluster.enabled_hypervisors]),
3695 "beparams": cluster.beparams,
3696 "nicparams": cluster.nicparams,
3697 "candidate_pool_size": cluster.candidate_pool_size,
3698 "master_netdev": cluster.master_netdev,
3699 "volume_group_name": cluster.volume_group_name,
3700 "file_storage_dir": cluster.file_storage_dir,
3701 "maintain_node_health": cluster.maintain_node_health,
3702 "ctime": cluster.ctime,
3703 "mtime": cluster.mtime,
3704 "uuid": cluster.uuid,
3705 "tags": list(cluster.GetTags()),
3711 class LUQueryConfigValues(NoHooksLU):
3712 """Return configuration values.
3717 _FIELDS_DYNAMIC = utils.FieldSet()
3718 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3721 def ExpandNames(self):
3722 self.needed_locks = {}
3724 _CheckOutputFields(static=self._FIELDS_STATIC,
3725 dynamic=self._FIELDS_DYNAMIC,
3726 selected=self.op.output_fields)
3728 def CheckPrereq(self):
3729 """No prerequisites.
3734 def Exec(self, feedback_fn):
3735 """Dump a representation of the cluster config to the standard output.
3739 for field in self.op.output_fields:
3740 if field == "cluster_name":
3741 entry = self.cfg.GetClusterName()
3742 elif field == "master_node":
3743 entry = self.cfg.GetMasterNode()
3744 elif field == "drain_flag":
3745 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3746 elif field == "watcher_pause":
3747 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3749 raise errors.ParameterError(field)
3750 values.append(entry)
3754 class LUActivateInstanceDisks(NoHooksLU):
3755 """Bring up an instance's disks.
3758 _OP_REQP = ["instance_name"]
3761 def ExpandNames(self):
3762 self._ExpandAndLockInstance()
3763 self.needed_locks[locking.LEVEL_NODE] = []
3764 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3766 def DeclareLocks(self, level):
3767 if level == locking.LEVEL_NODE:
3768 self._LockInstancesNodes()
3770 def CheckPrereq(self):
3771 """Check prerequisites.
3773 This checks that the instance is in the cluster.
3776 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3777 assert self.instance is not None, \
3778 "Cannot retrieve locked instance %s" % self.op.instance_name
3779 _CheckNodeOnline(self, self.instance.primary_node)
3780 if not hasattr(self.op, "ignore_size"):
3781 self.op.ignore_size = False
3783 def Exec(self, feedback_fn):
3784 """Activate the disks.
3787 disks_ok, disks_info = \
3788 _AssembleInstanceDisks(self, self.instance,
3789 ignore_size=self.op.ignore_size)
3791 raise errors.OpExecError("Cannot activate block devices")
3796 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3798 """Prepare the block devices for an instance.
3800 This sets up the block devices on all nodes.
3802 @type lu: L{LogicalUnit}
3803 @param lu: the logical unit on whose behalf we execute
3804 @type instance: L{objects.Instance}
3805 @param instance: the instance for whose disks we assemble
3806 @type ignore_secondaries: boolean
3807 @param ignore_secondaries: if true, errors on secondary nodes
3808 won't result in an error return from the function
3809 @type ignore_size: boolean
3810 @param ignore_size: if true, the current known size of the disk
3811 will not be used during the disk activation, useful for cases
3812 when the size is wrong
3813 @return: False if the operation failed, otherwise a list of
3814 (host, instance_visible_name, node_visible_name)
3815 with the mapping from node devices to instance devices
3820 iname = instance.name
3821 # With the two passes mechanism we try to reduce the window of
3822 # opportunity for the race condition of switching DRBD to primary
3823 # before handshaking occured, but we do not eliminate it
3825 # The proper fix would be to wait (with some limits) until the
3826 # connection has been made and drbd transitions from WFConnection
3827 # into any other network-connected state (Connected, SyncTarget,
3830 # 1st pass, assemble on all nodes in secondary mode
3831 for inst_disk in instance.disks:
3832 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3834 node_disk = node_disk.Copy()
3835 node_disk.UnsetSize()
3836 lu.cfg.SetDiskID(node_disk, node)
3837 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3838 msg = result.fail_msg
3840 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3841 " (is_primary=False, pass=1): %s",
3842 inst_disk.iv_name, node, msg)
3843 if not ignore_secondaries:
3846 # FIXME: race condition on drbd migration to primary
3848 # 2nd pass, do only the primary node
3849 for inst_disk in instance.disks:
3852 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3853 if node != instance.primary_node:
3856 node_disk = node_disk.Copy()
3857 node_disk.UnsetSize()
3858 lu.cfg.SetDiskID(node_disk, node)
3859 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3860 msg = result.fail_msg
3862 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3863 " (is_primary=True, pass=2): %s",
3864 inst_disk.iv_name, node, msg)
3867 dev_path = result.payload
3869 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3871 # leave the disks configured for the primary node
3872 # this is a workaround that would be fixed better by
3873 # improving the logical/physical id handling
3874 for disk in instance.disks:
3875 lu.cfg.SetDiskID(disk, instance.primary_node)
3877 return disks_ok, device_info
3880 def _StartInstanceDisks(lu, instance, force):
3881 """Start the disks of an instance.
3884 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3885 ignore_secondaries=force)
3887 _ShutdownInstanceDisks(lu, instance)
3888 if force is not None and not force:
3889 lu.proc.LogWarning("", hint="If the message above refers to a"
3891 " you can retry the operation using '--force'.")
3892 raise errors.OpExecError("Disk consistency error")
3895 class LUDeactivateInstanceDisks(NoHooksLU):
3896 """Shutdown an instance's disks.
3899 _OP_REQP = ["instance_name"]
3902 def ExpandNames(self):
3903 self._ExpandAndLockInstance()
3904 self.needed_locks[locking.LEVEL_NODE] = []
3905 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3907 def DeclareLocks(self, level):
3908 if level == locking.LEVEL_NODE:
3909 self._LockInstancesNodes()
3911 def CheckPrereq(self):
3912 """Check prerequisites.
3914 This checks that the instance is in the cluster.
3917 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3918 assert self.instance is not None, \
3919 "Cannot retrieve locked instance %s" % self.op.instance_name
3921 def Exec(self, feedback_fn):
3922 """Deactivate the disks
3925 instance = self.instance
3926 _SafeShutdownInstanceDisks(self, instance)
3929 def _SafeShutdownInstanceDisks(lu, instance):
3930 """Shutdown block devices of an instance.
3932 This function checks if an instance is running, before calling
3933 _ShutdownInstanceDisks.
3936 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
3937 _ShutdownInstanceDisks(lu, instance)
3940 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3941 """Shutdown block devices of an instance.
3943 This does the shutdown on all nodes of the instance.
3945 If the ignore_primary is false, errors on the primary node are
3950 for disk in instance.disks:
3951 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3952 lu.cfg.SetDiskID(top_disk, node)
3953 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3954 msg = result.fail_msg
3956 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3957 disk.iv_name, node, msg)
3958 if not ignore_primary or node != instance.primary_node:
3963 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3964 """Checks if a node has enough free memory.
3966 This function check if a given node has the needed amount of free
3967 memory. In case the node has less memory or we cannot get the
3968 information from the node, this function raise an OpPrereqError
3971 @type lu: C{LogicalUnit}
3972 @param lu: a logical unit from which we get configuration data
3974 @param node: the node to check
3975 @type reason: C{str}
3976 @param reason: string to use in the error message
3977 @type requested: C{int}
3978 @param requested: the amount of memory in MiB to check for
3979 @type hypervisor_name: C{str}
3980 @param hypervisor_name: the hypervisor to ask for memory stats
3981 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3982 we cannot check the node
3985 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3986 nodeinfo[node].Raise("Can't get data from node %s" % node,
3987 prereq=True, ecode=errors.ECODE_ENVIRON)
3988 free_mem = nodeinfo[node].payload.get('memory_free', None)
3989 if not isinstance(free_mem, int):
3990 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3991 " was '%s'" % (node, free_mem),
3992 errors.ECODE_ENVIRON)
3993 if requested > free_mem:
3994 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3995 " needed %s MiB, available %s MiB" %
3996 (node, reason, requested, free_mem),
4000 def _CheckNodesFreeDisk(lu, nodenames, requested):
4001 """Checks if nodes have enough free disk space in the default VG.
4003 This function check if all given nodes have the needed amount of
4004 free disk. In case any node has less disk or we cannot get the
4005 information from the node, this function raise an OpPrereqError
4008 @type lu: C{LogicalUnit}
4009 @param lu: a logical unit from which we get configuration data
4010 @type nodenames: C{list}
4011 @param nodenames: the list of node names to check
4012 @type requested: C{int}
4013 @param requested: the amount of disk in MiB to check for
4014 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4015 we cannot check the node
4018 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4019 lu.cfg.GetHypervisorType())
4020 for node in nodenames:
4021 info = nodeinfo[node]
4022 info.Raise("Cannot get current information from node %s" % node,
4023 prereq=True, ecode=errors.ECODE_ENVIRON)
4024 vg_free = info.payload.get("vg_free", None)
4025 if not isinstance(vg_free, int):
4026 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4027 " result was '%s'" % (node, vg_free),
4028 errors.ECODE_ENVIRON)
4029 if requested > vg_free:
4030 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4031 " required %d MiB, available %d MiB" %
4032 (node, requested, vg_free),
4036 class LUStartupInstance(LogicalUnit):
4037 """Starts an instance.
4040 HPATH = "instance-start"
4041 HTYPE = constants.HTYPE_INSTANCE
4042 _OP_REQP = ["instance_name", "force"]
4045 def ExpandNames(self):
4046 self._ExpandAndLockInstance()
4048 def BuildHooksEnv(self):
4051 This runs on master, primary and secondary nodes of the instance.
4055 "FORCE": self.op.force,
4057 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4058 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4061 def CheckPrereq(self):
4062 """Check prerequisites.
4064 This checks that the instance is in the cluster.
4067 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4068 assert self.instance is not None, \
4069 "Cannot retrieve locked instance %s" % self.op.instance_name
4072 self.beparams = getattr(self.op, "beparams", {})
4074 if not isinstance(self.beparams, dict):
4075 raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4076 " dict" % (type(self.beparams), ),
4078 # fill the beparams dict
4079 utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4080 self.op.beparams = self.beparams
4083 self.hvparams = getattr(self.op, "hvparams", {})
4085 if not isinstance(self.hvparams, dict):
4086 raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4087 " dict" % (type(self.hvparams), ),
4090 # check hypervisor parameter syntax (locally)
4091 cluster = self.cfg.GetClusterInfo()
4092 utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4093 filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4095 filled_hvp.update(self.hvparams)
4096 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4097 hv_type.CheckParameterSyntax(filled_hvp)
4098 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4099 self.op.hvparams = self.hvparams
4101 _CheckNodeOnline(self, instance.primary_node)
4103 bep = self.cfg.GetClusterInfo().FillBE(instance)
4104 # check bridges existence
4105 _CheckInstanceBridgesExist(self, instance)
4107 remote_info = self.rpc.call_instance_info(instance.primary_node,
4109 instance.hypervisor)
4110 remote_info.Raise("Error checking node %s" % instance.primary_node,
4111 prereq=True, ecode=errors.ECODE_ENVIRON)
4112 if not remote_info.payload: # not running already
4113 _CheckNodeFreeMemory(self, instance.primary_node,
4114 "starting instance %s" % instance.name,
4115 bep[constants.BE_MEMORY], instance.hypervisor)
4117 def Exec(self, feedback_fn):
4118 """Start the instance.
4121 instance = self.instance
4122 force = self.op.force
4124 self.cfg.MarkInstanceUp(instance.name)
4126 node_current = instance.primary_node
4128 _StartInstanceDisks(self, instance, force)
4130 result = self.rpc.call_instance_start(node_current, instance,
4131 self.hvparams, self.beparams)
4132 msg = result.fail_msg
4134 _ShutdownInstanceDisks(self, instance)
4135 raise errors.OpExecError("Could not start instance: %s" % msg)
4138 class LURebootInstance(LogicalUnit):
4139 """Reboot an instance.
4142 HPATH = "instance-reboot"
4143 HTYPE = constants.HTYPE_INSTANCE
4144 _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4147 def CheckArguments(self):
4148 """Check the arguments.
4151 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4152 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4154 def ExpandNames(self):
4155 if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4156 constants.INSTANCE_REBOOT_HARD,
4157 constants.INSTANCE_REBOOT_FULL]:
4158 raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4159 (constants.INSTANCE_REBOOT_SOFT,
4160 constants.INSTANCE_REBOOT_HARD,
4161 constants.INSTANCE_REBOOT_FULL))
4162 self._ExpandAndLockInstance()
4164 def BuildHooksEnv(self):
4167 This runs on master, primary and secondary nodes of the instance.
4171 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4172 "REBOOT_TYPE": self.op.reboot_type,
4173 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4175 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4176 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4179 def CheckPrereq(self):
4180 """Check prerequisites.
4182 This checks that the instance is in the cluster.
4185 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4186 assert self.instance is not None, \
4187 "Cannot retrieve locked instance %s" % self.op.instance_name
4189 _CheckNodeOnline(self, instance.primary_node)
4191 # check bridges existence
4192 _CheckInstanceBridgesExist(self, instance)
4194 def Exec(self, feedback_fn):
4195 """Reboot the instance.
4198 instance = self.instance
4199 ignore_secondaries = self.op.ignore_secondaries
4200 reboot_type = self.op.reboot_type
4202 node_current = instance.primary_node
4204 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4205 constants.INSTANCE_REBOOT_HARD]:
4206 for disk in instance.disks:
4207 self.cfg.SetDiskID(disk, node_current)
4208 result = self.rpc.call_instance_reboot(node_current, instance,
4210 self.shutdown_timeout)
4211 result.Raise("Could not reboot instance")
4213 result = self.rpc.call_instance_shutdown(node_current, instance,
4214 self.shutdown_timeout)
4215 result.Raise("Could not shutdown instance for full reboot")
4216 _ShutdownInstanceDisks(self, instance)
4217 _StartInstanceDisks(self, instance, ignore_secondaries)
4218 result = self.rpc.call_instance_start(node_current, instance, None, None)
4219 msg = result.fail_msg
4221 _ShutdownInstanceDisks(self, instance)
4222 raise errors.OpExecError("Could not start instance for"
4223 " full reboot: %s" % msg)
4225 self.cfg.MarkInstanceUp(instance.name)
4228 class LUShutdownInstance(LogicalUnit):
4229 """Shutdown an instance.
4232 HPATH = "instance-stop"
4233 HTYPE = constants.HTYPE_INSTANCE
4234 _OP_REQP = ["instance_name"]
4237 def CheckArguments(self):
4238 """Check the arguments.
4241 self.timeout = getattr(self.op, "timeout",
4242 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4244 def ExpandNames(self):
4245 self._ExpandAndLockInstance()
4247 def BuildHooksEnv(self):
4250 This runs on master, primary and secondary nodes of the instance.
4253 env = _BuildInstanceHookEnvByObject(self, self.instance)
4254 env["TIMEOUT"] = self.timeout
4255 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4258 def CheckPrereq(self):
4259 """Check prerequisites.
4261 This checks that the instance is in the cluster.
4264 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4265 assert self.instance is not None, \
4266 "Cannot retrieve locked instance %s" % self.op.instance_name
4267 _CheckNodeOnline(self, self.instance.primary_node)
4269 def Exec(self, feedback_fn):
4270 """Shutdown the instance.
4273 instance = self.instance
4274 node_current = instance.primary_node
4275 timeout = self.timeout
4276 self.cfg.MarkInstanceDown(instance.name)
4277 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4278 msg = result.fail_msg
4280 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4282 _ShutdownInstanceDisks(self, instance)
4285 class LUReinstallInstance(LogicalUnit):
4286 """Reinstall an instance.
4289 HPATH = "instance-reinstall"
4290 HTYPE = constants.HTYPE_INSTANCE
4291 _OP_REQP = ["instance_name"]
4294 def ExpandNames(self):
4295 self._ExpandAndLockInstance()
4297 def BuildHooksEnv(self):
4300 This runs on master, primary and secondary nodes of the instance.
4303 env = _BuildInstanceHookEnvByObject(self, self.instance)
4304 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4307 def CheckPrereq(self):
4308 """Check prerequisites.
4310 This checks that the instance is in the cluster and is not running.
4313 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4314 assert instance is not None, \
4315 "Cannot retrieve locked instance %s" % self.op.instance_name
4316 _CheckNodeOnline(self, instance.primary_node)
4318 if instance.disk_template == constants.DT_DISKLESS:
4319 raise errors.OpPrereqError("Instance '%s' has no disks" %
4320 self.op.instance_name,
4322 _CheckInstanceDown(self, instance, "cannot reinstall")
4324 self.op.os_type = getattr(self.op, "os_type", None)
4325 self.op.force_variant = getattr(self.op, "force_variant", False)
4326 if self.op.os_type is not None:
4328 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4329 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4331 self.instance = instance
4333 def Exec(self, feedback_fn):
4334 """Reinstall the instance.
4337 inst = self.instance
4339 if self.op.os_type is not None:
4340 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4341 inst.os = self.op.os_type
4342 self.cfg.Update(inst, feedback_fn)
4344 _StartInstanceDisks(self, inst, None)
4346 feedback_fn("Running the instance OS create scripts...")
4347 # FIXME: pass debug option from opcode to backend
4348 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4349 self.op.debug_level)
4350 result.Raise("Could not install OS for instance %s on node %s" %
4351 (inst.name, inst.primary_node))
4353 _ShutdownInstanceDisks(self, inst)
4356 class LURecreateInstanceDisks(LogicalUnit):
4357 """Recreate an instance's missing disks.
4360 HPATH = "instance-recreate-disks"
4361 HTYPE = constants.HTYPE_INSTANCE
4362 _OP_REQP = ["instance_name", "disks"]
4365 def CheckArguments(self):
4366 """Check the arguments.
4369 if not isinstance(self.op.disks, list):
4370 raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4371 for item in self.op.disks:
4372 if (not isinstance(item, int) or
4374 raise errors.OpPrereqError("Invalid disk specification '%s'" %
4375 str(item), errors.ECODE_INVAL)
4377 def ExpandNames(self):
4378 self._ExpandAndLockInstance()
4380 def BuildHooksEnv(self):
4383 This runs on master, primary and secondary nodes of the instance.
4386 env = _BuildInstanceHookEnvByObject(self, self.instance)
4387 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4390 def CheckPrereq(self):
4391 """Check prerequisites.
4393 This checks that the instance is in the cluster and is not running.
4396 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4397 assert instance is not None, \
4398 "Cannot retrieve locked instance %s" % self.op.instance_name
4399 _CheckNodeOnline(self, instance.primary_node)
4401 if instance.disk_template == constants.DT_DISKLESS:
4402 raise errors.OpPrereqError("Instance '%s' has no disks" %
4403 self.op.instance_name, errors.ECODE_INVAL)
4404 _CheckInstanceDown(self, instance, "cannot recreate disks")
4406 if not self.op.disks:
4407 self.op.disks = range(len(instance.disks))
4409 for idx in self.op.disks:
4410 if idx >= len(instance.disks):
4411 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4414 self.instance = instance
4416 def Exec(self, feedback_fn):
4417 """Recreate the disks.
4421 for idx, _ in enumerate(self.instance.disks):
4422 if idx not in self.op.disks: # disk idx has not been passed in
4426 _CreateDisks(self, self.instance, to_skip=to_skip)
4429 class LURenameInstance(LogicalUnit):
4430 """Rename an instance.
4433 HPATH = "instance-rename"
4434 HTYPE = constants.HTYPE_INSTANCE
4435 _OP_REQP = ["instance_name", "new_name"]
4437 def BuildHooksEnv(self):
4440 This runs on master, primary and secondary nodes of the instance.
4443 env = _BuildInstanceHookEnvByObject(self, self.instance)
4444 env["INSTANCE_NEW_NAME"] = self.op.new_name
4445 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4448 def CheckPrereq(self):
4449 """Check prerequisites.
4451 This checks that the instance is in the cluster and is not running.
4454 self.op.instance_name = _ExpandInstanceName(self.cfg,
4455 self.op.instance_name)
4456 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4457 assert instance is not None
4458 _CheckNodeOnline(self, instance.primary_node)
4459 _CheckInstanceDown(self, instance, "cannot rename")
4460 self.instance = instance
4462 # new name verification
4463 name_info = utils.GetHostInfo(self.op.new_name)
4465 self.op.new_name = new_name = name_info.name
4466 instance_list = self.cfg.GetInstanceList()
4467 if new_name in instance_list:
4468 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4469 new_name, errors.ECODE_EXISTS)
4471 if not getattr(self.op, "ignore_ip", False):
4472 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4473 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4474 (name_info.ip, new_name),
4475 errors.ECODE_NOTUNIQUE)
4478 def Exec(self, feedback_fn):
4479 """Reinstall the instance.
4482 inst = self.instance
4483 old_name = inst.name
4485 if inst.disk_template == constants.DT_FILE:
4486 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4488 self.cfg.RenameInstance(inst.name, self.op.new_name)
4489 # Change the instance lock. This is definitely safe while we hold the BGL
4490 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4491 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4493 # re-read the instance from the configuration after rename
4494 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4496 if inst.disk_template == constants.DT_FILE:
4497 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4498 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4499 old_file_storage_dir,
4500 new_file_storage_dir)
4501 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4502 " (but the instance has been renamed in Ganeti)" %
4503 (inst.primary_node, old_file_storage_dir,
4504 new_file_storage_dir))
4506 _StartInstanceDisks(self, inst, None)
4508 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4509 old_name, self.op.debug_level)
4510 msg = result.fail_msg
4512 msg = ("Could not run OS rename script for instance %s on node %s"
4513 " (but the instance has been renamed in Ganeti): %s" %
4514 (inst.name, inst.primary_node, msg))
4515 self.proc.LogWarning(msg)
4517 _ShutdownInstanceDisks(self, inst)
4520 class LURemoveInstance(LogicalUnit):
4521 """Remove an instance.
4524 HPATH = "instance-remove"
4525 HTYPE = constants.HTYPE_INSTANCE
4526 _OP_REQP = ["instance_name", "ignore_failures"]
4529 def CheckArguments(self):
4530 """Check the arguments.
4533 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4534 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4536 def ExpandNames(self):
4537 self._ExpandAndLockInstance()
4538 self.needed_locks[locking.LEVEL_NODE] = []
4539 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4541 def DeclareLocks(self, level):
4542 if level == locking.LEVEL_NODE:
4543 self._LockInstancesNodes()
4545 def BuildHooksEnv(self):
4548 This runs on master, primary and secondary nodes of the instance.
4551 env = _BuildInstanceHookEnvByObject(self, self.instance)
4552 env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4553 nl = [self.cfg.GetMasterNode()]
4554 nl_post = list(self.instance.all_nodes) + nl
4555 return env, nl, nl_post
4557 def CheckPrereq(self):
4558 """Check prerequisites.
4560 This checks that the instance is in the cluster.
4563 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4564 assert self.instance is not None, \
4565 "Cannot retrieve locked instance %s" % self.op.instance_name
4567 def Exec(self, feedback_fn):
4568 """Remove the instance.
4571 instance = self.instance
4572 logging.info("Shutting down instance %s on node %s",
4573 instance.name, instance.primary_node)
4575 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4576 self.shutdown_timeout)
4577 msg = result.fail_msg
4579 if self.op.ignore_failures:
4580 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4582 raise errors.OpExecError("Could not shutdown instance %s on"
4584 (instance.name, instance.primary_node, msg))
4586 logging.info("Removing block devices for instance %s", instance.name)
4588 if not _RemoveDisks(self, instance):
4589 if self.op.ignore_failures:
4590 feedback_fn("Warning: can't remove instance's disks")
4592 raise errors.OpExecError("Can't remove instance's disks")
4594 logging.info("Removing instance %s out of cluster config", instance.name)
4596 self.cfg.RemoveInstance(instance.name)
4597 self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4600 class LUQueryInstances(NoHooksLU):
4601 """Logical unit for querying instances.
4604 # pylint: disable-msg=W0142
4605 _OP_REQP = ["output_fields", "names", "use_locking"]
4607 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4608 "serial_no", "ctime", "mtime", "uuid"]
4609 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4611 "disk_template", "ip", "mac", "bridge",
4612 "nic_mode", "nic_link",
4613 "sda_size", "sdb_size", "vcpus", "tags",
4614 "network_port", "beparams",
4615 r"(disk)\.(size)/([0-9]+)",
4616 r"(disk)\.(sizes)", "disk_usage",
4617 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4618 r"(nic)\.(bridge)/([0-9]+)",
4619 r"(nic)\.(macs|ips|modes|links|bridges)",
4620 r"(disk|nic)\.(count)",
4622 ] + _SIMPLE_FIELDS +
4624 for name in constants.HVS_PARAMETERS
4625 if name not in constants.HVC_GLOBALS] +
4627 for name in constants.BES_PARAMETERS])
4628 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4631 def ExpandNames(self):
4632 _CheckOutputFields(static=self._FIELDS_STATIC,
4633 dynamic=self._FIELDS_DYNAMIC,
4634 selected=self.op.output_fields)
4636 self.needed_locks = {}
4637 self.share_locks[locking.LEVEL_INSTANCE] = 1
4638 self.share_locks[locking.LEVEL_NODE] = 1
4641 self.wanted = _GetWantedInstances(self, self.op.names)
4643 self.wanted = locking.ALL_SET
4645 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4646 self.do_locking = self.do_node_query and self.op.use_locking
4648 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4649 self.needed_locks[locking.LEVEL_NODE] = []
4650 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4652 def DeclareLocks(self, level):
4653 if level == locking.LEVEL_NODE and self.do_locking:
4654 self._LockInstancesNodes()
4656 def CheckPrereq(self):
4657 """Check prerequisites.
4662 def Exec(self, feedback_fn):
4663 """Computes the list of nodes and their attributes.
4666 # pylint: disable-msg=R0912
4667 # way too many branches here
4668 all_info = self.cfg.GetAllInstancesInfo()
4669 if self.wanted == locking.ALL_SET:
4670 # caller didn't specify instance names, so ordering is not important
4672 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4674 instance_names = all_info.keys()
4675 instance_names = utils.NiceSort(instance_names)
4677 # caller did specify names, so we must keep the ordering
4679 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4681 tgt_set = all_info.keys()
4682 missing = set(self.wanted).difference(tgt_set)
4684 raise errors.OpExecError("Some instances were removed before"
4685 " retrieving their data: %s" % missing)
4686 instance_names = self.wanted
4688 instance_list = [all_info[iname] for iname in instance_names]
4690 # begin data gathering
4692 nodes = frozenset([inst.primary_node for inst in instance_list])
4693 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4697 if self.do_node_query:
4699 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4701 result = node_data[name]
4703 # offline nodes will be in both lists
4704 off_nodes.append(name)
4706 bad_nodes.append(name)
4709 live_data.update(result.payload)
4710 # else no instance is alive
4712 live_data = dict([(name, {}) for name in instance_names])
4714 # end data gathering
4719 cluster = self.cfg.GetClusterInfo()
4720 for instance in instance_list:
4722 i_hv = cluster.FillHV(instance, skip_globals=True)
4723 i_be = cluster.FillBE(instance)
4724 i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4725 nic.nicparams) for nic in instance.nics]
4726 for field in self.op.output_fields:
4727 st_match = self._FIELDS_STATIC.Matches(field)
4728 if field in self._SIMPLE_FIELDS:
4729 val = getattr(instance, field)
4730 elif field == "pnode":
4731 val = instance.primary_node
4732 elif field == "snodes":
4733 val = list(instance.secondary_nodes)
4734 elif field == "admin_state":
4735 val = instance.admin_up
4736 elif field == "oper_state":
4737 if instance.primary_node in bad_nodes:
4740 val = bool(live_data.get(instance.name))
4741 elif field == "status":
4742 if instance.primary_node in off_nodes:
4743 val = "ERROR_nodeoffline"
4744 elif instance.primary_node in bad_nodes:
4745 val = "ERROR_nodedown"
4747 running = bool(live_data.get(instance.name))
4749 if instance.admin_up:
4754 if instance.admin_up:
4758 elif field == "oper_ram":
4759 if instance.primary_node in bad_nodes:
4761 elif instance.name in live_data:
4762 val = live_data[instance.name].get("memory", "?")
4765 elif field == "vcpus":
4766 val = i_be[constants.BE_VCPUS]
4767 elif field == "disk_template":
4768 val = instance.disk_template
4771 val = instance.nics[0].ip
4774 elif field == "nic_mode":
4776 val = i_nicp[0][constants.NIC_MODE]
4779 elif field == "nic_link":
4781 val = i_nicp[0][constants.NIC_LINK]
4784 elif field == "bridge":
4785 if (instance.nics and
4786 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4787 val = i_nicp[0][constants.NIC_LINK]
4790 elif field == "mac":
4792 val = instance.nics[0].mac
4795 elif field == "sda_size" or field == "sdb_size":
4796 idx = ord(field[2]) - ord('a')
4798 val = instance.FindDisk(idx).size
4799 except errors.OpPrereqError:
4801 elif field == "disk_usage": # total disk usage per node
4802 disk_sizes = [{'size': disk.size} for disk in instance.disks]
4803 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4804 elif field == "tags":
4805 val = list(instance.GetTags())
4806 elif field == "hvparams":
4808 elif (field.startswith(HVPREFIX) and
4809 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4810 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4811 val = i_hv.get(field[len(HVPREFIX):], None)
4812 elif field == "beparams":
4814 elif (field.startswith(BEPREFIX) and
4815 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4816 val = i_be.get(field[len(BEPREFIX):], None)
4817 elif st_match and st_match.groups():
4818 # matches a variable list
4819 st_groups = st_match.groups()
4820 if st_groups and st_groups[0] == "disk":
4821 if st_groups[1] == "count":
4822 val = len(instance.disks)
4823 elif st_groups[1] == "sizes":
4824 val = [disk.size for disk in instance.disks]
4825 elif st_groups[1] == "size":
4827 val = instance.FindDisk(st_groups[2]).size
4828 except errors.OpPrereqError:
4831 assert False, "Unhandled disk parameter"
4832 elif st_groups[0] == "nic":
4833 if st_groups[1] == "count":
4834 val = len(instance.nics)
4835 elif st_groups[1] == "macs":
4836 val = [nic.mac for nic in instance.nics]
4837 elif st_groups[1] == "ips":
4838 val = [nic.ip for nic in instance.nics]
4839 elif st_groups[1] == "modes":
4840 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4841 elif st_groups[1] == "links":
4842 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4843 elif st_groups[1] == "bridges":
4846 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4847 val.append(nicp[constants.NIC_LINK])
4852 nic_idx = int(st_groups[2])
4853 if nic_idx >= len(instance.nics):
4856 if st_groups[1] == "mac":
4857 val = instance.nics[nic_idx].mac
4858 elif st_groups[1] == "ip":
4859 val = instance.nics[nic_idx].ip
4860 elif st_groups[1] == "mode":
4861 val = i_nicp[nic_idx][constants.NIC_MODE]
4862 elif st_groups[1] == "link":
4863 val = i_nicp[nic_idx][constants.NIC_LINK]
4864 elif st_groups[1] == "bridge":
4865 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4866 if nic_mode == constants.NIC_MODE_BRIDGED:
4867 val = i_nicp[nic_idx][constants.NIC_LINK]
4871 assert False, "Unhandled NIC parameter"
4873 assert False, ("Declared but unhandled variable parameter '%s'" %
4876 assert False, "Declared but unhandled parameter '%s'" % field
4883 class LUFailoverInstance(LogicalUnit):
4884 """Failover an instance.
4887 HPATH = "instance-failover"
4888 HTYPE = constants.HTYPE_INSTANCE
4889 _OP_REQP = ["instance_name", "ignore_consistency"]
4892 def CheckArguments(self):
4893 """Check the arguments.
4896 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4897 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4899 def ExpandNames(self):
4900 self._ExpandAndLockInstance()
4901 self.needed_locks[locking.LEVEL_NODE] = []
4902 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4904 def DeclareLocks(self, level):
4905 if level == locking.LEVEL_NODE:
4906 self._LockInstancesNodes()
4908 def BuildHooksEnv(self):
4911 This runs on master, primary and secondary nodes of the instance.
4914 instance = self.instance
4915 source_node = instance.primary_node
4916 target_node = instance.secondary_nodes[0]
4918 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4919 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4920 "OLD_PRIMARY": source_node,
4921 "OLD_SECONDARY": target_node,
4922 "NEW_PRIMARY": target_node,
4923 "NEW_SECONDARY": source_node,
4925 env.update(_BuildInstanceHookEnvByObject(self, instance))
4926 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4928 nl_post.append(source_node)
4929 return env, nl, nl_post
4931 def CheckPrereq(self):
4932 """Check prerequisites.
4934 This checks that the instance is in the cluster.
4937 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4938 assert self.instance is not None, \
4939 "Cannot retrieve locked instance %s" % self.op.instance_name
4941 bep = self.cfg.GetClusterInfo().FillBE(instance)
4942 if instance.disk_template not in constants.DTS_NET_MIRROR:
4943 raise errors.OpPrereqError("Instance's disk layout is not"
4944 " network mirrored, cannot failover.",
4947 secondary_nodes = instance.secondary_nodes
4948 if not secondary_nodes:
4949 raise errors.ProgrammerError("no secondary node but using "
4950 "a mirrored disk template")
4952 target_node = secondary_nodes[0]
4953 _CheckNodeOnline(self, target_node)
4954 _CheckNodeNotDrained(self, target_node)
4955 if instance.admin_up:
4956 # check memory requirements on the secondary node
4957 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4958 instance.name, bep[constants.BE_MEMORY],
4959 instance.hypervisor)
4961 self.LogInfo("Not checking memory on the secondary node as"
4962 " instance will not be started")
4964 # check bridge existance
4965 _CheckInstanceBridgesExist(self, instance, node=target_node)
4967 def Exec(self, feedback_fn):
4968 """Failover an instance.
4970 The failover is done by shutting it down on its present node and
4971 starting it on the secondary.
4974 instance = self.instance
4976 source_node = instance.primary_node
4977 target_node = instance.secondary_nodes[0]
4979 if instance.admin_up:
4980 feedback_fn("* checking disk consistency between source and target")
4981 for dev in instance.disks:
4982 # for drbd, these are drbd over lvm
4983 if not _CheckDiskConsistency(self, dev, target_node, False):
4984 if not self.op.ignore_consistency:
4985 raise errors.OpExecError("Disk %s is degraded on target node,"
4986 " aborting failover." % dev.iv_name)
4988 feedback_fn("* not checking disk consistency as instance is not running")
4990 feedback_fn("* shutting down instance on source node")
4991 logging.info("Shutting down instance %s on node %s",
4992 instance.name, source_node)
4994 result = self.rpc.call_instance_shutdown(source_node, instance,
4995 self.shutdown_timeout)
4996 msg = result.fail_msg
4998 if self.op.ignore_consistency:
4999 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5000 " Proceeding anyway. Please make sure node"
5001 " %s is down. Error details: %s",
5002 instance.name, source_node, source_node, msg)
5004 raise errors.OpExecError("Could not shutdown instance %s on"
5006 (instance.name, source_node, msg))
5008 feedback_fn("* deactivating the instance's disks on source node")
5009 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5010 raise errors.OpExecError("Can't shut down the instance's disks.")
5012 instance.primary_node = target_node
5013 # distribute new instance config to the other nodes
5014 self.cfg.Update(instance, feedback_fn)
5016 # Only start the instance if it's marked as up
5017 if instance.admin_up:
5018 feedback_fn("* activating the instance's disks on target node")
5019 logging.info("Starting instance %s on node %s",
5020 instance.name, target_node)
5022 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5023 ignore_secondaries=True)
5025 _ShutdownInstanceDisks(self, instance)
5026 raise errors.OpExecError("Can't activate the instance's disks")
5028 feedback_fn("* starting the instance on the target node")
5029 result = self.rpc.call_instance_start(target_node, instance, None, None)
5030 msg = result.fail_msg
5032 _ShutdownInstanceDisks(self, instance)
5033 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5034 (instance.name, target_node, msg))
5037 class LUMigrateInstance(LogicalUnit):
5038 """Migrate an instance.
5040 This is migration without shutting down, compared to the failover,
5041 which is done with shutdown.
5044 HPATH = "instance-migrate"
5045 HTYPE = constants.HTYPE_INSTANCE
5046 _OP_REQP = ["instance_name", "live", "cleanup"]
5050 def ExpandNames(self):
5051 self._ExpandAndLockInstance()
5053 self.needed_locks[locking.LEVEL_NODE] = []
5054 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5056 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5057 self.op.live, self.op.cleanup)
5058 self.tasklets = [self._migrater]
5060 def DeclareLocks(self, level):
5061 if level == locking.LEVEL_NODE:
5062 self._LockInstancesNodes()
5064 def BuildHooksEnv(self):
5067 This runs on master, primary and secondary nodes of the instance.
5070 instance = self._migrater.instance
5071 source_node = instance.primary_node
5072 target_node = instance.secondary_nodes[0]
5073 env = _BuildInstanceHookEnvByObject(self, instance)
5074 env["MIGRATE_LIVE"] = self.op.live
5075 env["MIGRATE_CLEANUP"] = self.op.cleanup
5077 "OLD_PRIMARY": source_node,
5078 "OLD_SECONDARY": target_node,
5079 "NEW_PRIMARY": target_node,
5080 "NEW_SECONDARY": source_node,
5082 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5084 nl_post.append(source_node)
5085 return env, nl, nl_post
5088 class LUMoveInstance(LogicalUnit):
5089 """Move an instance by data-copying.
5092 HPATH = "instance-move"
5093 HTYPE = constants.HTYPE_INSTANCE
5094 _OP_REQP = ["instance_name", "target_node"]
5097 def CheckArguments(self):
5098 """Check the arguments.
5101 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5102 constants.DEFAULT_SHUTDOWN_TIMEOUT)
5104 def ExpandNames(self):
5105 self._ExpandAndLockInstance()
5106 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5107 self.op.target_node = target_node
5108 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5109 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5111 def DeclareLocks(self, level):
5112 if level == locking.LEVEL_NODE:
5113 self._LockInstancesNodes(primary_only=True)
5115 def BuildHooksEnv(self):
5118 This runs on master, primary and secondary nodes of the instance.
5122 "TARGET_NODE": self.op.target_node,
5123 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5125 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5126 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5127 self.op.target_node]
5130 def CheckPrereq(self):
5131 """Check prerequisites.
5133 This checks that the instance is in the cluster.
5136 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5137 assert self.instance is not None, \
5138 "Cannot retrieve locked instance %s" % self.op.instance_name
5140 node = self.cfg.GetNodeInfo(self.op.target_node)
5141 assert node is not None, \
5142 "Cannot retrieve locked node %s" % self.op.target_node
5144 self.target_node = target_node = node.name
5146 if target_node == instance.primary_node:
5147 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5148 (instance.name, target_node),
5151 bep = self.cfg.GetClusterInfo().FillBE(instance)
5153 for idx, dsk in enumerate(instance.disks):
5154 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5155 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5156 " cannot copy" % idx, errors.ECODE_STATE)
5158 _CheckNodeOnline(self, target_node)
5159 _CheckNodeNotDrained(self, target_node)
5161 if instance.admin_up:
5162 # check memory requirements on the secondary node
5163 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5164 instance.name, bep[constants.BE_MEMORY],
5165 instance.hypervisor)
5167 self.LogInfo("Not checking memory on the secondary node as"
5168 " instance will not be started")
5170 # check bridge existance
5171 _CheckInstanceBridgesExist(self, instance, node=target_node)
5173 def Exec(self, feedback_fn):
5174 """Move an instance.
5176 The move is done by shutting it down on its present node, copying
5177 the data over (slow) and starting it on the new node.
5180 instance = self.instance
5182 source_node = instance.primary_node
5183 target_node = self.target_node
5185 self.LogInfo("Shutting down instance %s on source node %s",
5186 instance.name, source_node)
5188 result = self.rpc.call_instance_shutdown(source_node, instance,
5189 self.shutdown_timeout)
5190 msg = result.fail_msg
5192 if self.op.ignore_consistency:
5193 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5194 " Proceeding anyway. Please make sure node"
5195 " %s is down. Error details: %s",
5196 instance.name, source_node, source_node, msg)
5198 raise errors.OpExecError("Could not shutdown instance %s on"
5200 (instance.name, source_node, msg))
5202 # create the target disks
5204 _CreateDisks(self, instance, target_node=target_node)
5205 except errors.OpExecError:
5206 self.LogWarning("Device creation failed, reverting...")
5208 _RemoveDisks(self, instance, target_node=target_node)
5210 self.cfg.ReleaseDRBDMinors(instance.name)
5213 cluster_name = self.cfg.GetClusterInfo().cluster_name
5216 # activate, get path, copy the data over
5217 for idx, disk in enumerate(instance.disks):
5218 self.LogInfo("Copying data for disk %d", idx)
5219 result = self.rpc.call_blockdev_assemble(target_node, disk,
5220 instance.name, True)
5222 self.LogWarning("Can't assemble newly created disk %d: %s",
5223 idx, result.fail_msg)
5224 errs.append(result.fail_msg)
5226 dev_path = result.payload
5227 result = self.rpc.call_blockdev_export(source_node, disk,
5228 target_node, dev_path,
5231 self.LogWarning("Can't copy data over for disk %d: %s",
5232 idx, result.fail_msg)
5233 errs.append(result.fail_msg)
5237 self.LogWarning("Some disks failed to copy, aborting")
5239 _RemoveDisks(self, instance, target_node=target_node)
5241 self.cfg.ReleaseDRBDMinors(instance.name)
5242 raise errors.OpExecError("Errors during disk copy: %s" %
5245 instance.primary_node = target_node
5246 self.cfg.Update(instance, feedback_fn)
5248 self.LogInfo("Removing the disks on the original node")
5249 _RemoveDisks(self, instance, target_node=source_node)
5251 # Only start the instance if it's marked as up
5252 if instance.admin_up:
5253 self.LogInfo("Starting instance %s on node %s",
5254 instance.name, target_node)
5256 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5257 ignore_secondaries=True)
5259 _ShutdownInstanceDisks(self, instance)
5260 raise errors.OpExecError("Can't activate the instance's disks")
5262 result = self.rpc.call_instance_start(target_node, instance, None, None)
5263 msg = result.fail_msg
5265 _ShutdownInstanceDisks(self, instance)
5266 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5267 (instance.name, target_node, msg))
5270 class LUMigrateNode(LogicalUnit):
5271 """Migrate all instances from a node.
5274 HPATH = "node-migrate"
5275 HTYPE = constants.HTYPE_NODE
5276 _OP_REQP = ["node_name", "live"]
5279 def ExpandNames(self):
5280 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5282 self.needed_locks = {
5283 locking.LEVEL_NODE: [self.op.node_name],
5286 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5288 # Create tasklets for migrating instances for all instances on this node
5292 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5293 logging.debug("Migrating instance %s", inst.name)
5294 names.append(inst.name)
5296 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5298 self.tasklets = tasklets
5300 # Declare instance locks
5301 self.needed_locks[locking.LEVEL_INSTANCE] = names
5303 def DeclareLocks(self, level):
5304 if level == locking.LEVEL_NODE:
5305 self._LockInstancesNodes()
5307 def BuildHooksEnv(self):
5310 This runs on the master, the primary and all the secondaries.
5314 "NODE_NAME": self.op.node_name,
5317 nl = [self.cfg.GetMasterNode()]
5319 return (env, nl, nl)
5322 class TLMigrateInstance(Tasklet):
5323 def __init__(self, lu, instance_name, live, cleanup):
5324 """Initializes this class.
5327 Tasklet.__init__(self, lu)
5330 self.instance_name = instance_name
5332 self.cleanup = cleanup
5334 def CheckPrereq(self):
5335 """Check prerequisites.
5337 This checks that the instance is in the cluster.
5340 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5341 instance = self.cfg.GetInstanceInfo(instance_name)
5342 assert instance is not None
5344 if instance.disk_template != constants.DT_DRBD8:
5345 raise errors.OpPrereqError("Instance's disk layout is not"
5346 " drbd8, cannot migrate.", errors.ECODE_STATE)
5348 secondary_nodes = instance.secondary_nodes
5349 if not secondary_nodes:
5350 raise errors.ConfigurationError("No secondary node but using"
5351 " drbd8 disk template")
5353 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5355 target_node = secondary_nodes[0]
5356 # check memory requirements on the secondary node
5357 _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5358 instance.name, i_be[constants.BE_MEMORY],
5359 instance.hypervisor)
5361 # check bridge existance
5362 _CheckInstanceBridgesExist(self, instance, node=target_node)
5364 if not self.cleanup:
5365 _CheckNodeNotDrained(self, target_node)
5366 result = self.rpc.call_instance_migratable(instance.primary_node,
5368 result.Raise("Can't migrate, please use failover",
5369 prereq=True, ecode=errors.ECODE_STATE)
5371 self.instance = instance
5373 def _WaitUntilSync(self):
5374 """Poll with custom rpc for disk sync.
5376 This uses our own step-based rpc call.
5379 self.feedback_fn("* wait until resync is done")
5383 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5385 self.instance.disks)
5387 for node, nres in result.items():
5388 nres.Raise("Cannot resync disks on node %s" % node)
5389 node_done, node_percent = nres.payload
5390 all_done = all_done and node_done
5391 if node_percent is not None:
5392 min_percent = min(min_percent, node_percent)
5394 if min_percent < 100:
5395 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5398 def _EnsureSecondary(self, node):
5399 """Demote a node to secondary.
5402 self.feedback_fn("* switching node %s to secondary mode" % node)
5404 for dev in self.instance.disks:
5405 self.cfg.SetDiskID(dev, node)
5407 result = self.rpc.call_blockdev_close(node, self.instance.name,
5408 self.instance.disks)
5409 result.Raise("Cannot change disk to secondary on node %s" % node)
5411 def _GoStandalone(self):
5412 """Disconnect from the network.
5415 self.feedback_fn("* changing into standalone mode")
5416 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5417 self.instance.disks)
5418 for node, nres in result.items():
5419 nres.Raise("Cannot disconnect disks node %s" % node)
5421 def _GoReconnect(self, multimaster):
5422 """Reconnect to the network.
5428 msg = "single-master"
5429 self.feedback_fn("* changing disks into %s mode" % msg)
5430 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5431 self.instance.disks,
5432 self.instance.name, multimaster)
5433 for node, nres in result.items():
5434 nres.Raise("Cannot change disks config on node %s" % node)
5436 def _ExecCleanup(self):
5437 """Try to cleanup after a failed migration.
5439 The cleanup is done by:
5440 - check that the instance is running only on one node
5441 (and update the config if needed)
5442 - change disks on its secondary node to secondary
5443 - wait until disks are fully synchronized
5444 - disconnect from the network
5445 - change disks into single-master mode
5446 - wait again until disks are fully synchronized
5449 instance = self.instance
5450 target_node = self.target_node
5451 source_node = self.source_node
5453 # check running on only one node
5454 self.feedback_fn("* checking where the instance actually runs"
5455 " (if this hangs, the hypervisor might be in"
5457 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5458 for node, result in ins_l.items():
5459 result.Raise("Can't contact node %s" % node)
5461 runningon_source = instance.name in ins_l[source_node].payload
5462 runningon_target = instance.name in ins_l[target_node].payload
5464 if runningon_source and runningon_target:
5465 raise errors.OpExecError("Instance seems to be running on two nodes,"
5466 " or the hypervisor is confused. You will have"
5467 " to ensure manually that it runs only on one"
5468 " and restart this operation.")
5470 if not (runningon_source or runningon_target):
5471 raise errors.OpExecError("Instance does not seem to be running at all."
5472 " In this case, it's safer to repair by"
5473 " running 'gnt-instance stop' to ensure disk"
5474 " shutdown, and then restarting it.")
5476 if runningon_target:
5477 # the migration has actually succeeded, we need to update the config
5478 self.feedback_fn("* instance running on secondary node (%s),"
5479 " updating config" % target_node)
5480 instance.primary_node = target_node
5481 self.cfg.Update(instance, self.feedback_fn)
5482 demoted_node = source_node
5484 self.feedback_fn("* instance confirmed to be running on its"
5485 " primary node (%s)" % source_node)
5486 demoted_node = target_node
5488 self._EnsureSecondary(demoted_node)
5490 self._WaitUntilSync()
5491 except errors.OpExecError:
5492 # we ignore here errors, since if the device is standalone, it
5493 # won't be able to sync
5495 self._GoStandalone()
5496 self._GoReconnect(False)
5497 self._WaitUntilSync()
5499 self.feedback_fn("* done")
5501 def _RevertDiskStatus(self):
5502 """Try to revert the disk status after a failed migration.
5505 target_node = self.target_node
5507 self._EnsureSecondary(target_node)
5508 self._GoStandalone()
5509 self._GoReconnect(False)
5510 self._WaitUntilSync()
5511 except errors.OpExecError, err:
5512 self.lu.LogWarning("Migration failed and I can't reconnect the"
5513 " drives: error '%s'\n"
5514 "Please look and recover the instance status" %
5517 def _AbortMigration(self):
5518 """Call the hypervisor code to abort a started migration.
5521 instance = self.instance
5522 target_node = self.target_node
5523 migration_info = self.migration_info
5525 abort_result = self.rpc.call_finalize_migration(target_node,
5529 abort_msg = abort_result.fail_msg
5531 logging.error("Aborting migration failed on target node %s: %s",
5532 target_node, abort_msg)
5533 # Don't raise an exception here, as we stil have to try to revert the
5534 # disk status, even if this step failed.
5536 def _ExecMigration(self):
5537 """Migrate an instance.
5539 The migrate is done by:
5540 - change the disks into dual-master mode
5541 - wait until disks are fully synchronized again
5542 - migrate the instance
5543 - change disks on the new secondary node (the old primary) to secondary
5544 - wait until disks are fully synchronized
5545 - change disks into single-master mode
5548 instance = self.instance
5549 target_node = self.target_node
5550 source_node = self.source_node
5552 self.feedback_fn("* checking disk consistency between source and target")
5553 for dev in instance.disks:
5554 if not _CheckDiskConsistency(self, dev, target_node, False):
5555 raise errors.OpExecError("Disk %s is degraded or not fully"
5556 " synchronized on target node,"
5557 " aborting migrate." % dev.iv_name)
5559 # First get the migration information from the remote node
5560 result = self.rpc.call_migration_info(source_node, instance)
5561 msg = result.fail_msg
5563 log_err = ("Failed fetching source migration information from %s: %s" %
5565 logging.error(log_err)
5566 raise errors.OpExecError(log_err)
5568 self.migration_info = migration_info = result.payload
5570 # Then switch the disks to master/master mode
5571 self._EnsureSecondary(target_node)
5572 self._GoStandalone()
5573 self._GoReconnect(True)
5574 self._WaitUntilSync()
5576 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5577 result = self.rpc.call_accept_instance(target_node,
5580 self.nodes_ip[target_node])
5582 msg = result.fail_msg
5584 logging.error("Instance pre-migration failed, trying to revert"
5585 " disk status: %s", msg)
5586 self.feedback_fn("Pre-migration failed, aborting")
5587 self._AbortMigration()
5588 self._RevertDiskStatus()
5589 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5590 (instance.name, msg))
5592 self.feedback_fn("* migrating instance to %s" % target_node)
5594 result = self.rpc.call_instance_migrate(source_node, instance,
5595 self.nodes_ip[target_node],
5597 msg = result.fail_msg
5599 logging.error("Instance migration failed, trying to revert"
5600 " disk status: %s", msg)
5601 self.feedback_fn("Migration failed, aborting")
5602 self._AbortMigration()
5603 self._RevertDiskStatus()
5604 raise errors.OpExecError("Could not migrate instance %s: %s" %
5605 (instance.name, msg))
5608 instance.primary_node = target_node
5609 # distribute new instance config to the other nodes
5610 self.cfg.Update(instance, self.feedback_fn)
5612 result = self.rpc.call_finalize_migration(target_node,
5616 msg = result.fail_msg
5618 logging.error("Instance migration succeeded, but finalization failed:"
5620 raise errors.OpExecError("Could not finalize instance migration: %s" %
5623 self._EnsureSecondary(source_node)
5624 self._WaitUntilSync()
5625 self._GoStandalone()
5626 self._GoReconnect(False)
5627 self._WaitUntilSync()
5629 self.feedback_fn("* done")
5631 def Exec(self, feedback_fn):
5632 """Perform the migration.
5635 feedback_fn("Migrating instance %s" % self.instance.name)
5637 self.feedback_fn = feedback_fn
5639 self.source_node = self.instance.primary_node
5640 self.target_node = self.instance.secondary_nodes[0]
5641 self.all_nodes = [self.source_node, self.target_node]
5643 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5644 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5648 return self._ExecCleanup()
5650 return self._ExecMigration()
5653 def _CreateBlockDev(lu, node, instance, device, force_create,
5655 """Create a tree of block devices on a given node.
5657 If this device type has to be created on secondaries, create it and
5660 If not, just recurse to children keeping the same 'force' value.
5662 @param lu: the lu on whose behalf we execute
5663 @param node: the node on which to create the device
5664 @type instance: L{objects.Instance}
5665 @param instance: the instance which owns the device
5666 @type device: L{objects.Disk}
5667 @param device: the device to create
5668 @type force_create: boolean
5669 @param force_create: whether to force creation of this device; this
5670 will be change to True whenever we find a device which has
5671 CreateOnSecondary() attribute
5672 @param info: the extra 'metadata' we should attach to the device
5673 (this will be represented as a LVM tag)
5674 @type force_open: boolean
5675 @param force_open: this parameter will be passes to the
5676 L{backend.BlockdevCreate} function where it specifies
5677 whether we run on primary or not, and it affects both
5678 the child assembly and the device own Open() execution
5681 if device.CreateOnSecondary():
5685 for child in device.children:
5686 _CreateBlockDev(lu, node, instance, child, force_create,
5689 if not force_create:
5692 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5695 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5696 """Create a single block device on a given node.
5698 This will not recurse over children of the device, so they must be
5701 @param lu: the lu on whose behalf we execute
5702 @param node: the node on which to create the device
5703 @type instance: L{objects.Instance}
5704 @param instance: the instance which owns the device
5705 @type device: L{objects.Disk}
5706 @param device: the device to create
5707 @param info: the extra 'metadata' we should attach to the device
5708 (this will be represented as a LVM tag)
5709 @type force_open: boolean
5710 @param force_open: this parameter will be passes to the
5711 L{backend.BlockdevCreate} function where it specifies
5712 whether we run on primary or not, and it affects both
5713 the child assembly and the device own Open() execution
5716 lu.cfg.SetDiskID(device, node)
5717 result = lu.rpc.call_blockdev_create(node, device, device.size,
5718 instance.name, force_open, info)
5719 result.Raise("Can't create block device %s on"
5720 " node %s for instance %s" % (device, node, instance.name))
5721 if device.physical_id is None:
5722 device.physical_id = result.payload
5725 def _GenerateUniqueNames(lu, exts):
5726 """Generate a suitable LV name.
5728 This will generate a logical volume name for the given instance.
5733 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5734 results.append("%s%s" % (new_id, val))
5738 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5740 """Generate a drbd8 device complete with its children.
5743 port = lu.cfg.AllocatePort()
5744 vgname = lu.cfg.GetVGName()
5745 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5746 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5747 logical_id=(vgname, names[0]))
5748 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5749 logical_id=(vgname, names[1]))
5750 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5751 logical_id=(primary, secondary, port,
5754 children=[dev_data, dev_meta],
5759 def _GenerateDiskTemplate(lu, template_name,
5760 instance_name, primary_node,
5761 secondary_nodes, disk_info,
5762 file_storage_dir, file_driver,
5764 """Generate the entire disk layout for a given template type.
5767 #TODO: compute space requirements
5769 vgname = lu.cfg.GetVGName()
5770 disk_count = len(disk_info)
5772 if template_name == constants.DT_DISKLESS:
5774 elif template_name == constants.DT_PLAIN:
5775 if len(secondary_nodes) != 0:
5776 raise errors.ProgrammerError("Wrong template configuration")
5778 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5779 for i in range(disk_count)])
5780 for idx, disk in enumerate(disk_info):
5781 disk_index = idx + base_index
5782 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5783 logical_id=(vgname, names[idx]),
5784 iv_name="disk/%d" % disk_index,
5786 disks.append(disk_dev)
5787 elif template_name == constants.DT_DRBD8:
5788 if len(secondary_nodes) != 1:
5789 raise errors.ProgrammerError("Wrong template configuration")
5790 remote_node = secondary_nodes[0]
5791 minors = lu.cfg.AllocateDRBDMinor(
5792 [primary_node, remote_node] * len(disk_info), instance_name)
5795 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5796 for i in range(disk_count)]):
5797 names.append(lv_prefix + "_data")
5798 names.append(lv_prefix + "_meta")
5799 for idx, disk in enumerate(disk_info):
5800 disk_index = idx + base_index
5801 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5802 disk["size"], names[idx*2:idx*2+2],
5803 "disk/%d" % disk_index,
5804 minors[idx*2], minors[idx*2+1])
5805 disk_dev.mode = disk["mode"]
5806 disks.append(disk_dev)
5807 elif template_name == constants.DT_FILE:
5808 if len(secondary_nodes) != 0:
5809 raise errors.ProgrammerError("Wrong template configuration")
5811 for idx, disk in enumerate(disk_info):
5812 disk_index = idx + base_index
5813 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5814 iv_name="disk/%d" % disk_index,
5815 logical_id=(file_driver,
5816 "%s/disk%d" % (file_storage_dir,
5819 disks.append(disk_dev)
5821 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5825 def _GetInstanceInfoText(instance):
5826 """Compute that text that should be added to the disk's metadata.
5829 return "originstname+%s" % instance.name
5832 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5833 """Create all disks for an instance.
5835 This abstracts away some work from AddInstance.
5837 @type lu: L{LogicalUnit}
5838 @param lu: the logical unit on whose behalf we execute
5839 @type instance: L{objects.Instance}
5840 @param instance: the instance whose disks we should create
5842 @param to_skip: list of indices to skip
5843 @type target_node: string
5844 @param target_node: if passed, overrides the target node for creation
5846 @return: the success of the creation
5849 info = _GetInstanceInfoText(instance)
5850 if target_node is None:
5851 pnode = instance.primary_node
5852 all_nodes = instance.all_nodes
5857 if instance.disk_template == constants.DT_FILE:
5858 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5859 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5861 result.Raise("Failed to create directory '%s' on"
5862 " node %s" % (file_storage_dir, pnode))
5864 # Note: this needs to be kept in sync with adding of disks in
5865 # LUSetInstanceParams
5866 for idx, device in enumerate(instance.disks):
5867 if to_skip and idx in to_skip:
5869 logging.info("Creating volume %s for instance %s",
5870 device.iv_name, instance.name)
5872 for node in all_nodes:
5873 f_create = node == pnode
5874 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5877 def _RemoveDisks(lu, instance, target_node=None):
5878 """Remove all disks for an instance.
5880 This abstracts away some work from `AddInstance()` and
5881 `RemoveInstance()`. Note that in case some of the devices couldn't
5882 be removed, the removal will continue with the other ones (compare
5883 with `_CreateDisks()`).
5885 @type lu: L{LogicalUnit}
5886 @param lu: the logical unit on whose behalf we execute
5887 @type instance: L{objects.Instance}
5888 @param instance: the instance whose disks we should remove
5889 @type target_node: string
5890 @param target_node: used to override the node on which to remove the disks
5892 @return: the success of the removal
5895 logging.info("Removing block devices for instance %s", instance.name)
5898 for device in instance.disks:
5900 edata = [(target_node, device)]
5902 edata = device.ComputeNodeTree(instance.primary_node)
5903 for node, disk in edata:
5904 lu.cfg.SetDiskID(disk, node)
5905 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5907 lu.LogWarning("Could not remove block device %s on node %s,"
5908 " continuing anyway: %s", device.iv_name, node, msg)
5911 if instance.disk_template == constants.DT_FILE:
5912 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5916 tgt = instance.primary_node
5917 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5919 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5920 file_storage_dir, instance.primary_node, result.fail_msg)
5926 def _ComputeDiskSize(disk_template, disks):
5927 """Compute disk size requirements in the volume group
5930 # Required free disk space as a function of disk and swap space
5932 constants.DT_DISKLESS: None,
5933 constants.DT_PLAIN: sum(d["size"] for d in disks),
5934 # 128 MB are added for drbd metadata for each disk
5935 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5936 constants.DT_FILE: None,
5939 if disk_template not in req_size_dict:
5940 raise errors.ProgrammerError("Disk template '%s' size requirement"
5941 " is unknown" % disk_template)
5943 return req_size_dict[disk_template]
5946 def _CheckHVParams(lu, nodenames, hvname, hvparams):
5947 """Hypervisor parameter validation.
5949 This function abstract the hypervisor parameter validation to be
5950 used in both instance create and instance modify.
5952 @type lu: L{LogicalUnit}
5953 @param lu: the logical unit for which we check
5954 @type nodenames: list
5955 @param nodenames: the list of nodes on which we should check
5956 @type hvname: string
5957 @param hvname: the name of the hypervisor we should use
5958 @type hvparams: dict
5959 @param hvparams: the parameters which we need to check
5960 @raise errors.OpPrereqError: if the parameters are not valid
5963 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5966 for node in nodenames:
5970 info.Raise("Hypervisor parameter validation failed on node %s" % node)
5973 class LUCreateInstance(LogicalUnit):
5974 """Create an instance.
5977 HPATH = "instance-add"
5978 HTYPE = constants.HTYPE_INSTANCE
5979 _OP_REQP = ["instance_name", "disks",
5981 "wait_for_sync", "ip_check", "nics",
5982 "hvparams", "beparams"]
5985 def CheckArguments(self):
5989 # set optional parameters to none if they don't exist
5990 for attr in ["pnode", "snode", "iallocator", "hypervisor",
5991 "disk_template", "identify_defaults"]:
5992 if not hasattr(self.op, attr):
5993 setattr(self.op, attr, None)
5995 # do not require name_check to ease forward/backward compatibility
5997 if not hasattr(self.op, "name_check"):
5998 self.op.name_check = True
5999 if not hasattr(self.op, "no_install"):
6000 self.op.no_install = False
6001 if self.op.no_install and self.op.start:
6002 self.LogInfo("No-installation mode selected, disabling startup")
6003 self.op.start = False
6004 # validate/normalize the instance name
6005 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6006 if self.op.ip_check and not self.op.name_check:
6007 # TODO: make the ip check more flexible and not depend on the name check
6008 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6010 # check disk information: either all adopt, or no adopt
6011 has_adopt = has_no_adopt = False
6012 for disk in self.op.disks:
6017 if has_adopt and has_no_adopt:
6018 raise errors.OpPrereqError("Either all disks are adopted or none is",
6021 if self.op.disk_template != constants.DT_PLAIN:
6022 raise errors.OpPrereqError("Disk adoption is only supported for the"
6023 " 'plain' disk template",
6025 if self.op.iallocator is not None:
6026 raise errors.OpPrereqError("Disk adoption not allowed with an"
6027 " iallocator script", errors.ECODE_INVAL)
6028 if self.op.mode == constants.INSTANCE_IMPORT:
6029 raise errors.OpPrereqError("Disk adoption not allowed for"
6030 " instance import", errors.ECODE_INVAL)
6032 self.adopt_disks = has_adopt
6034 # verify creation mode
6035 if self.op.mode not in (constants.INSTANCE_CREATE,
6036 constants.INSTANCE_IMPORT):
6037 raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6038 self.op.mode, errors.ECODE_INVAL)
6040 # instance name verification
6041 if self.op.name_check:
6042 self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6043 self.op.instance_name = self.hostname1.name
6044 # used in CheckPrereq for ip ping check
6045 self.check_ip = self.hostname1.ip
6047 self.check_ip = None
6049 # file storage checks
6050 if (self.op.file_driver and
6051 not self.op.file_driver in constants.FILE_DRIVER):
6052 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6053 self.op.file_driver, errors.ECODE_INVAL)
6055 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6056 raise errors.OpPrereqError("File storage directory path not absolute",
6059 ### Node/iallocator related checks
6060 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6061 raise errors.OpPrereqError("One and only one of iallocator and primary"
6062 " node must be given",
6065 if self.op.mode == constants.INSTANCE_IMPORT:
6066 # On import force_variant must be True, because if we forced it at
6067 # initial install, our only chance when importing it back is that it
6069 self.op.force_variant = True
6071 if self.op.no_install:
6072 self.LogInfo("No-installation mode has no effect during import")
6074 else: # INSTANCE_CREATE
6075 if getattr(self.op, "os_type", None) is None:
6076 raise errors.OpPrereqError("No guest OS specified",
6078 self.op.force_variant = getattr(self.op, "force_variant", False)
6079 if self.op.disk_template is None:
6080 raise errors.OpPrereqError("No disk template specified",
6083 def ExpandNames(self):
6084 """ExpandNames for CreateInstance.
6086 Figure out the right locks for instance creation.
6089 self.needed_locks = {}
6091 instance_name = self.op.instance_name
6092 # this is just a preventive check, but someone might still add this
6093 # instance in the meantime, and creation will fail at lock-add time
6094 if instance_name in self.cfg.GetInstanceList():
6095 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6096 instance_name, errors.ECODE_EXISTS)
6098 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6100 if self.op.iallocator:
6101 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6103 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6104 nodelist = [self.op.pnode]
6105 if self.op.snode is not None:
6106 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6107 nodelist.append(self.op.snode)
6108 self.needed_locks[locking.LEVEL_NODE] = nodelist
6110 # in case of import lock the source node too
6111 if self.op.mode == constants.INSTANCE_IMPORT:
6112 src_node = getattr(self.op, "src_node", None)
6113 src_path = getattr(self.op, "src_path", None)
6115 if src_path is None:
6116 self.op.src_path = src_path = self.op.instance_name
6118 if src_node is None:
6119 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6120 self.op.src_node = None
6121 if os.path.isabs(src_path):
6122 raise errors.OpPrereqError("Importing an instance from an absolute"
6123 " path requires a source node option.",
6126 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6127 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6128 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6129 if not os.path.isabs(src_path):
6130 self.op.src_path = src_path = \
6131 utils.PathJoin(constants.EXPORT_DIR, src_path)
6133 def _RunAllocator(self):
6134 """Run the allocator based on input opcode.
6137 nics = [n.ToDict() for n in self.nics]
6138 ial = IAllocator(self.cfg, self.rpc,
6139 mode=constants.IALLOCATOR_MODE_ALLOC,
6140 name=self.op.instance_name,
6141 disk_template=self.op.disk_template,
6144 vcpus=self.be_full[constants.BE_VCPUS],
6145 mem_size=self.be_full[constants.BE_MEMORY],
6148 hypervisor=self.op.hypervisor,
6151 ial.Run(self.op.iallocator)
6154 raise errors.OpPrereqError("Can't compute nodes using"
6155 " iallocator '%s': %s" %
6156 (self.op.iallocator, ial.info),
6158 if len(ial.result) != ial.required_nodes:
6159 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6160 " of nodes (%s), required %s" %
6161 (self.op.iallocator, len(ial.result),
6162 ial.required_nodes), errors.ECODE_FAULT)
6163 self.op.pnode = ial.result[0]
6164 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6165 self.op.instance_name, self.op.iallocator,
6166 utils.CommaJoin(ial.result))
6167 if ial.required_nodes == 2:
6168 self.op.snode = ial.result[1]
6170 def BuildHooksEnv(self):
6173 This runs on master, primary and secondary nodes of the instance.
6177 "ADD_MODE": self.op.mode,
6179 if self.op.mode == constants.INSTANCE_IMPORT:
6180 env["SRC_NODE"] = self.op.src_node
6181 env["SRC_PATH"] = self.op.src_path
6182 env["SRC_IMAGES"] = self.src_images
6184 env.update(_BuildInstanceHookEnv(
6185 name=self.op.instance_name,
6186 primary_node=self.op.pnode,
6187 secondary_nodes=self.secondaries,
6188 status=self.op.start,
6189 os_type=self.op.os_type,
6190 memory=self.be_full[constants.BE_MEMORY],
6191 vcpus=self.be_full[constants.BE_VCPUS],
6192 nics=_NICListToTuple(self, self.nics),
6193 disk_template=self.op.disk_template,
6194 disks=[(d["size"], d["mode"]) for d in self.disks],
6197 hypervisor_name=self.op.hypervisor,
6200 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6204 def _ReadExportInfo(self):
6205 """Reads the export information from disk.
6207 It will override the opcode source node and path with the actual
6208 information, if these two were not specified before.
6210 @return: the export information
6213 assert self.op.mode == constants.INSTANCE_IMPORT
6215 src_node = self.op.src_node
6216 src_path = self.op.src_path
6218 if src_node is None:
6219 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6220 exp_list = self.rpc.call_export_list(locked_nodes)
6222 for node in exp_list:
6223 if exp_list[node].fail_msg:
6225 if src_path in exp_list[node].payload:
6227 self.op.src_node = src_node = node
6228 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6232 raise errors.OpPrereqError("No export found for relative path %s" %
6233 src_path, errors.ECODE_INVAL)
6235 _CheckNodeOnline(self, src_node)
6236 result = self.rpc.call_export_info(src_node, src_path)
6237 result.Raise("No export or invalid export found in dir %s" % src_path)
6239 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6240 if not export_info.has_section(constants.INISECT_EXP):
6241 raise errors.ProgrammerError("Corrupted export config",
6242 errors.ECODE_ENVIRON)
6244 ei_version = export_info.get(constants.INISECT_EXP, "version")
6245 if (int(ei_version) != constants.EXPORT_VERSION):
6246 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6247 (ei_version, constants.EXPORT_VERSION),
6248 errors.ECODE_ENVIRON)
6251 def _ReadExportParams(self, einfo):
6252 """Use export parameters as defaults.
6254 In case the opcode doesn't specify (as in override) some instance
6255 parameters, then try to use them from the export information, if
6259 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6261 if self.op.disk_template is None:
6262 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6263 self.op.disk_template = einfo.get(constants.INISECT_INS,
6266 raise errors.OpPrereqError("No disk template specified and the export"
6267 " is missing the disk_template information",
6270 if not self.op.disks:
6271 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6273 # TODO: import the disk iv_name too
6274 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6275 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6276 disks.append({"size": disk_sz})
6277 self.op.disks = disks
6279 raise errors.OpPrereqError("No disk info specified and the export"
6280 " is missing the disk information",
6283 if (not self.op.nics and
6284 einfo.has_option(constants.INISECT_INS, "nic_count")):
6286 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6288 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6289 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6294 if (self.op.hypervisor is None and
6295 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6296 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6297 if einfo.has_section(constants.INISECT_HYP):
6298 # use the export parameters but do not override the ones
6299 # specified by the user
6300 for name, value in einfo.items(constants.INISECT_HYP):
6301 if name not in self.op.hvparams:
6302 self.op.hvparams[name] = value
6304 if einfo.has_section(constants.INISECT_BEP):
6305 # use the parameters, without overriding
6306 for name, value in einfo.items(constants.INISECT_BEP):
6307 if name not in self.op.beparams:
6308 self.op.beparams[name] = value
6310 # try to read the parameters old style, from the main section
6311 for name in constants.BES_PARAMETERS:
6312 if (name not in self.op.beparams and
6313 einfo.has_option(constants.INISECT_INS, name)):
6314 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6316 def _RevertToDefaults(self, cluster):
6317 """Revert the instance parameters to the default values.
6321 hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6322 for name in self.op.hvparams.keys():
6323 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6324 del self.op.hvparams[name]
6326 be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6327 for name in self.op.beparams.keys():
6328 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6329 del self.op.beparams[name]
6331 nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6332 for nic in self.op.nics:
6333 for name in constants.NICS_PARAMETERS:
6334 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6337 def CheckPrereq(self):
6338 """Check prerequisites.
6341 if self.op.mode == constants.INSTANCE_IMPORT:
6342 export_info = self._ReadExportInfo()
6343 self._ReadExportParams(export_info)
6345 _CheckDiskTemplate(self.op.disk_template)
6347 if (not self.cfg.GetVGName() and
6348 self.op.disk_template not in constants.DTS_NOT_LVM):
6349 raise errors.OpPrereqError("Cluster does not support lvm-based"
6350 " instances", errors.ECODE_STATE)
6352 if self.op.hypervisor is None:
6353 self.op.hypervisor = self.cfg.GetHypervisorType()
6355 cluster = self.cfg.GetClusterInfo()
6356 enabled_hvs = cluster.enabled_hypervisors
6357 if self.op.hypervisor not in enabled_hvs:
6358 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6359 " cluster (%s)" % (self.op.hypervisor,
6360 ",".join(enabled_hvs)),
6363 # check hypervisor parameter syntax (locally)
6364 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6365 filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6368 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6369 hv_type.CheckParameterSyntax(filled_hvp)
6370 self.hv_full = filled_hvp
6371 # check that we don't specify global parameters on an instance
6372 _CheckGlobalHvParams(self.op.hvparams)
6374 # fill and remember the beparams dict
6375 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6376 self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6379 # now that hvp/bep are in final format, let's reset to defaults,
6381 if self.op.identify_defaults:
6382 self._RevertToDefaults(cluster)
6386 for idx, nic in enumerate(self.op.nics):
6387 nic_mode_req = nic.get("mode", None)
6388 nic_mode = nic_mode_req
6389 if nic_mode is None:
6390 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6392 # in routed mode, for the first nic, the default ip is 'auto'
6393 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6394 default_ip_mode = constants.VALUE_AUTO
6396 default_ip_mode = constants.VALUE_NONE
6398 # ip validity checks
6399 ip = nic.get("ip", default_ip_mode)
6400 if ip is None or ip.lower() == constants.VALUE_NONE:
6402 elif ip.lower() == constants.VALUE_AUTO:
6403 if not self.op.name_check:
6404 raise errors.OpPrereqError("IP address set to auto but name checks"
6405 " have been skipped. Aborting.",
6407 nic_ip = self.hostname1.ip
6409 if not utils.IsValidIP(ip):
6410 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6411 " like a valid IP" % ip,
6415 # TODO: check the ip address for uniqueness
6416 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6417 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6420 # MAC address verification
6421 mac = nic.get("mac", constants.VALUE_AUTO)
6422 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6423 mac = utils.NormalizeAndValidateMac(mac)
6426 self.cfg.ReserveMAC(mac, self.proc.GetECId())
6427 except errors.ReservationError:
6428 raise errors.OpPrereqError("MAC address %s already in use"
6429 " in cluster" % mac,
6430 errors.ECODE_NOTUNIQUE)
6432 # bridge verification
6433 bridge = nic.get("bridge", None)
6434 link = nic.get("link", None)
6436 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6437 " at the same time", errors.ECODE_INVAL)
6438 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6439 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6446 nicparams[constants.NIC_MODE] = nic_mode_req
6448 nicparams[constants.NIC_LINK] = link
6450 check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6452 objects.NIC.CheckParameterSyntax(check_params)
6453 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6455 # disk checks/pre-build
6457 for disk in self.op.disks:
6458 mode = disk.get("mode", constants.DISK_RDWR)
6459 if mode not in constants.DISK_ACCESS_SET:
6460 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6461 mode, errors.ECODE_INVAL)
6462 size = disk.get("size", None)
6464 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6467 except (TypeError, ValueError):
6468 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6470 new_disk = {"size": size, "mode": mode}
6472 new_disk["adopt"] = disk["adopt"]
6473 self.disks.append(new_disk)
6475 if self.op.mode == constants.INSTANCE_IMPORT:
6477 # Check that the new instance doesn't have less disks than the export
6478 instance_disks = len(self.disks)
6479 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6480 if instance_disks < export_disks:
6481 raise errors.OpPrereqError("Not enough disks to import."
6482 " (instance: %d, export: %d)" %
6483 (instance_disks, export_disks),
6487 for idx in range(export_disks):
6488 option = 'disk%d_dump' % idx
6489 if export_info.has_option(constants.INISECT_INS, option):
6490 # FIXME: are the old os-es, disk sizes, etc. useful?
6491 export_name = export_info.get(constants.INISECT_INS, option)
6492 image = utils.PathJoin(self.op.src_path, export_name)
6493 disk_images.append(image)
6495 disk_images.append(False)
6497 self.src_images = disk_images
6499 old_name = export_info.get(constants.INISECT_INS, 'name')
6501 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6502 except (TypeError, ValueError), err:
6503 raise errors.OpPrereqError("Invalid export file, nic_count is not"
6504 " an integer: %s" % str(err),
6506 if self.op.instance_name == old_name:
6507 for idx, nic in enumerate(self.nics):
6508 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6509 nic_mac_ini = 'nic%d_mac' % idx
6510 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6512 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6514 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6515 if self.op.ip_check:
6516 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6517 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6518 (self.check_ip, self.op.instance_name),
6519 errors.ECODE_NOTUNIQUE)
6521 #### mac address generation
6522 # By generating here the mac address both the allocator and the hooks get
6523 # the real final mac address rather than the 'auto' or 'generate' value.
6524 # There is a race condition between the generation and the instance object
6525 # creation, which means that we know the mac is valid now, but we're not
6526 # sure it will be when we actually add the instance. If things go bad
6527 # adding the instance will abort because of a duplicate mac, and the
6528 # creation job will fail.
6529 for nic in self.nics:
6530 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6531 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6535 if self.op.iallocator is not None:
6536 self._RunAllocator()
6538 #### node related checks
6540 # check primary node
6541 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6542 assert self.pnode is not None, \
6543 "Cannot retrieve locked node %s" % self.op.pnode
6545 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6546 pnode.name, errors.ECODE_STATE)
6548 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6549 pnode.name, errors.ECODE_STATE)
6551 self.secondaries = []
6553 # mirror node verification
6554 if self.op.disk_template in constants.DTS_NET_MIRROR:
6555 if self.op.snode is None:
6556 raise errors.OpPrereqError("The networked disk templates need"
6557 " a mirror node", errors.ECODE_INVAL)
6558 if self.op.snode == pnode.name:
6559 raise errors.OpPrereqError("The secondary node cannot be the"
6560 " primary node.", errors.ECODE_INVAL)
6561 _CheckNodeOnline(self, self.op.snode)
6562 _CheckNodeNotDrained(self, self.op.snode)
6563 self.secondaries.append(self.op.snode)
6565 nodenames = [pnode.name] + self.secondaries
6567 req_size = _ComputeDiskSize(self.op.disk_template,
6570 # Check lv size requirements, if not adopting
6571 if req_size is not None and not self.adopt_disks:
6572 _CheckNodesFreeDisk(self, nodenames, req_size)
6574 if self.adopt_disks: # instead, we must check the adoption data
6575 all_lvs = set([i["adopt"] for i in self.disks])
6576 if len(all_lvs) != len(self.disks):
6577 raise errors.OpPrereqError("Duplicate volume names given for adoption",
6579 for lv_name in all_lvs:
6581 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6582 except errors.ReservationError:
6583 raise errors.OpPrereqError("LV named %s used by another instance" %
6584 lv_name, errors.ECODE_NOTUNIQUE)
6586 node_lvs = self.rpc.call_lv_list([pnode.name],
6587 self.cfg.GetVGName())[pnode.name]
6588 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6589 node_lvs = node_lvs.payload
6590 delta = all_lvs.difference(node_lvs.keys())
6592 raise errors.OpPrereqError("Missing logical volume(s): %s" %
6593 utils.CommaJoin(delta),
6595 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6597 raise errors.OpPrereqError("Online logical volumes found, cannot"
6598 " adopt: %s" % utils.CommaJoin(online_lvs),
6600 # update the size of disk based on what is found
6601 for dsk in self.disks:
6602 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6604 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6606 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6608 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6610 # memory check on primary node
6612 _CheckNodeFreeMemory(self, self.pnode.name,
6613 "creating instance %s" % self.op.instance_name,
6614 self.be_full[constants.BE_MEMORY],
6617 self.dry_run_result = list(nodenames)
6619 def Exec(self, feedback_fn):
6620 """Create and add the instance to the cluster.
6623 instance = self.op.instance_name
6624 pnode_name = self.pnode.name
6626 ht_kind = self.op.hypervisor
6627 if ht_kind in constants.HTS_REQ_PORT:
6628 network_port = self.cfg.AllocatePort()
6632 ##if self.op.vnc_bind_address is None:
6633 ## self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
6635 # this is needed because os.path.join does not accept None arguments
6636 if self.op.file_storage_dir is None:
6637 string_file_storage_dir = ""
6639 string_file_storage_dir = self.op.file_storage_dir
6641 # build the full file storage dir path
6642 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6643 string_file_storage_dir, instance)
6646 disks = _GenerateDiskTemplate(self,
6647 self.op.disk_template,
6648 instance, pnode_name,
6652 self.op.file_driver,
6655 iobj = objects.Instance(name=instance, os=self.op.os_type,
6656 primary_node=pnode_name,
6657 nics=self.nics, disks=disks,
6658 disk_template=self.op.disk_template,
6660 network_port=network_port,
6661 beparams=self.op.beparams,
6662 hvparams=self.op.hvparams,
6663 hypervisor=self.op.hypervisor,
6666 if self.adopt_disks:
6667 # rename LVs to the newly-generated names; we need to construct
6668 # 'fake' LV disks with the old data, plus the new unique_id
6669 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6671 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6672 rename_to.append(t_dsk.logical_id)
6673 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6674 self.cfg.SetDiskID(t_dsk, pnode_name)
6675 result = self.rpc.call_blockdev_rename(pnode_name,
6676 zip(tmp_disks, rename_to))
6677 result.Raise("Failed to rename adoped LVs")
6679 feedback_fn("* creating instance disks...")
6681 _CreateDisks(self, iobj)
6682 except errors.OpExecError:
6683 self.LogWarning("Device creation failed, reverting...")
6685 _RemoveDisks(self, iobj)
6687 self.cfg.ReleaseDRBDMinors(instance)
6690 feedback_fn("adding instance %s to cluster config" % instance)
6692 self.cfg.AddInstance(iobj, self.proc.GetECId())
6694 # Declare that we don't want to remove the instance lock anymore, as we've
6695 # added the instance to the config
6696 del self.remove_locks[locking.LEVEL_INSTANCE]
6697 # Unlock all the nodes
6698 if self.op.mode == constants.INSTANCE_IMPORT:
6699 nodes_keep = [self.op.src_node]
6700 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6701 if node != self.op.src_node]
6702 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6703 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6705 self.context.glm.release(locking.LEVEL_NODE)
6706 del self.acquired_locks[locking.LEVEL_NODE]
6708 if self.op.wait_for_sync:
6709 disk_abort = not _WaitForSync(self, iobj)
6710 elif iobj.disk_template in constants.DTS_NET_MIRROR:
6711 # make sure the disks are not degraded (still sync-ing is ok)
6713 feedback_fn("* checking mirrors status")
6714 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6719 _RemoveDisks(self, iobj)
6720 self.cfg.RemoveInstance(iobj.name)
6721 # Make sure the instance lock gets removed
6722 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6723 raise errors.OpExecError("There are some degraded disks for"
6726 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6727 if self.op.mode == constants.INSTANCE_CREATE:
6728 if not self.op.no_install:
6729 feedback_fn("* running the instance OS create scripts...")
6730 # FIXME: pass debug option from opcode to backend
6731 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6732 self.op.debug_level)
6733 result.Raise("Could not add os for instance %s"
6734 " on node %s" % (instance, pnode_name))
6736 elif self.op.mode == constants.INSTANCE_IMPORT:
6737 feedback_fn("* running the instance OS import scripts...")
6738 src_node = self.op.src_node
6739 src_images = self.src_images
6740 cluster_name = self.cfg.GetClusterName()
6741 # FIXME: pass debug option from opcode to backend
6742 import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6743 src_node, src_images,
6745 self.op.debug_level)
6746 msg = import_result.fail_msg
6748 self.LogWarning("Error while importing the disk images for instance"
6749 " %s on node %s: %s" % (instance, pnode_name, msg))
6751 # also checked in the prereq part
6752 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6756 iobj.admin_up = True
6757 self.cfg.Update(iobj, feedback_fn)
6758 logging.info("Starting instance %s on node %s", instance, pnode_name)
6759 feedback_fn("* starting instance...")
6760 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6761 result.Raise("Could not start instance")
6763 return list(iobj.all_nodes)
6766 class LUConnectConsole(NoHooksLU):
6767 """Connect to an instance's console.
6769 This is somewhat special in that it returns the command line that
6770 you need to run on the master node in order to connect to the
6774 _OP_REQP = ["instance_name"]
6777 def ExpandNames(self):
6778 self._ExpandAndLockInstance()
6780 def CheckPrereq(self):
6781 """Check prerequisites.
6783 This checks that the instance is in the cluster.
6786 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6787 assert self.instance is not None, \
6788 "Cannot retrieve locked instance %s" % self.op.instance_name
6789 _CheckNodeOnline(self, self.instance.primary_node)
6791 def Exec(self, feedback_fn):
6792 """Connect to the console of an instance
6795 instance = self.instance
6796 node = instance.primary_node
6798 node_insts = self.rpc.call_instance_list([node],
6799 [instance.hypervisor])[node]
6800 node_insts.Raise("Can't get node information from %s" % node)
6802 if instance.name not in node_insts.payload:
6803 raise errors.OpExecError("Instance %s is not running." % instance.name)
6805 logging.debug("Connecting to console of %s on %s", instance.name, node)
6807 hyper = hypervisor.GetHypervisor(instance.hypervisor)
6808 cluster = self.cfg.GetClusterInfo()
6809 # beparams and hvparams are passed separately, to avoid editing the
6810 # instance and then saving the defaults in the instance itself.
6811 hvparams = cluster.FillHV(instance)
6812 beparams = cluster.FillBE(instance)
6813 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6816 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6819 class LUReplaceDisks(LogicalUnit):
6820 """Replace the disks of an instance.
6823 HPATH = "mirrors-replace"
6824 HTYPE = constants.HTYPE_INSTANCE
6825 _OP_REQP = ["instance_name", "mode", "disks"]
6828 def CheckArguments(self):
6829 if not hasattr(self.op, "remote_node"):
6830 self.op.remote_node = None
6831 if not hasattr(self.op, "iallocator"):
6832 self.op.iallocator = None
6833 if not hasattr(self.op, "early_release"):
6834 self.op.early_release = False
6836 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6839 def ExpandNames(self):
6840 self._ExpandAndLockInstance()
6842 if self.op.iallocator is not None:
6843 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6845 elif self.op.remote_node is not None:
6846 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6847 self.op.remote_node = remote_node
6849 # Warning: do not remove the locking of the new secondary here
6850 # unless DRBD8.AddChildren is changed to work in parallel;
6851 # currently it doesn't since parallel invocations of
6852 # FindUnusedMinor will conflict
6853 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6854 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6857 self.needed_locks[locking.LEVEL_NODE] = []
6858 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6860 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6861 self.op.iallocator, self.op.remote_node,
6862 self.op.disks, False, self.op.early_release)
6864 self.tasklets = [self.replacer]
6866 def DeclareLocks(self, level):
6867 # If we're not already locking all nodes in the set we have to declare the
6868 # instance's primary/secondary nodes.
6869 if (level == locking.LEVEL_NODE and
6870 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6871 self._LockInstancesNodes()
6873 def BuildHooksEnv(self):
6876 This runs on the master, the primary and all the secondaries.
6879 instance = self.replacer.instance
6881 "MODE": self.op.mode,
6882 "NEW_SECONDARY": self.op.remote_node,
6883 "OLD_SECONDARY": instance.secondary_nodes[0],
6885 env.update(_BuildInstanceHookEnvByObject(self, instance))
6887 self.cfg.GetMasterNode(),
6888 instance.primary_node,
6890 if self.op.remote_node is not None:
6891 nl.append(self.op.remote_node)
6895 class LUEvacuateNode(LogicalUnit):
6896 """Relocate the secondary instances from a node.
6899 HPATH = "node-evacuate"
6900 HTYPE = constants.HTYPE_NODE
6901 _OP_REQP = ["node_name"]
6904 def CheckArguments(self):
6905 if not hasattr(self.op, "remote_node"):
6906 self.op.remote_node = None
6907 if not hasattr(self.op, "iallocator"):
6908 self.op.iallocator = None
6909 if not hasattr(self.op, "early_release"):
6910 self.op.early_release = False
6912 TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6913 self.op.remote_node,
6916 def ExpandNames(self):
6917 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6919 self.needed_locks = {}
6921 # Declare node locks
6922 if self.op.iallocator is not None:
6923 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6925 elif self.op.remote_node is not None:
6926 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6928 # Warning: do not remove the locking of the new secondary here
6929 # unless DRBD8.AddChildren is changed to work in parallel;
6930 # currently it doesn't since parallel invocations of
6931 # FindUnusedMinor will conflict
6932 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6933 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6936 raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6938 # Create tasklets for replacing disks for all secondary instances on this
6943 for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6944 logging.debug("Replacing disks for instance %s", inst.name)
6945 names.append(inst.name)
6947 replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6948 self.op.iallocator, self.op.remote_node, [],
6949 True, self.op.early_release)
6950 tasklets.append(replacer)
6952 self.tasklets = tasklets
6953 self.instance_names = names
6955 # Declare instance locks
6956 self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6958 def DeclareLocks(self, level):
6959 # If we're not already locking all nodes in the set we have to declare the
6960 # instance's primary/secondary nodes.
6961 if (level == locking.LEVEL_NODE and
6962 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6963 self._LockInstancesNodes()
6965 def BuildHooksEnv(self):
6968 This runs on the master, the primary and all the secondaries.
6972 "NODE_NAME": self.op.node_name,
6975 nl = [self.cfg.GetMasterNode()]
6977 if self.op.remote_node is not None:
6978 env["NEW_SECONDARY"] = self.op.remote_node
6979 nl.append(self.op.remote_node)
6981 return (env, nl, nl)
6984 class TLReplaceDisks(Tasklet):
6985 """Replaces disks for an instance.
6987 Note: Locking is not within the scope of this class.
6990 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6991 disks, delay_iallocator, early_release):
6992 """Initializes this class.
6995 Tasklet.__init__(self, lu)
6998 self.instance_name = instance_name
7000 self.iallocator_name = iallocator_name
7001 self.remote_node = remote_node
7003 self.delay_iallocator = delay_iallocator
7004 self.early_release = early_release
7007 self.instance = None
7008 self.new_node = None
7009 self.target_node = None
7010 self.other_node = None
7011 self.remote_node_info = None
7012 self.node_secondary_ip = None
7015 def CheckArguments(mode, remote_node, iallocator):
7016 """Helper function for users of this class.
7019 # check for valid parameter combination
7020 if mode == constants.REPLACE_DISK_CHG:
7021 if remote_node is None and iallocator is None:
7022 raise errors.OpPrereqError("When changing the secondary either an"
7023 " iallocator script must be used or the"
7024 " new node given", errors.ECODE_INVAL)
7026 if remote_node is not None and iallocator is not None:
7027 raise errors.OpPrereqError("Give either the iallocator or the new"
7028 " secondary, not both", errors.ECODE_INVAL)
7030 elif remote_node is not None or iallocator is not None:
7031 # Not replacing the secondary
7032 raise errors.OpPrereqError("The iallocator and new node options can"
7033 " only be used when changing the"
7034 " secondary node", errors.ECODE_INVAL)
7037 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7038 """Compute a new secondary node using an IAllocator.
7041 ial = IAllocator(lu.cfg, lu.rpc,
7042 mode=constants.IALLOCATOR_MODE_RELOC,
7044 relocate_from=relocate_from)
7046 ial.Run(iallocator_name)
7049 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7050 " %s" % (iallocator_name, ial.info),
7053 if len(ial.result) != ial.required_nodes:
7054 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7055 " of nodes (%s), required %s" %
7057 len(ial.result), ial.required_nodes),
7060 remote_node_name = ial.result[0]
7062 lu.LogInfo("Selected new secondary for instance '%s': %s",
7063 instance_name, remote_node_name)
7065 return remote_node_name
7067 def _FindFaultyDisks(self, node_name):
7068 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7071 def CheckPrereq(self):
7072 """Check prerequisites.
7074 This checks that the instance is in the cluster.
7077 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7078 assert instance is not None, \
7079 "Cannot retrieve locked instance %s" % self.instance_name
7081 if instance.disk_template != constants.DT_DRBD8:
7082 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7083 " instances", errors.ECODE_INVAL)
7085 if len(instance.secondary_nodes) != 1:
7086 raise errors.OpPrereqError("The instance has a strange layout,"
7087 " expected one secondary but found %d" %
7088 len(instance.secondary_nodes),
7091 if not self.delay_iallocator:
7092 self._CheckPrereq2()
7094 def _CheckPrereq2(self):
7095 """Check prerequisites, second part.
7097 This function should always be part of CheckPrereq. It was separated and is
7098 now called from Exec because during node evacuation iallocator was only
7099 called with an unmodified cluster model, not taking planned changes into
7103 instance = self.instance
7104 secondary_node = instance.secondary_nodes[0]
7106 if self.iallocator_name is None:
7107 remote_node = self.remote_node
7109 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7110 instance.name, instance.secondary_nodes)
7112 if remote_node is not None:
7113 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7114 assert self.remote_node_info is not None, \
7115 "Cannot retrieve locked node %s" % remote_node
7117 self.remote_node_info = None
7119 if remote_node == self.instance.primary_node:
7120 raise errors.OpPrereqError("The specified node is the primary node of"
7121 " the instance.", errors.ECODE_INVAL)
7123 if remote_node == secondary_node:
7124 raise errors.OpPrereqError("The specified node is already the"
7125 " secondary node of the instance.",
7128 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7129 constants.REPLACE_DISK_CHG):
7130 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7133 if self.mode == constants.REPLACE_DISK_AUTO:
7134 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7135 faulty_secondary = self._FindFaultyDisks(secondary_node)
7137 if faulty_primary and faulty_secondary:
7138 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7139 " one node and can not be repaired"
7140 " automatically" % self.instance_name,
7144 self.disks = faulty_primary
7145 self.target_node = instance.primary_node
7146 self.other_node = secondary_node
7147 check_nodes = [self.target_node, self.other_node]
7148 elif faulty_secondary:
7149 self.disks = faulty_secondary
7150 self.target_node = secondary_node
7151 self.other_node = instance.primary_node
7152 check_nodes = [self.target_node, self.other_node]
7158 # Non-automatic modes
7159 if self.mode == constants.REPLACE_DISK_PRI:
7160 self.target_node = instance.primary_node
7161 self.other_node = secondary_node
7162 check_nodes = [self.target_node, self.other_node]
7164 elif self.mode == constants.REPLACE_DISK_SEC:
7165 self.target_node = secondary_node
7166 self.other_node = instance.primary_node
7167 check_nodes = [self.target_node, self.other_node]
7169 elif self.mode == constants.REPLACE_DISK_CHG:
7170 self.new_node = remote_node
7171 self.other_node = instance.primary_node
7172 self.target_node = secondary_node
7173 check_nodes = [self.new_node, self.other_node]
7175 _CheckNodeNotDrained(self.lu, remote_node)
7177 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7178 assert old_node_info is not None
7179 if old_node_info.offline and not self.early_release:
7180 # doesn't make sense to delay the release
7181 self.early_release = True
7182 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7183 " early-release mode", secondary_node)
7186 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7189 # If not specified all disks should be replaced
7191 self.disks = range(len(self.instance.disks))
7193 for node in check_nodes:
7194 _CheckNodeOnline(self.lu, node)
7196 # Check whether disks are valid
7197 for disk_idx in self.disks:
7198 instance.FindDisk(disk_idx)
7200 # Get secondary node IP addresses
7203 for node_name in [self.target_node, self.other_node, self.new_node]:
7204 if node_name is not None:
7205 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7207 self.node_secondary_ip = node_2nd_ip
7209 def Exec(self, feedback_fn):
7210 """Execute disk replacement.
7212 This dispatches the disk replacement to the appropriate handler.
7215 if self.delay_iallocator:
7216 self._CheckPrereq2()
7219 feedback_fn("No disks need replacement")
7222 feedback_fn("Replacing disk(s) %s for %s" %
7223 (utils.CommaJoin(self.disks), self.instance.name))
7225 activate_disks = (not self.instance.admin_up)
7227 # Activate the instance disks if we're replacing them on a down instance
7229 _StartInstanceDisks(self.lu, self.instance, True)
7232 # Should we replace the secondary node?
7233 if self.new_node is not None:
7234 fn = self._ExecDrbd8Secondary
7236 fn = self._ExecDrbd8DiskOnly
7238 return fn(feedback_fn)
7241 # Deactivate the instance disks if we're replacing them on a
7244 _SafeShutdownInstanceDisks(self.lu, self.instance)
7246 def _CheckVolumeGroup(self, nodes):
7247 self.lu.LogInfo("Checking volume groups")
7249 vgname = self.cfg.GetVGName()
7251 # Make sure volume group exists on all involved nodes
7252 results = self.rpc.call_vg_list(nodes)
7254 raise errors.OpExecError("Can't list volume groups on the nodes")
7258 res.Raise("Error checking node %s" % node)
7259 if vgname not in res.payload:
7260 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7263 def _CheckDisksExistence(self, nodes):
7264 # Check disk existence
7265 for idx, dev in enumerate(self.instance.disks):
7266 if idx not in self.disks:
7270 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7271 self.cfg.SetDiskID(dev, node)
7273 result = self.rpc.call_blockdev_find(node, dev)
7275 msg = result.fail_msg
7276 if msg or not result.payload:
7278 msg = "disk not found"
7279 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7282 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7283 for idx, dev in enumerate(self.instance.disks):
7284 if idx not in self.disks:
7287 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7290 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7292 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7293 " replace disks for instance %s" %
7294 (node_name, self.instance.name))
7296 def _CreateNewStorage(self, node_name):
7297 vgname = self.cfg.GetVGName()
7300 for idx, dev in enumerate(self.instance.disks):
7301 if idx not in self.disks:
7304 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7306 self.cfg.SetDiskID(dev, node_name)
7308 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7309 names = _GenerateUniqueNames(self.lu, lv_names)
7311 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7312 logical_id=(vgname, names[0]))
7313 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7314 logical_id=(vgname, names[1]))
7316 new_lvs = [lv_data, lv_meta]
7317 old_lvs = dev.children
7318 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7320 # we pass force_create=True to force the LVM creation
7321 for new_lv in new_lvs:
7322 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7323 _GetInstanceInfoText(self.instance), False)
7327 def _CheckDevices(self, node_name, iv_names):
7328 for name, (dev, _, _) in iv_names.iteritems():
7329 self.cfg.SetDiskID(dev, node_name)
7331 result = self.rpc.call_blockdev_find(node_name, dev)
7333 msg = result.fail_msg
7334 if msg or not result.payload:
7336 msg = "disk not found"
7337 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7340 if result.payload.is_degraded:
7341 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7343 def _RemoveOldStorage(self, node_name, iv_names):
7344 for name, (_, old_lvs, _) in iv_names.iteritems():
7345 self.lu.LogInfo("Remove logical volumes for %s" % name)
7348 self.cfg.SetDiskID(lv, node_name)
7350 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7352 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7353 hint="remove unused LVs manually")
7355 def _ReleaseNodeLock(self, node_name):
7356 """Releases the lock for a given node."""
7357 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7359 def _ExecDrbd8DiskOnly(self, feedback_fn):
7360 """Replace a disk on the primary or secondary for DRBD 8.
7362 The algorithm for replace is quite complicated:
7364 1. for each disk to be replaced:
7366 1. create new LVs on the target node with unique names
7367 1. detach old LVs from the drbd device
7368 1. rename old LVs to name_replaced.<time_t>
7369 1. rename new LVs to old LVs
7370 1. attach the new LVs (with the old names now) to the drbd device
7372 1. wait for sync across all devices
7374 1. for each modified disk:
7376 1. remove old LVs (which have the name name_replaces.<time_t>)
7378 Failures are not very well handled.
7383 # Step: check device activation
7384 self.lu.LogStep(1, steps_total, "Check device existence")
7385 self._CheckDisksExistence([self.other_node, self.target_node])
7386 self._CheckVolumeGroup([self.target_node, self.other_node])
7388 # Step: check other node consistency
7389 self.lu.LogStep(2, steps_total, "Check peer consistency")
7390 self._CheckDisksConsistency(self.other_node,
7391 self.other_node == self.instance.primary_node,
7394 # Step: create new storage
7395 self.lu.LogStep(3, steps_total, "Allocate new storage")
7396 iv_names = self._CreateNewStorage(self.target_node)
7398 # Step: for each lv, detach+rename*2+attach
7399 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7400 for dev, old_lvs, new_lvs in iv_names.itervalues():
7401 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7403 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7405 result.Raise("Can't detach drbd from local storage on node"
7406 " %s for device %s" % (self.target_node, dev.iv_name))
7408 #cfg.Update(instance)
7410 # ok, we created the new LVs, so now we know we have the needed
7411 # storage; as such, we proceed on the target node to rename
7412 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7413 # using the assumption that logical_id == physical_id (which in
7414 # turn is the unique_id on that node)
7416 # FIXME(iustin): use a better name for the replaced LVs
7417 temp_suffix = int(time.time())
7418 ren_fn = lambda d, suff: (d.physical_id[0],
7419 d.physical_id[1] + "_replaced-%s" % suff)
7421 # Build the rename list based on what LVs exist on the node
7422 rename_old_to_new = []
7423 for to_ren in old_lvs:
7424 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7425 if not result.fail_msg and result.payload:
7427 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7429 self.lu.LogInfo("Renaming the old LVs on the target node")
7430 result = self.rpc.call_blockdev_rename(self.target_node,
7432 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7434 # Now we rename the new LVs to the old LVs
7435 self.lu.LogInfo("Renaming the new LVs on the target node")
7436 rename_new_to_old = [(new, old.physical_id)
7437 for old, new in zip(old_lvs, new_lvs)]
7438 result = self.rpc.call_blockdev_rename(self.target_node,
7440 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7442 for old, new in zip(old_lvs, new_lvs):
7443 new.logical_id = old.logical_id
7444 self.cfg.SetDiskID(new, self.target_node)
7446 for disk in old_lvs:
7447 disk.logical_id = ren_fn(disk, temp_suffix)
7448 self.cfg.SetDiskID(disk, self.target_node)
7450 # Now that the new lvs have the old name, we can add them to the device
7451 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7452 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7454 msg = result.fail_msg
7456 for new_lv in new_lvs:
7457 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7460 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7461 hint=("cleanup manually the unused logical"
7463 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7465 dev.children = new_lvs
7467 self.cfg.Update(self.instance, feedback_fn)
7470 if self.early_release:
7471 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7473 self._RemoveOldStorage(self.target_node, iv_names)
7474 # WARNING: we release both node locks here, do not do other RPCs
7475 # than WaitForSync to the primary node
7476 self._ReleaseNodeLock([self.target_node, self.other_node])
7479 # This can fail as the old devices are degraded and _WaitForSync
7480 # does a combined result over all disks, so we don't check its return value
7481 self.lu.LogStep(cstep, steps_total, "Sync devices")
7483 _WaitForSync(self.lu, self.instance)
7485 # Check all devices manually
7486 self._CheckDevices(self.instance.primary_node, iv_names)
7488 # Step: remove old storage
7489 if not self.early_release:
7490 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7492 self._RemoveOldStorage(self.target_node, iv_names)
7494 def _ExecDrbd8Secondary(self, feedback_fn):
7495 """Replace the secondary node for DRBD 8.
7497 The algorithm for replace is quite complicated:
7498 - for all disks of the instance:
7499 - create new LVs on the new node with same names
7500 - shutdown the drbd device on the old secondary
7501 - disconnect the drbd network on the primary
7502 - create the drbd device on the new secondary
7503 - network attach the drbd on the primary, using an artifice:
7504 the drbd code for Attach() will connect to the network if it
7505 finds a device which is connected to the good local disks but
7507 - wait for sync across all devices
7508 - remove all disks from the old secondary
7510 Failures are not very well handled.
7515 # Step: check device activation
7516 self.lu.LogStep(1, steps_total, "Check device existence")
7517 self._CheckDisksExistence([self.instance.primary_node])
7518 self._CheckVolumeGroup([self.instance.primary_node])
7520 # Step: check other node consistency
7521 self.lu.LogStep(2, steps_total, "Check peer consistency")
7522 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7524 # Step: create new storage
7525 self.lu.LogStep(3, steps_total, "Allocate new storage")
7526 for idx, dev in enumerate(self.instance.disks):
7527 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7528 (self.new_node, idx))
7529 # we pass force_create=True to force LVM creation
7530 for new_lv in dev.children:
7531 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7532 _GetInstanceInfoText(self.instance), False)
7534 # Step 4: dbrd minors and drbd setups changes
7535 # after this, we must manually remove the drbd minors on both the
7536 # error and the success paths
7537 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7538 minors = self.cfg.AllocateDRBDMinor([self.new_node
7539 for dev in self.instance.disks],
7541 logging.debug("Allocated minors %r", minors)
7544 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7545 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7546 (self.new_node, idx))
7547 # create new devices on new_node; note that we create two IDs:
7548 # one without port, so the drbd will be activated without
7549 # networking information on the new node at this stage, and one
7550 # with network, for the latter activation in step 4
7551 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7552 if self.instance.primary_node == o_node1:
7555 assert self.instance.primary_node == o_node2, "Three-node instance?"
7558 new_alone_id = (self.instance.primary_node, self.new_node, None,
7559 p_minor, new_minor, o_secret)
7560 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7561 p_minor, new_minor, o_secret)
7563 iv_names[idx] = (dev, dev.children, new_net_id)
7564 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7566 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7567 logical_id=new_alone_id,
7568 children=dev.children,
7571 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7572 _GetInstanceInfoText(self.instance), False)
7573 except errors.GenericError:
7574 self.cfg.ReleaseDRBDMinors(self.instance.name)
7577 # We have new devices, shutdown the drbd on the old secondary
7578 for idx, dev in enumerate(self.instance.disks):
7579 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7580 self.cfg.SetDiskID(dev, self.target_node)
7581 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7583 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7584 "node: %s" % (idx, msg),
7585 hint=("Please cleanup this device manually as"
7586 " soon as possible"))
7588 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7589 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7590 self.node_secondary_ip,
7591 self.instance.disks)\
7592 [self.instance.primary_node]
7594 msg = result.fail_msg
7596 # detaches didn't succeed (unlikely)
7597 self.cfg.ReleaseDRBDMinors(self.instance.name)
7598 raise errors.OpExecError("Can't detach the disks from the network on"
7599 " old node: %s" % (msg,))
7601 # if we managed to detach at least one, we update all the disks of
7602 # the instance to point to the new secondary
7603 self.lu.LogInfo("Updating instance configuration")
7604 for dev, _, new_logical_id in iv_names.itervalues():
7605 dev.logical_id = new_logical_id
7606 self.cfg.SetDiskID(dev, self.instance.primary_node)
7608 self.cfg.Update(self.instance, feedback_fn)
7610 # and now perform the drbd attach
7611 self.lu.LogInfo("Attaching primary drbds to new secondary"
7612 " (standalone => connected)")
7613 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7615 self.node_secondary_ip,
7616 self.instance.disks,
7619 for to_node, to_result in result.items():
7620 msg = to_result.fail_msg
7622 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7624 hint=("please do a gnt-instance info to see the"
7625 " status of disks"))
7627 if self.early_release:
7628 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7630 self._RemoveOldStorage(self.target_node, iv_names)
7631 # WARNING: we release all node locks here, do not do other RPCs
7632 # than WaitForSync to the primary node
7633 self._ReleaseNodeLock([self.instance.primary_node,
7638 # This can fail as the old devices are degraded and _WaitForSync
7639 # does a combined result over all disks, so we don't check its return value
7640 self.lu.LogStep(cstep, steps_total, "Sync devices")
7642 _WaitForSync(self.lu, self.instance)
7644 # Check all devices manually
7645 self._CheckDevices(self.instance.primary_node, iv_names)
7647 # Step: remove old storage
7648 if not self.early_release:
7649 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7650 self._RemoveOldStorage(self.target_node, iv_names)
7653 class LURepairNodeStorage(NoHooksLU):
7654 """Repairs the volume group on a node.
7657 _OP_REQP = ["node_name"]
7660 def CheckArguments(self):
7661 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7663 def ExpandNames(self):
7664 self.needed_locks = {
7665 locking.LEVEL_NODE: [self.op.node_name],
7668 def _CheckFaultyDisks(self, instance, node_name):
7669 """Ensure faulty disks abort the opcode or at least warn."""
7671 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7673 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7674 " node '%s'" % (instance.name, node_name),
7676 except errors.OpPrereqError, err:
7677 if self.op.ignore_consistency:
7678 self.proc.LogWarning(str(err.args[0]))
7682 def CheckPrereq(self):
7683 """Check prerequisites.
7686 storage_type = self.op.storage_type
7688 if (constants.SO_FIX_CONSISTENCY not in
7689 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7690 raise errors.OpPrereqError("Storage units of type '%s' can not be"
7691 " repaired" % storage_type,
7694 # Check whether any instance on this node has faulty disks
7695 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7696 if not inst.admin_up:
7698 check_nodes = set(inst.all_nodes)
7699 check_nodes.discard(self.op.node_name)
7700 for inst_node_name in check_nodes:
7701 self._CheckFaultyDisks(inst, inst_node_name)
7703 def Exec(self, feedback_fn):
7704 feedback_fn("Repairing storage unit '%s' on %s ..." %
7705 (self.op.name, self.op.node_name))
7707 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7708 result = self.rpc.call_storage_execute(self.op.node_name,
7709 self.op.storage_type, st_args,
7711 constants.SO_FIX_CONSISTENCY)
7712 result.Raise("Failed to repair storage unit '%s' on %s" %
7713 (self.op.name, self.op.node_name))
7716 class LUNodeEvacuationStrategy(NoHooksLU):
7717 """Computes the node evacuation strategy.
7720 _OP_REQP = ["nodes"]
7723 def CheckArguments(self):
7724 if not hasattr(self.op, "remote_node"):
7725 self.op.remote_node = None
7726 if not hasattr(self.op, "iallocator"):
7727 self.op.iallocator = None
7728 if self.op.remote_node is not None and self.op.iallocator is not None:
7729 raise errors.OpPrereqError("Give either the iallocator or the new"
7730 " secondary, not both", errors.ECODE_INVAL)
7732 def ExpandNames(self):
7733 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7734 self.needed_locks = locks = {}
7735 if self.op.remote_node is None:
7736 locks[locking.LEVEL_NODE] = locking.ALL_SET
7738 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7739 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7741 def CheckPrereq(self):
7744 def Exec(self, feedback_fn):
7745 if self.op.remote_node is not None:
7747 for node in self.op.nodes:
7748 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7751 if i.primary_node == self.op.remote_node:
7752 raise errors.OpPrereqError("Node %s is the primary node of"
7753 " instance %s, cannot use it as"
7755 (self.op.remote_node, i.name),
7757 result.append([i.name, self.op.remote_node])
7759 ial = IAllocator(self.cfg, self.rpc,
7760 mode=constants.IALLOCATOR_MODE_MEVAC,
7761 evac_nodes=self.op.nodes)
7762 ial.Run(self.op.iallocator, validate=True)
7764 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7770 class LUGrowDisk(LogicalUnit):
7771 """Grow a disk of an instance.
7775 HTYPE = constants.HTYPE_INSTANCE
7776 _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7779 def ExpandNames(self):
7780 self._ExpandAndLockInstance()
7781 self.needed_locks[locking.LEVEL_NODE] = []
7782 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7784 def DeclareLocks(self, level):
7785 if level == locking.LEVEL_NODE:
7786 self._LockInstancesNodes()
7788 def BuildHooksEnv(self):
7791 This runs on the master, the primary and all the secondaries.
7795 "DISK": self.op.disk,
7796 "AMOUNT": self.op.amount,
7798 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7799 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7802 def CheckPrereq(self):
7803 """Check prerequisites.
7805 This checks that the instance is in the cluster.
7808 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7809 assert instance is not None, \
7810 "Cannot retrieve locked instance %s" % self.op.instance_name
7811 nodenames = list(instance.all_nodes)
7812 for node in nodenames:
7813 _CheckNodeOnline(self, node)
7816 self.instance = instance
7818 if instance.disk_template not in constants.DTS_GROWABLE:
7819 raise errors.OpPrereqError("Instance's disk layout does not support"
7820 " growing.", errors.ECODE_INVAL)
7822 self.disk = instance.FindDisk(self.op.disk)
7824 if instance.disk_template != constants.DT_FILE:
7825 # TODO: check the free disk space for file, when that feature will be
7827 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7829 def Exec(self, feedback_fn):
7830 """Execute disk grow.
7833 instance = self.instance
7835 for node in instance.all_nodes:
7836 self.cfg.SetDiskID(disk, node)
7837 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7838 result.Raise("Grow request failed to node %s" % node)
7840 # TODO: Rewrite code to work properly
7841 # DRBD goes into sync mode for a short amount of time after executing the
7842 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7843 # calling "resize" in sync mode fails. Sleeping for a short amount of
7844 # time is a work-around.
7847 disk.RecordGrow(self.op.amount)
7848 self.cfg.Update(instance, feedback_fn)
7849 if self.op.wait_for_sync:
7850 disk_abort = not _WaitForSync(self, instance)
7852 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7853 " status.\nPlease check the instance.")
7856 class LUQueryInstanceData(NoHooksLU):
7857 """Query runtime instance data.
7860 _OP_REQP = ["instances", "static"]
7863 def ExpandNames(self):
7864 self.needed_locks = {}
7865 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7867 if not isinstance(self.op.instances, list):
7868 raise errors.OpPrereqError("Invalid argument type 'instances'",
7871 if self.op.instances:
7872 self.wanted_names = []
7873 for name in self.op.instances:
7874 full_name = _ExpandInstanceName(self.cfg, name)
7875 self.wanted_names.append(full_name)
7876 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7878 self.wanted_names = None
7879 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7881 self.needed_locks[locking.LEVEL_NODE] = []
7882 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7884 def DeclareLocks(self, level):
7885 if level == locking.LEVEL_NODE:
7886 self._LockInstancesNodes()
7888 def CheckPrereq(self):
7889 """Check prerequisites.
7891 This only checks the optional instance list against the existing names.
7894 if self.wanted_names is None:
7895 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7897 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7898 in self.wanted_names]
7901 def _ComputeBlockdevStatus(self, node, instance_name, dev):
7902 """Returns the status of a block device
7905 if self.op.static or not node:
7908 self.cfg.SetDiskID(dev, node)
7910 result = self.rpc.call_blockdev_find(node, dev)
7914 result.Raise("Can't compute disk status for %s" % instance_name)
7916 status = result.payload
7920 return (status.dev_path, status.major, status.minor,
7921 status.sync_percent, status.estimated_time,
7922 status.is_degraded, status.ldisk_status)
7924 def _ComputeDiskStatus(self, instance, snode, dev):
7925 """Compute block device status.
7928 if dev.dev_type in constants.LDS_DRBD:
7929 # we change the snode then (otherwise we use the one passed in)
7930 if dev.logical_id[0] == instance.primary_node:
7931 snode = dev.logical_id[1]
7933 snode = dev.logical_id[0]
7935 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7937 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7940 dev_children = [self._ComputeDiskStatus(instance, snode, child)
7941 for child in dev.children]
7946 "iv_name": dev.iv_name,
7947 "dev_type": dev.dev_type,
7948 "logical_id": dev.logical_id,
7949 "physical_id": dev.physical_id,
7950 "pstatus": dev_pstatus,
7951 "sstatus": dev_sstatus,
7952 "children": dev_children,
7959 def Exec(self, feedback_fn):
7960 """Gather and return data"""
7963 cluster = self.cfg.GetClusterInfo()
7965 for instance in self.wanted_instances:
7966 if not self.op.static:
7967 remote_info = self.rpc.call_instance_info(instance.primary_node,
7969 instance.hypervisor)
7970 remote_info.Raise("Error checking node %s" % instance.primary_node)
7971 remote_info = remote_info.payload
7972 if remote_info and "state" in remote_info:
7975 remote_state = "down"
7978 if instance.admin_up:
7981 config_state = "down"
7983 disks = [self._ComputeDiskStatus(instance, None, device)
7984 for device in instance.disks]
7987 "name": instance.name,
7988 "config_state": config_state,
7989 "run_state": remote_state,
7990 "pnode": instance.primary_node,
7991 "snodes": instance.secondary_nodes,
7993 # this happens to be the same format used for hooks
7994 "nics": _NICListToTuple(self, instance.nics),
7996 "hypervisor": instance.hypervisor,
7997 "network_port": instance.network_port,
7998 "hv_instance": instance.hvparams,
7999 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8000 "be_instance": instance.beparams,
8001 "be_actual": cluster.FillBE(instance),
8002 "serial_no": instance.serial_no,
8003 "mtime": instance.mtime,
8004 "ctime": instance.ctime,
8005 "uuid": instance.uuid,
8008 result[instance.name] = idict
8013 class LUSetInstanceParams(LogicalUnit):
8014 """Modifies an instances's parameters.
8017 HPATH = "instance-modify"
8018 HTYPE = constants.HTYPE_INSTANCE
8019 _OP_REQP = ["instance_name"]
8022 def CheckArguments(self):
8023 if not hasattr(self.op, 'nics'):
8025 if not hasattr(self.op, 'disks'):
8027 if not hasattr(self.op, 'beparams'):
8028 self.op.beparams = {}
8029 if not hasattr(self.op, 'hvparams'):
8030 self.op.hvparams = {}
8031 if not hasattr(self.op, "disk_template"):
8032 self.op.disk_template = None
8033 if not hasattr(self.op, "remote_node"):
8034 self.op.remote_node = None
8035 if not hasattr(self.op, "os_name"):
8036 self.op.os_name = None
8037 if not hasattr(self.op, "force_variant"):
8038 self.op.force_variant = False
8039 self.op.force = getattr(self.op, "force", False)
8040 if not (self.op.nics or self.op.disks or self.op.disk_template or
8041 self.op.hvparams or self.op.beparams or self.op.os_name):
8042 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8044 if self.op.hvparams:
8045 _CheckGlobalHvParams(self.op.hvparams)
8049 for disk_op, disk_dict in self.op.disks:
8050 if disk_op == constants.DDM_REMOVE:
8053 elif disk_op == constants.DDM_ADD:
8056 if not isinstance(disk_op, int):
8057 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8058 if not isinstance(disk_dict, dict):
8059 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8060 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8062 if disk_op == constants.DDM_ADD:
8063 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8064 if mode not in constants.DISK_ACCESS_SET:
8065 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8067 size = disk_dict.get('size', None)
8069 raise errors.OpPrereqError("Required disk parameter size missing",
8073 except (TypeError, ValueError), err:
8074 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8075 str(err), errors.ECODE_INVAL)
8076 disk_dict['size'] = size
8078 # modification of disk
8079 if 'size' in disk_dict:
8080 raise errors.OpPrereqError("Disk size change not possible, use"
8081 " grow-disk", errors.ECODE_INVAL)
8083 if disk_addremove > 1:
8084 raise errors.OpPrereqError("Only one disk add or remove operation"
8085 " supported at a time", errors.ECODE_INVAL)
8087 if self.op.disks and self.op.disk_template is not None:
8088 raise errors.OpPrereqError("Disk template conversion and other disk"
8089 " changes not supported at the same time",
8092 if self.op.disk_template:
8093 _CheckDiskTemplate(self.op.disk_template)
8094 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8095 self.op.remote_node is None):
8096 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8097 " one requires specifying a secondary node",
8102 for nic_op, nic_dict in self.op.nics:
8103 if nic_op == constants.DDM_REMOVE:
8106 elif nic_op == constants.DDM_ADD:
8109 if not isinstance(nic_op, int):
8110 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8111 if not isinstance(nic_dict, dict):
8112 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8113 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8115 # nic_dict should be a dict
8116 nic_ip = nic_dict.get('ip', None)
8117 if nic_ip is not None:
8118 if nic_ip.lower() == constants.VALUE_NONE:
8119 nic_dict['ip'] = None
8121 if not utils.IsValidIP(nic_ip):
8122 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8125 nic_bridge = nic_dict.get('bridge', None)
8126 nic_link = nic_dict.get('link', None)
8127 if nic_bridge and nic_link:
8128 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8129 " at the same time", errors.ECODE_INVAL)
8130 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8131 nic_dict['bridge'] = None
8132 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8133 nic_dict['link'] = None
8135 if nic_op == constants.DDM_ADD:
8136 nic_mac = nic_dict.get('mac', None)
8138 nic_dict['mac'] = constants.VALUE_AUTO
8140 if 'mac' in nic_dict:
8141 nic_mac = nic_dict['mac']
8142 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8143 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8145 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8146 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8147 " modifying an existing nic",
8150 if nic_addremove > 1:
8151 raise errors.OpPrereqError("Only one NIC add or remove operation"
8152 " supported at a time", errors.ECODE_INVAL)
8154 def ExpandNames(self):
8155 self._ExpandAndLockInstance()
8156 self.needed_locks[locking.LEVEL_NODE] = []
8157 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8159 def DeclareLocks(self, level):
8160 if level == locking.LEVEL_NODE:
8161 self._LockInstancesNodes()
8162 if self.op.disk_template and self.op.remote_node:
8163 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8164 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8166 def BuildHooksEnv(self):
8169 This runs on the master, primary and secondaries.
8173 if constants.BE_MEMORY in self.be_new:
8174 args['memory'] = self.be_new[constants.BE_MEMORY]
8175 if constants.BE_VCPUS in self.be_new:
8176 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8177 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8178 # information at all.
8181 nic_override = dict(self.op.nics)
8182 c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8183 for idx, nic in enumerate(self.instance.nics):
8184 if idx in nic_override:
8185 this_nic_override = nic_override[idx]
8187 this_nic_override = {}
8188 if 'ip' in this_nic_override:
8189 ip = this_nic_override['ip']
8192 if 'mac' in this_nic_override:
8193 mac = this_nic_override['mac']
8196 if idx in self.nic_pnew:
8197 nicparams = self.nic_pnew[idx]
8199 nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8200 mode = nicparams[constants.NIC_MODE]
8201 link = nicparams[constants.NIC_LINK]
8202 args['nics'].append((ip, mac, mode, link))
8203 if constants.DDM_ADD in nic_override:
8204 ip = nic_override[constants.DDM_ADD].get('ip', None)
8205 mac = nic_override[constants.DDM_ADD]['mac']
8206 nicparams = self.nic_pnew[constants.DDM_ADD]
8207 mode = nicparams[constants.NIC_MODE]
8208 link = nicparams[constants.NIC_LINK]
8209 args['nics'].append((ip, mac, mode, link))
8210 elif constants.DDM_REMOVE in nic_override:
8211 del args['nics'][-1]
8213 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8214 if self.op.disk_template:
8215 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8216 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8220 def _GetUpdatedParams(old_params, update_dict,
8221 default_values, parameter_types):
8222 """Return the new params dict for the given params.
8224 @type old_params: dict
8225 @param old_params: old parameters
8226 @type update_dict: dict
8227 @param update_dict: dict containing new parameter values,
8228 or constants.VALUE_DEFAULT to reset the
8229 parameter to its default value
8230 @type default_values: dict
8231 @param default_values: default values for the filled parameters
8232 @type parameter_types: dict
8233 @param parameter_types: dict mapping target dict keys to types
8234 in constants.ENFORCEABLE_TYPES
8235 @rtype: (dict, dict)
8236 @return: (new_parameters, filled_parameters)
8239 params_copy = copy.deepcopy(old_params)
8240 for key, val in update_dict.iteritems():
8241 if val == constants.VALUE_DEFAULT:
8243 del params_copy[key]
8247 params_copy[key] = val
8248 utils.ForceDictType(params_copy, parameter_types)
8249 params_filled = objects.FillDict(default_values, params_copy)
8250 return (params_copy, params_filled)
8252 def CheckPrereq(self):
8253 """Check prerequisites.
8255 This only checks the instance list against the existing names.
8258 self.force = self.op.force
8260 # checking the new params on the primary/secondary nodes
8262 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8263 cluster = self.cluster = self.cfg.GetClusterInfo()
8264 assert self.instance is not None, \
8265 "Cannot retrieve locked instance %s" % self.op.instance_name
8266 pnode = instance.primary_node
8267 nodelist = list(instance.all_nodes)
8269 if self.op.disk_template:
8270 if instance.disk_template == self.op.disk_template:
8271 raise errors.OpPrereqError("Instance already has disk template %s" %
8272 instance.disk_template, errors.ECODE_INVAL)
8274 if (instance.disk_template,
8275 self.op.disk_template) not in self._DISK_CONVERSIONS:
8276 raise errors.OpPrereqError("Unsupported disk template conversion from"
8277 " %s to %s" % (instance.disk_template,
8278 self.op.disk_template),
8280 if self.op.disk_template in constants.DTS_NET_MIRROR:
8281 _CheckNodeOnline(self, self.op.remote_node)
8282 _CheckNodeNotDrained(self, self.op.remote_node)
8283 disks = [{"size": d.size} for d in instance.disks]
8284 required = _ComputeDiskSize(self.op.disk_template, disks)
8285 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8286 _CheckInstanceDown(self, instance, "cannot change disk template")
8288 # hvparams processing
8289 if self.op.hvparams:
8290 i_hvdict, hv_new = self._GetUpdatedParams(
8291 instance.hvparams, self.op.hvparams,
8292 cluster.hvparams[instance.hypervisor],
8293 constants.HVS_PARAMETER_TYPES)
8295 hypervisor.GetHypervisor(
8296 instance.hypervisor).CheckParameterSyntax(hv_new)
8297 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8298 self.hv_new = hv_new # the new actual values
8299 self.hv_inst = i_hvdict # the new dict (without defaults)
8301 self.hv_new = self.hv_inst = {}
8303 # beparams processing
8304 if self.op.beparams:
8305 i_bedict, be_new = self._GetUpdatedParams(
8306 instance.beparams, self.op.beparams,
8307 cluster.beparams[constants.PP_DEFAULT],
8308 constants.BES_PARAMETER_TYPES)
8309 self.be_new = be_new # the new actual values
8310 self.be_inst = i_bedict # the new dict (without defaults)
8312 self.be_new = self.be_inst = {}
8316 if constants.BE_MEMORY in self.op.beparams and not self.force:
8317 mem_check_list = [pnode]
8318 if be_new[constants.BE_AUTO_BALANCE]:
8319 # either we changed auto_balance to yes or it was from before
8320 mem_check_list.extend(instance.secondary_nodes)
8321 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8322 instance.hypervisor)
8323 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8324 instance.hypervisor)
8325 pninfo = nodeinfo[pnode]
8326 msg = pninfo.fail_msg
8328 # Assume the primary node is unreachable and go ahead
8329 self.warn.append("Can't get info from primary node %s: %s" %
8331 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8332 self.warn.append("Node data from primary node %s doesn't contain"
8333 " free memory information" % pnode)
8334 elif instance_info.fail_msg:
8335 self.warn.append("Can't get instance runtime information: %s" %
8336 instance_info.fail_msg)
8338 if instance_info.payload:
8339 current_mem = int(instance_info.payload['memory'])
8341 # Assume instance not running
8342 # (there is a slight race condition here, but it's not very probable,
8343 # and we have no other way to check)
8345 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8346 pninfo.payload['memory_free'])
8348 raise errors.OpPrereqError("This change will prevent the instance"
8349 " from starting, due to %d MB of memory"
8350 " missing on its primary node" % miss_mem,
8353 if be_new[constants.BE_AUTO_BALANCE]:
8354 for node, nres in nodeinfo.items():
8355 if node not in instance.secondary_nodes:
8359 self.warn.append("Can't get info from secondary node %s: %s" %
8361 elif not isinstance(nres.payload.get('memory_free', None), int):
8362 self.warn.append("Secondary node %s didn't return free"
8363 " memory information" % node)
8364 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8365 self.warn.append("Not enough memory to failover instance to"
8366 " secondary node %s" % node)
8371 for nic_op, nic_dict in self.op.nics:
8372 if nic_op == constants.DDM_REMOVE:
8373 if not instance.nics:
8374 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8377 if nic_op != constants.DDM_ADD:
8379 if not instance.nics:
8380 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8381 " no NICs" % nic_op,
8383 if nic_op < 0 or nic_op >= len(instance.nics):
8384 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8386 (nic_op, len(instance.nics) - 1),
8388 old_nic_params = instance.nics[nic_op].nicparams
8389 old_nic_ip = instance.nics[nic_op].ip
8394 update_params_dict = dict([(key, nic_dict[key])
8395 for key in constants.NICS_PARAMETERS
8396 if key in nic_dict])
8398 if 'bridge' in nic_dict:
8399 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8401 new_nic_params, new_filled_nic_params = \
8402 self._GetUpdatedParams(old_nic_params, update_params_dict,
8403 cluster.nicparams[constants.PP_DEFAULT],
8404 constants.NICS_PARAMETER_TYPES)
8405 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8406 self.nic_pinst[nic_op] = new_nic_params
8407 self.nic_pnew[nic_op] = new_filled_nic_params
8408 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8410 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8411 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8412 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8414 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8416 self.warn.append(msg)
8418 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8419 if new_nic_mode == constants.NIC_MODE_ROUTED:
8420 if 'ip' in nic_dict:
8421 nic_ip = nic_dict['ip']
8425 raise errors.OpPrereqError('Cannot set the nic ip to None'
8426 ' on a routed nic', errors.ECODE_INVAL)
8427 if 'mac' in nic_dict:
8428 nic_mac = nic_dict['mac']
8430 raise errors.OpPrereqError('Cannot set the nic mac to None',
8432 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8433 # otherwise generate the mac
8434 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8436 # or validate/reserve the current one
8438 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8439 except errors.ReservationError:
8440 raise errors.OpPrereqError("MAC address %s already in use"
8441 " in cluster" % nic_mac,
8442 errors.ECODE_NOTUNIQUE)
8445 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8446 raise errors.OpPrereqError("Disk operations not supported for"
8447 " diskless instances",
8449 for disk_op, _ in self.op.disks:
8450 if disk_op == constants.DDM_REMOVE:
8451 if len(instance.disks) == 1:
8452 raise errors.OpPrereqError("Cannot remove the last disk of"
8453 " an instance", errors.ECODE_INVAL)
8454 _CheckInstanceDown(self, instance, "cannot remove disks")
8456 if (disk_op == constants.DDM_ADD and
8457 len(instance.nics) >= constants.MAX_DISKS):
8458 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8459 " add more" % constants.MAX_DISKS,
8461 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8463 if disk_op < 0 or disk_op >= len(instance.disks):
8464 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8466 (disk_op, len(instance.disks)),
8470 if self.op.os_name and not self.op.force:
8471 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8472 self.op.force_variant)
8476 def _ConvertPlainToDrbd(self, feedback_fn):
8477 """Converts an instance from plain to drbd.
8480 feedback_fn("Converting template to drbd")
8481 instance = self.instance
8482 pnode = instance.primary_node
8483 snode = self.op.remote_node
8485 # create a fake disk info for _GenerateDiskTemplate
8486 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8487 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8488 instance.name, pnode, [snode],
8489 disk_info, None, None, 0)
8490 info = _GetInstanceInfoText(instance)
8491 feedback_fn("Creating aditional volumes...")
8492 # first, create the missing data and meta devices
8493 for disk in new_disks:
8494 # unfortunately this is... not too nice
8495 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8497 for child in disk.children:
8498 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8499 # at this stage, all new LVs have been created, we can rename the
8501 feedback_fn("Renaming original volumes...")
8502 rename_list = [(o, n.children[0].logical_id)
8503 for (o, n) in zip(instance.disks, new_disks)]
8504 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8505 result.Raise("Failed to rename original LVs")
8507 feedback_fn("Initializing DRBD devices...")
8508 # all child devices are in place, we can now create the DRBD devices
8509 for disk in new_disks:
8510 for node in [pnode, snode]:
8511 f_create = node == pnode
8512 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8514 # at this point, the instance has been modified
8515 instance.disk_template = constants.DT_DRBD8
8516 instance.disks = new_disks
8517 self.cfg.Update(instance, feedback_fn)
8519 # disks are created, waiting for sync
8520 disk_abort = not _WaitForSync(self, instance)
8522 raise errors.OpExecError("There are some degraded disks for"
8523 " this instance, please cleanup manually")
8525 def _ConvertDrbdToPlain(self, feedback_fn):
8526 """Converts an instance from drbd to plain.
8529 instance = self.instance
8530 assert len(instance.secondary_nodes) == 1
8531 pnode = instance.primary_node
8532 snode = instance.secondary_nodes[0]
8533 feedback_fn("Converting template to plain")
8535 old_disks = instance.disks
8536 new_disks = [d.children[0] for d in old_disks]
8538 # copy over size and mode
8539 for parent, child in zip(old_disks, new_disks):
8540 child.size = parent.size
8541 child.mode = parent.mode
8543 # update instance structure
8544 instance.disks = new_disks
8545 instance.disk_template = constants.DT_PLAIN
8546 self.cfg.Update(instance, feedback_fn)
8548 feedback_fn("Removing volumes on the secondary node...")
8549 for disk in old_disks:
8550 self.cfg.SetDiskID(disk, snode)
8551 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8553 self.LogWarning("Could not remove block device %s on node %s,"
8554 " continuing anyway: %s", disk.iv_name, snode, msg)
8556 feedback_fn("Removing unneeded volumes on the primary node...")
8557 for idx, disk in enumerate(old_disks):
8558 meta = disk.children[1]
8559 self.cfg.SetDiskID(meta, pnode)
8560 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8562 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8563 " continuing anyway: %s", idx, pnode, msg)
8566 def Exec(self, feedback_fn):
8567 """Modifies an instance.
8569 All parameters take effect only at the next restart of the instance.
8572 # Process here the warnings from CheckPrereq, as we don't have a
8573 # feedback_fn there.
8574 for warn in self.warn:
8575 feedback_fn("WARNING: %s" % warn)
8578 instance = self.instance
8580 for disk_op, disk_dict in self.op.disks:
8581 if disk_op == constants.DDM_REMOVE:
8582 # remove the last disk
8583 device = instance.disks.pop()
8584 device_idx = len(instance.disks)
8585 for node, disk in device.ComputeNodeTree(instance.primary_node):
8586 self.cfg.SetDiskID(disk, node)
8587 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8589 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8590 " continuing anyway", device_idx, node, msg)
8591 result.append(("disk/%d" % device_idx, "remove"))
8592 elif disk_op == constants.DDM_ADD:
8594 if instance.disk_template == constants.DT_FILE:
8595 file_driver, file_path = instance.disks[0].logical_id
8596 file_path = os.path.dirname(file_path)
8598 file_driver = file_path = None
8599 disk_idx_base = len(instance.disks)
8600 new_disk = _GenerateDiskTemplate(self,
8601 instance.disk_template,
8602 instance.name, instance.primary_node,
8603 instance.secondary_nodes,
8608 instance.disks.append(new_disk)
8609 info = _GetInstanceInfoText(instance)
8611 logging.info("Creating volume %s for instance %s",
8612 new_disk.iv_name, instance.name)
8613 # Note: this needs to be kept in sync with _CreateDisks
8615 for node in instance.all_nodes:
8616 f_create = node == instance.primary_node
8618 _CreateBlockDev(self, node, instance, new_disk,
8619 f_create, info, f_create)
8620 except errors.OpExecError, err:
8621 self.LogWarning("Failed to create volume %s (%s) on"
8623 new_disk.iv_name, new_disk, node, err)
8624 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8625 (new_disk.size, new_disk.mode)))
8627 # change a given disk
8628 instance.disks[disk_op].mode = disk_dict['mode']
8629 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8631 if self.op.disk_template:
8632 r_shut = _ShutdownInstanceDisks(self, instance)
8634 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8635 " proceed with disk template conversion")
8636 mode = (instance.disk_template, self.op.disk_template)
8638 self._DISK_CONVERSIONS[mode](self, feedback_fn)
8640 self.cfg.ReleaseDRBDMinors(instance.name)
8642 result.append(("disk_template", self.op.disk_template))
8645 for nic_op, nic_dict in self.op.nics:
8646 if nic_op == constants.DDM_REMOVE:
8647 # remove the last nic
8648 del instance.nics[-1]
8649 result.append(("nic.%d" % len(instance.nics), "remove"))
8650 elif nic_op == constants.DDM_ADD:
8651 # mac and bridge should be set, by now
8652 mac = nic_dict['mac']
8653 ip = nic_dict.get('ip', None)
8654 nicparams = self.nic_pinst[constants.DDM_ADD]
8655 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8656 instance.nics.append(new_nic)
8657 result.append(("nic.%d" % (len(instance.nics) - 1),
8658 "add:mac=%s,ip=%s,mode=%s,link=%s" %
8659 (new_nic.mac, new_nic.ip,
8660 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8661 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8664 for key in 'mac', 'ip':
8666 setattr(instance.nics[nic_op], key, nic_dict[key])
8667 if nic_op in self.nic_pinst:
8668 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8669 for key, val in nic_dict.iteritems():
8670 result.append(("nic.%s/%d" % (key, nic_op), val))
8673 if self.op.hvparams:
8674 instance.hvparams = self.hv_inst
8675 for key, val in self.op.hvparams.iteritems():
8676 result.append(("hv/%s" % key, val))
8679 if self.op.beparams:
8680 instance.beparams = self.be_inst
8681 for key, val in self.op.beparams.iteritems():
8682 result.append(("be/%s" % key, val))
8686 instance.os = self.op.os_name
8688 self.cfg.Update(instance, feedback_fn)
8692 _DISK_CONVERSIONS = {
8693 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8694 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8697 class LUQueryExports(NoHooksLU):
8698 """Query the exports list
8701 _OP_REQP = ['nodes']
8704 def ExpandNames(self):
8705 self.needed_locks = {}
8706 self.share_locks[locking.LEVEL_NODE] = 1
8707 if not self.op.nodes:
8708 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8710 self.needed_locks[locking.LEVEL_NODE] = \
8711 _GetWantedNodes(self, self.op.nodes)
8713 def CheckPrereq(self):
8714 """Check prerequisites.
8717 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8719 def Exec(self, feedback_fn):
8720 """Compute the list of all the exported system images.
8723 @return: a dictionary with the structure node->(export-list)
8724 where export-list is a list of the instances exported on
8728 rpcresult = self.rpc.call_export_list(self.nodes)
8730 for node in rpcresult:
8731 if rpcresult[node].fail_msg:
8732 result[node] = False
8734 result[node] = rpcresult[node].payload
8739 class LUExportInstance(LogicalUnit):
8740 """Export an instance to an image in the cluster.
8743 HPATH = "instance-export"
8744 HTYPE = constants.HTYPE_INSTANCE
8745 _OP_REQP = ["instance_name", "target_node", "shutdown"]
8748 def CheckArguments(self):
8749 """Check the arguments.
8752 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8753 constants.DEFAULT_SHUTDOWN_TIMEOUT)
8755 def ExpandNames(self):
8756 self._ExpandAndLockInstance()
8757 # FIXME: lock only instance primary and destination node
8759 # Sad but true, for now we have do lock all nodes, as we don't know where
8760 # the previous export might be, and and in this LU we search for it and
8761 # remove it from its current node. In the future we could fix this by:
8762 # - making a tasklet to search (share-lock all), then create the new one,
8763 # then one to remove, after
8764 # - removing the removal operation altogether
8765 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8767 def DeclareLocks(self, level):
8768 """Last minute lock declaration."""
8769 # All nodes are locked anyway, so nothing to do here.
8771 def BuildHooksEnv(self):
8774 This will run on the master, primary node and target node.
8778 "EXPORT_NODE": self.op.target_node,
8779 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8780 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8782 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8783 nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8784 self.op.target_node]
8787 def CheckPrereq(self):
8788 """Check prerequisites.
8790 This checks that the instance and node names are valid.
8793 instance_name = self.op.instance_name
8794 self.instance = self.cfg.GetInstanceInfo(instance_name)
8795 assert self.instance is not None, \
8796 "Cannot retrieve locked instance %s" % self.op.instance_name
8797 _CheckNodeOnline(self, self.instance.primary_node)
8799 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8800 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8801 assert self.dst_node is not None
8803 _CheckNodeOnline(self, self.dst_node.name)
8804 _CheckNodeNotDrained(self, self.dst_node.name)
8806 # instance disk type verification
8807 for disk in self.instance.disks:
8808 if disk.dev_type == constants.LD_FILE:
8809 raise errors.OpPrereqError("Export not supported for instances with"
8810 " file-based disks", errors.ECODE_INVAL)
8812 def Exec(self, feedback_fn):
8813 """Export an instance to an image in the cluster.
8816 instance = self.instance
8817 dst_node = self.dst_node
8818 src_node = instance.primary_node
8820 if self.op.shutdown:
8821 # shutdown the instance, but not the disks
8822 feedback_fn("Shutting down instance %s" % instance.name)
8823 result = self.rpc.call_instance_shutdown(src_node, instance,
8824 self.shutdown_timeout)
8825 result.Raise("Could not shutdown instance %s on"
8826 " node %s" % (instance.name, src_node))
8828 vgname = self.cfg.GetVGName()
8832 # set the disks ID correctly since call_instance_start needs the
8833 # correct drbd minor to create the symlinks
8834 for disk in instance.disks:
8835 self.cfg.SetDiskID(disk, src_node)
8837 activate_disks = (not instance.admin_up)
8840 # Activate the instance disks if we'exporting a stopped instance
8841 feedback_fn("Activating disks for %s" % instance.name)
8842 _StartInstanceDisks(self, instance, None)
8848 for idx, disk in enumerate(instance.disks):
8849 feedback_fn("Creating a snapshot of disk/%s on node %s" %
8852 # result.payload will be a snapshot of an lvm leaf of the one we
8854 result = self.rpc.call_blockdev_snapshot(src_node, disk)
8855 msg = result.fail_msg
8857 self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8859 snap_disks.append(False)
8861 disk_id = (vgname, result.payload)
8862 new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8863 logical_id=disk_id, physical_id=disk_id,
8864 iv_name=disk.iv_name)
8865 snap_disks.append(new_dev)
8868 if self.op.shutdown and instance.admin_up:
8869 feedback_fn("Starting instance %s" % instance.name)
8870 result = self.rpc.call_instance_start(src_node, instance, None, None)
8871 msg = result.fail_msg
8873 _ShutdownInstanceDisks(self, instance)
8874 raise errors.OpExecError("Could not start instance: %s" % msg)
8876 # TODO: check for size
8878 cluster_name = self.cfg.GetClusterName()
8879 for idx, dev in enumerate(snap_disks):
8880 feedback_fn("Exporting snapshot %s from %s to %s" %
8881 (idx, src_node, dst_node.name))
8883 # FIXME: pass debug from opcode to backend
8884 result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8885 instance, cluster_name,
8886 idx, self.op.debug_level)
8887 msg = result.fail_msg
8889 self.LogWarning("Could not export disk/%s from node %s to"
8890 " node %s: %s", idx, src_node, dst_node.name, msg)
8891 dresults.append(False)
8893 dresults.append(True)
8894 msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8896 self.LogWarning("Could not remove snapshot for disk/%d from node"
8897 " %s: %s", idx, src_node, msg)
8899 dresults.append(False)
8901 feedback_fn("Finalizing export on %s" % dst_node.name)
8902 result = self.rpc.call_finalize_export(dst_node.name, instance,
8905 msg = result.fail_msg
8907 self.LogWarning("Could not finalize export for instance %s"
8908 " on node %s: %s", instance.name, dst_node.name, msg)
8913 feedback_fn("Deactivating disks for %s" % instance.name)
8914 _ShutdownInstanceDisks(self, instance)
8916 nodelist = self.cfg.GetNodeList()
8917 nodelist.remove(dst_node.name)
8919 # on one-node clusters nodelist will be empty after the removal
8920 # if we proceed the backup would be removed because OpQueryExports
8921 # substitutes an empty list with the full cluster node list.
8922 iname = instance.name
8924 feedback_fn("Removing old exports for instance %s" % iname)
8925 exportlist = self.rpc.call_export_list(nodelist)
8926 for node in exportlist:
8927 if exportlist[node].fail_msg:
8929 if iname in exportlist[node].payload:
8930 msg = self.rpc.call_export_remove(node, iname).fail_msg
8932 self.LogWarning("Could not remove older export for instance %s"
8933 " on node %s: %s", iname, node, msg)
8934 return fin_resu, dresults
8937 class LURemoveExport(NoHooksLU):
8938 """Remove exports related to the named instance.
8941 _OP_REQP = ["instance_name"]
8944 def ExpandNames(self):
8945 self.needed_locks = {}
8946 # We need all nodes to be locked in order for RemoveExport to work, but we
8947 # don't need to lock the instance itself, as nothing will happen to it (and
8948 # we can remove exports also for a removed instance)
8949 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8951 def CheckPrereq(self):
8952 """Check prerequisites.
8956 def Exec(self, feedback_fn):
8957 """Remove any export.
8960 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8961 # If the instance was not found we'll try with the name that was passed in.
8962 # This will only work if it was an FQDN, though.
8964 if not instance_name:
8966 instance_name = self.op.instance_name
8968 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8969 exportlist = self.rpc.call_export_list(locked_nodes)
8971 for node in exportlist:
8972 msg = exportlist[node].fail_msg
8974 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8976 if instance_name in exportlist[node].payload:
8978 result = self.rpc.call_export_remove(node, instance_name)
8979 msg = result.fail_msg
8981 logging.error("Could not remove export for instance %s"
8982 " on node %s: %s", instance_name, node, msg)
8984 if fqdn_warn and not found:
8985 feedback_fn("Export not found. If trying to remove an export belonging"
8986 " to a deleted instance please use its Fully Qualified"
8990 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
8993 This is an abstract class which is the parent of all the other tags LUs.
8997 def ExpandNames(self):
8998 self.needed_locks = {}
8999 if self.op.kind == constants.TAG_NODE:
9000 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9001 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9002 elif self.op.kind == constants.TAG_INSTANCE:
9003 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9004 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9006 def CheckPrereq(self):
9007 """Check prerequisites.
9010 if self.op.kind == constants.TAG_CLUSTER:
9011 self.target = self.cfg.GetClusterInfo()
9012 elif self.op.kind == constants.TAG_NODE:
9013 self.target = self.cfg.GetNodeInfo(self.op.name)
9014 elif self.op.kind == constants.TAG_INSTANCE:
9015 self.target = self.cfg.GetInstanceInfo(self.op.name)
9017 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9018 str(self.op.kind), errors.ECODE_INVAL)
9021 class LUGetTags(TagsLU):
9022 """Returns the tags of a given object.
9025 _OP_REQP = ["kind", "name"]
9028 def Exec(self, feedback_fn):
9029 """Returns the tag list.
9032 return list(self.target.GetTags())
9035 class LUSearchTags(NoHooksLU):
9036 """Searches the tags for a given pattern.
9039 _OP_REQP = ["pattern"]
9042 def ExpandNames(self):
9043 self.needed_locks = {}
9045 def CheckPrereq(self):
9046 """Check prerequisites.
9048 This checks the pattern passed for validity by compiling it.
9052 self.re = re.compile(self.op.pattern)
9053 except re.error, err:
9054 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9055 (self.op.pattern, err), errors.ECODE_INVAL)
9057 def Exec(self, feedback_fn):
9058 """Returns the tag list.
9062 tgts = [("/cluster", cfg.GetClusterInfo())]
9063 ilist = cfg.GetAllInstancesInfo().values()
9064 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9065 nlist = cfg.GetAllNodesInfo().values()
9066 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9068 for path, target in tgts:
9069 for tag in target.GetTags():
9070 if self.re.search(tag):
9071 results.append((path, tag))
9075 class LUAddTags(TagsLU):
9076 """Sets a tag on a given object.
9079 _OP_REQP = ["kind", "name", "tags"]
9082 def CheckPrereq(self):
9083 """Check prerequisites.
9085 This checks the type and length of the tag name and value.
9088 TagsLU.CheckPrereq(self)
9089 for tag in self.op.tags:
9090 objects.TaggableObject.ValidateTag(tag)
9092 def Exec(self, feedback_fn):
9097 for tag in self.op.tags:
9098 self.target.AddTag(tag)
9099 except errors.TagError, err:
9100 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9101 self.cfg.Update(self.target, feedback_fn)
9104 class LUDelTags(TagsLU):
9105 """Delete a list of tags from a given object.
9108 _OP_REQP = ["kind", "name", "tags"]
9111 def CheckPrereq(self):
9112 """Check prerequisites.
9114 This checks that we have the given tag.
9117 TagsLU.CheckPrereq(self)
9118 for tag in self.op.tags:
9119 objects.TaggableObject.ValidateTag(tag)
9120 del_tags = frozenset(self.op.tags)
9121 cur_tags = self.target.GetTags()
9122 if not del_tags <= cur_tags:
9123 diff_tags = del_tags - cur_tags
9124 diff_names = ["'%s'" % tag for tag in diff_tags]
9126 raise errors.OpPrereqError("Tag(s) %s not found" %
9127 (",".join(diff_names)), errors.ECODE_NOENT)
9129 def Exec(self, feedback_fn):
9130 """Remove the tag from the object.
9133 for tag in self.op.tags:
9134 self.target.RemoveTag(tag)
9135 self.cfg.Update(self.target, feedback_fn)
9138 class LUTestDelay(NoHooksLU):
9139 """Sleep for a specified amount of time.
9141 This LU sleeps on the master and/or nodes for a specified amount of
9145 _OP_REQP = ["duration", "on_master", "on_nodes"]
9148 def ExpandNames(self):
9149 """Expand names and set required locks.
9151 This expands the node list, if any.
9154 self.needed_locks = {}
9155 if self.op.on_nodes:
9156 # _GetWantedNodes can be used here, but is not always appropriate to use
9157 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9159 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9160 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9162 def CheckPrereq(self):
9163 """Check prerequisites.
9167 def Exec(self, feedback_fn):
9168 """Do the actual sleep.
9171 if self.op.on_master:
9172 if not utils.TestDelay(self.op.duration):
9173 raise errors.OpExecError("Error during master delay test")
9174 if self.op.on_nodes:
9175 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9176 for node, node_result in result.items():
9177 node_result.Raise("Failure during rpc call to node %s" % node)
9180 class IAllocator(object):
9181 """IAllocator framework.
9183 An IAllocator instance has three sets of attributes:
9184 - cfg that is needed to query the cluster
9185 - input data (all members of the _KEYS class attribute are required)
9186 - four buffer attributes (in|out_data|text), that represent the
9187 input (to the external script) in text and data structure format,
9188 and the output from it, again in two formats
9189 - the result variables from the script (success, info, nodes) for
9193 # pylint: disable-msg=R0902
9194 # lots of instance attributes
9196 "name", "mem_size", "disks", "disk_template",
9197 "os", "tags", "nics", "vcpus", "hypervisor",
9200 "name", "relocate_from",
9206 def __init__(self, cfg, rpc, mode, **kwargs):
9209 # init buffer variables
9210 self.in_text = self.out_text = self.in_data = self.out_data = None
9211 # init all input fields so that pylint is happy
9213 self.mem_size = self.disks = self.disk_template = None
9214 self.os = self.tags = self.nics = self.vcpus = None
9215 self.hypervisor = None
9216 self.relocate_from = None
9218 self.evac_nodes = None
9220 self.required_nodes = None
9221 # init result fields
9222 self.success = self.info = self.result = None
9223 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9224 keyset = self._ALLO_KEYS
9225 fn = self._AddNewInstance
9226 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9227 keyset = self._RELO_KEYS
9228 fn = self._AddRelocateInstance
9229 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9230 keyset = self._EVAC_KEYS
9231 fn = self._AddEvacuateNodes
9233 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9234 " IAllocator" % self.mode)
9236 if key not in keyset:
9237 raise errors.ProgrammerError("Invalid input parameter '%s' to"
9238 " IAllocator" % key)
9239 setattr(self, key, kwargs[key])
9242 if key not in kwargs:
9243 raise errors.ProgrammerError("Missing input parameter '%s' to"
9244 " IAllocator" % key)
9245 self._BuildInputData(fn)
9247 def _ComputeClusterData(self):
9248 """Compute the generic allocator input data.
9250 This is the data that is independent of the actual operation.
9254 cluster_info = cfg.GetClusterInfo()
9257 "version": constants.IALLOCATOR_VERSION,
9258 "cluster_name": cfg.GetClusterName(),
9259 "cluster_tags": list(cluster_info.GetTags()),
9260 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9261 # we don't have job IDs
9263 iinfo = cfg.GetAllInstancesInfo().values()
9264 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9268 node_list = cfg.GetNodeList()
9270 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9271 hypervisor_name = self.hypervisor
9272 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9273 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9274 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9275 hypervisor_name = cluster_info.enabled_hypervisors[0]
9277 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9280 self.rpc.call_all_instances_info(node_list,
9281 cluster_info.enabled_hypervisors)
9282 for nname, nresult in node_data.items():
9283 # first fill in static (config-based) values
9284 ninfo = cfg.GetNodeInfo(nname)
9286 "tags": list(ninfo.GetTags()),
9287 "primary_ip": ninfo.primary_ip,
9288 "secondary_ip": ninfo.secondary_ip,
9289 "offline": ninfo.offline,
9290 "drained": ninfo.drained,
9291 "master_candidate": ninfo.master_candidate,
9294 if not (ninfo.offline or ninfo.drained):
9295 nresult.Raise("Can't get data for node %s" % nname)
9296 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9298 remote_info = nresult.payload
9300 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9301 'vg_size', 'vg_free', 'cpu_total']:
9302 if attr not in remote_info:
9303 raise errors.OpExecError("Node '%s' didn't return attribute"
9304 " '%s'" % (nname, attr))
9305 if not isinstance(remote_info[attr], int):
9306 raise errors.OpExecError("Node '%s' returned invalid value"
9308 (nname, attr, remote_info[attr]))
9309 # compute memory used by primary instances
9310 i_p_mem = i_p_up_mem = 0
9311 for iinfo, beinfo in i_list:
9312 if iinfo.primary_node == nname:
9313 i_p_mem += beinfo[constants.BE_MEMORY]
9314 if iinfo.name not in node_iinfo[nname].payload:
9317 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9318 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9319 remote_info['memory_free'] -= max(0, i_mem_diff)
9322 i_p_up_mem += beinfo[constants.BE_MEMORY]
9324 # compute memory used by instances
9326 "total_memory": remote_info['memory_total'],
9327 "reserved_memory": remote_info['memory_dom0'],
9328 "free_memory": remote_info['memory_free'],
9329 "total_disk": remote_info['vg_size'],
9330 "free_disk": remote_info['vg_free'],
9331 "total_cpus": remote_info['cpu_total'],
9332 "i_pri_memory": i_p_mem,
9333 "i_pri_up_memory": i_p_up_mem,
9337 node_results[nname] = pnr
9338 data["nodes"] = node_results
9342 for iinfo, beinfo in i_list:
9344 for nic in iinfo.nics:
9345 filled_params = objects.FillDict(
9346 cluster_info.nicparams[constants.PP_DEFAULT],
9348 nic_dict = {"mac": nic.mac,
9350 "mode": filled_params[constants.NIC_MODE],
9351 "link": filled_params[constants.NIC_LINK],
9353 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9354 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9355 nic_data.append(nic_dict)
9357 "tags": list(iinfo.GetTags()),
9358 "admin_up": iinfo.admin_up,
9359 "vcpus": beinfo[constants.BE_VCPUS],
9360 "memory": beinfo[constants.BE_MEMORY],
9362 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9364 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9365 "disk_template": iinfo.disk_template,
9366 "hypervisor": iinfo.hypervisor,
9368 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9370 instance_data[iinfo.name] = pir
9372 data["instances"] = instance_data
9376 def _AddNewInstance(self):
9377 """Add new instance data to allocator structure.
9379 This in combination with _AllocatorGetClusterData will create the
9380 correct structure needed as input for the allocator.
9382 The checks for the completeness of the opcode must have already been
9386 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9388 if self.disk_template in constants.DTS_NET_MIRROR:
9389 self.required_nodes = 2
9391 self.required_nodes = 1
9394 "disk_template": self.disk_template,
9397 "vcpus": self.vcpus,
9398 "memory": self.mem_size,
9399 "disks": self.disks,
9400 "disk_space_total": disk_space,
9402 "required_nodes": self.required_nodes,
9406 def _AddRelocateInstance(self):
9407 """Add relocate instance data to allocator structure.
9409 This in combination with _IAllocatorGetClusterData will create the
9410 correct structure needed as input for the allocator.
9412 The checks for the completeness of the opcode must have already been
9416 instance = self.cfg.GetInstanceInfo(self.name)
9417 if instance is None:
9418 raise errors.ProgrammerError("Unknown instance '%s' passed to"
9419 " IAllocator" % self.name)
9421 if instance.disk_template not in constants.DTS_NET_MIRROR:
9422 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9425 if len(instance.secondary_nodes) != 1:
9426 raise errors.OpPrereqError("Instance has not exactly one secondary node",
9429 self.required_nodes = 1
9430 disk_sizes = [{'size': disk.size} for disk in instance.disks]
9431 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9435 "disk_space_total": disk_space,
9436 "required_nodes": self.required_nodes,
9437 "relocate_from": self.relocate_from,
9441 def _AddEvacuateNodes(self):
9442 """Add evacuate nodes data to allocator structure.
9446 "evac_nodes": self.evac_nodes
9450 def _BuildInputData(self, fn):
9451 """Build input data structures.
9454 self._ComputeClusterData()
9457 request["type"] = self.mode
9458 self.in_data["request"] = request
9460 self.in_text = serializer.Dump(self.in_data)
9462 def Run(self, name, validate=True, call_fn=None):
9463 """Run an instance allocator and return the results.
9467 call_fn = self.rpc.call_iallocator_runner
9469 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9470 result.Raise("Failure while running the iallocator script")
9472 self.out_text = result.payload
9474 self._ValidateResult()
9476 def _ValidateResult(self):
9477 """Process the allocator results.
9479 This will process and if successful save the result in
9480 self.out_data and the other parameters.
9484 rdict = serializer.Load(self.out_text)
9485 except Exception, err:
9486 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9488 if not isinstance(rdict, dict):
9489 raise errors.OpExecError("Can't parse iallocator results: not a dict")
9491 # TODO: remove backwards compatiblity in later versions
9492 if "nodes" in rdict and "result" not in rdict:
9493 rdict["result"] = rdict["nodes"]
9496 for key in "success", "info", "result":
9497 if key not in rdict:
9498 raise errors.OpExecError("Can't parse iallocator results:"
9499 " missing key '%s'" % key)
9500 setattr(self, key, rdict[key])
9502 if not isinstance(rdict["result"], list):
9503 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9505 self.out_data = rdict
9508 class LUTestAllocator(NoHooksLU):
9509 """Run allocator tests.
9511 This LU runs the allocator tests
9514 _OP_REQP = ["direction", "mode", "name"]
9516 def CheckPrereq(self):
9517 """Check prerequisites.
9519 This checks the opcode parameters depending on the director and mode test.
9522 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9523 for attr in ["name", "mem_size", "disks", "disk_template",
9524 "os", "tags", "nics", "vcpus"]:
9525 if not hasattr(self.op, attr):
9526 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9527 attr, errors.ECODE_INVAL)
9528 iname = self.cfg.ExpandInstanceName(self.op.name)
9529 if iname is not None:
9530 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9531 iname, errors.ECODE_EXISTS)
9532 if not isinstance(self.op.nics, list):
9533 raise errors.OpPrereqError("Invalid parameter 'nics'",
9535 for row in self.op.nics:
9536 if (not isinstance(row, dict) or
9539 "bridge" not in row):
9540 raise errors.OpPrereqError("Invalid contents of the 'nics'"
9541 " parameter", errors.ECODE_INVAL)
9542 if not isinstance(self.op.disks, list):
9543 raise errors.OpPrereqError("Invalid parameter 'disks'",
9545 for row in self.op.disks:
9546 if (not isinstance(row, dict) or
9547 "size" not in row or
9548 not isinstance(row["size"], int) or
9549 "mode" not in row or
9550 row["mode"] not in ['r', 'w']):
9551 raise errors.OpPrereqError("Invalid contents of the 'disks'"
9552 " parameter", errors.ECODE_INVAL)
9553 if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9554 self.op.hypervisor = self.cfg.GetHypervisorType()
9555 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9556 if not hasattr(self.op, "name"):
9557 raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9559 fname = _ExpandInstanceName(self.cfg, self.op.name)
9560 self.op.name = fname
9561 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9562 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9563 if not hasattr(self.op, "evac_nodes"):
9564 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9565 " opcode input", errors.ECODE_INVAL)
9567 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9568 self.op.mode, errors.ECODE_INVAL)
9570 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9571 if not hasattr(self.op, "allocator") or self.op.allocator is None:
9572 raise errors.OpPrereqError("Missing allocator name",
9574 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9575 raise errors.OpPrereqError("Wrong allocator test '%s'" %
9576 self.op.direction, errors.ECODE_INVAL)
9578 def Exec(self, feedback_fn):
9579 """Run the allocator test.
9582 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9583 ial = IAllocator(self.cfg, self.rpc,
9586 mem_size=self.op.mem_size,
9587 disks=self.op.disks,
9588 disk_template=self.op.disk_template,
9592 vcpus=self.op.vcpus,
9593 hypervisor=self.op.hypervisor,
9595 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9596 ial = IAllocator(self.cfg, self.rpc,
9599 relocate_from=list(self.relocate_from),
9601 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9602 ial = IAllocator(self.cfg, self.rpc,
9604 evac_nodes=self.op.evac_nodes)
9606 raise errors.ProgrammerError("Uncatched mode %s in"
9607 " LUTestAllocator.Exec", self.op.mode)
9609 if self.op.direction == constants.IALLOCATOR_DIR_IN:
9610 result = ial.in_text
9612 ial.Run(self.op.allocator, validate=False)
9613 result = ial.out_text