4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
38 from ganeti import ssh
39 from ganeti import utils
40 from ganeti import errors
41 from ganeti import hypervisor
42 from ganeti import locking
43 from ganeti import constants
44 from ganeti import objects
45 from ganeti import serializer
46 from ganeti import ssconf
49 class LogicalUnit(object):
50 """Logical Unit base class.
52 Subclasses must follow these rules:
53 - implement ExpandNames
54 - implement CheckPrereq (except when tasklets are used)
55 - implement Exec (except when tasklets are used)
56 - implement BuildHooksEnv
57 - redefine HPATH and HTYPE
58 - optionally redefine their run requirements:
59 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
61 Note that all commands require root permissions.
63 @ivar dry_run_result: the value (if any) that will be returned to the caller
64 in dry-run mode (signalled by opcode dry_run parameter)
72 def __init__(self, processor, op, context, rpc):
73 """Constructor for LogicalUnit.
75 This needs to be overridden in derived classes in order to check op
81 self.cfg = context.cfg
82 self.context = context
84 # Dicts used to declare locking needs to mcpu
85 self.needed_locks = None
86 self.acquired_locks = {}
87 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
89 self.remove_locks = {}
90 # Used to force good behavior when calling helper functions
91 self.recalculate_locks = {}
94 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
95 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
96 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
98 self.dry_run_result = None
99 # support for generic debug attribute
100 if (not hasattr(self.op, "debug_level") or
101 not isinstance(self.op.debug_level, int)):
102 self.op.debug_level = 0
107 for attr_name in self._OP_REQP:
108 attr_val = getattr(op, attr_name, None)
110 raise errors.OpPrereqError("Required parameter '%s' missing" %
111 attr_name, errors.ECODE_INVAL)
113 self.CheckArguments()
116 """Returns the SshRunner object
120 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
123 ssh = property(fget=__GetSSH)
125 def CheckArguments(self):
126 """Check syntactic validity for the opcode arguments.
128 This method is for doing a simple syntactic check and ensure
129 validity of opcode parameters, without any cluster-related
130 checks. While the same can be accomplished in ExpandNames and/or
131 CheckPrereq, doing these separate is better because:
133 - ExpandNames is left as as purely a lock-related function
134 - CheckPrereq is run after we have acquired locks (and possible
137 The function is allowed to change the self.op attribute so that
138 later methods can no longer worry about missing parameters.
143 def ExpandNames(self):
144 """Expand names for this LU.
146 This method is called before starting to execute the opcode, and it should
147 update all the parameters of the opcode to their canonical form (e.g. a
148 short node name must be fully expanded after this method has successfully
149 completed). This way locking, hooks, logging, ecc. can work correctly.
151 LUs which implement this method must also populate the self.needed_locks
152 member, as a dict with lock levels as keys, and a list of needed lock names
155 - use an empty dict if you don't need any lock
156 - if you don't need any lock at a particular level omit that level
157 - don't put anything for the BGL level
158 - if you want all locks at a level use locking.ALL_SET as a value
160 If you need to share locks (rather than acquire them exclusively) at one
161 level you can modify self.share_locks, setting a true value (usually 1) for
162 that level. By default locks are not shared.
164 This function can also define a list of tasklets, which then will be
165 executed in order instead of the usual LU-level CheckPrereq and Exec
166 functions, if those are not defined by the LU.
170 # Acquire all nodes and one instance
171 self.needed_locks = {
172 locking.LEVEL_NODE: locking.ALL_SET,
173 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
175 # Acquire just two nodes
176 self.needed_locks = {
177 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
180 self.needed_locks = {} # No, you can't leave it to the default value None
183 # The implementation of this method is mandatory only if the new LU is
184 # concurrent, so that old LUs don't need to be changed all at the same
187 self.needed_locks = {} # Exclusive LUs don't need locks.
189 raise NotImplementedError
191 def DeclareLocks(self, level):
192 """Declare LU locking needs for a level
194 While most LUs can just declare their locking needs at ExpandNames time,
195 sometimes there's the need to calculate some locks after having acquired
196 the ones before. This function is called just before acquiring locks at a
197 particular level, but after acquiring the ones at lower levels, and permits
198 such calculations. It can be used to modify self.needed_locks, and by
199 default it does nothing.
201 This function is only called if you have something already set in
202 self.needed_locks for the level.
204 @param level: Locking level which is going to be locked
205 @type level: member of ganeti.locking.LEVELS
209 def CheckPrereq(self):
210 """Check prerequisites for this LU.
212 This method should check that the prerequisites for the execution
213 of this LU are fulfilled. It can do internode communication, but
214 it should be idempotent - no cluster or system changes are
217 The method should raise errors.OpPrereqError in case something is
218 not fulfilled. Its return value is ignored.
220 This method should also update all the parameters of the opcode to
221 their canonical form if it hasn't been done by ExpandNames before.
224 if self.tasklets is not None:
225 for (idx, tl) in enumerate(self.tasklets):
226 logging.debug("Checking prerequisites for tasklet %s/%s",
227 idx + 1, len(self.tasklets))
230 raise NotImplementedError
232 def Exec(self, feedback_fn):
235 This method should implement the actual work. It should raise
236 errors.OpExecError for failures that are somewhat dealt with in
240 if self.tasklets is not None:
241 for (idx, tl) in enumerate(self.tasklets):
242 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
245 raise NotImplementedError
247 def BuildHooksEnv(self):
248 """Build hooks environment for this LU.
250 This method should return a three-node tuple consisting of: a dict
251 containing the environment that will be used for running the
252 specific hook for this LU, a list of node names on which the hook
253 should run before the execution, and a list of node names on which
254 the hook should run after the execution.
256 The keys of the dict must not have 'GANETI_' prefixed as this will
257 be handled in the hooks runner. Also note additional keys will be
258 added by the hooks runner. If the LU doesn't define any
259 environment, an empty dict (and not None) should be returned.
261 No nodes should be returned as an empty list (and not None).
263 Note that if the HPATH for a LU class is None, this function will
267 raise NotImplementedError
269 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
270 """Notify the LU about the results of its hooks.
272 This method is called every time a hooks phase is executed, and notifies
273 the Logical Unit about the hooks' result. The LU can then use it to alter
274 its result based on the hooks. By default the method does nothing and the
275 previous result is passed back unchanged but any LU can define it if it
276 wants to use the local cluster hook-scripts somehow.
278 @param phase: one of L{constants.HOOKS_PHASE_POST} or
279 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
280 @param hook_results: the results of the multi-node hooks rpc call
281 @param feedback_fn: function used send feedback back to the caller
282 @param lu_result: the previous Exec result this LU had, or None
284 @return: the new Exec result, based on the previous result
288 # API must be kept, thus we ignore the unused argument and could
289 # be a function warnings
290 # pylint: disable-msg=W0613,R0201
293 def _ExpandAndLockInstance(self):
294 """Helper function to expand and lock an instance.
296 Many LUs that work on an instance take its name in self.op.instance_name
297 and need to expand it and then declare the expanded name for locking. This
298 function does it, and then updates self.op.instance_name to the expanded
299 name. It also initializes needed_locks as a dict, if this hasn't been done
303 if self.needed_locks is None:
304 self.needed_locks = {}
306 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
307 "_ExpandAndLockInstance called with instance-level locks set"
308 self.op.instance_name = _ExpandInstanceName(self.cfg,
309 self.op.instance_name)
310 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
312 def _LockInstancesNodes(self, primary_only=False):
313 """Helper function to declare instances' nodes for locking.
315 This function should be called after locking one or more instances to lock
316 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
317 with all primary or secondary nodes for instances already locked and
318 present in self.needed_locks[locking.LEVEL_INSTANCE].
320 It should be called from DeclareLocks, and for safety only works if
321 self.recalculate_locks[locking.LEVEL_NODE] is set.
323 In the future it may grow parameters to just lock some instance's nodes, or
324 to just lock primaries or secondary nodes, if needed.
326 If should be called in DeclareLocks in a way similar to::
328 if level == locking.LEVEL_NODE:
329 self._LockInstancesNodes()
331 @type primary_only: boolean
332 @param primary_only: only lock primary nodes of locked instances
335 assert locking.LEVEL_NODE in self.recalculate_locks, \
336 "_LockInstancesNodes helper function called with no nodes to recalculate"
338 # TODO: check if we're really been called with the instance locks held
340 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
341 # future we might want to have different behaviors depending on the value
342 # of self.recalculate_locks[locking.LEVEL_NODE]
344 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
345 instance = self.context.cfg.GetInstanceInfo(instance_name)
346 wanted_nodes.append(instance.primary_node)
348 wanted_nodes.extend(instance.secondary_nodes)
350 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
351 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
352 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
353 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
355 del self.recalculate_locks[locking.LEVEL_NODE]
358 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
359 """Simple LU which runs no hooks.
361 This LU is intended as a parent for other LogicalUnits which will
362 run no hooks, in order to reduce duplicate code.
368 def BuildHooksEnv(self):
369 """Empty BuildHooksEnv for NoHooksLu.
371 This just raises an error.
374 assert False, "BuildHooksEnv called for NoHooksLUs"
378 """Tasklet base class.
380 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
381 they can mix legacy code with tasklets. Locking needs to be done in the LU,
382 tasklets know nothing about locks.
384 Subclasses must follow these rules:
385 - Implement CheckPrereq
389 def __init__(self, lu):
396 def CheckPrereq(self):
397 """Check prerequisites for this tasklets.
399 This method should check whether the prerequisites for the execution of
400 this tasklet are fulfilled. It can do internode communication, but it
401 should be idempotent - no cluster or system changes are allowed.
403 The method should raise errors.OpPrereqError in case something is not
404 fulfilled. Its return value is ignored.
406 This method should also update all parameters to their canonical form if it
407 hasn't been done before.
410 raise NotImplementedError
412 def Exec(self, feedback_fn):
413 """Execute the tasklet.
415 This method should implement the actual work. It should raise
416 errors.OpExecError for failures that are somewhat dealt with in code, or
420 raise NotImplementedError
423 def _GetWantedNodes(lu, nodes):
424 """Returns list of checked and expanded node names.
426 @type lu: L{LogicalUnit}
427 @param lu: the logical unit on whose behalf we execute
429 @param nodes: list of node names or None for all nodes
431 @return: the list of nodes, sorted
432 @raise errors.ProgrammerError: if the nodes parameter is wrong type
435 if not isinstance(nodes, list):
436 raise errors.OpPrereqError("Invalid argument type 'nodes'",
440 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
441 " non-empty list of nodes whose name is to be expanded.")
443 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
444 return utils.NiceSort(wanted)
447 def _GetWantedInstances(lu, instances):
448 """Returns list of checked and expanded instance names.
450 @type lu: L{LogicalUnit}
451 @param lu: the logical unit on whose behalf we execute
452 @type instances: list
453 @param instances: list of instance names or None for all instances
455 @return: the list of instances, sorted
456 @raise errors.OpPrereqError: if the instances parameter is wrong type
457 @raise errors.OpPrereqError: if any of the passed instances is not found
460 if not isinstance(instances, list):
461 raise errors.OpPrereqError("Invalid argument type 'instances'",
465 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
467 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
471 def _CheckOutputFields(static, dynamic, selected):
472 """Checks whether all selected fields are valid.
474 @type static: L{utils.FieldSet}
475 @param static: static fields set
476 @type dynamic: L{utils.FieldSet}
477 @param dynamic: dynamic fields set
484 delta = f.NonMatching(selected)
486 raise errors.OpPrereqError("Unknown output fields selected: %s"
487 % ",".join(delta), errors.ECODE_INVAL)
490 def _CheckBooleanOpField(op, name):
491 """Validates boolean opcode parameters.
493 This will ensure that an opcode parameter is either a boolean value,
494 or None (but that it always exists).
497 val = getattr(op, name, None)
498 if not (val is None or isinstance(val, bool)):
499 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
500 (name, str(val)), errors.ECODE_INVAL)
501 setattr(op, name, val)
504 def _CheckGlobalHvParams(params):
505 """Validates that given hypervisor params are not global ones.
507 This will ensure that instances don't get customised versions of
511 used_globals = constants.HVC_GLOBALS.intersection(params)
513 msg = ("The following hypervisor parameters are global and cannot"
514 " be customized at instance level, please modify them at"
515 " cluster level: %s" % utils.CommaJoin(used_globals))
516 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
519 def _CheckNodeOnline(lu, node):
520 """Ensure that a given node is online.
522 @param lu: the LU on behalf of which we make the check
523 @param node: the node to check
524 @raise errors.OpPrereqError: if the node is offline
527 if lu.cfg.GetNodeInfo(node).offline:
528 raise errors.OpPrereqError("Can't use offline node %s" % node,
532 def _CheckNodeNotDrained(lu, node):
533 """Ensure that a given node is not drained.
535 @param lu: the LU on behalf of which we make the check
536 @param node: the node to check
537 @raise errors.OpPrereqError: if the node is drained
540 if lu.cfg.GetNodeInfo(node).drained:
541 raise errors.OpPrereqError("Can't use drained node %s" % node,
545 def _CheckNodeHasOS(lu, node, os_name, force_variant):
546 """Ensure that a node supports a given OS.
548 @param lu: the LU on behalf of which we make the check
549 @param node: the node to check
550 @param os_name: the OS to query about
551 @param force_variant: whether to ignore variant errors
552 @raise errors.OpPrereqError: if the node is not supporting the OS
555 result = lu.rpc.call_os_get(node, os_name)
556 result.Raise("OS '%s' not in supported OS list for node %s" %
558 prereq=True, ecode=errors.ECODE_INVAL)
559 if not force_variant:
560 _CheckOSVariant(result.payload, os_name)
563 def _RequireFileStorage():
564 """Checks that file storage is enabled.
566 @raise errors.OpPrereqError: when file storage is disabled
569 if not constants.ENABLE_FILE_STORAGE:
570 raise errors.OpPrereqError("File storage disabled at configure time",
574 def _CheckDiskTemplate(template):
575 """Ensure a given disk template is valid.
578 if template not in constants.DISK_TEMPLATES:
579 msg = ("Invalid disk template name '%s', valid templates are: %s" %
580 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
581 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
582 if template == constants.DT_FILE:
583 _RequireFileStorage()
586 def _CheckStorageType(storage_type):
587 """Ensure a given storage type is valid.
590 if storage_type not in constants.VALID_STORAGE_TYPES:
591 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
593 if storage_type == constants.ST_FILE:
594 _RequireFileStorage()
598 def _CheckInstanceDown(lu, instance, reason):
599 """Ensure that an instance is not running."""
600 if instance.admin_up:
601 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
602 (instance.name, reason), errors.ECODE_STATE)
604 pnode = instance.primary_node
605 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
606 ins_l.Raise("Can't contact node %s for instance information" % pnode,
607 prereq=True, ecode=errors.ECODE_ENVIRON)
609 if instance.name in ins_l.payload:
610 raise errors.OpPrereqError("Instance %s is running, %s" %
611 (instance.name, reason), errors.ECODE_STATE)
614 def _ExpandItemName(fn, name, kind):
615 """Expand an item name.
617 @param fn: the function to use for expansion
618 @param name: requested item name
619 @param kind: text description ('Node' or 'Instance')
620 @return: the resolved (full) name
621 @raise errors.OpPrereqError: if the item is not found
625 if full_name is None:
626 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
631 def _ExpandNodeName(cfg, name):
632 """Wrapper over L{_ExpandItemName} for nodes."""
633 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
636 def _ExpandInstanceName(cfg, name):
637 """Wrapper over L{_ExpandItemName} for instance."""
638 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
641 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
642 memory, vcpus, nics, disk_template, disks,
643 bep, hvp, hypervisor_name):
644 """Builds instance related env variables for hooks
646 This builds the hook environment from individual variables.
649 @param name: the name of the instance
650 @type primary_node: string
651 @param primary_node: the name of the instance's primary node
652 @type secondary_nodes: list
653 @param secondary_nodes: list of secondary nodes as strings
654 @type os_type: string
655 @param os_type: the name of the instance's OS
656 @type status: boolean
657 @param status: the should_run status of the instance
659 @param memory: the memory size of the instance
661 @param vcpus: the count of VCPUs the instance has
663 @param nics: list of tuples (ip, mac, mode, link) representing
664 the NICs the instance has
665 @type disk_template: string
666 @param disk_template: the disk template of the instance
668 @param disks: the list of (size, mode) pairs
670 @param bep: the backend parameters for the instance
672 @param hvp: the hypervisor parameters for the instance
673 @type hypervisor_name: string
674 @param hypervisor_name: the hypervisor for the instance
676 @return: the hook environment for this instance
685 "INSTANCE_NAME": name,
686 "INSTANCE_PRIMARY": primary_node,
687 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
688 "INSTANCE_OS_TYPE": os_type,
689 "INSTANCE_STATUS": str_status,
690 "INSTANCE_MEMORY": memory,
691 "INSTANCE_VCPUS": vcpus,
692 "INSTANCE_DISK_TEMPLATE": disk_template,
693 "INSTANCE_HYPERVISOR": hypervisor_name,
697 nic_count = len(nics)
698 for idx, (ip, mac, mode, link) in enumerate(nics):
701 env["INSTANCE_NIC%d_IP" % idx] = ip
702 env["INSTANCE_NIC%d_MAC" % idx] = mac
703 env["INSTANCE_NIC%d_MODE" % idx] = mode
704 env["INSTANCE_NIC%d_LINK" % idx] = link
705 if mode == constants.NIC_MODE_BRIDGED:
706 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
710 env["INSTANCE_NIC_COUNT"] = nic_count
713 disk_count = len(disks)
714 for idx, (size, mode) in enumerate(disks):
715 env["INSTANCE_DISK%d_SIZE" % idx] = size
716 env["INSTANCE_DISK%d_MODE" % idx] = mode
720 env["INSTANCE_DISK_COUNT"] = disk_count
722 for source, kind in [(bep, "BE"), (hvp, "HV")]:
723 for key, value in source.items():
724 env["INSTANCE_%s_%s" % (kind, key)] = value
729 def _NICListToTuple(lu, nics):
730 """Build a list of nic information tuples.
732 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
733 value in LUQueryInstanceData.
735 @type lu: L{LogicalUnit}
736 @param lu: the logical unit on whose behalf we execute
737 @type nics: list of L{objects.NIC}
738 @param nics: list of nics to convert to hooks tuples
742 c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
746 filled_params = objects.FillDict(c_nicparams, nic.nicparams)
747 mode = filled_params[constants.NIC_MODE]
748 link = filled_params[constants.NIC_LINK]
749 hooks_nics.append((ip, mac, mode, link))
753 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
754 """Builds instance related env variables for hooks from an object.
756 @type lu: L{LogicalUnit}
757 @param lu: the logical unit on whose behalf we execute
758 @type instance: L{objects.Instance}
759 @param instance: the instance for which we should build the
762 @param override: dictionary with key/values that will override
765 @return: the hook environment dictionary
768 cluster = lu.cfg.GetClusterInfo()
769 bep = cluster.FillBE(instance)
770 hvp = cluster.FillHV(instance)
772 'name': instance.name,
773 'primary_node': instance.primary_node,
774 'secondary_nodes': instance.secondary_nodes,
775 'os_type': instance.os,
776 'status': instance.admin_up,
777 'memory': bep[constants.BE_MEMORY],
778 'vcpus': bep[constants.BE_VCPUS],
779 'nics': _NICListToTuple(lu, instance.nics),
780 'disk_template': instance.disk_template,
781 'disks': [(disk.size, disk.mode) for disk in instance.disks],
784 'hypervisor_name': instance.hypervisor,
787 args.update(override)
788 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
791 def _AdjustCandidatePool(lu, exceptions):
792 """Adjust the candidate pool after node operations.
795 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
797 lu.LogInfo("Promoted nodes to master candidate role: %s",
798 utils.CommaJoin(node.name for node in mod_list))
799 for name in mod_list:
800 lu.context.ReaddNode(name)
801 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
803 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
807 def _DecideSelfPromotion(lu, exceptions=None):
808 """Decide whether I should promote myself as a master candidate.
811 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
812 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
813 # the new node will increase mc_max with one, so:
814 mc_should = min(mc_should + 1, cp_size)
815 return mc_now < mc_should
818 def _CheckNicsBridgesExist(lu, target_nics, target_node,
819 profile=constants.PP_DEFAULT):
820 """Check that the brigdes needed by a list of nics exist.
823 c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
824 paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
825 for nic in target_nics]
826 brlist = [params[constants.NIC_LINK] for params in paramslist
827 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
829 result = lu.rpc.call_bridges_exist(target_node, brlist)
830 result.Raise("Error checking bridges on destination node '%s'" %
831 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
834 def _CheckInstanceBridgesExist(lu, instance, node=None):
835 """Check that the brigdes needed by an instance exist.
839 node = instance.primary_node
840 _CheckNicsBridgesExist(lu, instance.nics, node)
843 def _CheckOSVariant(os_obj, name):
844 """Check whether an OS name conforms to the os variants specification.
846 @type os_obj: L{objects.OS}
847 @param os_obj: OS object to check
849 @param name: OS name passed by the user, to check for validity
852 if not os_obj.supported_variants:
855 variant = name.split("+", 1)[1]
857 raise errors.OpPrereqError("OS name must include a variant",
860 if variant not in os_obj.supported_variants:
861 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
864 def _GetNodeInstancesInner(cfg, fn):
865 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
868 def _GetNodeInstances(cfg, node_name):
869 """Returns a list of all primary and secondary instances on a node.
873 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
876 def _GetNodePrimaryInstances(cfg, node_name):
877 """Returns primary instances on a node.
880 return _GetNodeInstancesInner(cfg,
881 lambda inst: node_name == inst.primary_node)
884 def _GetNodeSecondaryInstances(cfg, node_name):
885 """Returns secondary instances on a node.
888 return _GetNodeInstancesInner(cfg,
889 lambda inst: node_name in inst.secondary_nodes)
892 def _GetStorageTypeArgs(cfg, storage_type):
893 """Returns the arguments for a storage type.
896 # Special case for file storage
897 if storage_type == constants.ST_FILE:
898 # storage.FileStorage wants a list of storage directories
899 return [[cfg.GetFileStorageDir()]]
904 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
907 for dev in instance.disks:
908 cfg.SetDiskID(dev, node_name)
910 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
911 result.Raise("Failed to get disk status from node %s" % node_name,
912 prereq=prereq, ecode=errors.ECODE_ENVIRON)
914 for idx, bdev_status in enumerate(result.payload):
915 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
921 def _FormatTimestamp(secs):
922 """Formats a Unix timestamp with the local timezone.
925 return time.strftime("%F %T %Z", time.gmtime(secs))
928 class LUPostInitCluster(LogicalUnit):
929 """Logical unit for running hooks after cluster initialization.
932 HPATH = "cluster-init"
933 HTYPE = constants.HTYPE_CLUSTER
936 def BuildHooksEnv(self):
940 env = {"OP_TARGET": self.cfg.GetClusterName()}
941 mn = self.cfg.GetMasterNode()
944 def CheckPrereq(self):
945 """No prerequisites to check.
950 def Exec(self, feedback_fn):
957 class LUDestroyCluster(LogicalUnit):
958 """Logical unit for destroying the cluster.
961 HPATH = "cluster-destroy"
962 HTYPE = constants.HTYPE_CLUSTER
965 def BuildHooksEnv(self):
969 env = {"OP_TARGET": self.cfg.GetClusterName()}
972 def CheckPrereq(self):
973 """Check prerequisites.
975 This checks whether the cluster is empty.
977 Any errors are signaled by raising errors.OpPrereqError.
980 master = self.cfg.GetMasterNode()
982 nodelist = self.cfg.GetNodeList()
983 if len(nodelist) != 1 or nodelist[0] != master:
984 raise errors.OpPrereqError("There are still %d node(s) in"
985 " this cluster." % (len(nodelist) - 1),
987 instancelist = self.cfg.GetInstanceList()
989 raise errors.OpPrereqError("There are still %d instance(s) in"
990 " this cluster." % len(instancelist),
993 def Exec(self, feedback_fn):
994 """Destroys the cluster.
997 master = self.cfg.GetMasterNode()
998 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1000 # Run post hooks on master node before it's removed
1001 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1003 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1005 # pylint: disable-msg=W0702
1006 self.LogWarning("Errors occurred running hooks on %s" % master)
1008 result = self.rpc.call_node_stop_master(master, False)
1009 result.Raise("Could not disable the master role")
1011 if modify_ssh_setup:
1012 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1013 utils.CreateBackup(priv_key)
1014 utils.CreateBackup(pub_key)
1019 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
1020 warn_days=constants.SSL_CERT_EXPIRATION_WARN,
1021 error_days=constants.SSL_CERT_EXPIRATION_ERROR):
1022 """Verifies certificate details for LUVerifyCluster.
1026 msg = "Certificate %s is expired" % filename
1028 if not_before is not None and not_after is not None:
1029 msg += (" (valid from %s to %s)" %
1030 (_FormatTimestamp(not_before),
1031 _FormatTimestamp(not_after)))
1032 elif not_before is not None:
1033 msg += " (valid from %s)" % _FormatTimestamp(not_before)
1034 elif not_after is not None:
1035 msg += " (valid until %s)" % _FormatTimestamp(not_after)
1037 return (LUVerifyCluster.ETYPE_ERROR, msg)
1039 elif not_before is not None and not_before > now:
1040 return (LUVerifyCluster.ETYPE_WARNING,
1041 "Certificate %s not yet valid (valid from %s)" %
1042 (filename, _FormatTimestamp(not_before)))
1044 elif not_after is not None:
1045 remaining_days = int((not_after - now) / (24 * 3600))
1047 msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1049 if remaining_days <= error_days:
1050 return (LUVerifyCluster.ETYPE_ERROR, msg)
1052 if remaining_days <= warn_days:
1053 return (LUVerifyCluster.ETYPE_WARNING, msg)
1058 def _VerifyCertificate(filename):
1059 """Verifies a certificate for LUVerifyCluster.
1061 @type filename: string
1062 @param filename: Path to PEM file
1066 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1067 utils.ReadFile(filename))
1068 except Exception, err: # pylint: disable-msg=W0703
1069 return (LUVerifyCluster.ETYPE_ERROR,
1070 "Failed to load X509 certificate %s: %s" % (filename, err))
1072 # Depending on the pyOpenSSL version, this can just return (None, None)
1073 (not_before, not_after) = utils.GetX509CertValidity(cert)
1075 return _VerifyCertificateInner(filename, cert.has_expired(),
1076 not_before, not_after, time.time())
1079 class LUVerifyCluster(LogicalUnit):
1080 """Verifies the cluster status.
1083 HPATH = "cluster-verify"
1084 HTYPE = constants.HTYPE_CLUSTER
1085 _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1088 TCLUSTER = "cluster"
1090 TINSTANCE = "instance"
1092 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1093 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1094 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1095 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1096 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1097 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1098 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1099 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1100 ENODEDRBD = (TNODE, "ENODEDRBD")
1101 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1102 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1103 ENODEHV = (TNODE, "ENODEHV")
1104 ENODELVM = (TNODE, "ENODELVM")
1105 ENODEN1 = (TNODE, "ENODEN1")
1106 ENODENET = (TNODE, "ENODENET")
1107 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1108 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1109 ENODERPC = (TNODE, "ENODERPC")
1110 ENODESSH = (TNODE, "ENODESSH")
1111 ENODEVERSION = (TNODE, "ENODEVERSION")
1112 ENODESETUP = (TNODE, "ENODESETUP")
1113 ENODETIME = (TNODE, "ENODETIME")
1115 ETYPE_FIELD = "code"
1116 ETYPE_ERROR = "ERROR"
1117 ETYPE_WARNING = "WARNING"
1119 class NodeImage(object):
1120 """A class representing the logical and physical status of a node.
1122 @ivar volumes: a structure as returned from
1123 L{ganeti.backend.GetVolumeList} (runtime)
1124 @ivar instances: a list of running instances (runtime)
1125 @ivar pinst: list of configured primary instances (config)
1126 @ivar sinst: list of configured secondary instances (config)
1127 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1128 of this node (config)
1129 @ivar mfree: free memory, as reported by hypervisor (runtime)
1130 @ivar dfree: free disk, as reported by the node (runtime)
1131 @ivar offline: the offline status (config)
1132 @type rpc_fail: boolean
1133 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1134 not whether the individual keys were correct) (runtime)
1135 @type lvm_fail: boolean
1136 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1137 @type hyp_fail: boolean
1138 @ivar hyp_fail: whether the RPC call didn't return the instance list
1139 @type ghost: boolean
1140 @ivar ghost: whether this is a known node or not (config)
1143 def __init__(self, offline=False):
1151 self.offline = offline
1152 self.rpc_fail = False
1153 self.lvm_fail = False
1154 self.hyp_fail = False
1157 def ExpandNames(self):
1158 self.needed_locks = {
1159 locking.LEVEL_NODE: locking.ALL_SET,
1160 locking.LEVEL_INSTANCE: locking.ALL_SET,
1162 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1164 def _Error(self, ecode, item, msg, *args, **kwargs):
1165 """Format an error message.
1167 Based on the opcode's error_codes parameter, either format a
1168 parseable error code, or a simpler error string.
1170 This must be called only from Exec and functions called from Exec.
1173 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1175 # first complete the msg
1178 # then format the whole message
1179 if self.op.error_codes:
1180 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1186 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1187 # and finally report it via the feedback_fn
1188 self._feedback_fn(" - %s" % msg)
1190 def _ErrorIf(self, cond, *args, **kwargs):
1191 """Log an error message if the passed condition is True.
1194 cond = bool(cond) or self.op.debug_simulate_errors
1196 self._Error(*args, **kwargs)
1197 # do not mark the operation as failed for WARN cases only
1198 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1199 self.bad = self.bad or cond
1201 def _VerifyNode(self, ninfo, nresult):
1202 """Run multiple tests against a node.
1206 - compares ganeti version
1207 - checks vg existence and size > 20G
1208 - checks config file checksum
1209 - checks ssh to other nodes
1211 @type ninfo: L{objects.Node}
1212 @param ninfo: the node to check
1213 @param nresult: the results from the node
1215 @return: whether overall this call was successful (and we can expect
1216 reasonable values in the respose)
1220 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1222 # main result, nresult should be a non-empty dict
1223 test = not nresult or not isinstance(nresult, dict)
1224 _ErrorIf(test, self.ENODERPC, node,
1225 "unable to verify node: no data returned")
1229 # compares ganeti version
1230 local_version = constants.PROTOCOL_VERSION
1231 remote_version = nresult.get("version", None)
1232 test = not (remote_version and
1233 isinstance(remote_version, (list, tuple)) and
1234 len(remote_version) == 2)
1235 _ErrorIf(test, self.ENODERPC, node,
1236 "connection to node returned invalid data")
1240 test = local_version != remote_version[0]
1241 _ErrorIf(test, self.ENODEVERSION, node,
1242 "incompatible protocol versions: master %s,"
1243 " node %s", local_version, remote_version[0])
1247 # node seems compatible, we can actually try to look into its results
1249 # full package version
1250 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1251 self.ENODEVERSION, node,
1252 "software version mismatch: master %s, node %s",
1253 constants.RELEASE_VERSION, remote_version[1],
1254 code=self.ETYPE_WARNING)
1256 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1257 if isinstance(hyp_result, dict):
1258 for hv_name, hv_result in hyp_result.iteritems():
1259 test = hv_result is not None
1260 _ErrorIf(test, self.ENODEHV, node,
1261 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1264 test = nresult.get(constants.NV_NODESETUP,
1265 ["Missing NODESETUP results"])
1266 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1271 def _VerifyNodeTime(self, ninfo, nresult,
1272 nvinfo_starttime, nvinfo_endtime):
1273 """Check the node time.
1275 @type ninfo: L{objects.Node}
1276 @param ninfo: the node to check
1277 @param nresult: the remote results for the node
1278 @param nvinfo_starttime: the start time of the RPC call
1279 @param nvinfo_endtime: the end time of the RPC call
1283 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1285 ntime = nresult.get(constants.NV_TIME, None)
1287 ntime_merged = utils.MergeTime(ntime)
1288 except (ValueError, TypeError):
1289 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1292 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1293 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1294 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1295 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1299 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1300 "Node time diverges by at least %s from master node time",
1303 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1304 """Check the node time.
1306 @type ninfo: L{objects.Node}
1307 @param ninfo: the node to check
1308 @param nresult: the remote results for the node
1309 @param vg_name: the configured VG name
1316 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1318 # checks vg existence and size > 20G
1319 vglist = nresult.get(constants.NV_VGLIST, None)
1321 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1323 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1324 constants.MIN_VG_SIZE)
1325 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1328 pvlist = nresult.get(constants.NV_PVLIST, None)
1329 test = pvlist is None
1330 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1332 # check that ':' is not present in PV names, since it's a
1333 # special character for lvcreate (denotes the range of PEs to
1335 for _, pvname, owner_vg in pvlist:
1336 test = ":" in pvname
1337 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1338 " '%s' of VG '%s'", pvname, owner_vg)
1340 def _VerifyNodeNetwork(self, ninfo, nresult):
1341 """Check the node time.
1343 @type ninfo: L{objects.Node}
1344 @param ninfo: the node to check
1345 @param nresult: the remote results for the node
1349 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1351 test = constants.NV_NODELIST not in nresult
1352 _ErrorIf(test, self.ENODESSH, node,
1353 "node hasn't returned node ssh connectivity data")
1355 if nresult[constants.NV_NODELIST]:
1356 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1357 _ErrorIf(True, self.ENODESSH, node,
1358 "ssh communication with node '%s': %s", a_node, a_msg)
1360 test = constants.NV_NODENETTEST not in nresult
1361 _ErrorIf(test, self.ENODENET, node,
1362 "node hasn't returned node tcp connectivity data")
1364 if nresult[constants.NV_NODENETTEST]:
1365 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1367 _ErrorIf(True, self.ENODENET, node,
1368 "tcp communication with node '%s': %s",
1369 anode, nresult[constants.NV_NODENETTEST][anode])
1371 def _VerifyInstance(self, instance, instanceconfig, node_image):
1372 """Verify an instance.
1374 This function checks to see if the required block devices are
1375 available on the instance's node.
1378 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1379 node_current = instanceconfig.primary_node
1381 node_vol_should = {}
1382 instanceconfig.MapLVsByNode(node_vol_should)
1384 for node in node_vol_should:
1385 n_img = node_image[node]
1386 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1387 # ignore missing volumes on offline or broken nodes
1389 for volume in node_vol_should[node]:
1390 test = volume not in n_img.volumes
1391 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1392 "volume %s missing on node %s", volume, node)
1394 if instanceconfig.admin_up:
1395 pri_img = node_image[node_current]
1396 test = instance not in pri_img.instances and not pri_img.offline
1397 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1398 "instance not running on its primary node %s",
1401 for node, n_img in node_image.items():
1402 if (not node == node_current):
1403 test = instance in n_img.instances
1404 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1405 "instance should not run on node %s", node)
1407 def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1408 """Verify if there are any unknown volumes in the cluster.
1410 The .os, .swap and backup volumes are ignored. All other volumes are
1411 reported as unknown.
1414 for node, n_img in node_image.items():
1415 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1416 # skip non-healthy nodes
1418 for volume in n_img.volumes:
1419 test = (node not in node_vol_should or
1420 volume not in node_vol_should[node])
1421 self._ErrorIf(test, self.ENODEORPHANLV, node,
1422 "volume %s is unknown", volume)
1424 def _VerifyOrphanInstances(self, instancelist, node_image):
1425 """Verify the list of running instances.
1427 This checks what instances are running but unknown to the cluster.
1430 for node, n_img in node_image.items():
1431 for o_inst in n_img.instances:
1432 test = o_inst not in instancelist
1433 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1434 "instance %s on node %s should not exist", o_inst, node)
1436 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1437 """Verify N+1 Memory Resilience.
1439 Check that if one single node dies we can still start all the
1440 instances it was primary for.
1443 for node, n_img in node_image.items():
1444 # This code checks that every node which is now listed as
1445 # secondary has enough memory to host all instances it is
1446 # supposed to should a single other node in the cluster fail.
1447 # FIXME: not ready for failover to an arbitrary node
1448 # FIXME: does not support file-backed instances
1449 # WARNING: we currently take into account down instances as well
1450 # as up ones, considering that even if they're down someone
1451 # might want to start them even in the event of a node failure.
1452 for prinode, instances in n_img.sbp.items():
1454 for instance in instances:
1455 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1456 if bep[constants.BE_AUTO_BALANCE]:
1457 needed_mem += bep[constants.BE_MEMORY]
1458 test = n_img.mfree < needed_mem
1459 self._ErrorIf(test, self.ENODEN1, node,
1460 "not enough memory on to accommodate"
1461 " failovers should peer node %s fail", prinode)
1463 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1465 """Verifies and computes the node required file checksums.
1467 @type ninfo: L{objects.Node}
1468 @param ninfo: the node to check
1469 @param nresult: the remote results for the node
1470 @param file_list: required list of files
1471 @param local_cksum: dictionary of local files and their checksums
1472 @param master_files: list of files that only masters should have
1476 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1478 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1479 test = not isinstance(remote_cksum, dict)
1480 _ErrorIf(test, self.ENODEFILECHECK, node,
1481 "node hasn't returned file checksum data")
1485 for file_name in file_list:
1486 node_is_mc = ninfo.master_candidate
1487 must_have = (file_name not in master_files) or node_is_mc
1489 test1 = file_name not in remote_cksum
1491 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1493 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1494 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1495 "file '%s' missing", file_name)
1496 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1497 "file '%s' has wrong checksum", file_name)
1498 # not candidate and this is not a must-have file
1499 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1500 "file '%s' should not exist on non master"
1501 " candidates (and the file is outdated)", file_name)
1502 # all good, except non-master/non-must have combination
1503 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1504 "file '%s' should not exist"
1505 " on non master candidates", file_name)
1507 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1508 """Verifies and the node DRBD status.
1510 @type ninfo: L{objects.Node}
1511 @param ninfo: the node to check
1512 @param nresult: the remote results for the node
1513 @param instanceinfo: the dict of instances
1514 @param drbd_map: the DRBD map as returned by
1515 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1519 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1521 # compute the DRBD minors
1523 for minor, instance in drbd_map[node].items():
1524 test = instance not in instanceinfo
1525 _ErrorIf(test, self.ECLUSTERCFG, None,
1526 "ghost instance '%s' in temporary DRBD map", instance)
1527 # ghost instance should not be running, but otherwise we
1528 # don't give double warnings (both ghost instance and
1529 # unallocated minor in use)
1531 node_drbd[minor] = (instance, False)
1533 instance = instanceinfo[instance]
1534 node_drbd[minor] = (instance.name, instance.admin_up)
1536 # and now check them
1537 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1538 test = not isinstance(used_minors, (tuple, list))
1539 _ErrorIf(test, self.ENODEDRBD, node,
1540 "cannot parse drbd status file: %s", str(used_minors))
1542 # we cannot check drbd status
1545 for minor, (iname, must_exist) in node_drbd.items():
1546 test = minor not in used_minors and must_exist
1547 _ErrorIf(test, self.ENODEDRBD, node,
1548 "drbd minor %d of instance %s is not active", minor, iname)
1549 for minor in used_minors:
1550 test = minor not in node_drbd
1551 _ErrorIf(test, self.ENODEDRBD, node,
1552 "unallocated drbd minor %d is in use", minor)
1554 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1555 """Verifies and updates the node volume data.
1557 This function will update a L{NodeImage}'s internal structures
1558 with data from the remote call.
1560 @type ninfo: L{objects.Node}
1561 @param ninfo: the node to check
1562 @param nresult: the remote results for the node
1563 @param nimg: the node image object
1564 @param vg_name: the configured VG name
1568 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1570 nimg.lvm_fail = True
1571 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1574 elif isinstance(lvdata, basestring):
1575 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1576 utils.SafeEncode(lvdata))
1577 elif not isinstance(lvdata, dict):
1578 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1580 nimg.volumes = lvdata
1581 nimg.lvm_fail = False
1583 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1584 """Verifies and updates the node instance list.
1586 If the listing was successful, then updates this node's instance
1587 list. Otherwise, it marks the RPC call as failed for the instance
1590 @type ninfo: L{objects.Node}
1591 @param ninfo: the node to check
1592 @param nresult: the remote results for the node
1593 @param nimg: the node image object
1596 idata = nresult.get(constants.NV_INSTANCELIST, None)
1597 test = not isinstance(idata, list)
1598 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1599 " (instancelist): %s", utils.SafeEncode(str(idata)))
1601 nimg.hyp_fail = True
1603 nimg.instances = idata
1605 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1606 """Verifies and computes a node information map
1608 @type ninfo: L{objects.Node}
1609 @param ninfo: the node to check
1610 @param nresult: the remote results for the node
1611 @param nimg: the node image object
1612 @param vg_name: the configured VG name
1616 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1618 # try to read free memory (from the hypervisor)
1619 hv_info = nresult.get(constants.NV_HVINFO, None)
1620 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1621 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1624 nimg.mfree = int(hv_info["memory_free"])
1625 except (ValueError, TypeError):
1626 _ErrorIf(True, self.ENODERPC, node,
1627 "node returned invalid nodeinfo, check hypervisor")
1629 # FIXME: devise a free space model for file based instances as well
1630 if vg_name is not None:
1631 test = (constants.NV_VGLIST not in nresult or
1632 vg_name not in nresult[constants.NV_VGLIST])
1633 _ErrorIf(test, self.ENODELVM, node,
1634 "node didn't return data for the volume group '%s'"
1635 " - it is either missing or broken", vg_name)
1638 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1639 except (ValueError, TypeError):
1640 _ErrorIf(True, self.ENODERPC, node,
1641 "node returned invalid LVM info, check LVM status")
1643 def CheckPrereq(self):
1644 """Check prerequisites.
1646 Transform the list of checks we're going to skip into a set and check that
1647 all its members are valid.
1650 self.skip_set = frozenset(self.op.skip_checks)
1651 if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1652 raise errors.OpPrereqError("Invalid checks to be skipped specified",
1655 def BuildHooksEnv(self):
1658 Cluster-Verify hooks just ran in the post phase and their failure makes
1659 the output be logged in the verify output and the verification to fail.
1662 all_nodes = self.cfg.GetNodeList()
1664 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1666 for node in self.cfg.GetAllNodesInfo().values():
1667 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1669 return env, [], all_nodes
1671 def Exec(self, feedback_fn):
1672 """Verify integrity of cluster, performing various test on nodes.
1676 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1677 verbose = self.op.verbose
1678 self._feedback_fn = feedback_fn
1679 feedback_fn("* Verifying global settings")
1680 for msg in self.cfg.VerifyConfig():
1681 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1683 # Check the cluster certificates
1684 for cert_filename in constants.ALL_CERT_FILES:
1685 (errcode, msg) = _VerifyCertificate(cert_filename)
1686 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1688 vg_name = self.cfg.GetVGName()
1689 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1690 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1691 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1692 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1693 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1694 for iname in instancelist)
1695 i_non_redundant = [] # Non redundant instances
1696 i_non_a_balanced = [] # Non auto-balanced instances
1697 n_offline = 0 # Count of offline nodes
1698 n_drained = 0 # Count of nodes being drained
1699 node_vol_should = {}
1701 # FIXME: verify OS list
1702 # do local checksums
1703 master_files = [constants.CLUSTER_CONF_FILE]
1705 file_names = ssconf.SimpleStore().GetFileList()
1706 file_names.extend(constants.ALL_CERT_FILES)
1707 file_names.extend(master_files)
1709 local_checksums = utils.FingerprintFiles(file_names)
1711 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1712 node_verify_param = {
1713 constants.NV_FILELIST: file_names,
1714 constants.NV_NODELIST: [node.name for node in nodeinfo
1715 if not node.offline],
1716 constants.NV_HYPERVISOR: hypervisors,
1717 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1718 node.secondary_ip) for node in nodeinfo
1719 if not node.offline],
1720 constants.NV_INSTANCELIST: hypervisors,
1721 constants.NV_VERSION: None,
1722 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1723 constants.NV_NODESETUP: None,
1724 constants.NV_TIME: None,
1727 if vg_name is not None:
1728 node_verify_param[constants.NV_VGLIST] = None
1729 node_verify_param[constants.NV_LVLIST] = vg_name
1730 node_verify_param[constants.NV_PVLIST] = [vg_name]
1731 node_verify_param[constants.NV_DRBDLIST] = None
1733 # Build our expected cluster state
1734 node_image = dict((node.name, self.NodeImage(offline=node.offline))
1735 for node in nodeinfo)
1737 for instance in instancelist:
1738 inst_config = instanceinfo[instance]
1740 for nname in inst_config.all_nodes:
1741 if nname not in node_image:
1743 gnode = self.NodeImage()
1745 node_image[nname] = gnode
1747 inst_config.MapLVsByNode(node_vol_should)
1749 pnode = inst_config.primary_node
1750 node_image[pnode].pinst.append(instance)
1752 for snode in inst_config.secondary_nodes:
1753 nimg = node_image[snode]
1754 nimg.sinst.append(instance)
1755 if pnode not in nimg.sbp:
1756 nimg.sbp[pnode] = []
1757 nimg.sbp[pnode].append(instance)
1759 # At this point, we have the in-memory data structures complete,
1760 # except for the runtime information, which we'll gather next
1762 # Due to the way our RPC system works, exact response times cannot be
1763 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1764 # time before and after executing the request, we can at least have a time
1766 nvinfo_starttime = time.time()
1767 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1768 self.cfg.GetClusterName())
1769 nvinfo_endtime = time.time()
1771 cluster = self.cfg.GetClusterInfo()
1772 master_node = self.cfg.GetMasterNode()
1773 all_drbd_map = self.cfg.ComputeDRBDMap()
1775 feedback_fn("* Verifying node status")
1776 for node_i in nodeinfo:
1778 nimg = node_image[node]
1782 feedback_fn("* Skipping offline node %s" % (node,))
1786 if node == master_node:
1788 elif node_i.master_candidate:
1789 ntype = "master candidate"
1790 elif node_i.drained:
1796 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1798 msg = all_nvinfo[node].fail_msg
1799 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1801 nimg.rpc_fail = True
1804 nresult = all_nvinfo[node].payload
1806 nimg.call_ok = self._VerifyNode(node_i, nresult)
1807 self._VerifyNodeNetwork(node_i, nresult)
1808 self._VerifyNodeLVM(node_i, nresult, vg_name)
1809 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1811 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1812 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1814 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1815 self._UpdateNodeInstances(node_i, nresult, nimg)
1816 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1818 feedback_fn("* Verifying instance status")
1819 for instance in instancelist:
1821 feedback_fn("* Verifying instance %s" % instance)
1822 inst_config = instanceinfo[instance]
1823 self._VerifyInstance(instance, inst_config, node_image)
1824 inst_nodes_offline = []
1826 pnode = inst_config.primary_node
1827 pnode_img = node_image[pnode]
1828 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1829 self.ENODERPC, pnode, "instance %s, connection to"
1830 " primary node failed", instance)
1832 if pnode_img.offline:
1833 inst_nodes_offline.append(pnode)
1835 # If the instance is non-redundant we cannot survive losing its primary
1836 # node, so we are not N+1 compliant. On the other hand we have no disk
1837 # templates with more than one secondary so that situation is not well
1839 # FIXME: does not support file-backed instances
1840 if not inst_config.secondary_nodes:
1841 i_non_redundant.append(instance)
1842 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1843 instance, "instance has multiple secondary nodes: %s",
1844 utils.CommaJoin(inst_config.secondary_nodes),
1845 code=self.ETYPE_WARNING)
1847 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1848 i_non_a_balanced.append(instance)
1850 for snode in inst_config.secondary_nodes:
1851 s_img = node_image[snode]
1852 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1853 "instance %s, connection to secondary node failed", instance)
1856 inst_nodes_offline.append(snode)
1858 # warn that the instance lives on offline nodes
1859 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1860 "instance lives on offline node(s) %s",
1861 utils.CommaJoin(inst_nodes_offline))
1862 # ... or ghost nodes
1863 for node in inst_config.all_nodes:
1864 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1865 "instance lives on ghost node %s", node)
1867 feedback_fn("* Verifying orphan volumes")
1868 self._VerifyOrphanVolumes(node_vol_should, node_image)
1870 feedback_fn("* Verifying oprhan instances")
1871 self._VerifyOrphanInstances(instancelist, node_image)
1873 if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1874 feedback_fn("* Verifying N+1 Memory redundancy")
1875 self._VerifyNPlusOneMemory(node_image, instanceinfo)
1877 feedback_fn("* Other Notes")
1879 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
1880 % len(i_non_redundant))
1882 if i_non_a_balanced:
1883 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
1884 % len(i_non_a_balanced))
1887 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
1890 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
1894 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1895 """Analyze the post-hooks' result
1897 This method analyses the hook result, handles it, and sends some
1898 nicely-formatted feedback back to the user.
1900 @param phase: one of L{constants.HOOKS_PHASE_POST} or
1901 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1902 @param hooks_results: the results of the multi-node hooks rpc call
1903 @param feedback_fn: function used send feedback back to the caller
1904 @param lu_result: previous Exec result
1905 @return: the new Exec result, based on the previous result
1909 # We only really run POST phase hooks, and are only interested in
1911 if phase == constants.HOOKS_PHASE_POST:
1912 # Used to change hooks' output to proper indentation
1913 indent_re = re.compile('^', re.M)
1914 feedback_fn("* Hooks Results")
1915 assert hooks_results, "invalid result from hooks"
1917 for node_name in hooks_results:
1918 res = hooks_results[node_name]
1920 test = msg and not res.offline
1921 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1922 "Communication failure in hooks execution: %s", msg)
1923 if res.offline or msg:
1924 # No need to investigate payload if node is offline or gave an error.
1925 # override manually lu_result here as _ErrorIf only
1926 # overrides self.bad
1929 for script, hkr, output in res.payload:
1930 test = hkr == constants.HKR_FAIL
1931 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1932 "Script %s failed, output:", script)
1934 output = indent_re.sub(' ', output)
1935 feedback_fn("%s" % output)
1941 class LUVerifyDisks(NoHooksLU):
1942 """Verifies the cluster disks status.
1948 def ExpandNames(self):
1949 self.needed_locks = {
1950 locking.LEVEL_NODE: locking.ALL_SET,
1951 locking.LEVEL_INSTANCE: locking.ALL_SET,
1953 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1955 def CheckPrereq(self):
1956 """Check prerequisites.
1958 This has no prerequisites.
1963 def Exec(self, feedback_fn):
1964 """Verify integrity of cluster disks.
1966 @rtype: tuple of three items
1967 @return: a tuple of (dict of node-to-node_error, list of instances
1968 which need activate-disks, dict of instance: (node, volume) for
1972 result = res_nodes, res_instances, res_missing = {}, [], {}
1974 vg_name = self.cfg.GetVGName()
1975 nodes = utils.NiceSort(self.cfg.GetNodeList())
1976 instances = [self.cfg.GetInstanceInfo(name)
1977 for name in self.cfg.GetInstanceList()]
1980 for inst in instances:
1982 if (not inst.admin_up or
1983 inst.disk_template not in constants.DTS_NET_MIRROR):
1985 inst.MapLVsByNode(inst_lvs)
1986 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1987 for node, vol_list in inst_lvs.iteritems():
1988 for vol in vol_list:
1989 nv_dict[(node, vol)] = inst
1994 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1998 node_res = node_lvs[node]
1999 if node_res.offline:
2001 msg = node_res.fail_msg
2003 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2004 res_nodes[node] = msg
2007 lvs = node_res.payload
2008 for lv_name, (_, _, lv_online) in lvs.items():
2009 inst = nv_dict.pop((node, lv_name), None)
2010 if (not lv_online and inst is not None
2011 and inst.name not in res_instances):
2012 res_instances.append(inst.name)
2014 # any leftover items in nv_dict are missing LVs, let's arrange the
2016 for key, inst in nv_dict.iteritems():
2017 if inst.name not in res_missing:
2018 res_missing[inst.name] = []
2019 res_missing[inst.name].append(key)
2024 class LURepairDiskSizes(NoHooksLU):
2025 """Verifies the cluster disks sizes.
2028 _OP_REQP = ["instances"]
2031 def ExpandNames(self):
2032 if not isinstance(self.op.instances, list):
2033 raise errors.OpPrereqError("Invalid argument type 'instances'",
2036 if self.op.instances:
2037 self.wanted_names = []
2038 for name in self.op.instances:
2039 full_name = _ExpandInstanceName(self.cfg, name)
2040 self.wanted_names.append(full_name)
2041 self.needed_locks = {
2042 locking.LEVEL_NODE: [],
2043 locking.LEVEL_INSTANCE: self.wanted_names,
2045 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2047 self.wanted_names = None
2048 self.needed_locks = {
2049 locking.LEVEL_NODE: locking.ALL_SET,
2050 locking.LEVEL_INSTANCE: locking.ALL_SET,
2052 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2054 def DeclareLocks(self, level):
2055 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2056 self._LockInstancesNodes(primary_only=True)
2058 def CheckPrereq(self):
2059 """Check prerequisites.
2061 This only checks the optional instance list against the existing names.
2064 if self.wanted_names is None:
2065 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2067 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2068 in self.wanted_names]
2070 def _EnsureChildSizes(self, disk):
2071 """Ensure children of the disk have the needed disk size.
2073 This is valid mainly for DRBD8 and fixes an issue where the
2074 children have smaller disk size.
2076 @param disk: an L{ganeti.objects.Disk} object
2079 if disk.dev_type == constants.LD_DRBD8:
2080 assert disk.children, "Empty children for DRBD8?"
2081 fchild = disk.children[0]
2082 mismatch = fchild.size < disk.size
2084 self.LogInfo("Child disk has size %d, parent %d, fixing",
2085 fchild.size, disk.size)
2086 fchild.size = disk.size
2088 # and we recurse on this child only, not on the metadev
2089 return self._EnsureChildSizes(fchild) or mismatch
2093 def Exec(self, feedback_fn):
2094 """Verify the size of cluster disks.
2097 # TODO: check child disks too
2098 # TODO: check differences in size between primary/secondary nodes
2100 for instance in self.wanted_instances:
2101 pnode = instance.primary_node
2102 if pnode not in per_node_disks:
2103 per_node_disks[pnode] = []
2104 for idx, disk in enumerate(instance.disks):
2105 per_node_disks[pnode].append((instance, idx, disk))
2108 for node, dskl in per_node_disks.items():
2109 newl = [v[2].Copy() for v in dskl]
2111 self.cfg.SetDiskID(dsk, node)
2112 result = self.rpc.call_blockdev_getsizes(node, newl)
2114 self.LogWarning("Failure in blockdev_getsizes call to node"
2115 " %s, ignoring", node)
2117 if len(result.data) != len(dskl):
2118 self.LogWarning("Invalid result from node %s, ignoring node results",
2121 for ((instance, idx, disk), size) in zip(dskl, result.data):
2123 self.LogWarning("Disk %d of instance %s did not return size"
2124 " information, ignoring", idx, instance.name)
2126 if not isinstance(size, (int, long)):
2127 self.LogWarning("Disk %d of instance %s did not return valid"
2128 " size information, ignoring", idx, instance.name)
2131 if size != disk.size:
2132 self.LogInfo("Disk %d of instance %s has mismatched size,"
2133 " correcting: recorded %d, actual %d", idx,
2134 instance.name, disk.size, size)
2136 self.cfg.Update(instance, feedback_fn)
2137 changed.append((instance.name, idx, size))
2138 if self._EnsureChildSizes(disk):
2139 self.cfg.Update(instance, feedback_fn)
2140 changed.append((instance.name, idx, disk.size))
2144 class LURenameCluster(LogicalUnit):
2145 """Rename the cluster.
2148 HPATH = "cluster-rename"
2149 HTYPE = constants.HTYPE_CLUSTER
2152 def BuildHooksEnv(self):
2157 "OP_TARGET": self.cfg.GetClusterName(),
2158 "NEW_NAME": self.op.name,
2160 mn = self.cfg.GetMasterNode()
2161 all_nodes = self.cfg.GetNodeList()
2162 return env, [mn], all_nodes
2164 def CheckPrereq(self):
2165 """Verify that the passed name is a valid one.
2168 hostname = utils.GetHostInfo(self.op.name)
2170 new_name = hostname.name
2171 self.ip = new_ip = hostname.ip
2172 old_name = self.cfg.GetClusterName()
2173 old_ip = self.cfg.GetMasterIP()
2174 if new_name == old_name and new_ip == old_ip:
2175 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2176 " cluster has changed",
2178 if new_ip != old_ip:
2179 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2180 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2181 " reachable on the network. Aborting." %
2182 new_ip, errors.ECODE_NOTUNIQUE)
2184 self.op.name = new_name
2186 def Exec(self, feedback_fn):
2187 """Rename the cluster.
2190 clustername = self.op.name
2193 # shutdown the master IP
2194 master = self.cfg.GetMasterNode()
2195 result = self.rpc.call_node_stop_master(master, False)
2196 result.Raise("Could not disable the master role")
2199 cluster = self.cfg.GetClusterInfo()
2200 cluster.cluster_name = clustername
2201 cluster.master_ip = ip
2202 self.cfg.Update(cluster, feedback_fn)
2204 # update the known hosts file
2205 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2206 node_list = self.cfg.GetNodeList()
2208 node_list.remove(master)
2211 result = self.rpc.call_upload_file(node_list,
2212 constants.SSH_KNOWN_HOSTS_FILE)
2213 for to_node, to_result in result.iteritems():
2214 msg = to_result.fail_msg
2216 msg = ("Copy of file %s to node %s failed: %s" %
2217 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2218 self.proc.LogWarning(msg)
2221 result = self.rpc.call_node_start_master(master, False, False)
2222 msg = result.fail_msg
2224 self.LogWarning("Could not re-enable the master role on"
2225 " the master, please restart manually: %s", msg)
2228 def _RecursiveCheckIfLVMBased(disk):
2229 """Check if the given disk or its children are lvm-based.
2231 @type disk: L{objects.Disk}
2232 @param disk: the disk to check
2234 @return: boolean indicating whether a LD_LV dev_type was found or not
2238 for chdisk in disk.children:
2239 if _RecursiveCheckIfLVMBased(chdisk):
2241 return disk.dev_type == constants.LD_LV
2244 class LUSetClusterParams(LogicalUnit):
2245 """Change the parameters of the cluster.
2248 HPATH = "cluster-modify"
2249 HTYPE = constants.HTYPE_CLUSTER
2253 def CheckArguments(self):
2257 if not hasattr(self.op, "candidate_pool_size"):
2258 self.op.candidate_pool_size = None
2259 if self.op.candidate_pool_size is not None:
2261 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2262 except (ValueError, TypeError), err:
2263 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2264 str(err), errors.ECODE_INVAL)
2265 if self.op.candidate_pool_size < 1:
2266 raise errors.OpPrereqError("At least one master candidate needed",
2268 _CheckBooleanOpField(self.op, "maintain_node_health")
2270 def ExpandNames(self):
2271 # FIXME: in the future maybe other cluster params won't require checking on
2272 # all nodes to be modified.
2273 self.needed_locks = {
2274 locking.LEVEL_NODE: locking.ALL_SET,
2276 self.share_locks[locking.LEVEL_NODE] = 1
2278 def BuildHooksEnv(self):
2283 "OP_TARGET": self.cfg.GetClusterName(),
2284 "NEW_VG_NAME": self.op.vg_name,
2286 mn = self.cfg.GetMasterNode()
2287 return env, [mn], [mn]
2289 def CheckPrereq(self):
2290 """Check prerequisites.
2292 This checks whether the given params don't conflict and
2293 if the given volume group is valid.
2296 if self.op.vg_name is not None and not self.op.vg_name:
2297 instances = self.cfg.GetAllInstancesInfo().values()
2298 for inst in instances:
2299 for disk in inst.disks:
2300 if _RecursiveCheckIfLVMBased(disk):
2301 raise errors.OpPrereqError("Cannot disable lvm storage while"
2302 " lvm-based instances exist",
2305 node_list = self.acquired_locks[locking.LEVEL_NODE]
2307 # if vg_name not None, checks given volume group on all nodes
2309 vglist = self.rpc.call_vg_list(node_list)
2310 for node in node_list:
2311 msg = vglist[node].fail_msg
2313 # ignoring down node
2314 self.LogWarning("Error while gathering data on node %s"
2315 " (ignoring node): %s", node, msg)
2317 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2319 constants.MIN_VG_SIZE)
2321 raise errors.OpPrereqError("Error on node '%s': %s" %
2322 (node, vgstatus), errors.ECODE_ENVIRON)
2324 self.cluster = cluster = self.cfg.GetClusterInfo()
2325 # validate params changes
2326 if self.op.beparams:
2327 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2328 self.new_beparams = objects.FillDict(
2329 cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2331 if self.op.nicparams:
2332 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2333 self.new_nicparams = objects.FillDict(
2334 cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2335 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2338 # check all instances for consistency
2339 for instance in self.cfg.GetAllInstancesInfo().values():
2340 for nic_idx, nic in enumerate(instance.nics):
2341 params_copy = copy.deepcopy(nic.nicparams)
2342 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2344 # check parameter syntax
2346 objects.NIC.CheckParameterSyntax(params_filled)
2347 except errors.ConfigurationError, err:
2348 nic_errors.append("Instance %s, nic/%d: %s" %
2349 (instance.name, nic_idx, err))
2351 # if we're moving instances to routed, check that they have an ip
2352 target_mode = params_filled[constants.NIC_MODE]
2353 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2354 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2355 (instance.name, nic_idx))
2357 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2358 "\n".join(nic_errors))
2360 # hypervisor list/parameters
2361 self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2362 if self.op.hvparams:
2363 if not isinstance(self.op.hvparams, dict):
2364 raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2366 for hv_name, hv_dict in self.op.hvparams.items():
2367 if hv_name not in self.new_hvparams:
2368 self.new_hvparams[hv_name] = hv_dict
2370 self.new_hvparams[hv_name].update(hv_dict)
2372 # os hypervisor parameters
2373 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2375 if not isinstance(self.op.os_hvp, dict):
2376 raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2378 for os_name, hvs in self.op.os_hvp.items():
2379 if not isinstance(hvs, dict):
2380 raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2381 " input"), errors.ECODE_INVAL)
2382 if os_name not in self.new_os_hvp:
2383 self.new_os_hvp[os_name] = hvs
2385 for hv_name, hv_dict in hvs.items():
2386 if hv_name not in self.new_os_hvp[os_name]:
2387 self.new_os_hvp[os_name][hv_name] = hv_dict
2389 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2391 if self.op.enabled_hypervisors is not None:
2392 self.hv_list = self.op.enabled_hypervisors
2393 if not self.hv_list:
2394 raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2395 " least one member",
2397 invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2399 raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2401 utils.CommaJoin(invalid_hvs),
2404 self.hv_list = cluster.enabled_hypervisors
2406 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2407 # either the enabled list has changed, or the parameters have, validate
2408 for hv_name, hv_params in self.new_hvparams.items():
2409 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2410 (self.op.enabled_hypervisors and
2411 hv_name in self.op.enabled_hypervisors)):
2412 # either this is a new hypervisor, or its parameters have changed
2413 hv_class = hypervisor.GetHypervisor(hv_name)
2414 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2415 hv_class.CheckParameterSyntax(hv_params)
2416 _CheckHVParams(self, node_list, hv_name, hv_params)
2419 # no need to check any newly-enabled hypervisors, since the
2420 # defaults have already been checked in the above code-block
2421 for os_name, os_hvp in self.new_os_hvp.items():
2422 for hv_name, hv_params in os_hvp.items():
2423 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2424 # we need to fill in the new os_hvp on top of the actual hv_p
2425 cluster_defaults = self.new_hvparams.get(hv_name, {})
2426 new_osp = objects.FillDict(cluster_defaults, hv_params)
2427 hv_class = hypervisor.GetHypervisor(hv_name)
2428 hv_class.CheckParameterSyntax(new_osp)
2429 _CheckHVParams(self, node_list, hv_name, new_osp)
2432 def Exec(self, feedback_fn):
2433 """Change the parameters of the cluster.
2436 if self.op.vg_name is not None:
2437 new_volume = self.op.vg_name
2440 if new_volume != self.cfg.GetVGName():
2441 self.cfg.SetVGName(new_volume)
2443 feedback_fn("Cluster LVM configuration already in desired"
2444 " state, not changing")
2445 if self.op.hvparams:
2446 self.cluster.hvparams = self.new_hvparams
2448 self.cluster.os_hvp = self.new_os_hvp
2449 if self.op.enabled_hypervisors is not None:
2450 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2451 if self.op.beparams:
2452 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2453 if self.op.nicparams:
2454 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2456 if self.op.candidate_pool_size is not None:
2457 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2458 # we need to update the pool size here, otherwise the save will fail
2459 _AdjustCandidatePool(self, [])
2461 if self.op.maintain_node_health is not None:
2462 self.cluster.maintain_node_health = self.op.maintain_node_health
2464 self.cfg.Update(self.cluster, feedback_fn)
2467 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2468 """Distribute additional files which are part of the cluster configuration.
2470 ConfigWriter takes care of distributing the config and ssconf files, but
2471 there are more files which should be distributed to all nodes. This function
2472 makes sure those are copied.
2474 @param lu: calling logical unit
2475 @param additional_nodes: list of nodes not in the config to distribute to
2478 # 1. Gather target nodes
2479 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2480 dist_nodes = lu.cfg.GetOnlineNodeList()
2481 if additional_nodes is not None:
2482 dist_nodes.extend(additional_nodes)
2483 if myself.name in dist_nodes:
2484 dist_nodes.remove(myself.name)
2486 # 2. Gather files to distribute
2487 dist_files = set([constants.ETC_HOSTS,
2488 constants.SSH_KNOWN_HOSTS_FILE,
2489 constants.RAPI_CERT_FILE,
2490 constants.RAPI_USERS_FILE,
2491 constants.CONFD_HMAC_KEY,
2494 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2495 for hv_name in enabled_hypervisors:
2496 hv_class = hypervisor.GetHypervisor(hv_name)
2497 dist_files.update(hv_class.GetAncillaryFiles())
2499 # 3. Perform the files upload
2500 for fname in dist_files:
2501 if os.path.exists(fname):
2502 result = lu.rpc.call_upload_file(dist_nodes, fname)
2503 for to_node, to_result in result.items():
2504 msg = to_result.fail_msg
2506 msg = ("Copy of file %s to node %s failed: %s" %
2507 (fname, to_node, msg))
2508 lu.proc.LogWarning(msg)
2511 class LURedistributeConfig(NoHooksLU):
2512 """Force the redistribution of cluster configuration.
2514 This is a very simple LU.
2520 def ExpandNames(self):
2521 self.needed_locks = {
2522 locking.LEVEL_NODE: locking.ALL_SET,
2524 self.share_locks[locking.LEVEL_NODE] = 1
2526 def CheckPrereq(self):
2527 """Check prerequisites.
2531 def Exec(self, feedback_fn):
2532 """Redistribute the configuration.
2535 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2536 _RedistributeAncillaryFiles(self)
2539 def _WaitForSync(lu, instance, oneshot=False):
2540 """Sleep and poll for an instance's disk to sync.
2543 if not instance.disks:
2547 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2549 node = instance.primary_node
2551 for dev in instance.disks:
2552 lu.cfg.SetDiskID(dev, node)
2554 # TODO: Convert to utils.Retry
2557 degr_retries = 10 # in seconds, as we sleep 1 second each time
2561 cumul_degraded = False
2562 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2563 msg = rstats.fail_msg
2565 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2568 raise errors.RemoteError("Can't contact node %s for mirror data,"
2569 " aborting." % node)
2572 rstats = rstats.payload
2574 for i, mstat in enumerate(rstats):
2576 lu.LogWarning("Can't compute data for node %s/%s",
2577 node, instance.disks[i].iv_name)
2580 cumul_degraded = (cumul_degraded or
2581 (mstat.is_degraded and mstat.sync_percent is None))
2582 if mstat.sync_percent is not None:
2584 if mstat.estimated_time is not None:
2585 rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2586 max_time = mstat.estimated_time
2588 rem_time = "no time estimate"
2589 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2590 (instance.disks[i].iv_name, mstat.sync_percent,
2593 # if we're done but degraded, let's do a few small retries, to
2594 # make sure we see a stable and not transient situation; therefore
2595 # we force restart of the loop
2596 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2597 logging.info("Degraded disks found, %d retries left", degr_retries)
2605 time.sleep(min(60, max_time))
2608 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2609 return not cumul_degraded
2612 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2613 """Check that mirrors are not degraded.
2615 The ldisk parameter, if True, will change the test from the
2616 is_degraded attribute (which represents overall non-ok status for
2617 the device(s)) to the ldisk (representing the local storage status).
2620 lu.cfg.SetDiskID(dev, node)
2624 if on_primary or dev.AssembleOnSecondary():
2625 rstats = lu.rpc.call_blockdev_find(node, dev)
2626 msg = rstats.fail_msg
2628 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2630 elif not rstats.payload:
2631 lu.LogWarning("Can't find disk on node %s", node)
2635 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2637 result = result and not rstats.payload.is_degraded
2640 for child in dev.children:
2641 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2646 class LUDiagnoseOS(NoHooksLU):
2647 """Logical unit for OS diagnose/query.
2650 _OP_REQP = ["output_fields", "names"]
2652 _FIELDS_STATIC = utils.FieldSet()
2653 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2654 # Fields that need calculation of global os validity
2655 _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2657 def ExpandNames(self):
2659 raise errors.OpPrereqError("Selective OS query not supported",
2662 _CheckOutputFields(static=self._FIELDS_STATIC,
2663 dynamic=self._FIELDS_DYNAMIC,
2664 selected=self.op.output_fields)
2666 # Lock all nodes, in shared mode
2667 # Temporary removal of locks, should be reverted later
2668 # TODO: reintroduce locks when they are lighter-weight
2669 self.needed_locks = {}
2670 #self.share_locks[locking.LEVEL_NODE] = 1
2671 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2673 def CheckPrereq(self):
2674 """Check prerequisites.
2679 def _DiagnoseByOS(rlist):
2680 """Remaps a per-node return list into an a per-os per-node dictionary
2682 @param rlist: a map with node names as keys and OS objects as values
2685 @return: a dictionary with osnames as keys and as value another map, with
2686 nodes as keys and tuples of (path, status, diagnose) as values, eg::
2688 {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2689 (/srv/..., False, "invalid api")],
2690 "node2": [(/srv/..., True, "")]}
2695 # we build here the list of nodes that didn't fail the RPC (at RPC
2696 # level), so that nodes with a non-responding node daemon don't
2697 # make all OSes invalid
2698 good_nodes = [node_name for node_name in rlist
2699 if not rlist[node_name].fail_msg]
2700 for node_name, nr in rlist.items():
2701 if nr.fail_msg or not nr.payload:
2703 for name, path, status, diagnose, variants in nr.payload:
2704 if name not in all_os:
2705 # build a list of nodes for this os containing empty lists
2706 # for each node in node_list
2708 for nname in good_nodes:
2709 all_os[name][nname] = []
2710 all_os[name][node_name].append((path, status, diagnose, variants))
2713 def Exec(self, feedback_fn):
2714 """Compute the list of OSes.
2717 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2718 node_data = self.rpc.call_os_diagnose(valid_nodes)
2719 pol = self._DiagnoseByOS(node_data)
2721 calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2722 calc_variants = "variants" in self.op.output_fields
2724 for os_name, os_data in pol.items():
2729 for osl in os_data.values():
2730 valid = valid and osl and osl[0][1]
2735 node_variants = osl[0][3]
2736 if variants is None:
2737 variants = node_variants
2739 variants = [v for v in variants if v in node_variants]
2741 for field in self.op.output_fields:
2744 elif field == "valid":
2746 elif field == "node_status":
2747 # this is just a copy of the dict
2749 for node_name, nos_list in os_data.items():
2750 val[node_name] = nos_list
2751 elif field == "variants":
2754 raise errors.ParameterError(field)
2761 class LURemoveNode(LogicalUnit):
2762 """Logical unit for removing a node.
2765 HPATH = "node-remove"
2766 HTYPE = constants.HTYPE_NODE
2767 _OP_REQP = ["node_name"]
2769 def BuildHooksEnv(self):
2772 This doesn't run on the target node in the pre phase as a failed
2773 node would then be impossible to remove.
2777 "OP_TARGET": self.op.node_name,
2778 "NODE_NAME": self.op.node_name,
2780 all_nodes = self.cfg.GetNodeList()
2782 all_nodes.remove(self.op.node_name)
2784 logging.warning("Node %s which is about to be removed not found"
2785 " in the all nodes list", self.op.node_name)
2786 return env, all_nodes, all_nodes
2788 def CheckPrereq(self):
2789 """Check prerequisites.
2792 - the node exists in the configuration
2793 - it does not have primary or secondary instances
2794 - it's not the master
2796 Any errors are signaled by raising errors.OpPrereqError.
2799 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2800 node = self.cfg.GetNodeInfo(self.op.node_name)
2801 assert node is not None
2803 instance_list = self.cfg.GetInstanceList()
2805 masternode = self.cfg.GetMasterNode()
2806 if node.name == masternode:
2807 raise errors.OpPrereqError("Node is the master node,"
2808 " you need to failover first.",
2811 for instance_name in instance_list:
2812 instance = self.cfg.GetInstanceInfo(instance_name)
2813 if node.name in instance.all_nodes:
2814 raise errors.OpPrereqError("Instance %s is still running on the node,"
2815 " please remove first." % instance_name,
2817 self.op.node_name = node.name
2820 def Exec(self, feedback_fn):
2821 """Removes the node from the cluster.
2825 logging.info("Stopping the node daemon and removing configs from node %s",
2828 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2830 # Promote nodes to master candidate as needed
2831 _AdjustCandidatePool(self, exceptions=[node.name])
2832 self.context.RemoveNode(node.name)
2834 # Run post hooks on the node before it's removed
2835 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2837 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2839 # pylint: disable-msg=W0702
2840 self.LogWarning("Errors occurred running hooks on %s" % node.name)
2842 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2843 msg = result.fail_msg
2845 self.LogWarning("Errors encountered on the remote node while leaving"
2846 " the cluster: %s", msg)
2849 class LUQueryNodes(NoHooksLU):
2850 """Logical unit for querying nodes.
2853 # pylint: disable-msg=W0142
2854 _OP_REQP = ["output_fields", "names", "use_locking"]
2857 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2858 "master_candidate", "offline", "drained"]
2860 _FIELDS_DYNAMIC = utils.FieldSet(
2862 "mtotal", "mnode", "mfree",
2864 "ctotal", "cnodes", "csockets",
2867 _FIELDS_STATIC = utils.FieldSet(*[
2868 "pinst_cnt", "sinst_cnt",
2869 "pinst_list", "sinst_list",
2870 "pip", "sip", "tags",
2872 "role"] + _SIMPLE_FIELDS
2875 def ExpandNames(self):
2876 _CheckOutputFields(static=self._FIELDS_STATIC,
2877 dynamic=self._FIELDS_DYNAMIC,
2878 selected=self.op.output_fields)
2880 self.needed_locks = {}
2881 self.share_locks[locking.LEVEL_NODE] = 1
2884 self.wanted = _GetWantedNodes(self, self.op.names)
2886 self.wanted = locking.ALL_SET
2888 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2889 self.do_locking = self.do_node_query and self.op.use_locking
2891 # if we don't request only static fields, we need to lock the nodes
2892 self.needed_locks[locking.LEVEL_NODE] = self.wanted
2894 def CheckPrereq(self):
2895 """Check prerequisites.
2898 # The validation of the node list is done in the _GetWantedNodes,
2899 # if non empty, and if empty, there's no validation to do
2902 def Exec(self, feedback_fn):
2903 """Computes the list of nodes and their attributes.
2906 all_info = self.cfg.GetAllNodesInfo()
2908 nodenames = self.acquired_locks[locking.LEVEL_NODE]
2909 elif self.wanted != locking.ALL_SET:
2910 nodenames = self.wanted
2911 missing = set(nodenames).difference(all_info.keys())
2913 raise errors.OpExecError(
2914 "Some nodes were removed before retrieving their data: %s" % missing)
2916 nodenames = all_info.keys()
2918 nodenames = utils.NiceSort(nodenames)
2919 nodelist = [all_info[name] for name in nodenames]
2921 # begin data gathering
2923 if self.do_node_query:
2925 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2926 self.cfg.GetHypervisorType())
2927 for name in nodenames:
2928 nodeinfo = node_data[name]
2929 if not nodeinfo.fail_msg and nodeinfo.payload:
2930 nodeinfo = nodeinfo.payload
2931 fn = utils.TryConvert
2933 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2934 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2935 "mfree": fn(int, nodeinfo.get('memory_free', None)),
2936 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2937 "dfree": fn(int, nodeinfo.get('vg_free', None)),
2938 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2939 "bootid": nodeinfo.get('bootid', None),
2940 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2941 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2944 live_data[name] = {}
2946 live_data = dict.fromkeys(nodenames, {})
2948 node_to_primary = dict([(name, set()) for name in nodenames])
2949 node_to_secondary = dict([(name, set()) for name in nodenames])
2951 inst_fields = frozenset(("pinst_cnt", "pinst_list",
2952 "sinst_cnt", "sinst_list"))
2953 if inst_fields & frozenset(self.op.output_fields):
2954 inst_data = self.cfg.GetAllInstancesInfo()
2956 for inst in inst_data.values():
2957 if inst.primary_node in node_to_primary:
2958 node_to_primary[inst.primary_node].add(inst.name)
2959 for secnode in inst.secondary_nodes:
2960 if secnode in node_to_secondary:
2961 node_to_secondary[secnode].add(inst.name)
2963 master_node = self.cfg.GetMasterNode()
2965 # end data gathering
2968 for node in nodelist:
2970 for field in self.op.output_fields:
2971 if field in self._SIMPLE_FIELDS:
2972 val = getattr(node, field)
2973 elif field == "pinst_list":
2974 val = list(node_to_primary[node.name])
2975 elif field == "sinst_list":
2976 val = list(node_to_secondary[node.name])
2977 elif field == "pinst_cnt":
2978 val = len(node_to_primary[node.name])
2979 elif field == "sinst_cnt":
2980 val = len(node_to_secondary[node.name])
2981 elif field == "pip":
2982 val = node.primary_ip
2983 elif field == "sip":
2984 val = node.secondary_ip
2985 elif field == "tags":
2986 val = list(node.GetTags())
2987 elif field == "master":
2988 val = node.name == master_node
2989 elif self._FIELDS_DYNAMIC.Matches(field):
2990 val = live_data[node.name].get(field, None)
2991 elif field == "role":
2992 if node.name == master_node:
2994 elif node.master_candidate:
3003 raise errors.ParameterError(field)
3004 node_output.append(val)
3005 output.append(node_output)
3010 class LUQueryNodeVolumes(NoHooksLU):
3011 """Logical unit for getting volumes on node(s).
3014 _OP_REQP = ["nodes", "output_fields"]
3016 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3017 _FIELDS_STATIC = utils.FieldSet("node")
3019 def ExpandNames(self):
3020 _CheckOutputFields(static=self._FIELDS_STATIC,
3021 dynamic=self._FIELDS_DYNAMIC,
3022 selected=self.op.output_fields)
3024 self.needed_locks = {}
3025 self.share_locks[locking.LEVEL_NODE] = 1
3026 if not self.op.nodes:
3027 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3029 self.needed_locks[locking.LEVEL_NODE] = \
3030 _GetWantedNodes(self, self.op.nodes)
3032 def CheckPrereq(self):
3033 """Check prerequisites.
3035 This checks that the fields required are valid output fields.
3038 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3040 def Exec(self, feedback_fn):
3041 """Computes the list of nodes and their attributes.
3044 nodenames = self.nodes
3045 volumes = self.rpc.call_node_volumes(nodenames)
3047 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3048 in self.cfg.GetInstanceList()]
3050 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3053 for node in nodenames:
3054 nresult = volumes[node]
3057 msg = nresult.fail_msg
3059 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3062 node_vols = nresult.payload[:]
3063 node_vols.sort(key=lambda vol: vol['dev'])
3065 for vol in node_vols:
3067 for field in self.op.output_fields:
3070 elif field == "phys":
3074 elif field == "name":
3076 elif field == "size":
3077 val = int(float(vol['size']))
3078 elif field == "instance":
3080 if node not in lv_by_node[inst]:
3082 if vol['name'] in lv_by_node[inst][node]:
3088 raise errors.ParameterError(field)
3089 node_output.append(str(val))
3091 output.append(node_output)
3096 class LUQueryNodeStorage(NoHooksLU):
3097 """Logical unit for getting information on storage units on node(s).
3100 _OP_REQP = ["nodes", "storage_type", "output_fields"]
3102 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3104 def CheckArguments(self):
3105 _CheckStorageType(self.op.storage_type)
3107 _CheckOutputFields(static=self._FIELDS_STATIC,
3108 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3109 selected=self.op.output_fields)
3111 def ExpandNames(self):
3112 self.needed_locks = {}
3113 self.share_locks[locking.LEVEL_NODE] = 1
3116 self.needed_locks[locking.LEVEL_NODE] = \
3117 _GetWantedNodes(self, self.op.nodes)
3119 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3121 def CheckPrereq(self):
3122 """Check prerequisites.
3124 This checks that the fields required are valid output fields.
3127 self.op.name = getattr(self.op, "name", None)
3129 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3131 def Exec(self, feedback_fn):
3132 """Computes the list of nodes and their attributes.
3135 # Always get name to sort by
3136 if constants.SF_NAME in self.op.output_fields:
3137 fields = self.op.output_fields[:]
3139 fields = [constants.SF_NAME] + self.op.output_fields
3141 # Never ask for node or type as it's only known to the LU
3142 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3143 while extra in fields:
3144 fields.remove(extra)
3146 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3147 name_idx = field_idx[constants.SF_NAME]
3149 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3150 data = self.rpc.call_storage_list(self.nodes,
3151 self.op.storage_type, st_args,
3152 self.op.name, fields)
3156 for node in utils.NiceSort(self.nodes):
3157 nresult = data[node]
3161 msg = nresult.fail_msg
3163 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3166 rows = dict([(row[name_idx], row) for row in nresult.payload])
3168 for name in utils.NiceSort(rows.keys()):
3173 for field in self.op.output_fields:
3174 if field == constants.SF_NODE:
3176 elif field == constants.SF_TYPE:
3177 val = self.op.storage_type
3178 elif field in field_idx:
3179 val = row[field_idx[field]]
3181 raise errors.ParameterError(field)
3190 class LUModifyNodeStorage(NoHooksLU):
3191 """Logical unit for modifying a storage volume on a node.
3194 _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3197 def CheckArguments(self):
3198 self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3200 _CheckStorageType(self.op.storage_type)
3202 def ExpandNames(self):
3203 self.needed_locks = {
3204 locking.LEVEL_NODE: self.op.node_name,
3207 def CheckPrereq(self):
3208 """Check prerequisites.
3211 storage_type = self.op.storage_type
3214 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3216 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3217 " modified" % storage_type,
3220 diff = set(self.op.changes.keys()) - modifiable
3222 raise errors.OpPrereqError("The following fields can not be modified for"
3223 " storage units of type '%s': %r" %
3224 (storage_type, list(diff)),
3227 def Exec(self, feedback_fn):
3228 """Computes the list of nodes and their attributes.
3231 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3232 result = self.rpc.call_storage_modify(self.op.node_name,
3233 self.op.storage_type, st_args,
3234 self.op.name, self.op.changes)
3235 result.Raise("Failed to modify storage unit '%s' on %s" %
3236 (self.op.name, self.op.node_name))
3239 class LUAddNode(LogicalUnit):
3240 """Logical unit for adding node to the cluster.
3244 HTYPE = constants.HTYPE_NODE
3245 _OP_REQP = ["node_name"]
3247 def CheckArguments(self):
3248 # validate/normalize the node name
3249 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3251 def BuildHooksEnv(self):
3254 This will run on all nodes before, and on all nodes + the new node after.
3258 "OP_TARGET": self.op.node_name,
3259 "NODE_NAME": self.op.node_name,
3260 "NODE_PIP": self.op.primary_ip,
3261 "NODE_SIP": self.op.secondary_ip,
3263 nodes_0 = self.cfg.GetNodeList()
3264 nodes_1 = nodes_0 + [self.op.node_name, ]
3265 return env, nodes_0, nodes_1
3267 def CheckPrereq(self):
3268 """Check prerequisites.
3271 - the new node is not already in the config
3273 - its parameters (single/dual homed) matches the cluster
3275 Any errors are signaled by raising errors.OpPrereqError.
3278 node_name = self.op.node_name
3281 dns_data = utils.GetHostInfo(node_name)
3283 node = dns_data.name
3284 primary_ip = self.op.primary_ip = dns_data.ip
3285 secondary_ip = getattr(self.op, "secondary_ip", None)
3286 if secondary_ip is None:
3287 secondary_ip = primary_ip
3288 if not utils.IsValidIP(secondary_ip):
3289 raise errors.OpPrereqError("Invalid secondary IP given",
3291 self.op.secondary_ip = secondary_ip
3293 node_list = cfg.GetNodeList()
3294 if not self.op.readd and node in node_list:
3295 raise errors.OpPrereqError("Node %s is already in the configuration" %
3296 node, errors.ECODE_EXISTS)
3297 elif self.op.readd and node not in node_list:
3298 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3301 for existing_node_name in node_list:
3302 existing_node = cfg.GetNodeInfo(existing_node_name)
3304 if self.op.readd and node == existing_node_name:
3305 if (existing_node.primary_ip != primary_ip or
3306 existing_node.secondary_ip != secondary_ip):
3307 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3308 " address configuration as before",
3312 if (existing_node.primary_ip == primary_ip or
3313 existing_node.secondary_ip == primary_ip or
3314 existing_node.primary_ip == secondary_ip or
3315 existing_node.secondary_ip == secondary_ip):
3316 raise errors.OpPrereqError("New node ip address(es) conflict with"
3317 " existing node %s" % existing_node.name,
3318 errors.ECODE_NOTUNIQUE)
3320 # check that the type of the node (single versus dual homed) is the
3321 # same as for the master
3322 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3323 master_singlehomed = myself.secondary_ip == myself.primary_ip
3324 newbie_singlehomed = secondary_ip == primary_ip
3325 if master_singlehomed != newbie_singlehomed:
3326 if master_singlehomed:
3327 raise errors.OpPrereqError("The master has no private ip but the"
3328 " new node has one",
3331 raise errors.OpPrereqError("The master has a private ip but the"
3332 " new node doesn't have one",
3335 # checks reachability
3336 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3337 raise errors.OpPrereqError("Node not reachable by ping",
3338 errors.ECODE_ENVIRON)
3340 if not newbie_singlehomed:
3341 # check reachability from my secondary ip to newbie's secondary ip
3342 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3343 source=myself.secondary_ip):
3344 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3345 " based ping to noded port",
3346 errors.ECODE_ENVIRON)
3353 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3356 self.new_node = self.cfg.GetNodeInfo(node)
3357 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3359 self.new_node = objects.Node(name=node,
3360 primary_ip=primary_ip,
3361 secondary_ip=secondary_ip,
3362 master_candidate=self.master_candidate,
3363 offline=False, drained=False)
3365 def Exec(self, feedback_fn):
3366 """Adds the new node to the cluster.
3369 new_node = self.new_node
3370 node = new_node.name
3372 # for re-adds, reset the offline/drained/master-candidate flags;
3373 # we need to reset here, otherwise offline would prevent RPC calls
3374 # later in the procedure; this also means that if the re-add
3375 # fails, we are left with a non-offlined, broken node
3377 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3378 self.LogInfo("Readding a node, the offline/drained flags were reset")
3379 # if we demote the node, we do cleanup later in the procedure
3380 new_node.master_candidate = self.master_candidate
3382 # notify the user about any possible mc promotion
3383 if new_node.master_candidate:
3384 self.LogInfo("Node will be a master candidate")
3386 # check connectivity
3387 result = self.rpc.call_version([node])[node]
3388 result.Raise("Can't get version information from node %s" % node)
3389 if constants.PROTOCOL_VERSION == result.payload:
3390 logging.info("Communication to node %s fine, sw version %s match",
3391 node, result.payload)
3393 raise errors.OpExecError("Version mismatch master version %s,"
3394 " node version %s" %
3395 (constants.PROTOCOL_VERSION, result.payload))
3398 if self.cfg.GetClusterInfo().modify_ssh_setup:
3399 logging.info("Copy ssh key to node %s", node)
3400 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3402 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3403 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3407 keyarray.append(utils.ReadFile(i))
3409 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3410 keyarray[2], keyarray[3], keyarray[4],
3412 result.Raise("Cannot transfer ssh keys to the new node")
3414 # Add node to our /etc/hosts, and add key to known_hosts
3415 if self.cfg.GetClusterInfo().modify_etc_hosts:
3416 utils.AddHostToEtcHosts(new_node.name)
3418 if new_node.secondary_ip != new_node.primary_ip:
3419 result = self.rpc.call_node_has_ip_address(new_node.name,
3420 new_node.secondary_ip)
3421 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3422 prereq=True, ecode=errors.ECODE_ENVIRON)
3423 if not result.payload:
3424 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3425 " you gave (%s). Please fix and re-run this"
3426 " command." % new_node.secondary_ip)
3428 node_verify_list = [self.cfg.GetMasterNode()]
3429 node_verify_param = {
3430 constants.NV_NODELIST: [node],
3431 # TODO: do a node-net-test as well?
3434 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3435 self.cfg.GetClusterName())
3436 for verifier in node_verify_list:
3437 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3438 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3440 for failed in nl_payload:
3441 feedback_fn("ssh/hostname verification failed"
3442 " (checking from %s): %s" %
3443 (verifier, nl_payload[failed]))
3444 raise errors.OpExecError("ssh/hostname verification failed.")
3447 _RedistributeAncillaryFiles(self)
3448 self.context.ReaddNode(new_node)
3449 # make sure we redistribute the config
3450 self.cfg.Update(new_node, feedback_fn)
3451 # and make sure the new node will not have old files around
3452 if not new_node.master_candidate:
3453 result = self.rpc.call_node_demote_from_mc(new_node.name)
3454 msg = result.fail_msg
3456 self.LogWarning("Node failed to demote itself from master"
3457 " candidate status: %s" % msg)
3459 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3460 self.context.AddNode(new_node, self.proc.GetECId())
3463 class LUSetNodeParams(LogicalUnit):
3464 """Modifies the parameters of a node.
3467 HPATH = "node-modify"
3468 HTYPE = constants.HTYPE_NODE
3469 _OP_REQP = ["node_name"]
3472 def CheckArguments(self):
3473 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3474 _CheckBooleanOpField(self.op, 'master_candidate')
3475 _CheckBooleanOpField(self.op, 'offline')
3476 _CheckBooleanOpField(self.op, 'drained')
3477 _CheckBooleanOpField(self.op, 'auto_promote')
3478 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3479 if all_mods.count(None) == 3:
3480 raise errors.OpPrereqError("Please pass at least one modification",
3482 if all_mods.count(True) > 1:
3483 raise errors.OpPrereqError("Can't set the node into more than one"
3484 " state at the same time",
3487 # Boolean value that tells us whether we're offlining or draining the node
3488 self.offline_or_drain = (self.op.offline == True or
3489 self.op.drained == True)
3490 self.deoffline_or_drain = (self.op.offline == False or
3491 self.op.drained == False)
3492 self.might_demote = (self.op.master_candidate == False or
3493 self.offline_or_drain)
3495 self.lock_all = self.op.auto_promote and self.might_demote
3498 def ExpandNames(self):
3500 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3502 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3504 def BuildHooksEnv(self):
3507 This runs on the master node.
3511 "OP_TARGET": self.op.node_name,
3512 "MASTER_CANDIDATE": str(self.op.master_candidate),
3513 "OFFLINE": str(self.op.offline),
3514 "DRAINED": str(self.op.drained),
3516 nl = [self.cfg.GetMasterNode(),
3520 def CheckPrereq(self):
3521 """Check prerequisites.
3523 This only checks the instance list against the existing names.
3526 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3528 if (self.op.master_candidate is not None or
3529 self.op.drained is not None or
3530 self.op.offline is not None):
3531 # we can't change the master's node flags
3532 if self.op.node_name == self.cfg.GetMasterNode():
3533 raise errors.OpPrereqError("The master role can be changed"
3534 " only via masterfailover",
3538 if node.master_candidate and self.might_demote and not self.lock_all:
3539 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3540 # check if after removing the current node, we're missing master
3542 (mc_remaining, mc_should, _) = \
3543 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3544 if mc_remaining < mc_should:
3545 raise errors.OpPrereqError("Not enough master candidates, please"
3546 " pass auto_promote to allow promotion",
3549 if (self.op.master_candidate == True and
3550 ((node.offline and not self.op.offline == False) or
3551 (node.drained and not self.op.drained == False))):
3552 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3553 " to master_candidate" % node.name,
3556 # If we're being deofflined/drained, we'll MC ourself if needed
3557 if (self.deoffline_or_drain and not self.offline_or_drain and not
3558 self.op.master_candidate == True and not node.master_candidate):
3559 self.op.master_candidate = _DecideSelfPromotion(self)
3560 if self.op.master_candidate:
3561 self.LogInfo("Autopromoting node to master candidate")
3565 def Exec(self, feedback_fn):
3574 if self.op.offline is not None:
3575 node.offline = self.op.offline
3576 result.append(("offline", str(self.op.offline)))
3577 if self.op.offline == True:
3578 if node.master_candidate:
3579 node.master_candidate = False
3581 result.append(("master_candidate", "auto-demotion due to offline"))
3583 node.drained = False
3584 result.append(("drained", "clear drained status due to offline"))
3586 if self.op.master_candidate is not None:
3587 node.master_candidate = self.op.master_candidate
3589 result.append(("master_candidate", str(self.op.master_candidate)))
3590 if self.op.master_candidate == False:
3591 rrc = self.rpc.call_node_demote_from_mc(node.name)
3594 self.LogWarning("Node failed to demote itself: %s" % msg)
3596 if self.op.drained is not None:
3597 node.drained = self.op.drained
3598 result.append(("drained", str(self.op.drained)))
3599 if self.op.drained == True:
3600 if node.master_candidate:
3601 node.master_candidate = False
3603 result.append(("master_candidate", "auto-demotion due to drain"))
3604 rrc = self.rpc.call_node_demote_from_mc(node.name)
3607 self.LogWarning("Node failed to demote itself: %s" % msg)
3609 node.offline = False
3610 result.append(("offline", "clear offline status due to drain"))
3612 # we locked all nodes, we adjust the CP before updating this node
3614 _AdjustCandidatePool(self, [node.name])
3616 # this will trigger configuration file update, if needed
3617 self.cfg.Update(node, feedback_fn)
3619 # this will trigger job queue propagation or cleanup
3621 self.context.ReaddNode(node)
3626 class LUPowercycleNode(NoHooksLU):
3627 """Powercycles a node.
3630 _OP_REQP = ["node_name", "force"]
3633 def CheckArguments(self):
3634 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3635 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3636 raise errors.OpPrereqError("The node is the master and the force"
3637 " parameter was not set",
3640 def ExpandNames(self):
3641 """Locking for PowercycleNode.
3643 This is a last-resort option and shouldn't block on other
3644 jobs. Therefore, we grab no locks.
3647 self.needed_locks = {}
3649 def CheckPrereq(self):
3650 """Check prerequisites.
3652 This LU has no prereqs.
3657 def Exec(self, feedback_fn):
3661 result = self.rpc.call_node_powercycle(self.op.node_name,
3662 self.cfg.GetHypervisorType())
3663 result.Raise("Failed to schedule the reboot")
3664 return result.payload
3667 class LUQueryClusterInfo(NoHooksLU):
3668 """Query cluster configuration.
3674 def ExpandNames(self):
3675 self.needed_locks = {}
3677 def CheckPrereq(self):
3678 """No prerequsites needed for this LU.
3683 def Exec(self, feedback_fn):
3684 """Return cluster config.
3687 cluster = self.cfg.GetClusterInfo()
3690 # Filter just for enabled hypervisors
3691 for os_name, hv_dict in cluster.os_hvp.items():
3692 os_hvp[os_name] = {}
3693 for hv_name, hv_params in hv_dict.items():
3694 if hv_name in cluster.enabled_hypervisors:
3695 os_hvp[os_name][hv_name] = hv_params
3698 "software_version": constants.RELEASE_VERSION,
3699 "protocol_version": constants.PROTOCOL_VERSION,
3700 "config_version": constants.CONFIG_VERSION,
3701 "os_api_version": max(constants.OS_API_VERSIONS),
3702 "export_version": constants.EXPORT_VERSION,
3703 "architecture": (platform.architecture()[0], platform.machine()),
3704 "name": cluster.cluster_name,
3705 "master": cluster.master_node,
3706 "default_hypervisor": cluster.enabled_hypervisors[0],
3707 "enabled_hypervisors": cluster.enabled_hypervisors,
3708 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3709 for hypervisor_name in cluster.enabled_hypervisors]),
3711 "beparams": cluster.beparams,
3712 "nicparams": cluster.nicparams,
3713 "candidate_pool_size": cluster.candidate_pool_size,
3714 "master_netdev": cluster.master_netdev,
3715 "volume_group_name": cluster.volume_group_name,
3716 "file_storage_dir": cluster.file_storage_dir,
3717 "maintain_node_health": cluster.maintain_node_health,
3718 "ctime": cluster.ctime,
3719 "mtime": cluster.mtime,
3720 "uuid": cluster.uuid,
3721 "tags": list(cluster.GetTags()),
3727 class LUQueryConfigValues(NoHooksLU):
3728 """Return configuration values.
3733 _FIELDS_DYNAMIC = utils.FieldSet()
3734 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3737 def ExpandNames(self):
3738 self.needed_locks = {}
3740 _CheckOutputFields(static=self._FIELDS_STATIC,
3741 dynamic=self._FIELDS_DYNAMIC,
3742 selected=self.op.output_fields)
3744 def CheckPrereq(self):
3745 """No prerequisites.
3750 def Exec(self, feedback_fn):
3751 """Dump a representation of the cluster config to the standard output.
3755 for field in self.op.output_fields:
3756 if field == "cluster_name":
3757 entry = self.cfg.GetClusterName()
3758 elif field == "master_node":
3759 entry = self.cfg.GetMasterNode()
3760 elif field == "drain_flag":
3761 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3762 elif field == "watcher_pause":
3763 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3765 raise errors.ParameterError(field)
3766 values.append(entry)
3770 class LUActivateInstanceDisks(NoHooksLU):
3771 """Bring up an instance's disks.
3774 _OP_REQP = ["instance_name"]
3777 def ExpandNames(self):
3778 self._ExpandAndLockInstance()
3779 self.needed_locks[locking.LEVEL_NODE] = []
3780 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3782 def DeclareLocks(self, level):
3783 if level == locking.LEVEL_NODE:
3784 self._LockInstancesNodes()
3786 def CheckPrereq(self):
3787 """Check prerequisites.
3789 This checks that the instance is in the cluster.
3792 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3793 assert self.instance is not None, \
3794 "Cannot retrieve locked instance %s" % self.op.instance_name
3795 _CheckNodeOnline(self, self.instance.primary_node)
3796 if not hasattr(self.op, "ignore_size"):
3797 self.op.ignore_size = False
3799 def Exec(self, feedback_fn):
3800 """Activate the disks.
3803 disks_ok, disks_info = \
3804 _AssembleInstanceDisks(self, self.instance,
3805 ignore_size=self.op.ignore_size)
3807 raise errors.OpExecError("Cannot activate block devices")
3812 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3814 """Prepare the block devices for an instance.
3816 This sets up the block devices on all nodes.
3818 @type lu: L{LogicalUnit}
3819 @param lu: the logical unit on whose behalf we execute
3820 @type instance: L{objects.Instance}
3821 @param instance: the instance for whose disks we assemble
3822 @type ignore_secondaries: boolean
3823 @param ignore_secondaries: if true, errors on secondary nodes
3824 won't result in an error return from the function
3825 @type ignore_size: boolean
3826 @param ignore_size: if true, the current known size of the disk
3827 will not be used during the disk activation, useful for cases
3828 when the size is wrong
3829 @return: False if the operation failed, otherwise a list of
3830 (host, instance_visible_name, node_visible_name)
3831 with the mapping from node devices to instance devices
3836 iname = instance.name
3837 # With the two passes mechanism we try to reduce the window of
3838 # opportunity for the race condition of switching DRBD to primary
3839 # before handshaking occured, but we do not eliminate it
3841 # The proper fix would be to wait (with some limits) until the
3842 # connection has been made and drbd transitions from WFConnection
3843 # into any other network-connected state (Connected, SyncTarget,
3846 # 1st pass, assemble on all nodes in secondary mode
3847 for inst_disk in instance.disks:
3848 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3850 node_disk = node_disk.Copy()
3851 node_disk.UnsetSize()
3852 lu.cfg.SetDiskID(node_disk, node)
3853 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3854 msg = result.fail_msg
3856 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3857 " (is_primary=False, pass=1): %s",
3858 inst_disk.iv_name, node, msg)
3859 if not ignore_secondaries:
3862 # FIXME: race condition on drbd migration to primary
3864 # 2nd pass, do only the primary node
3865 for inst_disk in instance.disks:
3868 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3869 if node != instance.primary_node:
3872 node_disk = node_disk.Copy()
3873 node_disk.UnsetSize()
3874 lu.cfg.SetDiskID(node_disk, node)
3875 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3876 msg = result.fail_msg
3878 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3879 " (is_primary=True, pass=2): %s",
3880 inst_disk.iv_name, node, msg)
3883 dev_path = result.payload
3885 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3887 # leave the disks configured for the primary node
3888 # this is a workaround that would be fixed better by
3889 # improving the logical/physical id handling
3890 for disk in instance.disks:
3891 lu.cfg.SetDiskID(disk, instance.primary_node)
3893 return disks_ok, device_info
3896 def _StartInstanceDisks(lu, instance, force):
3897 """Start the disks of an instance.
3900 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3901 ignore_secondaries=force)
3903 _ShutdownInstanceDisks(lu, instance)
3904 if force is not None and not force:
3905 lu.proc.LogWarning("", hint="If the message above refers to a"
3907 " you can retry the operation using '--force'.")
3908 raise errors.OpExecError("Disk consistency error")
3911 class LUDeactivateInstanceDisks(NoHooksLU):
3912 """Shutdown an instance's disks.
3915 _OP_REQP = ["instance_name"]
3918 def ExpandNames(self):
3919 self._ExpandAndLockInstance()
3920 self.needed_locks[locking.LEVEL_NODE] = []
3921 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3923 def DeclareLocks(self, level):
3924 if level == locking.LEVEL_NODE:
3925 self._LockInstancesNodes()
3927 def CheckPrereq(self):
3928 """Check prerequisites.
3930 This checks that the instance is in the cluster.
3933 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3934 assert self.instance is not None, \
3935 "Cannot retrieve locked instance %s" % self.op.instance_name
3937 def Exec(self, feedback_fn):
3938 """Deactivate the disks
3941 instance = self.instance
3942 _SafeShutdownInstanceDisks(self, instance)
3945 def _SafeShutdownInstanceDisks(lu, instance):
3946 """Shutdown block devices of an instance.
3948 This function checks if an instance is running, before calling
3949 _ShutdownInstanceDisks.
3952 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
3953 _ShutdownInstanceDisks(lu, instance)
3956 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3957 """Shutdown block devices of an instance.
3959 This does the shutdown on all nodes of the instance.
3961 If the ignore_primary is false, errors on the primary node are
3966 for disk in instance.disks:
3967 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3968 lu.cfg.SetDiskID(top_disk, node)
3969 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3970 msg = result.fail_msg
3972 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3973 disk.iv_name, node, msg)
3974 if not ignore_primary or node != instance.primary_node:
3979 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3980 """Checks if a node has enough free memory.
3982 This function check if a given node has the needed amount of free
3983 memory. In case the node has less memory or we cannot get the
3984 information from the node, this function raise an OpPrereqError
3987 @type lu: C{LogicalUnit}
3988 @param lu: a logical unit from which we get configuration data
3990 @param node: the node to check
3991 @type reason: C{str}
3992 @param reason: string to use in the error message
3993 @type requested: C{int}
3994 @param requested: the amount of memory in MiB to check for
3995 @type hypervisor_name: C{str}
3996 @param hypervisor_name: the hypervisor to ask for memory stats
3997 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3998 we cannot check the node
4001 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4002 nodeinfo[node].Raise("Can't get data from node %s" % node,
4003 prereq=True, ecode=errors.ECODE_ENVIRON)
4004 free_mem = nodeinfo[node].payload.get('memory_free', None)
4005 if not isinstance(free_mem, int):
4006 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4007 " was '%s'" % (node, free_mem),
4008 errors.ECODE_ENVIRON)
4009 if requested > free_mem:
4010 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4011 " needed %s MiB, available %s MiB" %
4012 (node, reason, requested, free_mem),
4016 def _CheckNodesFreeDisk(lu, nodenames, requested):
4017 """Checks if nodes have enough free disk space in the default VG.
4019 This function check if all given nodes have the needed amount of
4020 free disk. In case any node has less disk or we cannot get the
4021 information from the node, this function raise an OpPrereqError
4024 @type lu: C{LogicalUnit}
4025 @param lu: a logical unit from which we get configuration data
4026 @type nodenames: C{list}
4027 @param nodenames: the list of node names to check
4028 @type requested: C{int}
4029 @param requested: the amount of disk in MiB to check for
4030 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4031 we cannot check the node
4034 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4035 lu.cfg.GetHypervisorType())
4036 for node in nodenames:
4037 info = nodeinfo[node]
4038 info.Raise("Cannot get current information from node %s" % node,
4039 prereq=True, ecode=errors.ECODE_ENVIRON)
4040 vg_free = info.payload.get("vg_free", None)
4041 if not isinstance(vg_free, int):
4042 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4043 " result was '%s'" % (node, vg_free),
4044 errors.ECODE_ENVIRON)
4045 if requested > vg_free:
4046 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4047 " required %d MiB, available %d MiB" %
4048 (node, requested, vg_free),
4052 class LUStartupInstance(LogicalUnit):
4053 """Starts an instance.
4056 HPATH = "instance-start"
4057 HTYPE = constants.HTYPE_INSTANCE
4058 _OP_REQP = ["instance_name", "force"]
4061 def ExpandNames(self):
4062 self._ExpandAndLockInstance()
4064 def BuildHooksEnv(self):
4067 This runs on master, primary and secondary nodes of the instance.
4071 "FORCE": self.op.force,
4073 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4074 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4077 def CheckPrereq(self):
4078 """Check prerequisites.
4080 This checks that the instance is in the cluster.
4083 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4084 assert self.instance is not None, \
4085 "Cannot retrieve locked instance %s" % self.op.instance_name
4088 self.beparams = getattr(self.op, "beparams", {})
4090 if not isinstance(self.beparams, dict):
4091 raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4092 " dict" % (type(self.beparams), ),
4094 # fill the beparams dict
4095 utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4096 self.op.beparams = self.beparams
4099 self.hvparams = getattr(self.op, "hvparams", {})
4101 if not isinstance(self.hvparams, dict):
4102 raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4103 " dict" % (type(self.hvparams), ),
4106 # check hypervisor parameter syntax (locally)
4107 cluster = self.cfg.GetClusterInfo()
4108 utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4109 filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4111 filled_hvp.update(self.hvparams)
4112 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4113 hv_type.CheckParameterSyntax(filled_hvp)
4114 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4115 self.op.hvparams = self.hvparams
4117 _CheckNodeOnline(self, instance.primary_node)
4119 bep = self.cfg.GetClusterInfo().FillBE(instance)
4120 # check bridges existence
4121 _CheckInstanceBridgesExist(self, instance)
4123 remote_info = self.rpc.call_instance_info(instance.primary_node,
4125 instance.hypervisor)
4126 remote_info.Raise("Error checking node %s" % instance.primary_node,
4127 prereq=True, ecode=errors.ECODE_ENVIRON)
4128 if not remote_info.payload: # not running already
4129 _CheckNodeFreeMemory(self, instance.primary_node,
4130 "starting instance %s" % instance.name,
4131 bep[constants.BE_MEMORY], instance.hypervisor)
4133 def Exec(self, feedback_fn):
4134 """Start the instance.
4137 instance = self.instance
4138 force = self.op.force
4140 self.cfg.MarkInstanceUp(instance.name)
4142 node_current = instance.primary_node
4144 _StartInstanceDisks(self, instance, force)
4146 result = self.rpc.call_instance_start(node_current, instance,
4147 self.hvparams, self.beparams)
4148 msg = result.fail_msg
4150 _ShutdownInstanceDisks(self, instance)
4151 raise errors.OpExecError("Could not start instance: %s" % msg)
4154 class LURebootInstance(LogicalUnit):
4155 """Reboot an instance.
4158 HPATH = "instance-reboot"
4159 HTYPE = constants.HTYPE_INSTANCE
4160 _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4163 def CheckArguments(self):
4164 """Check the arguments.
4167 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4168 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4170 def ExpandNames(self):
4171 if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4172 constants.INSTANCE_REBOOT_HARD,
4173 constants.INSTANCE_REBOOT_FULL]:
4174 raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4175 (constants.INSTANCE_REBOOT_SOFT,
4176 constants.INSTANCE_REBOOT_HARD,
4177 constants.INSTANCE_REBOOT_FULL))
4178 self._ExpandAndLockInstance()
4180 def BuildHooksEnv(self):
4183 This runs on master, primary and secondary nodes of the instance.
4187 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4188 "REBOOT_TYPE": self.op.reboot_type,
4189 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4191 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4192 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4195 def CheckPrereq(self):
4196 """Check prerequisites.
4198 This checks that the instance is in the cluster.
4201 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4202 assert self.instance is not None, \
4203 "Cannot retrieve locked instance %s" % self.op.instance_name
4205 _CheckNodeOnline(self, instance.primary_node)
4207 # check bridges existence
4208 _CheckInstanceBridgesExist(self, instance)
4210 def Exec(self, feedback_fn):
4211 """Reboot the instance.
4214 instance = self.instance
4215 ignore_secondaries = self.op.ignore_secondaries
4216 reboot_type = self.op.reboot_type
4218 node_current = instance.primary_node
4220 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4221 constants.INSTANCE_REBOOT_HARD]:
4222 for disk in instance.disks:
4223 self.cfg.SetDiskID(disk, node_current)
4224 result = self.rpc.call_instance_reboot(node_current, instance,
4226 self.shutdown_timeout)
4227 result.Raise("Could not reboot instance")
4229 result = self.rpc.call_instance_shutdown(node_current, instance,
4230 self.shutdown_timeout)
4231 result.Raise("Could not shutdown instance for full reboot")
4232 _ShutdownInstanceDisks(self, instance)
4233 _StartInstanceDisks(self, instance, ignore_secondaries)
4234 result = self.rpc.call_instance_start(node_current, instance, None, None)
4235 msg = result.fail_msg
4237 _ShutdownInstanceDisks(self, instance)
4238 raise errors.OpExecError("Could not start instance for"
4239 " full reboot: %s" % msg)
4241 self.cfg.MarkInstanceUp(instance.name)
4244 class LUShutdownInstance(LogicalUnit):
4245 """Shutdown an instance.
4248 HPATH = "instance-stop"
4249 HTYPE = constants.HTYPE_INSTANCE
4250 _OP_REQP = ["instance_name"]
4253 def CheckArguments(self):
4254 """Check the arguments.
4257 self.timeout = getattr(self.op, "timeout",
4258 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4260 def ExpandNames(self):
4261 self._ExpandAndLockInstance()
4263 def BuildHooksEnv(self):
4266 This runs on master, primary and secondary nodes of the instance.
4269 env = _BuildInstanceHookEnvByObject(self, self.instance)
4270 env["TIMEOUT"] = self.timeout
4271 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4274 def CheckPrereq(self):
4275 """Check prerequisites.
4277 This checks that the instance is in the cluster.
4280 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4281 assert self.instance is not None, \
4282 "Cannot retrieve locked instance %s" % self.op.instance_name
4283 _CheckNodeOnline(self, self.instance.primary_node)
4285 def Exec(self, feedback_fn):
4286 """Shutdown the instance.
4289 instance = self.instance
4290 node_current = instance.primary_node
4291 timeout = self.timeout
4292 self.cfg.MarkInstanceDown(instance.name)
4293 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4294 msg = result.fail_msg
4296 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4298 _ShutdownInstanceDisks(self, instance)
4301 class LUReinstallInstance(LogicalUnit):
4302 """Reinstall an instance.
4305 HPATH = "instance-reinstall"
4306 HTYPE = constants.HTYPE_INSTANCE
4307 _OP_REQP = ["instance_name"]
4310 def ExpandNames(self):
4311 self._ExpandAndLockInstance()
4313 def BuildHooksEnv(self):
4316 This runs on master, primary and secondary nodes of the instance.
4319 env = _BuildInstanceHookEnvByObject(self, self.instance)
4320 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4323 def CheckPrereq(self):
4324 """Check prerequisites.
4326 This checks that the instance is in the cluster and is not running.
4329 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4330 assert instance is not None, \
4331 "Cannot retrieve locked instance %s" % self.op.instance_name
4332 _CheckNodeOnline(self, instance.primary_node)
4334 if instance.disk_template == constants.DT_DISKLESS:
4335 raise errors.OpPrereqError("Instance '%s' has no disks" %
4336 self.op.instance_name,
4338 _CheckInstanceDown(self, instance, "cannot reinstall")
4340 self.op.os_type = getattr(self.op, "os_type", None)
4341 self.op.force_variant = getattr(self.op, "force_variant", False)
4342 if self.op.os_type is not None:
4344 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4345 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4347 self.instance = instance
4349 def Exec(self, feedback_fn):
4350 """Reinstall the instance.
4353 inst = self.instance
4355 if self.op.os_type is not None:
4356 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4357 inst.os = self.op.os_type
4358 self.cfg.Update(inst, feedback_fn)
4360 _StartInstanceDisks(self, inst, None)
4362 feedback_fn("Running the instance OS create scripts...")
4363 # FIXME: pass debug option from opcode to backend
4364 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4365 self.op.debug_level)
4366 result.Raise("Could not install OS for instance %s on node %s" %
4367 (inst.name, inst.primary_node))
4369 _ShutdownInstanceDisks(self, inst)
4372 class LURecreateInstanceDisks(LogicalUnit):
4373 """Recreate an instance's missing disks.
4376 HPATH = "instance-recreate-disks"
4377 HTYPE = constants.HTYPE_INSTANCE
4378 _OP_REQP = ["instance_name", "disks"]
4381 def CheckArguments(self):
4382 """Check the arguments.
4385 if not isinstance(self.op.disks, list):
4386 raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4387 for item in self.op.disks:
4388 if (not isinstance(item, int) or
4390 raise errors.OpPrereqError("Invalid disk specification '%s'" %
4391 str(item), errors.ECODE_INVAL)
4393 def ExpandNames(self):
4394 self._ExpandAndLockInstance()
4396 def BuildHooksEnv(self):
4399 This runs on master, primary and secondary nodes of the instance.
4402 env = _BuildInstanceHookEnvByObject(self, self.instance)
4403 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4406 def CheckPrereq(self):
4407 """Check prerequisites.
4409 This checks that the instance is in the cluster and is not running.
4412 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4413 assert instance is not None, \
4414 "Cannot retrieve locked instance %s" % self.op.instance_name
4415 _CheckNodeOnline(self, instance.primary_node)
4417 if instance.disk_template == constants.DT_DISKLESS:
4418 raise errors.OpPrereqError("Instance '%s' has no disks" %
4419 self.op.instance_name, errors.ECODE_INVAL)
4420 _CheckInstanceDown(self, instance, "cannot recreate disks")
4422 if not self.op.disks:
4423 self.op.disks = range(len(instance.disks))
4425 for idx in self.op.disks:
4426 if idx >= len(instance.disks):
4427 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4430 self.instance = instance
4432 def Exec(self, feedback_fn):
4433 """Recreate the disks.
4437 for idx, _ in enumerate(self.instance.disks):
4438 if idx not in self.op.disks: # disk idx has not been passed in
4442 _CreateDisks(self, self.instance, to_skip=to_skip)
4445 class LURenameInstance(LogicalUnit):
4446 """Rename an instance.
4449 HPATH = "instance-rename"
4450 HTYPE = constants.HTYPE_INSTANCE
4451 _OP_REQP = ["instance_name", "new_name"]
4453 def BuildHooksEnv(self):
4456 This runs on master, primary and secondary nodes of the instance.
4459 env = _BuildInstanceHookEnvByObject(self, self.instance)
4460 env["INSTANCE_NEW_NAME"] = self.op.new_name
4461 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4464 def CheckPrereq(self):
4465 """Check prerequisites.
4467 This checks that the instance is in the cluster and is not running.
4470 self.op.instance_name = _ExpandInstanceName(self.cfg,
4471 self.op.instance_name)
4472 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4473 assert instance is not None
4474 _CheckNodeOnline(self, instance.primary_node)
4475 _CheckInstanceDown(self, instance, "cannot rename")
4476 self.instance = instance
4478 # new name verification
4479 name_info = utils.GetHostInfo(self.op.new_name)
4481 self.op.new_name = new_name = name_info.name
4482 instance_list = self.cfg.GetInstanceList()
4483 if new_name in instance_list:
4484 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4485 new_name, errors.ECODE_EXISTS)
4487 if not getattr(self.op, "ignore_ip", False):
4488 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4489 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4490 (name_info.ip, new_name),
4491 errors.ECODE_NOTUNIQUE)
4494 def Exec(self, feedback_fn):
4495 """Reinstall the instance.
4498 inst = self.instance
4499 old_name = inst.name
4501 if inst.disk_template == constants.DT_FILE:
4502 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4504 self.cfg.RenameInstance(inst.name, self.op.new_name)
4505 # Change the instance lock. This is definitely safe while we hold the BGL
4506 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4507 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4509 # re-read the instance from the configuration after rename
4510 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4512 if inst.disk_template == constants.DT_FILE:
4513 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4514 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4515 old_file_storage_dir,
4516 new_file_storage_dir)
4517 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4518 " (but the instance has been renamed in Ganeti)" %
4519 (inst.primary_node, old_file_storage_dir,
4520 new_file_storage_dir))
4522 _StartInstanceDisks(self, inst, None)
4524 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4525 old_name, self.op.debug_level)
4526 msg = result.fail_msg
4528 msg = ("Could not run OS rename script for instance %s on node %s"
4529 " (but the instance has been renamed in Ganeti): %s" %
4530 (inst.name, inst.primary_node, msg))
4531 self.proc.LogWarning(msg)
4533 _ShutdownInstanceDisks(self, inst)
4536 class LURemoveInstance(LogicalUnit):
4537 """Remove an instance.
4540 HPATH = "instance-remove"
4541 HTYPE = constants.HTYPE_INSTANCE
4542 _OP_REQP = ["instance_name", "ignore_failures"]
4545 def CheckArguments(self):
4546 """Check the arguments.
4549 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4550 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4552 def ExpandNames(self):
4553 self._ExpandAndLockInstance()
4554 self.needed_locks[locking.LEVEL_NODE] = []
4555 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4557 def DeclareLocks(self, level):
4558 if level == locking.LEVEL_NODE:
4559 self._LockInstancesNodes()
4561 def BuildHooksEnv(self):
4564 This runs on master, primary and secondary nodes of the instance.
4567 env = _BuildInstanceHookEnvByObject(self, self.instance)
4568 env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4569 nl = [self.cfg.GetMasterNode()]
4570 nl_post = list(self.instance.all_nodes) + nl
4571 return env, nl, nl_post
4573 def CheckPrereq(self):
4574 """Check prerequisites.
4576 This checks that the instance is in the cluster.
4579 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4580 assert self.instance is not None, \
4581 "Cannot retrieve locked instance %s" % self.op.instance_name
4583 def Exec(self, feedback_fn):
4584 """Remove the instance.
4587 instance = self.instance
4588 logging.info("Shutting down instance %s on node %s",
4589 instance.name, instance.primary_node)
4591 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4592 self.shutdown_timeout)
4593 msg = result.fail_msg
4595 if self.op.ignore_failures:
4596 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4598 raise errors.OpExecError("Could not shutdown instance %s on"
4600 (instance.name, instance.primary_node, msg))
4602 logging.info("Removing block devices for instance %s", instance.name)
4604 if not _RemoveDisks(self, instance):
4605 if self.op.ignore_failures:
4606 feedback_fn("Warning: can't remove instance's disks")
4608 raise errors.OpExecError("Can't remove instance's disks")
4610 logging.info("Removing instance %s out of cluster config", instance.name)
4612 self.cfg.RemoveInstance(instance.name)
4613 self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4616 class LUQueryInstances(NoHooksLU):
4617 """Logical unit for querying instances.
4620 # pylint: disable-msg=W0142
4621 _OP_REQP = ["output_fields", "names", "use_locking"]
4623 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4624 "serial_no", "ctime", "mtime", "uuid"]
4625 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4627 "disk_template", "ip", "mac", "bridge",
4628 "nic_mode", "nic_link",
4629 "sda_size", "sdb_size", "vcpus", "tags",
4630 "network_port", "beparams",
4631 r"(disk)\.(size)/([0-9]+)",
4632 r"(disk)\.(sizes)", "disk_usage",
4633 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4634 r"(nic)\.(bridge)/([0-9]+)",
4635 r"(nic)\.(macs|ips|modes|links|bridges)",
4636 r"(disk|nic)\.(count)",
4638 ] + _SIMPLE_FIELDS +
4640 for name in constants.HVS_PARAMETERS
4641 if name not in constants.HVC_GLOBALS] +
4643 for name in constants.BES_PARAMETERS])
4644 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4647 def ExpandNames(self):
4648 _CheckOutputFields(static=self._FIELDS_STATIC,
4649 dynamic=self._FIELDS_DYNAMIC,
4650 selected=self.op.output_fields)
4652 self.needed_locks = {}
4653 self.share_locks[locking.LEVEL_INSTANCE] = 1
4654 self.share_locks[locking.LEVEL_NODE] = 1
4657 self.wanted = _GetWantedInstances(self, self.op.names)
4659 self.wanted = locking.ALL_SET
4661 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4662 self.do_locking = self.do_node_query and self.op.use_locking
4664 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4665 self.needed_locks[locking.LEVEL_NODE] = []
4666 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4668 def DeclareLocks(self, level):
4669 if level == locking.LEVEL_NODE and self.do_locking:
4670 self._LockInstancesNodes()
4672 def CheckPrereq(self):
4673 """Check prerequisites.
4678 def Exec(self, feedback_fn):
4679 """Computes the list of nodes and their attributes.
4682 # pylint: disable-msg=R0912
4683 # way too many branches here
4684 all_info = self.cfg.GetAllInstancesInfo()
4685 if self.wanted == locking.ALL_SET:
4686 # caller didn't specify instance names, so ordering is not important
4688 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4690 instance_names = all_info.keys()
4691 instance_names = utils.NiceSort(instance_names)
4693 # caller did specify names, so we must keep the ordering
4695 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4697 tgt_set = all_info.keys()
4698 missing = set(self.wanted).difference(tgt_set)
4700 raise errors.OpExecError("Some instances were removed before"
4701 " retrieving their data: %s" % missing)
4702 instance_names = self.wanted
4704 instance_list = [all_info[iname] for iname in instance_names]
4706 # begin data gathering
4708 nodes = frozenset([inst.primary_node for inst in instance_list])
4709 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4713 if self.do_node_query:
4715 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4717 result = node_data[name]
4719 # offline nodes will be in both lists
4720 off_nodes.append(name)
4722 bad_nodes.append(name)
4725 live_data.update(result.payload)
4726 # else no instance is alive
4728 live_data = dict([(name, {}) for name in instance_names])
4730 # end data gathering
4735 cluster = self.cfg.GetClusterInfo()
4736 for instance in instance_list:
4738 i_hv = cluster.FillHV(instance, skip_globals=True)
4739 i_be = cluster.FillBE(instance)
4740 i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4741 nic.nicparams) for nic in instance.nics]
4742 for field in self.op.output_fields:
4743 st_match = self._FIELDS_STATIC.Matches(field)
4744 if field in self._SIMPLE_FIELDS:
4745 val = getattr(instance, field)
4746 elif field == "pnode":
4747 val = instance.primary_node
4748 elif field == "snodes":
4749 val = list(instance.secondary_nodes)
4750 elif field == "admin_state":
4751 val = instance.admin_up
4752 elif field == "oper_state":
4753 if instance.primary_node in bad_nodes:
4756 val = bool(live_data.get(instance.name))
4757 elif field == "status":
4758 if instance.primary_node in off_nodes:
4759 val = "ERROR_nodeoffline"
4760 elif instance.primary_node in bad_nodes:
4761 val = "ERROR_nodedown"
4763 running = bool(live_data.get(instance.name))
4765 if instance.admin_up:
4770 if instance.admin_up:
4774 elif field == "oper_ram":
4775 if instance.primary_node in bad_nodes:
4777 elif instance.name in live_data:
4778 val = live_data[instance.name].get("memory", "?")
4781 elif field == "vcpus":
4782 val = i_be[constants.BE_VCPUS]
4783 elif field == "disk_template":
4784 val = instance.disk_template
4787 val = instance.nics[0].ip
4790 elif field == "nic_mode":
4792 val = i_nicp[0][constants.NIC_MODE]
4795 elif field == "nic_link":
4797 val = i_nicp[0][constants.NIC_LINK]
4800 elif field == "bridge":
4801 if (instance.nics and
4802 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4803 val = i_nicp[0][constants.NIC_LINK]
4806 elif field == "mac":
4808 val = instance.nics[0].mac
4811 elif field == "sda_size" or field == "sdb_size":
4812 idx = ord(field[2]) - ord('a')
4814 val = instance.FindDisk(idx).size
4815 except errors.OpPrereqError:
4817 elif field == "disk_usage": # total disk usage per node
4818 disk_sizes = [{'size': disk.size} for disk in instance.disks]
4819 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4820 elif field == "tags":
4821 val = list(instance.GetTags())
4822 elif field == "hvparams":
4824 elif (field.startswith(HVPREFIX) and
4825 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4826 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4827 val = i_hv.get(field[len(HVPREFIX):], None)
4828 elif field == "beparams":
4830 elif (field.startswith(BEPREFIX) and
4831 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4832 val = i_be.get(field[len(BEPREFIX):], None)
4833 elif st_match and st_match.groups():
4834 # matches a variable list
4835 st_groups = st_match.groups()
4836 if st_groups and st_groups[0] == "disk":
4837 if st_groups[1] == "count":
4838 val = len(instance.disks)
4839 elif st_groups[1] == "sizes":
4840 val = [disk.size for disk in instance.disks]
4841 elif st_groups[1] == "size":
4843 val = instance.FindDisk(st_groups[2]).size
4844 except errors.OpPrereqError:
4847 assert False, "Unhandled disk parameter"
4848 elif st_groups[0] == "nic":
4849 if st_groups[1] == "count":
4850 val = len(instance.nics)
4851 elif st_groups[1] == "macs":
4852 val = [nic.mac for nic in instance.nics]
4853 elif st_groups[1] == "ips":
4854 val = [nic.ip for nic in instance.nics]
4855 elif st_groups[1] == "modes":
4856 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4857 elif st_groups[1] == "links":
4858 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4859 elif st_groups[1] == "bridges":
4862 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4863 val.append(nicp[constants.NIC_LINK])
4868 nic_idx = int(st_groups[2])
4869 if nic_idx >= len(instance.nics):
4872 if st_groups[1] == "mac":
4873 val = instance.nics[nic_idx].mac
4874 elif st_groups[1] == "ip":
4875 val = instance.nics[nic_idx].ip
4876 elif st_groups[1] == "mode":
4877 val = i_nicp[nic_idx][constants.NIC_MODE]
4878 elif st_groups[1] == "link":
4879 val = i_nicp[nic_idx][constants.NIC_LINK]
4880 elif st_groups[1] == "bridge":
4881 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4882 if nic_mode == constants.NIC_MODE_BRIDGED:
4883 val = i_nicp[nic_idx][constants.NIC_LINK]
4887 assert False, "Unhandled NIC parameter"
4889 assert False, ("Declared but unhandled variable parameter '%s'" %
4892 assert False, "Declared but unhandled parameter '%s'" % field
4899 class LUFailoverInstance(LogicalUnit):
4900 """Failover an instance.
4903 HPATH = "instance-failover"
4904 HTYPE = constants.HTYPE_INSTANCE
4905 _OP_REQP = ["instance_name", "ignore_consistency"]
4908 def CheckArguments(self):
4909 """Check the arguments.
4912 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4913 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4915 def ExpandNames(self):
4916 self._ExpandAndLockInstance()
4917 self.needed_locks[locking.LEVEL_NODE] = []
4918 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4920 def DeclareLocks(self, level):
4921 if level == locking.LEVEL_NODE:
4922 self._LockInstancesNodes()
4924 def BuildHooksEnv(self):
4927 This runs on master, primary and secondary nodes of the instance.
4930 instance = self.instance
4931 source_node = instance.primary_node
4932 target_node = instance.secondary_nodes[0]
4934 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4935 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4936 "OLD_PRIMARY": source_node,
4937 "OLD_SECONDARY": target_node,
4938 "NEW_PRIMARY": target_node,
4939 "NEW_SECONDARY": source_node,
4941 env.update(_BuildInstanceHookEnvByObject(self, instance))
4942 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4944 nl_post.append(source_node)
4945 return env, nl, nl_post
4947 def CheckPrereq(self):
4948 """Check prerequisites.
4950 This checks that the instance is in the cluster.
4953 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4954 assert self.instance is not None, \
4955 "Cannot retrieve locked instance %s" % self.op.instance_name
4957 bep = self.cfg.GetClusterInfo().FillBE(instance)
4958 if instance.disk_template not in constants.DTS_NET_MIRROR:
4959 raise errors.OpPrereqError("Instance's disk layout is not"
4960 " network mirrored, cannot failover.",
4963 secondary_nodes = instance.secondary_nodes
4964 if not secondary_nodes:
4965 raise errors.ProgrammerError("no secondary node but using "
4966 "a mirrored disk template")
4968 target_node = secondary_nodes[0]
4969 _CheckNodeOnline(self, target_node)
4970 _CheckNodeNotDrained(self, target_node)
4971 if instance.admin_up:
4972 # check memory requirements on the secondary node
4973 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4974 instance.name, bep[constants.BE_MEMORY],
4975 instance.hypervisor)
4977 self.LogInfo("Not checking memory on the secondary node as"
4978 " instance will not be started")
4980 # check bridge existance
4981 _CheckInstanceBridgesExist(self, instance, node=target_node)
4983 def Exec(self, feedback_fn):
4984 """Failover an instance.
4986 The failover is done by shutting it down on its present node and
4987 starting it on the secondary.
4990 instance = self.instance
4992 source_node = instance.primary_node
4993 target_node = instance.secondary_nodes[0]
4995 if instance.admin_up:
4996 feedback_fn("* checking disk consistency between source and target")
4997 for dev in instance.disks:
4998 # for drbd, these are drbd over lvm
4999 if not _CheckDiskConsistency(self, dev, target_node, False):
5000 if not self.op.ignore_consistency:
5001 raise errors.OpExecError("Disk %s is degraded on target node,"
5002 " aborting failover." % dev.iv_name)
5004 feedback_fn("* not checking disk consistency as instance is not running")
5006 feedback_fn("* shutting down instance on source node")
5007 logging.info("Shutting down instance %s on node %s",
5008 instance.name, source_node)
5010 result = self.rpc.call_instance_shutdown(source_node, instance,
5011 self.shutdown_timeout)
5012 msg = result.fail_msg
5014 if self.op.ignore_consistency:
5015 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5016 " Proceeding anyway. Please make sure node"
5017 " %s is down. Error details: %s",
5018 instance.name, source_node, source_node, msg)
5020 raise errors.OpExecError("Could not shutdown instance %s on"
5022 (instance.name, source_node, msg))
5024 feedback_fn("* deactivating the instance's disks on source node")
5025 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5026 raise errors.OpExecError("Can't shut down the instance's disks.")
5028 instance.primary_node = target_node
5029 # distribute new instance config to the other nodes
5030 self.cfg.Update(instance, feedback_fn)
5032 # Only start the instance if it's marked as up
5033 if instance.admin_up:
5034 feedback_fn("* activating the instance's disks on target node")
5035 logging.info("Starting instance %s on node %s",
5036 instance.name, target_node)
5038 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5039 ignore_secondaries=True)
5041 _ShutdownInstanceDisks(self, instance)
5042 raise errors.OpExecError("Can't activate the instance's disks")
5044 feedback_fn("* starting the instance on the target node")
5045 result = self.rpc.call_instance_start(target_node, instance, None, None)
5046 msg = result.fail_msg
5048 _ShutdownInstanceDisks(self, instance)
5049 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5050 (instance.name, target_node, msg))
5053 class LUMigrateInstance(LogicalUnit):
5054 """Migrate an instance.
5056 This is migration without shutting down, compared to the failover,
5057 which is done with shutdown.
5060 HPATH = "instance-migrate"
5061 HTYPE = constants.HTYPE_INSTANCE
5062 _OP_REQP = ["instance_name", "live", "cleanup"]
5066 def ExpandNames(self):
5067 self._ExpandAndLockInstance()
5069 self.needed_locks[locking.LEVEL_NODE] = []
5070 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5072 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5073 self.op.live, self.op.cleanup)
5074 self.tasklets = [self._migrater]
5076 def DeclareLocks(self, level):
5077 if level == locking.LEVEL_NODE:
5078 self._LockInstancesNodes()
5080 def BuildHooksEnv(self):
5083 This runs on master, primary and secondary nodes of the instance.
5086 instance = self._migrater.instance
5087 source_node = instance.primary_node
5088 target_node = instance.secondary_nodes[0]
5089 env = _BuildInstanceHookEnvByObject(self, instance)
5090 env["MIGRATE_LIVE"] = self.op.live
5091 env["MIGRATE_CLEANUP"] = self.op.cleanup
5093 "OLD_PRIMARY": source_node,
5094 "OLD_SECONDARY": target_node,
5095 "NEW_PRIMARY": target_node,
5096 "NEW_SECONDARY": source_node,
5098 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5100 nl_post.append(source_node)
5101 return env, nl, nl_post
5104 class LUMoveInstance(LogicalUnit):
5105 """Move an instance by data-copying.
5108 HPATH = "instance-move"
5109 HTYPE = constants.HTYPE_INSTANCE
5110 _OP_REQP = ["instance_name", "target_node"]
5113 def CheckArguments(self):
5114 """Check the arguments.
5117 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5118 constants.DEFAULT_SHUTDOWN_TIMEOUT)
5120 def ExpandNames(self):
5121 self._ExpandAndLockInstance()
5122 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5123 self.op.target_node = target_node
5124 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5125 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5127 def DeclareLocks(self, level):
5128 if level == locking.LEVEL_NODE:
5129 self._LockInstancesNodes(primary_only=True)
5131 def BuildHooksEnv(self):
5134 This runs on master, primary and secondary nodes of the instance.
5138 "TARGET_NODE": self.op.target_node,
5139 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5141 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5142 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5143 self.op.target_node]
5146 def CheckPrereq(self):
5147 """Check prerequisites.
5149 This checks that the instance is in the cluster.
5152 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5153 assert self.instance is not None, \
5154 "Cannot retrieve locked instance %s" % self.op.instance_name
5156 node = self.cfg.GetNodeInfo(self.op.target_node)
5157 assert node is not None, \
5158 "Cannot retrieve locked node %s" % self.op.target_node
5160 self.target_node = target_node = node.name
5162 if target_node == instance.primary_node:
5163 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5164 (instance.name, target_node),
5167 bep = self.cfg.GetClusterInfo().FillBE(instance)
5169 for idx, dsk in enumerate(instance.disks):
5170 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5171 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5172 " cannot copy" % idx, errors.ECODE_STATE)
5174 _CheckNodeOnline(self, target_node)
5175 _CheckNodeNotDrained(self, target_node)
5177 if instance.admin_up:
5178 # check memory requirements on the secondary node
5179 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5180 instance.name, bep[constants.BE_MEMORY],
5181 instance.hypervisor)
5183 self.LogInfo("Not checking memory on the secondary node as"
5184 " instance will not be started")
5186 # check bridge existance
5187 _CheckInstanceBridgesExist(self, instance, node=target_node)
5189 def Exec(self, feedback_fn):
5190 """Move an instance.
5192 The move is done by shutting it down on its present node, copying
5193 the data over (slow) and starting it on the new node.
5196 instance = self.instance
5198 source_node = instance.primary_node
5199 target_node = self.target_node
5201 self.LogInfo("Shutting down instance %s on source node %s",
5202 instance.name, source_node)
5204 result = self.rpc.call_instance_shutdown(source_node, instance,
5205 self.shutdown_timeout)
5206 msg = result.fail_msg
5208 if self.op.ignore_consistency:
5209 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5210 " Proceeding anyway. Please make sure node"
5211 " %s is down. Error details: %s",
5212 instance.name, source_node, source_node, msg)
5214 raise errors.OpExecError("Could not shutdown instance %s on"
5216 (instance.name, source_node, msg))
5218 # create the target disks
5220 _CreateDisks(self, instance, target_node=target_node)
5221 except errors.OpExecError:
5222 self.LogWarning("Device creation failed, reverting...")
5224 _RemoveDisks(self, instance, target_node=target_node)
5226 self.cfg.ReleaseDRBDMinors(instance.name)
5229 cluster_name = self.cfg.GetClusterInfo().cluster_name
5232 # activate, get path, copy the data over
5233 for idx, disk in enumerate(instance.disks):
5234 self.LogInfo("Copying data for disk %d", idx)
5235 result = self.rpc.call_blockdev_assemble(target_node, disk,
5236 instance.name, True)
5238 self.LogWarning("Can't assemble newly created disk %d: %s",
5239 idx, result.fail_msg)
5240 errs.append(result.fail_msg)
5242 dev_path = result.payload
5243 result = self.rpc.call_blockdev_export(source_node, disk,
5244 target_node, dev_path,
5247 self.LogWarning("Can't copy data over for disk %d: %s",
5248 idx, result.fail_msg)
5249 errs.append(result.fail_msg)
5253 self.LogWarning("Some disks failed to copy, aborting")
5255 _RemoveDisks(self, instance, target_node=target_node)
5257 self.cfg.ReleaseDRBDMinors(instance.name)
5258 raise errors.OpExecError("Errors during disk copy: %s" %
5261 instance.primary_node = target_node
5262 self.cfg.Update(instance, feedback_fn)
5264 self.LogInfo("Removing the disks on the original node")
5265 _RemoveDisks(self, instance, target_node=source_node)
5267 # Only start the instance if it's marked as up
5268 if instance.admin_up:
5269 self.LogInfo("Starting instance %s on node %s",
5270 instance.name, target_node)
5272 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5273 ignore_secondaries=True)
5275 _ShutdownInstanceDisks(self, instance)
5276 raise errors.OpExecError("Can't activate the instance's disks")
5278 result = self.rpc.call_instance_start(target_node, instance, None, None)
5279 msg = result.fail_msg
5281 _ShutdownInstanceDisks(self, instance)
5282 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5283 (instance.name, target_node, msg))
5286 class LUMigrateNode(LogicalUnit):
5287 """Migrate all instances from a node.
5290 HPATH = "node-migrate"
5291 HTYPE = constants.HTYPE_NODE
5292 _OP_REQP = ["node_name", "live"]
5295 def ExpandNames(self):
5296 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5298 self.needed_locks = {
5299 locking.LEVEL_NODE: [self.op.node_name],
5302 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5304 # Create tasklets for migrating instances for all instances on this node
5308 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5309 logging.debug("Migrating instance %s", inst.name)
5310 names.append(inst.name)
5312 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5314 self.tasklets = tasklets
5316 # Declare instance locks
5317 self.needed_locks[locking.LEVEL_INSTANCE] = names
5319 def DeclareLocks(self, level):
5320 if level == locking.LEVEL_NODE:
5321 self._LockInstancesNodes()
5323 def BuildHooksEnv(self):
5326 This runs on the master, the primary and all the secondaries.
5330 "NODE_NAME": self.op.node_name,
5333 nl = [self.cfg.GetMasterNode()]
5335 return (env, nl, nl)
5338 class TLMigrateInstance(Tasklet):
5339 def __init__(self, lu, instance_name, live, cleanup):
5340 """Initializes this class.
5343 Tasklet.__init__(self, lu)
5346 self.instance_name = instance_name
5348 self.cleanup = cleanup
5350 def CheckPrereq(self):
5351 """Check prerequisites.
5353 This checks that the instance is in the cluster.
5356 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5357 instance = self.cfg.GetInstanceInfo(instance_name)
5358 assert instance is not None
5360 if instance.disk_template != constants.DT_DRBD8:
5361 raise errors.OpPrereqError("Instance's disk layout is not"
5362 " drbd8, cannot migrate.", errors.ECODE_STATE)
5364 secondary_nodes = instance.secondary_nodes
5365 if not secondary_nodes:
5366 raise errors.ConfigurationError("No secondary node but using"
5367 " drbd8 disk template")
5369 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5371 target_node = secondary_nodes[0]
5372 # check memory requirements on the secondary node
5373 _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5374 instance.name, i_be[constants.BE_MEMORY],
5375 instance.hypervisor)
5377 # check bridge existance
5378 _CheckInstanceBridgesExist(self, instance, node=target_node)
5380 if not self.cleanup:
5381 _CheckNodeNotDrained(self, target_node)
5382 result = self.rpc.call_instance_migratable(instance.primary_node,
5384 result.Raise("Can't migrate, please use failover",
5385 prereq=True, ecode=errors.ECODE_STATE)
5387 self.instance = instance
5389 def _WaitUntilSync(self):
5390 """Poll with custom rpc for disk sync.
5392 This uses our own step-based rpc call.
5395 self.feedback_fn("* wait until resync is done")
5399 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5401 self.instance.disks)
5403 for node, nres in result.items():
5404 nres.Raise("Cannot resync disks on node %s" % node)
5405 node_done, node_percent = nres.payload
5406 all_done = all_done and node_done
5407 if node_percent is not None:
5408 min_percent = min(min_percent, node_percent)
5410 if min_percent < 100:
5411 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5414 def _EnsureSecondary(self, node):
5415 """Demote a node to secondary.
5418 self.feedback_fn("* switching node %s to secondary mode" % node)
5420 for dev in self.instance.disks:
5421 self.cfg.SetDiskID(dev, node)
5423 result = self.rpc.call_blockdev_close(node, self.instance.name,
5424 self.instance.disks)
5425 result.Raise("Cannot change disk to secondary on node %s" % node)
5427 def _GoStandalone(self):
5428 """Disconnect from the network.
5431 self.feedback_fn("* changing into standalone mode")
5432 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5433 self.instance.disks)
5434 for node, nres in result.items():
5435 nres.Raise("Cannot disconnect disks node %s" % node)
5437 def _GoReconnect(self, multimaster):
5438 """Reconnect to the network.
5444 msg = "single-master"
5445 self.feedback_fn("* changing disks into %s mode" % msg)
5446 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5447 self.instance.disks,
5448 self.instance.name, multimaster)
5449 for node, nres in result.items():
5450 nres.Raise("Cannot change disks config on node %s" % node)
5452 def _ExecCleanup(self):
5453 """Try to cleanup after a failed migration.
5455 The cleanup is done by:
5456 - check that the instance is running only on one node
5457 (and update the config if needed)
5458 - change disks on its secondary node to secondary
5459 - wait until disks are fully synchronized
5460 - disconnect from the network
5461 - change disks into single-master mode
5462 - wait again until disks are fully synchronized
5465 instance = self.instance
5466 target_node = self.target_node
5467 source_node = self.source_node
5469 # check running on only one node
5470 self.feedback_fn("* checking where the instance actually runs"
5471 " (if this hangs, the hypervisor might be in"
5473 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5474 for node, result in ins_l.items():
5475 result.Raise("Can't contact node %s" % node)
5477 runningon_source = instance.name in ins_l[source_node].payload
5478 runningon_target = instance.name in ins_l[target_node].payload
5480 if runningon_source and runningon_target:
5481 raise errors.OpExecError("Instance seems to be running on two nodes,"
5482 " or the hypervisor is confused. You will have"
5483 " to ensure manually that it runs only on one"
5484 " and restart this operation.")
5486 if not (runningon_source or runningon_target):
5487 raise errors.OpExecError("Instance does not seem to be running at all."
5488 " In this case, it's safer to repair by"
5489 " running 'gnt-instance stop' to ensure disk"
5490 " shutdown, and then restarting it.")
5492 if runningon_target:
5493 # the migration has actually succeeded, we need to update the config
5494 self.feedback_fn("* instance running on secondary node (%s),"
5495 " updating config" % target_node)
5496 instance.primary_node = target_node
5497 self.cfg.Update(instance, self.feedback_fn)
5498 demoted_node = source_node
5500 self.feedback_fn("* instance confirmed to be running on its"
5501 " primary node (%s)" % source_node)
5502 demoted_node = target_node
5504 self._EnsureSecondary(demoted_node)
5506 self._WaitUntilSync()
5507 except errors.OpExecError:
5508 # we ignore here errors, since if the device is standalone, it
5509 # won't be able to sync
5511 self._GoStandalone()
5512 self._GoReconnect(False)
5513 self._WaitUntilSync()
5515 self.feedback_fn("* done")
5517 def _RevertDiskStatus(self):
5518 """Try to revert the disk status after a failed migration.
5521 target_node = self.target_node
5523 self._EnsureSecondary(target_node)
5524 self._GoStandalone()
5525 self._GoReconnect(False)
5526 self._WaitUntilSync()
5527 except errors.OpExecError, err:
5528 self.lu.LogWarning("Migration failed and I can't reconnect the"
5529 " drives: error '%s'\n"
5530 "Please look and recover the instance status" %
5533 def _AbortMigration(self):
5534 """Call the hypervisor code to abort a started migration.
5537 instance = self.instance
5538 target_node = self.target_node
5539 migration_info = self.migration_info
5541 abort_result = self.rpc.call_finalize_migration(target_node,
5545 abort_msg = abort_result.fail_msg
5547 logging.error("Aborting migration failed on target node %s: %s",
5548 target_node, abort_msg)
5549 # Don't raise an exception here, as we stil have to try to revert the
5550 # disk status, even if this step failed.
5552 def _ExecMigration(self):
5553 """Migrate an instance.
5555 The migrate is done by:
5556 - change the disks into dual-master mode
5557 - wait until disks are fully synchronized again
5558 - migrate the instance
5559 - change disks on the new secondary node (the old primary) to secondary
5560 - wait until disks are fully synchronized
5561 - change disks into single-master mode
5564 instance = self.instance
5565 target_node = self.target_node
5566 source_node = self.source_node
5568 self.feedback_fn("* checking disk consistency between source and target")
5569 for dev in instance.disks:
5570 if not _CheckDiskConsistency(self, dev, target_node, False):
5571 raise errors.OpExecError("Disk %s is degraded or not fully"
5572 " synchronized on target node,"
5573 " aborting migrate." % dev.iv_name)
5575 # First get the migration information from the remote node
5576 result = self.rpc.call_migration_info(source_node, instance)
5577 msg = result.fail_msg
5579 log_err = ("Failed fetching source migration information from %s: %s" %
5581 logging.error(log_err)
5582 raise errors.OpExecError(log_err)
5584 self.migration_info = migration_info = result.payload
5586 # Then switch the disks to master/master mode
5587 self._EnsureSecondary(target_node)
5588 self._GoStandalone()
5589 self._GoReconnect(True)
5590 self._WaitUntilSync()
5592 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5593 result = self.rpc.call_accept_instance(target_node,
5596 self.nodes_ip[target_node])
5598 msg = result.fail_msg
5600 logging.error("Instance pre-migration failed, trying to revert"
5601 " disk status: %s", msg)
5602 self.feedback_fn("Pre-migration failed, aborting")
5603 self._AbortMigration()
5604 self._RevertDiskStatus()
5605 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5606 (instance.name, msg))
5608 self.feedback_fn("* migrating instance to %s" % target_node)
5610 result = self.rpc.call_instance_migrate(source_node, instance,
5611 self.nodes_ip[target_node],
5613 msg = result.fail_msg
5615 logging.error("Instance migration failed, trying to revert"
5616 " disk status: %s", msg)
5617 self.feedback_fn("Migration failed, aborting")
5618 self._AbortMigration()
5619 self._RevertDiskStatus()
5620 raise errors.OpExecError("Could not migrate instance %s: %s" %
5621 (instance.name, msg))
5624 instance.primary_node = target_node
5625 # distribute new instance config to the other nodes
5626 self.cfg.Update(instance, self.feedback_fn)
5628 result = self.rpc.call_finalize_migration(target_node,
5632 msg = result.fail_msg
5634 logging.error("Instance migration succeeded, but finalization failed:"
5636 raise errors.OpExecError("Could not finalize instance migration: %s" %
5639 self._EnsureSecondary(source_node)
5640 self._WaitUntilSync()
5641 self._GoStandalone()
5642 self._GoReconnect(False)
5643 self._WaitUntilSync()
5645 self.feedback_fn("* done")
5647 def Exec(self, feedback_fn):
5648 """Perform the migration.
5651 feedback_fn("Migrating instance %s" % self.instance.name)
5653 self.feedback_fn = feedback_fn
5655 self.source_node = self.instance.primary_node
5656 self.target_node = self.instance.secondary_nodes[0]
5657 self.all_nodes = [self.source_node, self.target_node]
5659 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5660 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5664 return self._ExecCleanup()
5666 return self._ExecMigration()
5669 def _CreateBlockDev(lu, node, instance, device, force_create,
5671 """Create a tree of block devices on a given node.
5673 If this device type has to be created on secondaries, create it and
5676 If not, just recurse to children keeping the same 'force' value.
5678 @param lu: the lu on whose behalf we execute
5679 @param node: the node on which to create the device
5680 @type instance: L{objects.Instance}
5681 @param instance: the instance which owns the device
5682 @type device: L{objects.Disk}
5683 @param device: the device to create
5684 @type force_create: boolean
5685 @param force_create: whether to force creation of this device; this
5686 will be change to True whenever we find a device which has
5687 CreateOnSecondary() attribute
5688 @param info: the extra 'metadata' we should attach to the device
5689 (this will be represented as a LVM tag)
5690 @type force_open: boolean
5691 @param force_open: this parameter will be passes to the
5692 L{backend.BlockdevCreate} function where it specifies
5693 whether we run on primary or not, and it affects both
5694 the child assembly and the device own Open() execution
5697 if device.CreateOnSecondary():
5701 for child in device.children:
5702 _CreateBlockDev(lu, node, instance, child, force_create,
5705 if not force_create:
5708 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5711 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5712 """Create a single block device on a given node.
5714 This will not recurse over children of the device, so they must be
5717 @param lu: the lu on whose behalf we execute
5718 @param node: the node on which to create the device
5719 @type instance: L{objects.Instance}
5720 @param instance: the instance which owns the device
5721 @type device: L{objects.Disk}
5722 @param device: the device to create
5723 @param info: the extra 'metadata' we should attach to the device
5724 (this will be represented as a LVM tag)
5725 @type force_open: boolean
5726 @param force_open: this parameter will be passes to the
5727 L{backend.BlockdevCreate} function where it specifies
5728 whether we run on primary or not, and it affects both
5729 the child assembly and the device own Open() execution
5732 lu.cfg.SetDiskID(device, node)
5733 result = lu.rpc.call_blockdev_create(node, device, device.size,
5734 instance.name, force_open, info)
5735 result.Raise("Can't create block device %s on"
5736 " node %s for instance %s" % (device, node, instance.name))
5737 if device.physical_id is None:
5738 device.physical_id = result.payload
5741 def _GenerateUniqueNames(lu, exts):
5742 """Generate a suitable LV name.
5744 This will generate a logical volume name for the given instance.
5749 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5750 results.append("%s%s" % (new_id, val))
5754 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5756 """Generate a drbd8 device complete with its children.
5759 port = lu.cfg.AllocatePort()
5760 vgname = lu.cfg.GetVGName()
5761 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5762 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5763 logical_id=(vgname, names[0]))
5764 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5765 logical_id=(vgname, names[1]))
5766 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5767 logical_id=(primary, secondary, port,
5770 children=[dev_data, dev_meta],
5775 def _GenerateDiskTemplate(lu, template_name,
5776 instance_name, primary_node,
5777 secondary_nodes, disk_info,
5778 file_storage_dir, file_driver,
5780 """Generate the entire disk layout for a given template type.
5783 #TODO: compute space requirements
5785 vgname = lu.cfg.GetVGName()
5786 disk_count = len(disk_info)
5788 if template_name == constants.DT_DISKLESS:
5790 elif template_name == constants.DT_PLAIN:
5791 if len(secondary_nodes) != 0:
5792 raise errors.ProgrammerError("Wrong template configuration")
5794 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5795 for i in range(disk_count)])
5796 for idx, disk in enumerate(disk_info):
5797 disk_index = idx + base_index
5798 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5799 logical_id=(vgname, names[idx]),
5800 iv_name="disk/%d" % disk_index,
5802 disks.append(disk_dev)
5803 elif template_name == constants.DT_DRBD8:
5804 if len(secondary_nodes) != 1:
5805 raise errors.ProgrammerError("Wrong template configuration")
5806 remote_node = secondary_nodes[0]
5807 minors = lu.cfg.AllocateDRBDMinor(
5808 [primary_node, remote_node] * len(disk_info), instance_name)
5811 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5812 for i in range(disk_count)]):
5813 names.append(lv_prefix + "_data")
5814 names.append(lv_prefix + "_meta")
5815 for idx, disk in enumerate(disk_info):
5816 disk_index = idx + base_index
5817 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5818 disk["size"], names[idx*2:idx*2+2],
5819 "disk/%d" % disk_index,
5820 minors[idx*2], minors[idx*2+1])
5821 disk_dev.mode = disk["mode"]
5822 disks.append(disk_dev)
5823 elif template_name == constants.DT_FILE:
5824 if len(secondary_nodes) != 0:
5825 raise errors.ProgrammerError("Wrong template configuration")
5827 _RequireFileStorage()
5829 for idx, disk in enumerate(disk_info):
5830 disk_index = idx + base_index
5831 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5832 iv_name="disk/%d" % disk_index,
5833 logical_id=(file_driver,
5834 "%s/disk%d" % (file_storage_dir,
5837 disks.append(disk_dev)
5839 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5843 def _GetInstanceInfoText(instance):
5844 """Compute that text that should be added to the disk's metadata.
5847 return "originstname+%s" % instance.name
5850 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5851 """Create all disks for an instance.
5853 This abstracts away some work from AddInstance.
5855 @type lu: L{LogicalUnit}
5856 @param lu: the logical unit on whose behalf we execute
5857 @type instance: L{objects.Instance}
5858 @param instance: the instance whose disks we should create
5860 @param to_skip: list of indices to skip
5861 @type target_node: string
5862 @param target_node: if passed, overrides the target node for creation
5864 @return: the success of the creation
5867 info = _GetInstanceInfoText(instance)
5868 if target_node is None:
5869 pnode = instance.primary_node
5870 all_nodes = instance.all_nodes
5875 if instance.disk_template == constants.DT_FILE:
5876 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5877 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5879 result.Raise("Failed to create directory '%s' on"
5880 " node %s" % (file_storage_dir, pnode))
5882 # Note: this needs to be kept in sync with adding of disks in
5883 # LUSetInstanceParams
5884 for idx, device in enumerate(instance.disks):
5885 if to_skip and idx in to_skip:
5887 logging.info("Creating volume %s for instance %s",
5888 device.iv_name, instance.name)
5890 for node in all_nodes:
5891 f_create = node == pnode
5892 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5895 def _RemoveDisks(lu, instance, target_node=None):
5896 """Remove all disks for an instance.
5898 This abstracts away some work from `AddInstance()` and
5899 `RemoveInstance()`. Note that in case some of the devices couldn't
5900 be removed, the removal will continue with the other ones (compare
5901 with `_CreateDisks()`).
5903 @type lu: L{LogicalUnit}
5904 @param lu: the logical unit on whose behalf we execute
5905 @type instance: L{objects.Instance}
5906 @param instance: the instance whose disks we should remove
5907 @type target_node: string
5908 @param target_node: used to override the node on which to remove the disks
5910 @return: the success of the removal
5913 logging.info("Removing block devices for instance %s", instance.name)
5916 for device in instance.disks:
5918 edata = [(target_node, device)]
5920 edata = device.ComputeNodeTree(instance.primary_node)
5921 for node, disk in edata:
5922 lu.cfg.SetDiskID(disk, node)
5923 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5925 lu.LogWarning("Could not remove block device %s on node %s,"
5926 " continuing anyway: %s", device.iv_name, node, msg)
5929 if instance.disk_template == constants.DT_FILE:
5930 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5934 tgt = instance.primary_node
5935 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5937 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5938 file_storage_dir, instance.primary_node, result.fail_msg)
5944 def _ComputeDiskSize(disk_template, disks):
5945 """Compute disk size requirements in the volume group
5948 # Required free disk space as a function of disk and swap space
5950 constants.DT_DISKLESS: None,
5951 constants.DT_PLAIN: sum(d["size"] for d in disks),
5952 # 128 MB are added for drbd metadata for each disk
5953 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5954 constants.DT_FILE: None,
5957 if disk_template not in req_size_dict:
5958 raise errors.ProgrammerError("Disk template '%s' size requirement"
5959 " is unknown" % disk_template)
5961 return req_size_dict[disk_template]
5964 def _CheckHVParams(lu, nodenames, hvname, hvparams):
5965 """Hypervisor parameter validation.
5967 This function abstract the hypervisor parameter validation to be
5968 used in both instance create and instance modify.
5970 @type lu: L{LogicalUnit}
5971 @param lu: the logical unit for which we check
5972 @type nodenames: list
5973 @param nodenames: the list of nodes on which we should check
5974 @type hvname: string
5975 @param hvname: the name of the hypervisor we should use
5976 @type hvparams: dict
5977 @param hvparams: the parameters which we need to check
5978 @raise errors.OpPrereqError: if the parameters are not valid
5981 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5984 for node in nodenames:
5988 info.Raise("Hypervisor parameter validation failed on node %s" % node)
5991 class LUCreateInstance(LogicalUnit):
5992 """Create an instance.
5995 HPATH = "instance-add"
5996 HTYPE = constants.HTYPE_INSTANCE
5997 _OP_REQP = ["instance_name", "disks",
5999 "wait_for_sync", "ip_check", "nics",
6000 "hvparams", "beparams"]
6003 def CheckArguments(self):
6007 # set optional parameters to none if they don't exist
6008 for attr in ["pnode", "snode", "iallocator", "hypervisor",
6009 "disk_template", "identify_defaults"]:
6010 if not hasattr(self.op, attr):
6011 setattr(self.op, attr, None)
6013 # do not require name_check to ease forward/backward compatibility
6015 if not hasattr(self.op, "name_check"):
6016 self.op.name_check = True
6017 if not hasattr(self.op, "no_install"):
6018 self.op.no_install = False
6019 if self.op.no_install and self.op.start:
6020 self.LogInfo("No-installation mode selected, disabling startup")
6021 self.op.start = False
6022 # validate/normalize the instance name
6023 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6024 if self.op.ip_check and not self.op.name_check:
6025 # TODO: make the ip check more flexible and not depend on the name check
6026 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6028 # check disk information: either all adopt, or no adopt
6029 has_adopt = has_no_adopt = False
6030 for disk in self.op.disks:
6035 if has_adopt and has_no_adopt:
6036 raise errors.OpPrereqError("Either all disks are adopted or none is",
6039 if self.op.disk_template != constants.DT_PLAIN:
6040 raise errors.OpPrereqError("Disk adoption is only supported for the"
6041 " 'plain' disk template",
6043 if self.op.iallocator is not None:
6044 raise errors.OpPrereqError("Disk adoption not allowed with an"
6045 " iallocator script", errors.ECODE_INVAL)
6046 if self.op.mode == constants.INSTANCE_IMPORT:
6047 raise errors.OpPrereqError("Disk adoption not allowed for"
6048 " instance import", errors.ECODE_INVAL)
6050 self.adopt_disks = has_adopt
6052 # verify creation mode
6053 if self.op.mode not in (constants.INSTANCE_CREATE,
6054 constants.INSTANCE_IMPORT):
6055 raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6056 self.op.mode, errors.ECODE_INVAL)
6058 # instance name verification
6059 if self.op.name_check:
6060 self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6061 self.op.instance_name = self.hostname1.name
6062 # used in CheckPrereq for ip ping check
6063 self.check_ip = self.hostname1.ip
6065 self.check_ip = None
6067 # file storage checks
6068 if (self.op.file_driver and
6069 not self.op.file_driver in constants.FILE_DRIVER):
6070 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6071 self.op.file_driver, errors.ECODE_INVAL)
6073 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6074 raise errors.OpPrereqError("File storage directory path not absolute",
6077 ### Node/iallocator related checks
6078 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6079 raise errors.OpPrereqError("One and only one of iallocator and primary"
6080 " node must be given",
6083 if self.op.mode == constants.INSTANCE_IMPORT:
6084 # On import force_variant must be True, because if we forced it at
6085 # initial install, our only chance when importing it back is that it
6087 self.op.force_variant = True
6089 if self.op.no_install:
6090 self.LogInfo("No-installation mode has no effect during import")
6092 else: # INSTANCE_CREATE
6093 if getattr(self.op, "os_type", None) is None:
6094 raise errors.OpPrereqError("No guest OS specified",
6096 self.op.force_variant = getattr(self.op, "force_variant", False)
6097 if self.op.disk_template is None:
6098 raise errors.OpPrereqError("No disk template specified",
6101 def ExpandNames(self):
6102 """ExpandNames for CreateInstance.
6104 Figure out the right locks for instance creation.
6107 self.needed_locks = {}
6109 instance_name = self.op.instance_name
6110 # this is just a preventive check, but someone might still add this
6111 # instance in the meantime, and creation will fail at lock-add time
6112 if instance_name in self.cfg.GetInstanceList():
6113 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6114 instance_name, errors.ECODE_EXISTS)
6116 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6118 if self.op.iallocator:
6119 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6121 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6122 nodelist = [self.op.pnode]
6123 if self.op.snode is not None:
6124 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6125 nodelist.append(self.op.snode)
6126 self.needed_locks[locking.LEVEL_NODE] = nodelist
6128 # in case of import lock the source node too
6129 if self.op.mode == constants.INSTANCE_IMPORT:
6130 src_node = getattr(self.op, "src_node", None)
6131 src_path = getattr(self.op, "src_path", None)
6133 if src_path is None:
6134 self.op.src_path = src_path = self.op.instance_name
6136 if src_node is None:
6137 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6138 self.op.src_node = None
6139 if os.path.isabs(src_path):
6140 raise errors.OpPrereqError("Importing an instance from an absolute"
6141 " path requires a source node option.",
6144 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6145 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6146 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6147 if not os.path.isabs(src_path):
6148 self.op.src_path = src_path = \
6149 utils.PathJoin(constants.EXPORT_DIR, src_path)
6151 def _RunAllocator(self):
6152 """Run the allocator based on input opcode.
6155 nics = [n.ToDict() for n in self.nics]
6156 ial = IAllocator(self.cfg, self.rpc,
6157 mode=constants.IALLOCATOR_MODE_ALLOC,
6158 name=self.op.instance_name,
6159 disk_template=self.op.disk_template,
6162 vcpus=self.be_full[constants.BE_VCPUS],
6163 mem_size=self.be_full[constants.BE_MEMORY],
6166 hypervisor=self.op.hypervisor,
6169 ial.Run(self.op.iallocator)
6172 raise errors.OpPrereqError("Can't compute nodes using"
6173 " iallocator '%s': %s" %
6174 (self.op.iallocator, ial.info),
6176 if len(ial.result) != ial.required_nodes:
6177 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6178 " of nodes (%s), required %s" %
6179 (self.op.iallocator, len(ial.result),
6180 ial.required_nodes), errors.ECODE_FAULT)
6181 self.op.pnode = ial.result[0]
6182 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6183 self.op.instance_name, self.op.iallocator,
6184 utils.CommaJoin(ial.result))
6185 if ial.required_nodes == 2:
6186 self.op.snode = ial.result[1]
6188 def BuildHooksEnv(self):
6191 This runs on master, primary and secondary nodes of the instance.
6195 "ADD_MODE": self.op.mode,
6197 if self.op.mode == constants.INSTANCE_IMPORT:
6198 env["SRC_NODE"] = self.op.src_node
6199 env["SRC_PATH"] = self.op.src_path
6200 env["SRC_IMAGES"] = self.src_images
6202 env.update(_BuildInstanceHookEnv(
6203 name=self.op.instance_name,
6204 primary_node=self.op.pnode,
6205 secondary_nodes=self.secondaries,
6206 status=self.op.start,
6207 os_type=self.op.os_type,
6208 memory=self.be_full[constants.BE_MEMORY],
6209 vcpus=self.be_full[constants.BE_VCPUS],
6210 nics=_NICListToTuple(self, self.nics),
6211 disk_template=self.op.disk_template,
6212 disks=[(d["size"], d["mode"]) for d in self.disks],
6215 hypervisor_name=self.op.hypervisor,
6218 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6222 def _ReadExportInfo(self):
6223 """Reads the export information from disk.
6225 It will override the opcode source node and path with the actual
6226 information, if these two were not specified before.
6228 @return: the export information
6231 assert self.op.mode == constants.INSTANCE_IMPORT
6233 src_node = self.op.src_node
6234 src_path = self.op.src_path
6236 if src_node is None:
6237 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6238 exp_list = self.rpc.call_export_list(locked_nodes)
6240 for node in exp_list:
6241 if exp_list[node].fail_msg:
6243 if src_path in exp_list[node].payload:
6245 self.op.src_node = src_node = node
6246 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6250 raise errors.OpPrereqError("No export found for relative path %s" %
6251 src_path, errors.ECODE_INVAL)
6253 _CheckNodeOnline(self, src_node)
6254 result = self.rpc.call_export_info(src_node, src_path)
6255 result.Raise("No export or invalid export found in dir %s" % src_path)
6257 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6258 if not export_info.has_section(constants.INISECT_EXP):
6259 raise errors.ProgrammerError("Corrupted export config",
6260 errors.ECODE_ENVIRON)
6262 ei_version = export_info.get(constants.INISECT_EXP, "version")
6263 if (int(ei_version) != constants.EXPORT_VERSION):
6264 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6265 (ei_version, constants.EXPORT_VERSION),
6266 errors.ECODE_ENVIRON)
6269 def _ReadExportParams(self, einfo):
6270 """Use export parameters as defaults.
6272 In case the opcode doesn't specify (as in override) some instance
6273 parameters, then try to use them from the export information, if
6277 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6279 if self.op.disk_template is None:
6280 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6281 self.op.disk_template = einfo.get(constants.INISECT_INS,
6284 raise errors.OpPrereqError("No disk template specified and the export"
6285 " is missing the disk_template information",
6288 if not self.op.disks:
6289 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6291 # TODO: import the disk iv_name too
6292 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6293 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6294 disks.append({"size": disk_sz})
6295 self.op.disks = disks
6297 raise errors.OpPrereqError("No disk info specified and the export"
6298 " is missing the disk information",
6301 if (not self.op.nics and
6302 einfo.has_option(constants.INISECT_INS, "nic_count")):
6304 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6306 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6307 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6312 if (self.op.hypervisor is None and
6313 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6314 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6315 if einfo.has_section(constants.INISECT_HYP):
6316 # use the export parameters but do not override the ones
6317 # specified by the user
6318 for name, value in einfo.items(constants.INISECT_HYP):
6319 if name not in self.op.hvparams:
6320 self.op.hvparams[name] = value
6322 if einfo.has_section(constants.INISECT_BEP):
6323 # use the parameters, without overriding
6324 for name, value in einfo.items(constants.INISECT_BEP):
6325 if name not in self.op.beparams:
6326 self.op.beparams[name] = value
6328 # try to read the parameters old style, from the main section
6329 for name in constants.BES_PARAMETERS:
6330 if (name not in self.op.beparams and
6331 einfo.has_option(constants.INISECT_INS, name)):
6332 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6334 def _RevertToDefaults(self, cluster):
6335 """Revert the instance parameters to the default values.
6339 hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6340 for name in self.op.hvparams.keys():
6341 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6342 del self.op.hvparams[name]
6344 be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6345 for name in self.op.beparams.keys():
6346 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6347 del self.op.beparams[name]
6349 nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6350 for nic in self.op.nics:
6351 for name in constants.NICS_PARAMETERS:
6352 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6355 def CheckPrereq(self):
6356 """Check prerequisites.
6359 if self.op.mode == constants.INSTANCE_IMPORT:
6360 export_info = self._ReadExportInfo()
6361 self._ReadExportParams(export_info)
6363 _CheckDiskTemplate(self.op.disk_template)
6365 if (not self.cfg.GetVGName() and
6366 self.op.disk_template not in constants.DTS_NOT_LVM):
6367 raise errors.OpPrereqError("Cluster does not support lvm-based"
6368 " instances", errors.ECODE_STATE)
6370 if self.op.hypervisor is None:
6371 self.op.hypervisor = self.cfg.GetHypervisorType()
6373 cluster = self.cfg.GetClusterInfo()
6374 enabled_hvs = cluster.enabled_hypervisors
6375 if self.op.hypervisor not in enabled_hvs:
6376 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6377 " cluster (%s)" % (self.op.hypervisor,
6378 ",".join(enabled_hvs)),
6381 # check hypervisor parameter syntax (locally)
6382 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6383 filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6386 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6387 hv_type.CheckParameterSyntax(filled_hvp)
6388 self.hv_full = filled_hvp
6389 # check that we don't specify global parameters on an instance
6390 _CheckGlobalHvParams(self.op.hvparams)
6392 # fill and remember the beparams dict
6393 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6394 self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6397 # now that hvp/bep are in final format, let's reset to defaults,
6399 if self.op.identify_defaults:
6400 self._RevertToDefaults(cluster)
6404 for idx, nic in enumerate(self.op.nics):
6405 nic_mode_req = nic.get("mode", None)
6406 nic_mode = nic_mode_req
6407 if nic_mode is None:
6408 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6410 # in routed mode, for the first nic, the default ip is 'auto'
6411 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6412 default_ip_mode = constants.VALUE_AUTO
6414 default_ip_mode = constants.VALUE_NONE
6416 # ip validity checks
6417 ip = nic.get("ip", default_ip_mode)
6418 if ip is None or ip.lower() == constants.VALUE_NONE:
6420 elif ip.lower() == constants.VALUE_AUTO:
6421 if not self.op.name_check:
6422 raise errors.OpPrereqError("IP address set to auto but name checks"
6423 " have been skipped. Aborting.",
6425 nic_ip = self.hostname1.ip
6427 if not utils.IsValidIP(ip):
6428 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6429 " like a valid IP" % ip,
6433 # TODO: check the ip address for uniqueness
6434 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6435 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6438 # MAC address verification
6439 mac = nic.get("mac", constants.VALUE_AUTO)
6440 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6441 mac = utils.NormalizeAndValidateMac(mac)
6444 self.cfg.ReserveMAC(mac, self.proc.GetECId())
6445 except errors.ReservationError:
6446 raise errors.OpPrereqError("MAC address %s already in use"
6447 " in cluster" % mac,
6448 errors.ECODE_NOTUNIQUE)
6450 # bridge verification
6451 bridge = nic.get("bridge", None)
6452 link = nic.get("link", None)
6454 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6455 " at the same time", errors.ECODE_INVAL)
6456 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6457 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6464 nicparams[constants.NIC_MODE] = nic_mode_req
6466 nicparams[constants.NIC_LINK] = link
6468 check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6470 objects.NIC.CheckParameterSyntax(check_params)
6471 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6473 # disk checks/pre-build
6475 for disk in self.op.disks:
6476 mode = disk.get("mode", constants.DISK_RDWR)
6477 if mode not in constants.DISK_ACCESS_SET:
6478 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6479 mode, errors.ECODE_INVAL)
6480 size = disk.get("size", None)
6482 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6485 except (TypeError, ValueError):
6486 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6488 new_disk = {"size": size, "mode": mode}
6490 new_disk["adopt"] = disk["adopt"]
6491 self.disks.append(new_disk)
6493 if self.op.mode == constants.INSTANCE_IMPORT:
6495 # Check that the new instance doesn't have less disks than the export
6496 instance_disks = len(self.disks)
6497 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6498 if instance_disks < export_disks:
6499 raise errors.OpPrereqError("Not enough disks to import."
6500 " (instance: %d, export: %d)" %
6501 (instance_disks, export_disks),
6505 for idx in range(export_disks):
6506 option = 'disk%d_dump' % idx
6507 if export_info.has_option(constants.INISECT_INS, option):
6508 # FIXME: are the old os-es, disk sizes, etc. useful?
6509 export_name = export_info.get(constants.INISECT_INS, option)
6510 image = utils.PathJoin(self.op.src_path, export_name)
6511 disk_images.append(image)
6513 disk_images.append(False)
6515 self.src_images = disk_images
6517 old_name = export_info.get(constants.INISECT_INS, 'name')
6519 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6520 except (TypeError, ValueError), err:
6521 raise errors.OpPrereqError("Invalid export file, nic_count is not"
6522 " an integer: %s" % str(err),
6524 if self.op.instance_name == old_name:
6525 for idx, nic in enumerate(self.nics):
6526 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6527 nic_mac_ini = 'nic%d_mac' % idx
6528 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6530 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6532 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6533 if self.op.ip_check:
6534 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6535 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6536 (self.check_ip, self.op.instance_name),
6537 errors.ECODE_NOTUNIQUE)
6539 #### mac address generation
6540 # By generating here the mac address both the allocator and the hooks get
6541 # the real final mac address rather than the 'auto' or 'generate' value.
6542 # There is a race condition between the generation and the instance object
6543 # creation, which means that we know the mac is valid now, but we're not
6544 # sure it will be when we actually add the instance. If things go bad
6545 # adding the instance will abort because of a duplicate mac, and the
6546 # creation job will fail.
6547 for nic in self.nics:
6548 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6549 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6553 if self.op.iallocator is not None:
6554 self._RunAllocator()
6556 #### node related checks
6558 # check primary node
6559 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6560 assert self.pnode is not None, \
6561 "Cannot retrieve locked node %s" % self.op.pnode
6563 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6564 pnode.name, errors.ECODE_STATE)
6566 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6567 pnode.name, errors.ECODE_STATE)
6569 self.secondaries = []
6571 # mirror node verification
6572 if self.op.disk_template in constants.DTS_NET_MIRROR:
6573 if self.op.snode is None:
6574 raise errors.OpPrereqError("The networked disk templates need"
6575 " a mirror node", errors.ECODE_INVAL)
6576 if self.op.snode == pnode.name:
6577 raise errors.OpPrereqError("The secondary node cannot be the"
6578 " primary node.", errors.ECODE_INVAL)
6579 _CheckNodeOnline(self, self.op.snode)
6580 _CheckNodeNotDrained(self, self.op.snode)
6581 self.secondaries.append(self.op.snode)
6583 nodenames = [pnode.name] + self.secondaries
6585 req_size = _ComputeDiskSize(self.op.disk_template,
6588 # Check lv size requirements, if not adopting
6589 if req_size is not None and not self.adopt_disks:
6590 _CheckNodesFreeDisk(self, nodenames, req_size)
6592 if self.adopt_disks: # instead, we must check the adoption data
6593 all_lvs = set([i["adopt"] for i in self.disks])
6594 if len(all_lvs) != len(self.disks):
6595 raise errors.OpPrereqError("Duplicate volume names given for adoption",
6597 for lv_name in all_lvs:
6599 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6600 except errors.ReservationError:
6601 raise errors.OpPrereqError("LV named %s used by another instance" %
6602 lv_name, errors.ECODE_NOTUNIQUE)
6604 node_lvs = self.rpc.call_lv_list([pnode.name],
6605 self.cfg.GetVGName())[pnode.name]
6606 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6607 node_lvs = node_lvs.payload
6608 delta = all_lvs.difference(node_lvs.keys())
6610 raise errors.OpPrereqError("Missing logical volume(s): %s" %
6611 utils.CommaJoin(delta),
6613 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6615 raise errors.OpPrereqError("Online logical volumes found, cannot"
6616 " adopt: %s" % utils.CommaJoin(online_lvs),
6618 # update the size of disk based on what is found
6619 for dsk in self.disks:
6620 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6622 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6624 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6626 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6628 # memory check on primary node
6630 _CheckNodeFreeMemory(self, self.pnode.name,
6631 "creating instance %s" % self.op.instance_name,
6632 self.be_full[constants.BE_MEMORY],
6635 self.dry_run_result = list(nodenames)
6637 def Exec(self, feedback_fn):
6638 """Create and add the instance to the cluster.
6641 instance = self.op.instance_name
6642 pnode_name = self.pnode.name
6644 ht_kind = self.op.hypervisor
6645 if ht_kind in constants.HTS_REQ_PORT:
6646 network_port = self.cfg.AllocatePort()
6650 if constants.ENABLE_FILE_STORAGE:
6651 # this is needed because os.path.join does not accept None arguments
6652 if self.op.file_storage_dir is None:
6653 string_file_storage_dir = ""
6655 string_file_storage_dir = self.op.file_storage_dir
6657 # build the full file storage dir path
6658 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6659 string_file_storage_dir, instance)
6661 file_storage_dir = ""
6664 disks = _GenerateDiskTemplate(self,
6665 self.op.disk_template,
6666 instance, pnode_name,
6670 self.op.file_driver,
6673 iobj = objects.Instance(name=instance, os=self.op.os_type,
6674 primary_node=pnode_name,
6675 nics=self.nics, disks=disks,
6676 disk_template=self.op.disk_template,
6678 network_port=network_port,
6679 beparams=self.op.beparams,
6680 hvparams=self.op.hvparams,
6681 hypervisor=self.op.hypervisor,
6684 if self.adopt_disks:
6685 # rename LVs to the newly-generated names; we need to construct
6686 # 'fake' LV disks with the old data, plus the new unique_id
6687 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6689 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6690 rename_to.append(t_dsk.logical_id)
6691 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6692 self.cfg.SetDiskID(t_dsk, pnode_name)
6693 result = self.rpc.call_blockdev_rename(pnode_name,
6694 zip(tmp_disks, rename_to))
6695 result.Raise("Failed to rename adoped LVs")
6697 feedback_fn("* creating instance disks...")
6699 _CreateDisks(self, iobj)
6700 except errors.OpExecError:
6701 self.LogWarning("Device creation failed, reverting...")
6703 _RemoveDisks(self, iobj)
6705 self.cfg.ReleaseDRBDMinors(instance)
6708 feedback_fn("adding instance %s to cluster config" % instance)
6710 self.cfg.AddInstance(iobj, self.proc.GetECId())
6712 # Declare that we don't want to remove the instance lock anymore, as we've
6713 # added the instance to the config
6714 del self.remove_locks[locking.LEVEL_INSTANCE]
6715 # Unlock all the nodes
6716 if self.op.mode == constants.INSTANCE_IMPORT:
6717 nodes_keep = [self.op.src_node]
6718 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6719 if node != self.op.src_node]
6720 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6721 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6723 self.context.glm.release(locking.LEVEL_NODE)
6724 del self.acquired_locks[locking.LEVEL_NODE]
6726 if self.op.wait_for_sync:
6727 disk_abort = not _WaitForSync(self, iobj)
6728 elif iobj.disk_template in constants.DTS_NET_MIRROR:
6729 # make sure the disks are not degraded (still sync-ing is ok)
6731 feedback_fn("* checking mirrors status")
6732 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6737 _RemoveDisks(self, iobj)
6738 self.cfg.RemoveInstance(iobj.name)
6739 # Make sure the instance lock gets removed
6740 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6741 raise errors.OpExecError("There are some degraded disks for"
6744 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6745 if self.op.mode == constants.INSTANCE_CREATE:
6746 if not self.op.no_install:
6747 feedback_fn("* running the instance OS create scripts...")
6748 # FIXME: pass debug option from opcode to backend
6749 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6750 self.op.debug_level)
6751 result.Raise("Could not add os for instance %s"
6752 " on node %s" % (instance, pnode_name))
6754 elif self.op.mode == constants.INSTANCE_IMPORT:
6755 feedback_fn("* running the instance OS import scripts...")
6756 src_node = self.op.src_node
6757 src_images = self.src_images
6758 cluster_name = self.cfg.GetClusterName()
6759 # FIXME: pass debug option from opcode to backend
6760 import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6761 src_node, src_images,
6763 self.op.debug_level)
6764 msg = import_result.fail_msg
6766 self.LogWarning("Error while importing the disk images for instance"
6767 " %s on node %s: %s" % (instance, pnode_name, msg))
6769 # also checked in the prereq part
6770 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6774 iobj.admin_up = True
6775 self.cfg.Update(iobj, feedback_fn)
6776 logging.info("Starting instance %s on node %s", instance, pnode_name)
6777 feedback_fn("* starting instance...")
6778 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6779 result.Raise("Could not start instance")
6781 return list(iobj.all_nodes)
6784 class LUConnectConsole(NoHooksLU):
6785 """Connect to an instance's console.
6787 This is somewhat special in that it returns the command line that
6788 you need to run on the master node in order to connect to the
6792 _OP_REQP = ["instance_name"]
6795 def ExpandNames(self):
6796 self._ExpandAndLockInstance()
6798 def CheckPrereq(self):
6799 """Check prerequisites.
6801 This checks that the instance is in the cluster.
6804 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6805 assert self.instance is not None, \
6806 "Cannot retrieve locked instance %s" % self.op.instance_name
6807 _CheckNodeOnline(self, self.instance.primary_node)
6809 def Exec(self, feedback_fn):
6810 """Connect to the console of an instance
6813 instance = self.instance
6814 node = instance.primary_node
6816 node_insts = self.rpc.call_instance_list([node],
6817 [instance.hypervisor])[node]
6818 node_insts.Raise("Can't get node information from %s" % node)
6820 if instance.name not in node_insts.payload:
6821 raise errors.OpExecError("Instance %s is not running." % instance.name)
6823 logging.debug("Connecting to console of %s on %s", instance.name, node)
6825 hyper = hypervisor.GetHypervisor(instance.hypervisor)
6826 cluster = self.cfg.GetClusterInfo()
6827 # beparams and hvparams are passed separately, to avoid editing the
6828 # instance and then saving the defaults in the instance itself.
6829 hvparams = cluster.FillHV(instance)
6830 beparams = cluster.FillBE(instance)
6831 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6834 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6837 class LUReplaceDisks(LogicalUnit):
6838 """Replace the disks of an instance.
6841 HPATH = "mirrors-replace"
6842 HTYPE = constants.HTYPE_INSTANCE
6843 _OP_REQP = ["instance_name", "mode", "disks"]
6846 def CheckArguments(self):
6847 if not hasattr(self.op, "remote_node"):
6848 self.op.remote_node = None
6849 if not hasattr(self.op, "iallocator"):
6850 self.op.iallocator = None
6851 if not hasattr(self.op, "early_release"):
6852 self.op.early_release = False
6854 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6857 def ExpandNames(self):
6858 self._ExpandAndLockInstance()
6860 if self.op.iallocator is not None:
6861 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6863 elif self.op.remote_node is not None:
6864 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6865 self.op.remote_node = remote_node
6867 # Warning: do not remove the locking of the new secondary here
6868 # unless DRBD8.AddChildren is changed to work in parallel;
6869 # currently it doesn't since parallel invocations of
6870 # FindUnusedMinor will conflict
6871 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6872 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6875 self.needed_locks[locking.LEVEL_NODE] = []
6876 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6878 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6879 self.op.iallocator, self.op.remote_node,
6880 self.op.disks, False, self.op.early_release)
6882 self.tasklets = [self.replacer]
6884 def DeclareLocks(self, level):
6885 # If we're not already locking all nodes in the set we have to declare the
6886 # instance's primary/secondary nodes.
6887 if (level == locking.LEVEL_NODE and
6888 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6889 self._LockInstancesNodes()
6891 def BuildHooksEnv(self):
6894 This runs on the master, the primary and all the secondaries.
6897 instance = self.replacer.instance
6899 "MODE": self.op.mode,
6900 "NEW_SECONDARY": self.op.remote_node,
6901 "OLD_SECONDARY": instance.secondary_nodes[0],
6903 env.update(_BuildInstanceHookEnvByObject(self, instance))
6905 self.cfg.GetMasterNode(),
6906 instance.primary_node,
6908 if self.op.remote_node is not None:
6909 nl.append(self.op.remote_node)
6913 class LUEvacuateNode(LogicalUnit):
6914 """Relocate the secondary instances from a node.
6917 HPATH = "node-evacuate"
6918 HTYPE = constants.HTYPE_NODE
6919 _OP_REQP = ["node_name"]
6922 def CheckArguments(self):
6923 if not hasattr(self.op, "remote_node"):
6924 self.op.remote_node = None
6925 if not hasattr(self.op, "iallocator"):
6926 self.op.iallocator = None
6927 if not hasattr(self.op, "early_release"):
6928 self.op.early_release = False
6930 TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6931 self.op.remote_node,
6934 def ExpandNames(self):
6935 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6937 self.needed_locks = {}
6939 # Declare node locks
6940 if self.op.iallocator is not None:
6941 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6943 elif self.op.remote_node is not None:
6944 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6946 # Warning: do not remove the locking of the new secondary here
6947 # unless DRBD8.AddChildren is changed to work in parallel;
6948 # currently it doesn't since parallel invocations of
6949 # FindUnusedMinor will conflict
6950 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6951 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6954 raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6956 # Create tasklets for replacing disks for all secondary instances on this
6961 for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6962 logging.debug("Replacing disks for instance %s", inst.name)
6963 names.append(inst.name)
6965 replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6966 self.op.iallocator, self.op.remote_node, [],
6967 True, self.op.early_release)
6968 tasklets.append(replacer)
6970 self.tasklets = tasklets
6971 self.instance_names = names
6973 # Declare instance locks
6974 self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6976 def DeclareLocks(self, level):
6977 # If we're not already locking all nodes in the set we have to declare the
6978 # instance's primary/secondary nodes.
6979 if (level == locking.LEVEL_NODE and
6980 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6981 self._LockInstancesNodes()
6983 def BuildHooksEnv(self):
6986 This runs on the master, the primary and all the secondaries.
6990 "NODE_NAME": self.op.node_name,
6993 nl = [self.cfg.GetMasterNode()]
6995 if self.op.remote_node is not None:
6996 env["NEW_SECONDARY"] = self.op.remote_node
6997 nl.append(self.op.remote_node)
6999 return (env, nl, nl)
7002 class TLReplaceDisks(Tasklet):
7003 """Replaces disks for an instance.
7005 Note: Locking is not within the scope of this class.
7008 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7009 disks, delay_iallocator, early_release):
7010 """Initializes this class.
7013 Tasklet.__init__(self, lu)
7016 self.instance_name = instance_name
7018 self.iallocator_name = iallocator_name
7019 self.remote_node = remote_node
7021 self.delay_iallocator = delay_iallocator
7022 self.early_release = early_release
7025 self.instance = None
7026 self.new_node = None
7027 self.target_node = None
7028 self.other_node = None
7029 self.remote_node_info = None
7030 self.node_secondary_ip = None
7033 def CheckArguments(mode, remote_node, iallocator):
7034 """Helper function for users of this class.
7037 # check for valid parameter combination
7038 if mode == constants.REPLACE_DISK_CHG:
7039 if remote_node is None and iallocator is None:
7040 raise errors.OpPrereqError("When changing the secondary either an"
7041 " iallocator script must be used or the"
7042 " new node given", errors.ECODE_INVAL)
7044 if remote_node is not None and iallocator is not None:
7045 raise errors.OpPrereqError("Give either the iallocator or the new"
7046 " secondary, not both", errors.ECODE_INVAL)
7048 elif remote_node is not None or iallocator is not None:
7049 # Not replacing the secondary
7050 raise errors.OpPrereqError("The iallocator and new node options can"
7051 " only be used when changing the"
7052 " secondary node", errors.ECODE_INVAL)
7055 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7056 """Compute a new secondary node using an IAllocator.
7059 ial = IAllocator(lu.cfg, lu.rpc,
7060 mode=constants.IALLOCATOR_MODE_RELOC,
7062 relocate_from=relocate_from)
7064 ial.Run(iallocator_name)
7067 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7068 " %s" % (iallocator_name, ial.info),
7071 if len(ial.result) != ial.required_nodes:
7072 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7073 " of nodes (%s), required %s" %
7075 len(ial.result), ial.required_nodes),
7078 remote_node_name = ial.result[0]
7080 lu.LogInfo("Selected new secondary for instance '%s': %s",
7081 instance_name, remote_node_name)
7083 return remote_node_name
7085 def _FindFaultyDisks(self, node_name):
7086 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7089 def CheckPrereq(self):
7090 """Check prerequisites.
7092 This checks that the instance is in the cluster.
7095 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7096 assert instance is not None, \
7097 "Cannot retrieve locked instance %s" % self.instance_name
7099 if instance.disk_template != constants.DT_DRBD8:
7100 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7101 " instances", errors.ECODE_INVAL)
7103 if len(instance.secondary_nodes) != 1:
7104 raise errors.OpPrereqError("The instance has a strange layout,"
7105 " expected one secondary but found %d" %
7106 len(instance.secondary_nodes),
7109 if not self.delay_iallocator:
7110 self._CheckPrereq2()
7112 def _CheckPrereq2(self):
7113 """Check prerequisites, second part.
7115 This function should always be part of CheckPrereq. It was separated and is
7116 now called from Exec because during node evacuation iallocator was only
7117 called with an unmodified cluster model, not taking planned changes into
7121 instance = self.instance
7122 secondary_node = instance.secondary_nodes[0]
7124 if self.iallocator_name is None:
7125 remote_node = self.remote_node
7127 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7128 instance.name, instance.secondary_nodes)
7130 if remote_node is not None:
7131 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7132 assert self.remote_node_info is not None, \
7133 "Cannot retrieve locked node %s" % remote_node
7135 self.remote_node_info = None
7137 if remote_node == self.instance.primary_node:
7138 raise errors.OpPrereqError("The specified node is the primary node of"
7139 " the instance.", errors.ECODE_INVAL)
7141 if remote_node == secondary_node:
7142 raise errors.OpPrereqError("The specified node is already the"
7143 " secondary node of the instance.",
7146 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7147 constants.REPLACE_DISK_CHG):
7148 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7151 if self.mode == constants.REPLACE_DISK_AUTO:
7152 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7153 faulty_secondary = self._FindFaultyDisks(secondary_node)
7155 if faulty_primary and faulty_secondary:
7156 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7157 " one node and can not be repaired"
7158 " automatically" % self.instance_name,
7162 self.disks = faulty_primary
7163 self.target_node = instance.primary_node
7164 self.other_node = secondary_node
7165 check_nodes = [self.target_node, self.other_node]
7166 elif faulty_secondary:
7167 self.disks = faulty_secondary
7168 self.target_node = secondary_node
7169 self.other_node = instance.primary_node
7170 check_nodes = [self.target_node, self.other_node]
7176 # Non-automatic modes
7177 if self.mode == constants.REPLACE_DISK_PRI:
7178 self.target_node = instance.primary_node
7179 self.other_node = secondary_node
7180 check_nodes = [self.target_node, self.other_node]
7182 elif self.mode == constants.REPLACE_DISK_SEC:
7183 self.target_node = secondary_node
7184 self.other_node = instance.primary_node
7185 check_nodes = [self.target_node, self.other_node]
7187 elif self.mode == constants.REPLACE_DISK_CHG:
7188 self.new_node = remote_node
7189 self.other_node = instance.primary_node
7190 self.target_node = secondary_node
7191 check_nodes = [self.new_node, self.other_node]
7193 _CheckNodeNotDrained(self.lu, remote_node)
7195 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7196 assert old_node_info is not None
7197 if old_node_info.offline and not self.early_release:
7198 # doesn't make sense to delay the release
7199 self.early_release = True
7200 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7201 " early-release mode", secondary_node)
7204 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7207 # If not specified all disks should be replaced
7209 self.disks = range(len(self.instance.disks))
7211 for node in check_nodes:
7212 _CheckNodeOnline(self.lu, node)
7214 # Check whether disks are valid
7215 for disk_idx in self.disks:
7216 instance.FindDisk(disk_idx)
7218 # Get secondary node IP addresses
7221 for node_name in [self.target_node, self.other_node, self.new_node]:
7222 if node_name is not None:
7223 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7225 self.node_secondary_ip = node_2nd_ip
7227 def Exec(self, feedback_fn):
7228 """Execute disk replacement.
7230 This dispatches the disk replacement to the appropriate handler.
7233 if self.delay_iallocator:
7234 self._CheckPrereq2()
7237 feedback_fn("No disks need replacement")
7240 feedback_fn("Replacing disk(s) %s for %s" %
7241 (utils.CommaJoin(self.disks), self.instance.name))
7243 activate_disks = (not self.instance.admin_up)
7245 # Activate the instance disks if we're replacing them on a down instance
7247 _StartInstanceDisks(self.lu, self.instance, True)
7250 # Should we replace the secondary node?
7251 if self.new_node is not None:
7252 fn = self._ExecDrbd8Secondary
7254 fn = self._ExecDrbd8DiskOnly
7256 return fn(feedback_fn)
7259 # Deactivate the instance disks if we're replacing them on a
7262 _SafeShutdownInstanceDisks(self.lu, self.instance)
7264 def _CheckVolumeGroup(self, nodes):
7265 self.lu.LogInfo("Checking volume groups")
7267 vgname = self.cfg.GetVGName()
7269 # Make sure volume group exists on all involved nodes
7270 results = self.rpc.call_vg_list(nodes)
7272 raise errors.OpExecError("Can't list volume groups on the nodes")
7276 res.Raise("Error checking node %s" % node)
7277 if vgname not in res.payload:
7278 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7281 def _CheckDisksExistence(self, nodes):
7282 # Check disk existence
7283 for idx, dev in enumerate(self.instance.disks):
7284 if idx not in self.disks:
7288 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7289 self.cfg.SetDiskID(dev, node)
7291 result = self.rpc.call_blockdev_find(node, dev)
7293 msg = result.fail_msg
7294 if msg or not result.payload:
7296 msg = "disk not found"
7297 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7300 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7301 for idx, dev in enumerate(self.instance.disks):
7302 if idx not in self.disks:
7305 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7308 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7310 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7311 " replace disks for instance %s" %
7312 (node_name, self.instance.name))
7314 def _CreateNewStorage(self, node_name):
7315 vgname = self.cfg.GetVGName()
7318 for idx, dev in enumerate(self.instance.disks):
7319 if idx not in self.disks:
7322 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7324 self.cfg.SetDiskID(dev, node_name)
7326 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7327 names = _GenerateUniqueNames(self.lu, lv_names)
7329 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7330 logical_id=(vgname, names[0]))
7331 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7332 logical_id=(vgname, names[1]))
7334 new_lvs = [lv_data, lv_meta]
7335 old_lvs = dev.children
7336 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7338 # we pass force_create=True to force the LVM creation
7339 for new_lv in new_lvs:
7340 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7341 _GetInstanceInfoText(self.instance), False)
7345 def _CheckDevices(self, node_name, iv_names):
7346 for name, (dev, _, _) in iv_names.iteritems():
7347 self.cfg.SetDiskID(dev, node_name)
7349 result = self.rpc.call_blockdev_find(node_name, dev)
7351 msg = result.fail_msg
7352 if msg or not result.payload:
7354 msg = "disk not found"
7355 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7358 if result.payload.is_degraded:
7359 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7361 def _RemoveOldStorage(self, node_name, iv_names):
7362 for name, (_, old_lvs, _) in iv_names.iteritems():
7363 self.lu.LogInfo("Remove logical volumes for %s" % name)
7366 self.cfg.SetDiskID(lv, node_name)
7368 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7370 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7371 hint="remove unused LVs manually")
7373 def _ReleaseNodeLock(self, node_name):
7374 """Releases the lock for a given node."""
7375 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7377 def _ExecDrbd8DiskOnly(self, feedback_fn):
7378 """Replace a disk on the primary or secondary for DRBD 8.
7380 The algorithm for replace is quite complicated:
7382 1. for each disk to be replaced:
7384 1. create new LVs on the target node with unique names
7385 1. detach old LVs from the drbd device
7386 1. rename old LVs to name_replaced.<time_t>
7387 1. rename new LVs to old LVs
7388 1. attach the new LVs (with the old names now) to the drbd device
7390 1. wait for sync across all devices
7392 1. for each modified disk:
7394 1. remove old LVs (which have the name name_replaces.<time_t>)
7396 Failures are not very well handled.
7401 # Step: check device activation
7402 self.lu.LogStep(1, steps_total, "Check device existence")
7403 self._CheckDisksExistence([self.other_node, self.target_node])
7404 self._CheckVolumeGroup([self.target_node, self.other_node])
7406 # Step: check other node consistency
7407 self.lu.LogStep(2, steps_total, "Check peer consistency")
7408 self._CheckDisksConsistency(self.other_node,
7409 self.other_node == self.instance.primary_node,
7412 # Step: create new storage
7413 self.lu.LogStep(3, steps_total, "Allocate new storage")
7414 iv_names = self._CreateNewStorage(self.target_node)
7416 # Step: for each lv, detach+rename*2+attach
7417 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7418 for dev, old_lvs, new_lvs in iv_names.itervalues():
7419 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7421 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7423 result.Raise("Can't detach drbd from local storage on node"
7424 " %s for device %s" % (self.target_node, dev.iv_name))
7426 #cfg.Update(instance)
7428 # ok, we created the new LVs, so now we know we have the needed
7429 # storage; as such, we proceed on the target node to rename
7430 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7431 # using the assumption that logical_id == physical_id (which in
7432 # turn is the unique_id on that node)
7434 # FIXME(iustin): use a better name for the replaced LVs
7435 temp_suffix = int(time.time())
7436 ren_fn = lambda d, suff: (d.physical_id[0],
7437 d.physical_id[1] + "_replaced-%s" % suff)
7439 # Build the rename list based on what LVs exist on the node
7440 rename_old_to_new = []
7441 for to_ren in old_lvs:
7442 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7443 if not result.fail_msg and result.payload:
7445 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7447 self.lu.LogInfo("Renaming the old LVs on the target node")
7448 result = self.rpc.call_blockdev_rename(self.target_node,
7450 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7452 # Now we rename the new LVs to the old LVs
7453 self.lu.LogInfo("Renaming the new LVs on the target node")
7454 rename_new_to_old = [(new, old.physical_id)
7455 for old, new in zip(old_lvs, new_lvs)]
7456 result = self.rpc.call_blockdev_rename(self.target_node,
7458 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7460 for old, new in zip(old_lvs, new_lvs):
7461 new.logical_id = old.logical_id
7462 self.cfg.SetDiskID(new, self.target_node)
7464 for disk in old_lvs:
7465 disk.logical_id = ren_fn(disk, temp_suffix)
7466 self.cfg.SetDiskID(disk, self.target_node)
7468 # Now that the new lvs have the old name, we can add them to the device
7469 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7470 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7472 msg = result.fail_msg
7474 for new_lv in new_lvs:
7475 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7478 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7479 hint=("cleanup manually the unused logical"
7481 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7483 dev.children = new_lvs
7485 self.cfg.Update(self.instance, feedback_fn)
7488 if self.early_release:
7489 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7491 self._RemoveOldStorage(self.target_node, iv_names)
7492 # WARNING: we release both node locks here, do not do other RPCs
7493 # than WaitForSync to the primary node
7494 self._ReleaseNodeLock([self.target_node, self.other_node])
7497 # This can fail as the old devices are degraded and _WaitForSync
7498 # does a combined result over all disks, so we don't check its return value
7499 self.lu.LogStep(cstep, steps_total, "Sync devices")
7501 _WaitForSync(self.lu, self.instance)
7503 # Check all devices manually
7504 self._CheckDevices(self.instance.primary_node, iv_names)
7506 # Step: remove old storage
7507 if not self.early_release:
7508 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7510 self._RemoveOldStorage(self.target_node, iv_names)
7512 def _ExecDrbd8Secondary(self, feedback_fn):
7513 """Replace the secondary node for DRBD 8.
7515 The algorithm for replace is quite complicated:
7516 - for all disks of the instance:
7517 - create new LVs on the new node with same names
7518 - shutdown the drbd device on the old secondary
7519 - disconnect the drbd network on the primary
7520 - create the drbd device on the new secondary
7521 - network attach the drbd on the primary, using an artifice:
7522 the drbd code for Attach() will connect to the network if it
7523 finds a device which is connected to the good local disks but
7525 - wait for sync across all devices
7526 - remove all disks from the old secondary
7528 Failures are not very well handled.
7533 # Step: check device activation
7534 self.lu.LogStep(1, steps_total, "Check device existence")
7535 self._CheckDisksExistence([self.instance.primary_node])
7536 self._CheckVolumeGroup([self.instance.primary_node])
7538 # Step: check other node consistency
7539 self.lu.LogStep(2, steps_total, "Check peer consistency")
7540 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7542 # Step: create new storage
7543 self.lu.LogStep(3, steps_total, "Allocate new storage")
7544 for idx, dev in enumerate(self.instance.disks):
7545 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7546 (self.new_node, idx))
7547 # we pass force_create=True to force LVM creation
7548 for new_lv in dev.children:
7549 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7550 _GetInstanceInfoText(self.instance), False)
7552 # Step 4: dbrd minors and drbd setups changes
7553 # after this, we must manually remove the drbd minors on both the
7554 # error and the success paths
7555 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7556 minors = self.cfg.AllocateDRBDMinor([self.new_node
7557 for dev in self.instance.disks],
7559 logging.debug("Allocated minors %r", minors)
7562 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7563 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7564 (self.new_node, idx))
7565 # create new devices on new_node; note that we create two IDs:
7566 # one without port, so the drbd will be activated without
7567 # networking information on the new node at this stage, and one
7568 # with network, for the latter activation in step 4
7569 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7570 if self.instance.primary_node == o_node1:
7573 assert self.instance.primary_node == o_node2, "Three-node instance?"
7576 new_alone_id = (self.instance.primary_node, self.new_node, None,
7577 p_minor, new_minor, o_secret)
7578 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7579 p_minor, new_minor, o_secret)
7581 iv_names[idx] = (dev, dev.children, new_net_id)
7582 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7584 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7585 logical_id=new_alone_id,
7586 children=dev.children,
7589 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7590 _GetInstanceInfoText(self.instance), False)
7591 except errors.GenericError:
7592 self.cfg.ReleaseDRBDMinors(self.instance.name)
7595 # We have new devices, shutdown the drbd on the old secondary
7596 for idx, dev in enumerate(self.instance.disks):
7597 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7598 self.cfg.SetDiskID(dev, self.target_node)
7599 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7601 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7602 "node: %s" % (idx, msg),
7603 hint=("Please cleanup this device manually as"
7604 " soon as possible"))
7606 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7607 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7608 self.node_secondary_ip,
7609 self.instance.disks)\
7610 [self.instance.primary_node]
7612 msg = result.fail_msg
7614 # detaches didn't succeed (unlikely)
7615 self.cfg.ReleaseDRBDMinors(self.instance.name)
7616 raise errors.OpExecError("Can't detach the disks from the network on"
7617 " old node: %s" % (msg,))
7619 # if we managed to detach at least one, we update all the disks of
7620 # the instance to point to the new secondary
7621 self.lu.LogInfo("Updating instance configuration")
7622 for dev, _, new_logical_id in iv_names.itervalues():
7623 dev.logical_id = new_logical_id
7624 self.cfg.SetDiskID(dev, self.instance.primary_node)
7626 self.cfg.Update(self.instance, feedback_fn)
7628 # and now perform the drbd attach
7629 self.lu.LogInfo("Attaching primary drbds to new secondary"
7630 " (standalone => connected)")
7631 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7633 self.node_secondary_ip,
7634 self.instance.disks,
7637 for to_node, to_result in result.items():
7638 msg = to_result.fail_msg
7640 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7642 hint=("please do a gnt-instance info to see the"
7643 " status of disks"))
7645 if self.early_release:
7646 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7648 self._RemoveOldStorage(self.target_node, iv_names)
7649 # WARNING: we release all node locks here, do not do other RPCs
7650 # than WaitForSync to the primary node
7651 self._ReleaseNodeLock([self.instance.primary_node,
7656 # This can fail as the old devices are degraded and _WaitForSync
7657 # does a combined result over all disks, so we don't check its return value
7658 self.lu.LogStep(cstep, steps_total, "Sync devices")
7660 _WaitForSync(self.lu, self.instance)
7662 # Check all devices manually
7663 self._CheckDevices(self.instance.primary_node, iv_names)
7665 # Step: remove old storage
7666 if not self.early_release:
7667 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7668 self._RemoveOldStorage(self.target_node, iv_names)
7671 class LURepairNodeStorage(NoHooksLU):
7672 """Repairs the volume group on a node.
7675 _OP_REQP = ["node_name"]
7678 def CheckArguments(self):
7679 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7681 _CheckStorageType(self.op.storage_type)
7683 def ExpandNames(self):
7684 self.needed_locks = {
7685 locking.LEVEL_NODE: [self.op.node_name],
7688 def _CheckFaultyDisks(self, instance, node_name):
7689 """Ensure faulty disks abort the opcode or at least warn."""
7691 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7693 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7694 " node '%s'" % (instance.name, node_name),
7696 except errors.OpPrereqError, err:
7697 if self.op.ignore_consistency:
7698 self.proc.LogWarning(str(err.args[0]))
7702 def CheckPrereq(self):
7703 """Check prerequisites.
7706 storage_type = self.op.storage_type
7708 if (constants.SO_FIX_CONSISTENCY not in
7709 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7710 raise errors.OpPrereqError("Storage units of type '%s' can not be"
7711 " repaired" % storage_type,
7714 # Check whether any instance on this node has faulty disks
7715 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7716 if not inst.admin_up:
7718 check_nodes = set(inst.all_nodes)
7719 check_nodes.discard(self.op.node_name)
7720 for inst_node_name in check_nodes:
7721 self._CheckFaultyDisks(inst, inst_node_name)
7723 def Exec(self, feedback_fn):
7724 feedback_fn("Repairing storage unit '%s' on %s ..." %
7725 (self.op.name, self.op.node_name))
7727 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7728 result = self.rpc.call_storage_execute(self.op.node_name,
7729 self.op.storage_type, st_args,
7731 constants.SO_FIX_CONSISTENCY)
7732 result.Raise("Failed to repair storage unit '%s' on %s" %
7733 (self.op.name, self.op.node_name))
7736 class LUNodeEvacuationStrategy(NoHooksLU):
7737 """Computes the node evacuation strategy.
7740 _OP_REQP = ["nodes"]
7743 def CheckArguments(self):
7744 if not hasattr(self.op, "remote_node"):
7745 self.op.remote_node = None
7746 if not hasattr(self.op, "iallocator"):
7747 self.op.iallocator = None
7748 if self.op.remote_node is not None and self.op.iallocator is not None:
7749 raise errors.OpPrereqError("Give either the iallocator or the new"
7750 " secondary, not both", errors.ECODE_INVAL)
7752 def ExpandNames(self):
7753 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7754 self.needed_locks = locks = {}
7755 if self.op.remote_node is None:
7756 locks[locking.LEVEL_NODE] = locking.ALL_SET
7758 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7759 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7761 def CheckPrereq(self):
7764 def Exec(self, feedback_fn):
7765 if self.op.remote_node is not None:
7767 for node in self.op.nodes:
7768 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7771 if i.primary_node == self.op.remote_node:
7772 raise errors.OpPrereqError("Node %s is the primary node of"
7773 " instance %s, cannot use it as"
7775 (self.op.remote_node, i.name),
7777 result.append([i.name, self.op.remote_node])
7779 ial = IAllocator(self.cfg, self.rpc,
7780 mode=constants.IALLOCATOR_MODE_MEVAC,
7781 evac_nodes=self.op.nodes)
7782 ial.Run(self.op.iallocator, validate=True)
7784 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7790 class LUGrowDisk(LogicalUnit):
7791 """Grow a disk of an instance.
7795 HTYPE = constants.HTYPE_INSTANCE
7796 _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7799 def ExpandNames(self):
7800 self._ExpandAndLockInstance()
7801 self.needed_locks[locking.LEVEL_NODE] = []
7802 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7804 def DeclareLocks(self, level):
7805 if level == locking.LEVEL_NODE:
7806 self._LockInstancesNodes()
7808 def BuildHooksEnv(self):
7811 This runs on the master, the primary and all the secondaries.
7815 "DISK": self.op.disk,
7816 "AMOUNT": self.op.amount,
7818 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7819 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7822 def CheckPrereq(self):
7823 """Check prerequisites.
7825 This checks that the instance is in the cluster.
7828 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7829 assert instance is not None, \
7830 "Cannot retrieve locked instance %s" % self.op.instance_name
7831 nodenames = list(instance.all_nodes)
7832 for node in nodenames:
7833 _CheckNodeOnline(self, node)
7836 self.instance = instance
7838 if instance.disk_template not in constants.DTS_GROWABLE:
7839 raise errors.OpPrereqError("Instance's disk layout does not support"
7840 " growing.", errors.ECODE_INVAL)
7842 self.disk = instance.FindDisk(self.op.disk)
7844 if instance.disk_template != constants.DT_FILE:
7845 # TODO: check the free disk space for file, when that feature will be
7847 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7849 def Exec(self, feedback_fn):
7850 """Execute disk grow.
7853 instance = self.instance
7855 for node in instance.all_nodes:
7856 self.cfg.SetDiskID(disk, node)
7857 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7858 result.Raise("Grow request failed to node %s" % node)
7860 # TODO: Rewrite code to work properly
7861 # DRBD goes into sync mode for a short amount of time after executing the
7862 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7863 # calling "resize" in sync mode fails. Sleeping for a short amount of
7864 # time is a work-around.
7867 disk.RecordGrow(self.op.amount)
7868 self.cfg.Update(instance, feedback_fn)
7869 if self.op.wait_for_sync:
7870 disk_abort = not _WaitForSync(self, instance)
7872 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7873 " status.\nPlease check the instance.")
7876 class LUQueryInstanceData(NoHooksLU):
7877 """Query runtime instance data.
7880 _OP_REQP = ["instances", "static"]
7883 def ExpandNames(self):
7884 self.needed_locks = {}
7885 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7887 if not isinstance(self.op.instances, list):
7888 raise errors.OpPrereqError("Invalid argument type 'instances'",
7891 if self.op.instances:
7892 self.wanted_names = []
7893 for name in self.op.instances:
7894 full_name = _ExpandInstanceName(self.cfg, name)
7895 self.wanted_names.append(full_name)
7896 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7898 self.wanted_names = None
7899 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7901 self.needed_locks[locking.LEVEL_NODE] = []
7902 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7904 def DeclareLocks(self, level):
7905 if level == locking.LEVEL_NODE:
7906 self._LockInstancesNodes()
7908 def CheckPrereq(self):
7909 """Check prerequisites.
7911 This only checks the optional instance list against the existing names.
7914 if self.wanted_names is None:
7915 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7917 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7918 in self.wanted_names]
7921 def _ComputeBlockdevStatus(self, node, instance_name, dev):
7922 """Returns the status of a block device
7925 if self.op.static or not node:
7928 self.cfg.SetDiskID(dev, node)
7930 result = self.rpc.call_blockdev_find(node, dev)
7934 result.Raise("Can't compute disk status for %s" % instance_name)
7936 status = result.payload
7940 return (status.dev_path, status.major, status.minor,
7941 status.sync_percent, status.estimated_time,
7942 status.is_degraded, status.ldisk_status)
7944 def _ComputeDiskStatus(self, instance, snode, dev):
7945 """Compute block device status.
7948 if dev.dev_type in constants.LDS_DRBD:
7949 # we change the snode then (otherwise we use the one passed in)
7950 if dev.logical_id[0] == instance.primary_node:
7951 snode = dev.logical_id[1]
7953 snode = dev.logical_id[0]
7955 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7957 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7960 dev_children = [self._ComputeDiskStatus(instance, snode, child)
7961 for child in dev.children]
7966 "iv_name": dev.iv_name,
7967 "dev_type": dev.dev_type,
7968 "logical_id": dev.logical_id,
7969 "physical_id": dev.physical_id,
7970 "pstatus": dev_pstatus,
7971 "sstatus": dev_sstatus,
7972 "children": dev_children,
7979 def Exec(self, feedback_fn):
7980 """Gather and return data"""
7983 cluster = self.cfg.GetClusterInfo()
7985 for instance in self.wanted_instances:
7986 if not self.op.static:
7987 remote_info = self.rpc.call_instance_info(instance.primary_node,
7989 instance.hypervisor)
7990 remote_info.Raise("Error checking node %s" % instance.primary_node)
7991 remote_info = remote_info.payload
7992 if remote_info and "state" in remote_info:
7995 remote_state = "down"
7998 if instance.admin_up:
8001 config_state = "down"
8003 disks = [self._ComputeDiskStatus(instance, None, device)
8004 for device in instance.disks]
8007 "name": instance.name,
8008 "config_state": config_state,
8009 "run_state": remote_state,
8010 "pnode": instance.primary_node,
8011 "snodes": instance.secondary_nodes,
8013 # this happens to be the same format used for hooks
8014 "nics": _NICListToTuple(self, instance.nics),
8016 "hypervisor": instance.hypervisor,
8017 "network_port": instance.network_port,
8018 "hv_instance": instance.hvparams,
8019 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8020 "be_instance": instance.beparams,
8021 "be_actual": cluster.FillBE(instance),
8022 "serial_no": instance.serial_no,
8023 "mtime": instance.mtime,
8024 "ctime": instance.ctime,
8025 "uuid": instance.uuid,
8028 result[instance.name] = idict
8033 class LUSetInstanceParams(LogicalUnit):
8034 """Modifies an instances's parameters.
8037 HPATH = "instance-modify"
8038 HTYPE = constants.HTYPE_INSTANCE
8039 _OP_REQP = ["instance_name"]
8042 def CheckArguments(self):
8043 if not hasattr(self.op, 'nics'):
8045 if not hasattr(self.op, 'disks'):
8047 if not hasattr(self.op, 'beparams'):
8048 self.op.beparams = {}
8049 if not hasattr(self.op, 'hvparams'):
8050 self.op.hvparams = {}
8051 if not hasattr(self.op, "disk_template"):
8052 self.op.disk_template = None
8053 if not hasattr(self.op, "remote_node"):
8054 self.op.remote_node = None
8055 if not hasattr(self.op, "os_name"):
8056 self.op.os_name = None
8057 if not hasattr(self.op, "force_variant"):
8058 self.op.force_variant = False
8059 self.op.force = getattr(self.op, "force", False)
8060 if not (self.op.nics or self.op.disks or self.op.disk_template or
8061 self.op.hvparams or self.op.beparams or self.op.os_name):
8062 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8064 if self.op.hvparams:
8065 _CheckGlobalHvParams(self.op.hvparams)
8069 for disk_op, disk_dict in self.op.disks:
8070 if disk_op == constants.DDM_REMOVE:
8073 elif disk_op == constants.DDM_ADD:
8076 if not isinstance(disk_op, int):
8077 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8078 if not isinstance(disk_dict, dict):
8079 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8080 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8082 if disk_op == constants.DDM_ADD:
8083 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8084 if mode not in constants.DISK_ACCESS_SET:
8085 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8087 size = disk_dict.get('size', None)
8089 raise errors.OpPrereqError("Required disk parameter size missing",
8093 except (TypeError, ValueError), err:
8094 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8095 str(err), errors.ECODE_INVAL)
8096 disk_dict['size'] = size
8098 # modification of disk
8099 if 'size' in disk_dict:
8100 raise errors.OpPrereqError("Disk size change not possible, use"
8101 " grow-disk", errors.ECODE_INVAL)
8103 if disk_addremove > 1:
8104 raise errors.OpPrereqError("Only one disk add or remove operation"
8105 " supported at a time", errors.ECODE_INVAL)
8107 if self.op.disks and self.op.disk_template is not None:
8108 raise errors.OpPrereqError("Disk template conversion and other disk"
8109 " changes not supported at the same time",
8112 if self.op.disk_template:
8113 _CheckDiskTemplate(self.op.disk_template)
8114 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8115 self.op.remote_node is None):
8116 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8117 " one requires specifying a secondary node",
8122 for nic_op, nic_dict in self.op.nics:
8123 if nic_op == constants.DDM_REMOVE:
8126 elif nic_op == constants.DDM_ADD:
8129 if not isinstance(nic_op, int):
8130 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8131 if not isinstance(nic_dict, dict):
8132 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8133 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8135 # nic_dict should be a dict
8136 nic_ip = nic_dict.get('ip', None)
8137 if nic_ip is not None:
8138 if nic_ip.lower() == constants.VALUE_NONE:
8139 nic_dict['ip'] = None
8141 if not utils.IsValidIP(nic_ip):
8142 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8145 nic_bridge = nic_dict.get('bridge', None)
8146 nic_link = nic_dict.get('link', None)
8147 if nic_bridge and nic_link:
8148 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8149 " at the same time", errors.ECODE_INVAL)
8150 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8151 nic_dict['bridge'] = None
8152 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8153 nic_dict['link'] = None
8155 if nic_op == constants.DDM_ADD:
8156 nic_mac = nic_dict.get('mac', None)
8158 nic_dict['mac'] = constants.VALUE_AUTO
8160 if 'mac' in nic_dict:
8161 nic_mac = nic_dict['mac']
8162 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8163 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8165 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8166 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8167 " modifying an existing nic",
8170 if nic_addremove > 1:
8171 raise errors.OpPrereqError("Only one NIC add or remove operation"
8172 " supported at a time", errors.ECODE_INVAL)
8174 def ExpandNames(self):
8175 self._ExpandAndLockInstance()
8176 self.needed_locks[locking.LEVEL_NODE] = []
8177 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8179 def DeclareLocks(self, level):
8180 if level == locking.LEVEL_NODE:
8181 self._LockInstancesNodes()
8182 if self.op.disk_template and self.op.remote_node:
8183 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8184 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8186 def BuildHooksEnv(self):
8189 This runs on the master, primary and secondaries.
8193 if constants.BE_MEMORY in self.be_new:
8194 args['memory'] = self.be_new[constants.BE_MEMORY]
8195 if constants.BE_VCPUS in self.be_new:
8196 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8197 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8198 # information at all.
8201 nic_override = dict(self.op.nics)
8202 c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8203 for idx, nic in enumerate(self.instance.nics):
8204 if idx in nic_override:
8205 this_nic_override = nic_override[idx]
8207 this_nic_override = {}
8208 if 'ip' in this_nic_override:
8209 ip = this_nic_override['ip']
8212 if 'mac' in this_nic_override:
8213 mac = this_nic_override['mac']
8216 if idx in self.nic_pnew:
8217 nicparams = self.nic_pnew[idx]
8219 nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8220 mode = nicparams[constants.NIC_MODE]
8221 link = nicparams[constants.NIC_LINK]
8222 args['nics'].append((ip, mac, mode, link))
8223 if constants.DDM_ADD in nic_override:
8224 ip = nic_override[constants.DDM_ADD].get('ip', None)
8225 mac = nic_override[constants.DDM_ADD]['mac']
8226 nicparams = self.nic_pnew[constants.DDM_ADD]
8227 mode = nicparams[constants.NIC_MODE]
8228 link = nicparams[constants.NIC_LINK]
8229 args['nics'].append((ip, mac, mode, link))
8230 elif constants.DDM_REMOVE in nic_override:
8231 del args['nics'][-1]
8233 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8234 if self.op.disk_template:
8235 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8236 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8240 def _GetUpdatedParams(old_params, update_dict,
8241 default_values, parameter_types):
8242 """Return the new params dict for the given params.
8244 @type old_params: dict
8245 @param old_params: old parameters
8246 @type update_dict: dict
8247 @param update_dict: dict containing new parameter values,
8248 or constants.VALUE_DEFAULT to reset the
8249 parameter to its default value
8250 @type default_values: dict
8251 @param default_values: default values for the filled parameters
8252 @type parameter_types: dict
8253 @param parameter_types: dict mapping target dict keys to types
8254 in constants.ENFORCEABLE_TYPES
8255 @rtype: (dict, dict)
8256 @return: (new_parameters, filled_parameters)
8259 params_copy = copy.deepcopy(old_params)
8260 for key, val in update_dict.iteritems():
8261 if val == constants.VALUE_DEFAULT:
8263 del params_copy[key]
8267 params_copy[key] = val
8268 utils.ForceDictType(params_copy, parameter_types)
8269 params_filled = objects.FillDict(default_values, params_copy)
8270 return (params_copy, params_filled)
8272 def CheckPrereq(self):
8273 """Check prerequisites.
8275 This only checks the instance list against the existing names.
8278 self.force = self.op.force
8280 # checking the new params on the primary/secondary nodes
8282 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8283 cluster = self.cluster = self.cfg.GetClusterInfo()
8284 assert self.instance is not None, \
8285 "Cannot retrieve locked instance %s" % self.op.instance_name
8286 pnode = instance.primary_node
8287 nodelist = list(instance.all_nodes)
8289 if self.op.disk_template:
8290 if instance.disk_template == self.op.disk_template:
8291 raise errors.OpPrereqError("Instance already has disk template %s" %
8292 instance.disk_template, errors.ECODE_INVAL)
8294 if (instance.disk_template,
8295 self.op.disk_template) not in self._DISK_CONVERSIONS:
8296 raise errors.OpPrereqError("Unsupported disk template conversion from"
8297 " %s to %s" % (instance.disk_template,
8298 self.op.disk_template),
8300 if self.op.disk_template in constants.DTS_NET_MIRROR:
8301 _CheckNodeOnline(self, self.op.remote_node)
8302 _CheckNodeNotDrained(self, self.op.remote_node)
8303 disks = [{"size": d.size} for d in instance.disks]
8304 required = _ComputeDiskSize(self.op.disk_template, disks)
8305 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8306 _CheckInstanceDown(self, instance, "cannot change disk template")
8308 # hvparams processing
8309 if self.op.hvparams:
8310 i_hvdict, hv_new = self._GetUpdatedParams(
8311 instance.hvparams, self.op.hvparams,
8312 cluster.hvparams[instance.hypervisor],
8313 constants.HVS_PARAMETER_TYPES)
8315 hypervisor.GetHypervisor(
8316 instance.hypervisor).CheckParameterSyntax(hv_new)
8317 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8318 self.hv_new = hv_new # the new actual values
8319 self.hv_inst = i_hvdict # the new dict (without defaults)
8321 self.hv_new = self.hv_inst = {}
8323 # beparams processing
8324 if self.op.beparams:
8325 i_bedict, be_new = self._GetUpdatedParams(
8326 instance.beparams, self.op.beparams,
8327 cluster.beparams[constants.PP_DEFAULT],
8328 constants.BES_PARAMETER_TYPES)
8329 self.be_new = be_new # the new actual values
8330 self.be_inst = i_bedict # the new dict (without defaults)
8332 self.be_new = self.be_inst = {}
8336 if constants.BE_MEMORY in self.op.beparams and not self.force:
8337 mem_check_list = [pnode]
8338 if be_new[constants.BE_AUTO_BALANCE]:
8339 # either we changed auto_balance to yes or it was from before
8340 mem_check_list.extend(instance.secondary_nodes)
8341 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8342 instance.hypervisor)
8343 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8344 instance.hypervisor)
8345 pninfo = nodeinfo[pnode]
8346 msg = pninfo.fail_msg
8348 # Assume the primary node is unreachable and go ahead
8349 self.warn.append("Can't get info from primary node %s: %s" %
8351 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8352 self.warn.append("Node data from primary node %s doesn't contain"
8353 " free memory information" % pnode)
8354 elif instance_info.fail_msg:
8355 self.warn.append("Can't get instance runtime information: %s" %
8356 instance_info.fail_msg)
8358 if instance_info.payload:
8359 current_mem = int(instance_info.payload['memory'])
8361 # Assume instance not running
8362 # (there is a slight race condition here, but it's not very probable,
8363 # and we have no other way to check)
8365 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8366 pninfo.payload['memory_free'])
8368 raise errors.OpPrereqError("This change will prevent the instance"
8369 " from starting, due to %d MB of memory"
8370 " missing on its primary node" % miss_mem,
8373 if be_new[constants.BE_AUTO_BALANCE]:
8374 for node, nres in nodeinfo.items():
8375 if node not in instance.secondary_nodes:
8379 self.warn.append("Can't get info from secondary node %s: %s" %
8381 elif not isinstance(nres.payload.get('memory_free', None), int):
8382 self.warn.append("Secondary node %s didn't return free"
8383 " memory information" % node)
8384 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8385 self.warn.append("Not enough memory to failover instance to"
8386 " secondary node %s" % node)
8391 for nic_op, nic_dict in self.op.nics:
8392 if nic_op == constants.DDM_REMOVE:
8393 if not instance.nics:
8394 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8397 if nic_op != constants.DDM_ADD:
8399 if not instance.nics:
8400 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8401 " no NICs" % nic_op,
8403 if nic_op < 0 or nic_op >= len(instance.nics):
8404 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8406 (nic_op, len(instance.nics) - 1),
8408 old_nic_params = instance.nics[nic_op].nicparams
8409 old_nic_ip = instance.nics[nic_op].ip
8414 update_params_dict = dict([(key, nic_dict[key])
8415 for key in constants.NICS_PARAMETERS
8416 if key in nic_dict])
8418 if 'bridge' in nic_dict:
8419 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8421 new_nic_params, new_filled_nic_params = \
8422 self._GetUpdatedParams(old_nic_params, update_params_dict,
8423 cluster.nicparams[constants.PP_DEFAULT],
8424 constants.NICS_PARAMETER_TYPES)
8425 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8426 self.nic_pinst[nic_op] = new_nic_params
8427 self.nic_pnew[nic_op] = new_filled_nic_params
8428 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8430 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8431 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8432 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8434 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8436 self.warn.append(msg)
8438 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8439 if new_nic_mode == constants.NIC_MODE_ROUTED:
8440 if 'ip' in nic_dict:
8441 nic_ip = nic_dict['ip']
8445 raise errors.OpPrereqError('Cannot set the nic ip to None'
8446 ' on a routed nic', errors.ECODE_INVAL)
8447 if 'mac' in nic_dict:
8448 nic_mac = nic_dict['mac']
8450 raise errors.OpPrereqError('Cannot set the nic mac to None',
8452 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8453 # otherwise generate the mac
8454 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8456 # or validate/reserve the current one
8458 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8459 except errors.ReservationError:
8460 raise errors.OpPrereqError("MAC address %s already in use"
8461 " in cluster" % nic_mac,
8462 errors.ECODE_NOTUNIQUE)
8465 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8466 raise errors.OpPrereqError("Disk operations not supported for"
8467 " diskless instances",
8469 for disk_op, _ in self.op.disks:
8470 if disk_op == constants.DDM_REMOVE:
8471 if len(instance.disks) == 1:
8472 raise errors.OpPrereqError("Cannot remove the last disk of"
8473 " an instance", errors.ECODE_INVAL)
8474 _CheckInstanceDown(self, instance, "cannot remove disks")
8476 if (disk_op == constants.DDM_ADD and
8477 len(instance.nics) >= constants.MAX_DISKS):
8478 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8479 " add more" % constants.MAX_DISKS,
8481 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8483 if disk_op < 0 or disk_op >= len(instance.disks):
8484 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8486 (disk_op, len(instance.disks)),
8490 if self.op.os_name and not self.op.force:
8491 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8492 self.op.force_variant)
8496 def _ConvertPlainToDrbd(self, feedback_fn):
8497 """Converts an instance from plain to drbd.
8500 feedback_fn("Converting template to drbd")
8501 instance = self.instance
8502 pnode = instance.primary_node
8503 snode = self.op.remote_node
8505 # create a fake disk info for _GenerateDiskTemplate
8506 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8507 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8508 instance.name, pnode, [snode],
8509 disk_info, None, None, 0)
8510 info = _GetInstanceInfoText(instance)
8511 feedback_fn("Creating aditional volumes...")
8512 # first, create the missing data and meta devices
8513 for disk in new_disks:
8514 # unfortunately this is... not too nice
8515 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8517 for child in disk.children:
8518 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8519 # at this stage, all new LVs have been created, we can rename the
8521 feedback_fn("Renaming original volumes...")
8522 rename_list = [(o, n.children[0].logical_id)
8523 for (o, n) in zip(instance.disks, new_disks)]
8524 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8525 result.Raise("Failed to rename original LVs")
8527 feedback_fn("Initializing DRBD devices...")
8528 # all child devices are in place, we can now create the DRBD devices
8529 for disk in new_disks:
8530 for node in [pnode, snode]:
8531 f_create = node == pnode
8532 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8534 # at this point, the instance has been modified
8535 instance.disk_template = constants.DT_DRBD8
8536 instance.disks = new_disks
8537 self.cfg.Update(instance, feedback_fn)
8539 # disks are created, waiting for sync
8540 disk_abort = not _WaitForSync(self, instance)
8542 raise errors.OpExecError("There are some degraded disks for"
8543 " this instance, please cleanup manually")
8545 def _ConvertDrbdToPlain(self, feedback_fn):
8546 """Converts an instance from drbd to plain.
8549 instance = self.instance
8550 assert len(instance.secondary_nodes) == 1
8551 pnode = instance.primary_node
8552 snode = instance.secondary_nodes[0]
8553 feedback_fn("Converting template to plain")
8555 old_disks = instance.disks
8556 new_disks = [d.children[0] for d in old_disks]
8558 # copy over size and mode
8559 for parent, child in zip(old_disks, new_disks):
8560 child.size = parent.size
8561 child.mode = parent.mode
8563 # update instance structure
8564 instance.disks = new_disks
8565 instance.disk_template = constants.DT_PLAIN
8566 self.cfg.Update(instance, feedback_fn)
8568 feedback_fn("Removing volumes on the secondary node...")
8569 for disk in old_disks:
8570 self.cfg.SetDiskID(disk, snode)
8571 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8573 self.LogWarning("Could not remove block device %s on node %s,"
8574 " continuing anyway: %s", disk.iv_name, snode, msg)
8576 feedback_fn("Removing unneeded volumes on the primary node...")
8577 for idx, disk in enumerate(old_disks):
8578 meta = disk.children[1]
8579 self.cfg.SetDiskID(meta, pnode)
8580 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8582 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8583 " continuing anyway: %s", idx, pnode, msg)
8586 def Exec(self, feedback_fn):
8587 """Modifies an instance.
8589 All parameters take effect only at the next restart of the instance.
8592 # Process here the warnings from CheckPrereq, as we don't have a
8593 # feedback_fn there.
8594 for warn in self.warn:
8595 feedback_fn("WARNING: %s" % warn)
8598 instance = self.instance
8600 for disk_op, disk_dict in self.op.disks:
8601 if disk_op == constants.DDM_REMOVE:
8602 # remove the last disk
8603 device = instance.disks.pop()
8604 device_idx = len(instance.disks)
8605 for node, disk in device.ComputeNodeTree(instance.primary_node):
8606 self.cfg.SetDiskID(disk, node)
8607 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8609 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8610 " continuing anyway", device_idx, node, msg)
8611 result.append(("disk/%d" % device_idx, "remove"))
8612 elif disk_op == constants.DDM_ADD:
8614 if instance.disk_template == constants.DT_FILE:
8615 file_driver, file_path = instance.disks[0].logical_id
8616 file_path = os.path.dirname(file_path)
8618 file_driver = file_path = None
8619 disk_idx_base = len(instance.disks)
8620 new_disk = _GenerateDiskTemplate(self,
8621 instance.disk_template,
8622 instance.name, instance.primary_node,
8623 instance.secondary_nodes,
8628 instance.disks.append(new_disk)
8629 info = _GetInstanceInfoText(instance)
8631 logging.info("Creating volume %s for instance %s",
8632 new_disk.iv_name, instance.name)
8633 # Note: this needs to be kept in sync with _CreateDisks
8635 for node in instance.all_nodes:
8636 f_create = node == instance.primary_node
8638 _CreateBlockDev(self, node, instance, new_disk,
8639 f_create, info, f_create)
8640 except errors.OpExecError, err:
8641 self.LogWarning("Failed to create volume %s (%s) on"
8643 new_disk.iv_name, new_disk, node, err)
8644 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8645 (new_disk.size, new_disk.mode)))
8647 # change a given disk
8648 instance.disks[disk_op].mode = disk_dict['mode']
8649 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8651 if self.op.disk_template:
8652 r_shut = _ShutdownInstanceDisks(self, instance)
8654 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8655 " proceed with disk template conversion")
8656 mode = (instance.disk_template, self.op.disk_template)
8658 self._DISK_CONVERSIONS[mode](self, feedback_fn)
8660 self.cfg.ReleaseDRBDMinors(instance.name)
8662 result.append(("disk_template", self.op.disk_template))
8665 for nic_op, nic_dict in self.op.nics:
8666 if nic_op == constants.DDM_REMOVE:
8667 # remove the last nic
8668 del instance.nics[-1]
8669 result.append(("nic.%d" % len(instance.nics), "remove"))
8670 elif nic_op == constants.DDM_ADD:
8671 # mac and bridge should be set, by now
8672 mac = nic_dict['mac']
8673 ip = nic_dict.get('ip', None)
8674 nicparams = self.nic_pinst[constants.DDM_ADD]
8675 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8676 instance.nics.append(new_nic)
8677 result.append(("nic.%d" % (len(instance.nics) - 1),
8678 "add:mac=%s,ip=%s,mode=%s,link=%s" %
8679 (new_nic.mac, new_nic.ip,
8680 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8681 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8684 for key in 'mac', 'ip':
8686 setattr(instance.nics[nic_op], key, nic_dict[key])
8687 if nic_op in self.nic_pinst:
8688 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8689 for key, val in nic_dict.iteritems():
8690 result.append(("nic.%s/%d" % (key, nic_op), val))
8693 if self.op.hvparams:
8694 instance.hvparams = self.hv_inst
8695 for key, val in self.op.hvparams.iteritems():
8696 result.append(("hv/%s" % key, val))
8699 if self.op.beparams:
8700 instance.beparams = self.be_inst
8701 for key, val in self.op.beparams.iteritems():
8702 result.append(("be/%s" % key, val))
8706 instance.os = self.op.os_name
8708 self.cfg.Update(instance, feedback_fn)
8712 _DISK_CONVERSIONS = {
8713 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8714 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8717 class LUQueryExports(NoHooksLU):
8718 """Query the exports list
8721 _OP_REQP = ['nodes']
8724 def ExpandNames(self):
8725 self.needed_locks = {}
8726 self.share_locks[locking.LEVEL_NODE] = 1
8727 if not self.op.nodes:
8728 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8730 self.needed_locks[locking.LEVEL_NODE] = \
8731 _GetWantedNodes(self, self.op.nodes)
8733 def CheckPrereq(self):
8734 """Check prerequisites.
8737 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8739 def Exec(self, feedback_fn):
8740 """Compute the list of all the exported system images.
8743 @return: a dictionary with the structure node->(export-list)
8744 where export-list is a list of the instances exported on
8748 rpcresult = self.rpc.call_export_list(self.nodes)
8750 for node in rpcresult:
8751 if rpcresult[node].fail_msg:
8752 result[node] = False
8754 result[node] = rpcresult[node].payload
8759 class LUExportInstance(LogicalUnit):
8760 """Export an instance to an image in the cluster.
8763 HPATH = "instance-export"
8764 HTYPE = constants.HTYPE_INSTANCE
8765 _OP_REQP = ["instance_name", "target_node", "shutdown"]
8768 def CheckArguments(self):
8769 """Check the arguments.
8772 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8773 constants.DEFAULT_SHUTDOWN_TIMEOUT)
8775 def ExpandNames(self):
8776 self._ExpandAndLockInstance()
8777 # FIXME: lock only instance primary and destination node
8779 # Sad but true, for now we have do lock all nodes, as we don't know where
8780 # the previous export might be, and and in this LU we search for it and
8781 # remove it from its current node. In the future we could fix this by:
8782 # - making a tasklet to search (share-lock all), then create the new one,
8783 # then one to remove, after
8784 # - removing the removal operation altogether
8785 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8787 def DeclareLocks(self, level):
8788 """Last minute lock declaration."""
8789 # All nodes are locked anyway, so nothing to do here.
8791 def BuildHooksEnv(self):
8794 This will run on the master, primary node and target node.
8798 "EXPORT_NODE": self.op.target_node,
8799 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8800 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8802 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8803 nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8804 self.op.target_node]
8807 def CheckPrereq(self):
8808 """Check prerequisites.
8810 This checks that the instance and node names are valid.
8813 instance_name = self.op.instance_name
8814 self.instance = self.cfg.GetInstanceInfo(instance_name)
8815 assert self.instance is not None, \
8816 "Cannot retrieve locked instance %s" % self.op.instance_name
8817 _CheckNodeOnline(self, self.instance.primary_node)
8819 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8820 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8821 assert self.dst_node is not None
8823 _CheckNodeOnline(self, self.dst_node.name)
8824 _CheckNodeNotDrained(self, self.dst_node.name)
8826 # instance disk type verification
8827 for disk in self.instance.disks:
8828 if disk.dev_type == constants.LD_FILE:
8829 raise errors.OpPrereqError("Export not supported for instances with"
8830 " file-based disks", errors.ECODE_INVAL)
8832 def Exec(self, feedback_fn):
8833 """Export an instance to an image in the cluster.
8836 instance = self.instance
8837 dst_node = self.dst_node
8838 src_node = instance.primary_node
8840 if self.op.shutdown:
8841 # shutdown the instance, but not the disks
8842 feedback_fn("Shutting down instance %s" % instance.name)
8843 result = self.rpc.call_instance_shutdown(src_node, instance,
8844 self.shutdown_timeout)
8845 result.Raise("Could not shutdown instance %s on"
8846 " node %s" % (instance.name, src_node))
8848 vgname = self.cfg.GetVGName()
8852 # set the disks ID correctly since call_instance_start needs the
8853 # correct drbd minor to create the symlinks
8854 for disk in instance.disks:
8855 self.cfg.SetDiskID(disk, src_node)
8857 activate_disks = (not instance.admin_up)
8860 # Activate the instance disks if we'exporting a stopped instance
8861 feedback_fn("Activating disks for %s" % instance.name)
8862 _StartInstanceDisks(self, instance, None)
8868 for idx, disk in enumerate(instance.disks):
8869 feedback_fn("Creating a snapshot of disk/%s on node %s" %
8872 # result.payload will be a snapshot of an lvm leaf of the one we
8874 result = self.rpc.call_blockdev_snapshot(src_node, disk)
8875 msg = result.fail_msg
8877 self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8879 snap_disks.append(False)
8881 disk_id = (vgname, result.payload)
8882 new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8883 logical_id=disk_id, physical_id=disk_id,
8884 iv_name=disk.iv_name)
8885 snap_disks.append(new_dev)
8888 if self.op.shutdown and instance.admin_up:
8889 feedback_fn("Starting instance %s" % instance.name)
8890 result = self.rpc.call_instance_start(src_node, instance, None, None)
8891 msg = result.fail_msg
8893 _ShutdownInstanceDisks(self, instance)
8894 raise errors.OpExecError("Could not start instance: %s" % msg)
8896 # TODO: check for size
8898 cluster_name = self.cfg.GetClusterName()
8899 for idx, dev in enumerate(snap_disks):
8900 feedback_fn("Exporting snapshot %s from %s to %s" %
8901 (idx, src_node, dst_node.name))
8903 # FIXME: pass debug from opcode to backend
8904 result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8905 instance, cluster_name,
8906 idx, self.op.debug_level)
8907 msg = result.fail_msg
8909 self.LogWarning("Could not export disk/%s from node %s to"
8910 " node %s: %s", idx, src_node, dst_node.name, msg)
8911 dresults.append(False)
8913 dresults.append(True)
8914 msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8916 self.LogWarning("Could not remove snapshot for disk/%d from node"
8917 " %s: %s", idx, src_node, msg)
8919 dresults.append(False)
8921 feedback_fn("Finalizing export on %s" % dst_node.name)
8922 result = self.rpc.call_finalize_export(dst_node.name, instance,
8925 msg = result.fail_msg
8927 self.LogWarning("Could not finalize export for instance %s"
8928 " on node %s: %s", instance.name, dst_node.name, msg)
8933 feedback_fn("Deactivating disks for %s" % instance.name)
8934 _ShutdownInstanceDisks(self, instance)
8936 nodelist = self.cfg.GetNodeList()
8937 nodelist.remove(dst_node.name)
8939 # on one-node clusters nodelist will be empty after the removal
8940 # if we proceed the backup would be removed because OpQueryExports
8941 # substitutes an empty list with the full cluster node list.
8942 iname = instance.name
8944 feedback_fn("Removing old exports for instance %s" % iname)
8945 exportlist = self.rpc.call_export_list(nodelist)
8946 for node in exportlist:
8947 if exportlist[node].fail_msg:
8949 if iname in exportlist[node].payload:
8950 msg = self.rpc.call_export_remove(node, iname).fail_msg
8952 self.LogWarning("Could not remove older export for instance %s"
8953 " on node %s: %s", iname, node, msg)
8954 return fin_resu, dresults
8957 class LURemoveExport(NoHooksLU):
8958 """Remove exports related to the named instance.
8961 _OP_REQP = ["instance_name"]
8964 def ExpandNames(self):
8965 self.needed_locks = {}
8966 # We need all nodes to be locked in order for RemoveExport to work, but we
8967 # don't need to lock the instance itself, as nothing will happen to it (and
8968 # we can remove exports also for a removed instance)
8969 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8971 def CheckPrereq(self):
8972 """Check prerequisites.
8976 def Exec(self, feedback_fn):
8977 """Remove any export.
8980 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8981 # If the instance was not found we'll try with the name that was passed in.
8982 # This will only work if it was an FQDN, though.
8984 if not instance_name:
8986 instance_name = self.op.instance_name
8988 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8989 exportlist = self.rpc.call_export_list(locked_nodes)
8991 for node in exportlist:
8992 msg = exportlist[node].fail_msg
8994 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8996 if instance_name in exportlist[node].payload:
8998 result = self.rpc.call_export_remove(node, instance_name)
8999 msg = result.fail_msg
9001 logging.error("Could not remove export for instance %s"
9002 " on node %s: %s", instance_name, node, msg)
9004 if fqdn_warn and not found:
9005 feedback_fn("Export not found. If trying to remove an export belonging"
9006 " to a deleted instance please use its Fully Qualified"
9010 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9013 This is an abstract class which is the parent of all the other tags LUs.
9017 def ExpandNames(self):
9018 self.needed_locks = {}
9019 if self.op.kind == constants.TAG_NODE:
9020 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9021 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9022 elif self.op.kind == constants.TAG_INSTANCE:
9023 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9024 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9026 def CheckPrereq(self):
9027 """Check prerequisites.
9030 if self.op.kind == constants.TAG_CLUSTER:
9031 self.target = self.cfg.GetClusterInfo()
9032 elif self.op.kind == constants.TAG_NODE:
9033 self.target = self.cfg.GetNodeInfo(self.op.name)
9034 elif self.op.kind == constants.TAG_INSTANCE:
9035 self.target = self.cfg.GetInstanceInfo(self.op.name)
9037 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9038 str(self.op.kind), errors.ECODE_INVAL)
9041 class LUGetTags(TagsLU):
9042 """Returns the tags of a given object.
9045 _OP_REQP = ["kind", "name"]
9048 def Exec(self, feedback_fn):
9049 """Returns the tag list.
9052 return list(self.target.GetTags())
9055 class LUSearchTags(NoHooksLU):
9056 """Searches the tags for a given pattern.
9059 _OP_REQP = ["pattern"]
9062 def ExpandNames(self):
9063 self.needed_locks = {}
9065 def CheckPrereq(self):
9066 """Check prerequisites.
9068 This checks the pattern passed for validity by compiling it.
9072 self.re = re.compile(self.op.pattern)
9073 except re.error, err:
9074 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9075 (self.op.pattern, err), errors.ECODE_INVAL)
9077 def Exec(self, feedback_fn):
9078 """Returns the tag list.
9082 tgts = [("/cluster", cfg.GetClusterInfo())]
9083 ilist = cfg.GetAllInstancesInfo().values()
9084 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9085 nlist = cfg.GetAllNodesInfo().values()
9086 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9088 for path, target in tgts:
9089 for tag in target.GetTags():
9090 if self.re.search(tag):
9091 results.append((path, tag))
9095 class LUAddTags(TagsLU):
9096 """Sets a tag on a given object.
9099 _OP_REQP = ["kind", "name", "tags"]
9102 def CheckPrereq(self):
9103 """Check prerequisites.
9105 This checks the type and length of the tag name and value.
9108 TagsLU.CheckPrereq(self)
9109 for tag in self.op.tags:
9110 objects.TaggableObject.ValidateTag(tag)
9112 def Exec(self, feedback_fn):
9117 for tag in self.op.tags:
9118 self.target.AddTag(tag)
9119 except errors.TagError, err:
9120 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9121 self.cfg.Update(self.target, feedback_fn)
9124 class LUDelTags(TagsLU):
9125 """Delete a list of tags from a given object.
9128 _OP_REQP = ["kind", "name", "tags"]
9131 def CheckPrereq(self):
9132 """Check prerequisites.
9134 This checks that we have the given tag.
9137 TagsLU.CheckPrereq(self)
9138 for tag in self.op.tags:
9139 objects.TaggableObject.ValidateTag(tag)
9140 del_tags = frozenset(self.op.tags)
9141 cur_tags = self.target.GetTags()
9142 if not del_tags <= cur_tags:
9143 diff_tags = del_tags - cur_tags
9144 diff_names = ["'%s'" % tag for tag in diff_tags]
9146 raise errors.OpPrereqError("Tag(s) %s not found" %
9147 (",".join(diff_names)), errors.ECODE_NOENT)
9149 def Exec(self, feedback_fn):
9150 """Remove the tag from the object.
9153 for tag in self.op.tags:
9154 self.target.RemoveTag(tag)
9155 self.cfg.Update(self.target, feedback_fn)
9158 class LUTestDelay(NoHooksLU):
9159 """Sleep for a specified amount of time.
9161 This LU sleeps on the master and/or nodes for a specified amount of
9165 _OP_REQP = ["duration", "on_master", "on_nodes"]
9168 def ExpandNames(self):
9169 """Expand names and set required locks.
9171 This expands the node list, if any.
9174 self.needed_locks = {}
9175 if self.op.on_nodes:
9176 # _GetWantedNodes can be used here, but is not always appropriate to use
9177 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9179 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9180 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9182 def CheckPrereq(self):
9183 """Check prerequisites.
9187 def Exec(self, feedback_fn):
9188 """Do the actual sleep.
9191 if self.op.on_master:
9192 if not utils.TestDelay(self.op.duration):
9193 raise errors.OpExecError("Error during master delay test")
9194 if self.op.on_nodes:
9195 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9196 for node, node_result in result.items():
9197 node_result.Raise("Failure during rpc call to node %s" % node)
9200 class IAllocator(object):
9201 """IAllocator framework.
9203 An IAllocator instance has three sets of attributes:
9204 - cfg that is needed to query the cluster
9205 - input data (all members of the _KEYS class attribute are required)
9206 - four buffer attributes (in|out_data|text), that represent the
9207 input (to the external script) in text and data structure format,
9208 and the output from it, again in two formats
9209 - the result variables from the script (success, info, nodes) for
9213 # pylint: disable-msg=R0902
9214 # lots of instance attributes
9216 "name", "mem_size", "disks", "disk_template",
9217 "os", "tags", "nics", "vcpus", "hypervisor",
9220 "name", "relocate_from",
9226 def __init__(self, cfg, rpc, mode, **kwargs):
9229 # init buffer variables
9230 self.in_text = self.out_text = self.in_data = self.out_data = None
9231 # init all input fields so that pylint is happy
9233 self.mem_size = self.disks = self.disk_template = None
9234 self.os = self.tags = self.nics = self.vcpus = None
9235 self.hypervisor = None
9236 self.relocate_from = None
9238 self.evac_nodes = None
9240 self.required_nodes = None
9241 # init result fields
9242 self.success = self.info = self.result = None
9243 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9244 keyset = self._ALLO_KEYS
9245 fn = self._AddNewInstance
9246 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9247 keyset = self._RELO_KEYS
9248 fn = self._AddRelocateInstance
9249 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9250 keyset = self._EVAC_KEYS
9251 fn = self._AddEvacuateNodes
9253 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9254 " IAllocator" % self.mode)
9256 if key not in keyset:
9257 raise errors.ProgrammerError("Invalid input parameter '%s' to"
9258 " IAllocator" % key)
9259 setattr(self, key, kwargs[key])
9262 if key not in kwargs:
9263 raise errors.ProgrammerError("Missing input parameter '%s' to"
9264 " IAllocator" % key)
9265 self._BuildInputData(fn)
9267 def _ComputeClusterData(self):
9268 """Compute the generic allocator input data.
9270 This is the data that is independent of the actual operation.
9274 cluster_info = cfg.GetClusterInfo()
9277 "version": constants.IALLOCATOR_VERSION,
9278 "cluster_name": cfg.GetClusterName(),
9279 "cluster_tags": list(cluster_info.GetTags()),
9280 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9281 # we don't have job IDs
9283 iinfo = cfg.GetAllInstancesInfo().values()
9284 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9288 node_list = cfg.GetNodeList()
9290 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9291 hypervisor_name = self.hypervisor
9292 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9293 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9294 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9295 hypervisor_name = cluster_info.enabled_hypervisors[0]
9297 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9300 self.rpc.call_all_instances_info(node_list,
9301 cluster_info.enabled_hypervisors)
9302 for nname, nresult in node_data.items():
9303 # first fill in static (config-based) values
9304 ninfo = cfg.GetNodeInfo(nname)
9306 "tags": list(ninfo.GetTags()),
9307 "primary_ip": ninfo.primary_ip,
9308 "secondary_ip": ninfo.secondary_ip,
9309 "offline": ninfo.offline,
9310 "drained": ninfo.drained,
9311 "master_candidate": ninfo.master_candidate,
9314 if not (ninfo.offline or ninfo.drained):
9315 nresult.Raise("Can't get data for node %s" % nname)
9316 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9318 remote_info = nresult.payload
9320 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9321 'vg_size', 'vg_free', 'cpu_total']:
9322 if attr not in remote_info:
9323 raise errors.OpExecError("Node '%s' didn't return attribute"
9324 " '%s'" % (nname, attr))
9325 if not isinstance(remote_info[attr], int):
9326 raise errors.OpExecError("Node '%s' returned invalid value"
9328 (nname, attr, remote_info[attr]))
9329 # compute memory used by primary instances
9330 i_p_mem = i_p_up_mem = 0
9331 for iinfo, beinfo in i_list:
9332 if iinfo.primary_node == nname:
9333 i_p_mem += beinfo[constants.BE_MEMORY]
9334 if iinfo.name not in node_iinfo[nname].payload:
9337 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9338 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9339 remote_info['memory_free'] -= max(0, i_mem_diff)
9342 i_p_up_mem += beinfo[constants.BE_MEMORY]
9344 # compute memory used by instances
9346 "total_memory": remote_info['memory_total'],
9347 "reserved_memory": remote_info['memory_dom0'],
9348 "free_memory": remote_info['memory_free'],
9349 "total_disk": remote_info['vg_size'],
9350 "free_disk": remote_info['vg_free'],
9351 "total_cpus": remote_info['cpu_total'],
9352 "i_pri_memory": i_p_mem,
9353 "i_pri_up_memory": i_p_up_mem,
9357 node_results[nname] = pnr
9358 data["nodes"] = node_results
9362 for iinfo, beinfo in i_list:
9364 for nic in iinfo.nics:
9365 filled_params = objects.FillDict(
9366 cluster_info.nicparams[constants.PP_DEFAULT],
9368 nic_dict = {"mac": nic.mac,
9370 "mode": filled_params[constants.NIC_MODE],
9371 "link": filled_params[constants.NIC_LINK],
9373 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9374 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9375 nic_data.append(nic_dict)
9377 "tags": list(iinfo.GetTags()),
9378 "admin_up": iinfo.admin_up,
9379 "vcpus": beinfo[constants.BE_VCPUS],
9380 "memory": beinfo[constants.BE_MEMORY],
9382 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9384 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9385 "disk_template": iinfo.disk_template,
9386 "hypervisor": iinfo.hypervisor,
9388 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9390 instance_data[iinfo.name] = pir
9392 data["instances"] = instance_data
9396 def _AddNewInstance(self):
9397 """Add new instance data to allocator structure.
9399 This in combination with _AllocatorGetClusterData will create the
9400 correct structure needed as input for the allocator.
9402 The checks for the completeness of the opcode must have already been
9406 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9408 if self.disk_template in constants.DTS_NET_MIRROR:
9409 self.required_nodes = 2
9411 self.required_nodes = 1
9414 "disk_template": self.disk_template,
9417 "vcpus": self.vcpus,
9418 "memory": self.mem_size,
9419 "disks": self.disks,
9420 "disk_space_total": disk_space,
9422 "required_nodes": self.required_nodes,
9426 def _AddRelocateInstance(self):
9427 """Add relocate instance data to allocator structure.
9429 This in combination with _IAllocatorGetClusterData will create the
9430 correct structure needed as input for the allocator.
9432 The checks for the completeness of the opcode must have already been
9436 instance = self.cfg.GetInstanceInfo(self.name)
9437 if instance is None:
9438 raise errors.ProgrammerError("Unknown instance '%s' passed to"
9439 " IAllocator" % self.name)
9441 if instance.disk_template not in constants.DTS_NET_MIRROR:
9442 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9445 if len(instance.secondary_nodes) != 1:
9446 raise errors.OpPrereqError("Instance has not exactly one secondary node",
9449 self.required_nodes = 1
9450 disk_sizes = [{'size': disk.size} for disk in instance.disks]
9451 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9455 "disk_space_total": disk_space,
9456 "required_nodes": self.required_nodes,
9457 "relocate_from": self.relocate_from,
9461 def _AddEvacuateNodes(self):
9462 """Add evacuate nodes data to allocator structure.
9466 "evac_nodes": self.evac_nodes
9470 def _BuildInputData(self, fn):
9471 """Build input data structures.
9474 self._ComputeClusterData()
9477 request["type"] = self.mode
9478 self.in_data["request"] = request
9480 self.in_text = serializer.Dump(self.in_data)
9482 def Run(self, name, validate=True, call_fn=None):
9483 """Run an instance allocator and return the results.
9487 call_fn = self.rpc.call_iallocator_runner
9489 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9490 result.Raise("Failure while running the iallocator script")
9492 self.out_text = result.payload
9494 self._ValidateResult()
9496 def _ValidateResult(self):
9497 """Process the allocator results.
9499 This will process and if successful save the result in
9500 self.out_data and the other parameters.
9504 rdict = serializer.Load(self.out_text)
9505 except Exception, err:
9506 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9508 if not isinstance(rdict, dict):
9509 raise errors.OpExecError("Can't parse iallocator results: not a dict")
9511 # TODO: remove backwards compatiblity in later versions
9512 if "nodes" in rdict and "result" not in rdict:
9513 rdict["result"] = rdict["nodes"]
9516 for key in "success", "info", "result":
9517 if key not in rdict:
9518 raise errors.OpExecError("Can't parse iallocator results:"
9519 " missing key '%s'" % key)
9520 setattr(self, key, rdict[key])
9522 if not isinstance(rdict["result"], list):
9523 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9525 self.out_data = rdict
9528 class LUTestAllocator(NoHooksLU):
9529 """Run allocator tests.
9531 This LU runs the allocator tests
9534 _OP_REQP = ["direction", "mode", "name"]
9536 def CheckPrereq(self):
9537 """Check prerequisites.
9539 This checks the opcode parameters depending on the director and mode test.
9542 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9543 for attr in ["name", "mem_size", "disks", "disk_template",
9544 "os", "tags", "nics", "vcpus"]:
9545 if not hasattr(self.op, attr):
9546 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9547 attr, errors.ECODE_INVAL)
9548 iname = self.cfg.ExpandInstanceName(self.op.name)
9549 if iname is not None:
9550 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9551 iname, errors.ECODE_EXISTS)
9552 if not isinstance(self.op.nics, list):
9553 raise errors.OpPrereqError("Invalid parameter 'nics'",
9555 for row in self.op.nics:
9556 if (not isinstance(row, dict) or
9559 "bridge" not in row):
9560 raise errors.OpPrereqError("Invalid contents of the 'nics'"
9561 " parameter", errors.ECODE_INVAL)
9562 if not isinstance(self.op.disks, list):
9563 raise errors.OpPrereqError("Invalid parameter 'disks'",
9565 for row in self.op.disks:
9566 if (not isinstance(row, dict) or
9567 "size" not in row or
9568 not isinstance(row["size"], int) or
9569 "mode" not in row or
9570 row["mode"] not in ['r', 'w']):
9571 raise errors.OpPrereqError("Invalid contents of the 'disks'"
9572 " parameter", errors.ECODE_INVAL)
9573 if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9574 self.op.hypervisor = self.cfg.GetHypervisorType()
9575 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9576 if not hasattr(self.op, "name"):
9577 raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9579 fname = _ExpandInstanceName(self.cfg, self.op.name)
9580 self.op.name = fname
9581 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9582 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9583 if not hasattr(self.op, "evac_nodes"):
9584 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9585 " opcode input", errors.ECODE_INVAL)
9587 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9588 self.op.mode, errors.ECODE_INVAL)
9590 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9591 if not hasattr(self.op, "allocator") or self.op.allocator is None:
9592 raise errors.OpPrereqError("Missing allocator name",
9594 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9595 raise errors.OpPrereqError("Wrong allocator test '%s'" %
9596 self.op.direction, errors.ECODE_INVAL)
9598 def Exec(self, feedback_fn):
9599 """Run the allocator test.
9602 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9603 ial = IAllocator(self.cfg, self.rpc,
9606 mem_size=self.op.mem_size,
9607 disks=self.op.disks,
9608 disk_template=self.op.disk_template,
9612 vcpus=self.op.vcpus,
9613 hypervisor=self.op.hypervisor,
9615 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9616 ial = IAllocator(self.cfg, self.rpc,
9619 relocate_from=list(self.relocate_from),
9621 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9622 ial = IAllocator(self.cfg, self.rpc,
9624 evac_nodes=self.op.evac_nodes)
9626 raise errors.ProgrammerError("Uncatched mode %s in"
9627 " LUTestAllocator.Exec", self.op.mode)
9629 if self.op.direction == constants.IALLOCATOR_DIR_IN:
9630 result = ial.in_text
9632 ial.Run(self.op.allocator, validate=False)
9633 result = ial.out_text