4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
38 from ganeti import ssh
39 from ganeti import utils
40 from ganeti import errors
41 from ganeti import hypervisor
42 from ganeti import locking
43 from ganeti import constants
44 from ganeti import objects
45 from ganeti import serializer
46 from ganeti import ssconf
49 class LogicalUnit(object):
50 """Logical Unit base class.
52 Subclasses must follow these rules:
53 - implement ExpandNames
54 - implement CheckPrereq (except when tasklets are used)
55 - implement Exec (except when tasklets are used)
56 - implement BuildHooksEnv
57 - redefine HPATH and HTYPE
58 - optionally redefine their run requirements:
59 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
61 Note that all commands require root permissions.
63 @ivar dry_run_result: the value (if any) that will be returned to the caller
64 in dry-run mode (signalled by opcode dry_run parameter)
72 def __init__(self, processor, op, context, rpc):
73 """Constructor for LogicalUnit.
75 This needs to be overridden in derived classes in order to check op
81 self.cfg = context.cfg
82 self.context = context
84 # Dicts used to declare locking needs to mcpu
85 self.needed_locks = None
86 self.acquired_locks = {}
87 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
89 self.remove_locks = {}
90 # Used to force good behavior when calling helper functions
91 self.recalculate_locks = {}
94 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
95 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
96 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
98 self.dry_run_result = None
99 # support for generic debug attribute
100 if (not hasattr(self.op, "debug_level") or
101 not isinstance(self.op.debug_level, int)):
102 self.op.debug_level = 0
107 for attr_name in self._OP_REQP:
108 attr_val = getattr(op, attr_name, None)
110 raise errors.OpPrereqError("Required parameter '%s' missing" %
111 attr_name, errors.ECODE_INVAL)
113 self.CheckArguments()
116 """Returns the SshRunner object
120 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
123 ssh = property(fget=__GetSSH)
125 def CheckArguments(self):
126 """Check syntactic validity for the opcode arguments.
128 This method is for doing a simple syntactic check and ensure
129 validity of opcode parameters, without any cluster-related
130 checks. While the same can be accomplished in ExpandNames and/or
131 CheckPrereq, doing these separate is better because:
133 - ExpandNames is left as as purely a lock-related function
134 - CheckPrereq is run after we have acquired locks (and possible
137 The function is allowed to change the self.op attribute so that
138 later methods can no longer worry about missing parameters.
143 def ExpandNames(self):
144 """Expand names for this LU.
146 This method is called before starting to execute the opcode, and it should
147 update all the parameters of the opcode to their canonical form (e.g. a
148 short node name must be fully expanded after this method has successfully
149 completed). This way locking, hooks, logging, ecc. can work correctly.
151 LUs which implement this method must also populate the self.needed_locks
152 member, as a dict with lock levels as keys, and a list of needed lock names
155 - use an empty dict if you don't need any lock
156 - if you don't need any lock at a particular level omit that level
157 - don't put anything for the BGL level
158 - if you want all locks at a level use locking.ALL_SET as a value
160 If you need to share locks (rather than acquire them exclusively) at one
161 level you can modify self.share_locks, setting a true value (usually 1) for
162 that level. By default locks are not shared.
164 This function can also define a list of tasklets, which then will be
165 executed in order instead of the usual LU-level CheckPrereq and Exec
166 functions, if those are not defined by the LU.
170 # Acquire all nodes and one instance
171 self.needed_locks = {
172 locking.LEVEL_NODE: locking.ALL_SET,
173 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
175 # Acquire just two nodes
176 self.needed_locks = {
177 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
180 self.needed_locks = {} # No, you can't leave it to the default value None
183 # The implementation of this method is mandatory only if the new LU is
184 # concurrent, so that old LUs don't need to be changed all at the same
187 self.needed_locks = {} # Exclusive LUs don't need locks.
189 raise NotImplementedError
191 def DeclareLocks(self, level):
192 """Declare LU locking needs for a level
194 While most LUs can just declare their locking needs at ExpandNames time,
195 sometimes there's the need to calculate some locks after having acquired
196 the ones before. This function is called just before acquiring locks at a
197 particular level, but after acquiring the ones at lower levels, and permits
198 such calculations. It can be used to modify self.needed_locks, and by
199 default it does nothing.
201 This function is only called if you have something already set in
202 self.needed_locks for the level.
204 @param level: Locking level which is going to be locked
205 @type level: member of ganeti.locking.LEVELS
209 def CheckPrereq(self):
210 """Check prerequisites for this LU.
212 This method should check that the prerequisites for the execution
213 of this LU are fulfilled. It can do internode communication, but
214 it should be idempotent - no cluster or system changes are
217 The method should raise errors.OpPrereqError in case something is
218 not fulfilled. Its return value is ignored.
220 This method should also update all the parameters of the opcode to
221 their canonical form if it hasn't been done by ExpandNames before.
224 if self.tasklets is not None:
225 for (idx, tl) in enumerate(self.tasklets):
226 logging.debug("Checking prerequisites for tasklet %s/%s",
227 idx + 1, len(self.tasklets))
230 raise NotImplementedError
232 def Exec(self, feedback_fn):
235 This method should implement the actual work. It should raise
236 errors.OpExecError for failures that are somewhat dealt with in
240 if self.tasklets is not None:
241 for (idx, tl) in enumerate(self.tasklets):
242 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
245 raise NotImplementedError
247 def BuildHooksEnv(self):
248 """Build hooks environment for this LU.
250 This method should return a three-node tuple consisting of: a dict
251 containing the environment that will be used for running the
252 specific hook for this LU, a list of node names on which the hook
253 should run before the execution, and a list of node names on which
254 the hook should run after the execution.
256 The keys of the dict must not have 'GANETI_' prefixed as this will
257 be handled in the hooks runner. Also note additional keys will be
258 added by the hooks runner. If the LU doesn't define any
259 environment, an empty dict (and not None) should be returned.
261 No nodes should be returned as an empty list (and not None).
263 Note that if the HPATH for a LU class is None, this function will
267 raise NotImplementedError
269 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
270 """Notify the LU about the results of its hooks.
272 This method is called every time a hooks phase is executed, and notifies
273 the Logical Unit about the hooks' result. The LU can then use it to alter
274 its result based on the hooks. By default the method does nothing and the
275 previous result is passed back unchanged but any LU can define it if it
276 wants to use the local cluster hook-scripts somehow.
278 @param phase: one of L{constants.HOOKS_PHASE_POST} or
279 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
280 @param hook_results: the results of the multi-node hooks rpc call
281 @param feedback_fn: function used send feedback back to the caller
282 @param lu_result: the previous Exec result this LU had, or None
284 @return: the new Exec result, based on the previous result
288 # API must be kept, thus we ignore the unused argument and could
289 # be a function warnings
290 # pylint: disable-msg=W0613,R0201
293 def _ExpandAndLockInstance(self):
294 """Helper function to expand and lock an instance.
296 Many LUs that work on an instance take its name in self.op.instance_name
297 and need to expand it and then declare the expanded name for locking. This
298 function does it, and then updates self.op.instance_name to the expanded
299 name. It also initializes needed_locks as a dict, if this hasn't been done
303 if self.needed_locks is None:
304 self.needed_locks = {}
306 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
307 "_ExpandAndLockInstance called with instance-level locks set"
308 self.op.instance_name = _ExpandInstanceName(self.cfg,
309 self.op.instance_name)
310 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
312 def _LockInstancesNodes(self, primary_only=False):
313 """Helper function to declare instances' nodes for locking.
315 This function should be called after locking one or more instances to lock
316 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
317 with all primary or secondary nodes for instances already locked and
318 present in self.needed_locks[locking.LEVEL_INSTANCE].
320 It should be called from DeclareLocks, and for safety only works if
321 self.recalculate_locks[locking.LEVEL_NODE] is set.
323 In the future it may grow parameters to just lock some instance's nodes, or
324 to just lock primaries or secondary nodes, if needed.
326 If should be called in DeclareLocks in a way similar to::
328 if level == locking.LEVEL_NODE:
329 self._LockInstancesNodes()
331 @type primary_only: boolean
332 @param primary_only: only lock primary nodes of locked instances
335 assert locking.LEVEL_NODE in self.recalculate_locks, \
336 "_LockInstancesNodes helper function called with no nodes to recalculate"
338 # TODO: check if we're really been called with the instance locks held
340 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
341 # future we might want to have different behaviors depending on the value
342 # of self.recalculate_locks[locking.LEVEL_NODE]
344 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
345 instance = self.context.cfg.GetInstanceInfo(instance_name)
346 wanted_nodes.append(instance.primary_node)
348 wanted_nodes.extend(instance.secondary_nodes)
350 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
351 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
352 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
353 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
355 del self.recalculate_locks[locking.LEVEL_NODE]
358 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
359 """Simple LU which runs no hooks.
361 This LU is intended as a parent for other LogicalUnits which will
362 run no hooks, in order to reduce duplicate code.
368 def BuildHooksEnv(self):
369 """Empty BuildHooksEnv for NoHooksLu.
371 This just raises an error.
374 assert False, "BuildHooksEnv called for NoHooksLUs"
378 """Tasklet base class.
380 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
381 they can mix legacy code with tasklets. Locking needs to be done in the LU,
382 tasklets know nothing about locks.
384 Subclasses must follow these rules:
385 - Implement CheckPrereq
389 def __init__(self, lu):
396 def CheckPrereq(self):
397 """Check prerequisites for this tasklets.
399 This method should check whether the prerequisites for the execution of
400 this tasklet are fulfilled. It can do internode communication, but it
401 should be idempotent - no cluster or system changes are allowed.
403 The method should raise errors.OpPrereqError in case something is not
404 fulfilled. Its return value is ignored.
406 This method should also update all parameters to their canonical form if it
407 hasn't been done before.
410 raise NotImplementedError
412 def Exec(self, feedback_fn):
413 """Execute the tasklet.
415 This method should implement the actual work. It should raise
416 errors.OpExecError for failures that are somewhat dealt with in code, or
420 raise NotImplementedError
423 def _GetWantedNodes(lu, nodes):
424 """Returns list of checked and expanded node names.
426 @type lu: L{LogicalUnit}
427 @param lu: the logical unit on whose behalf we execute
429 @param nodes: list of node names or None for all nodes
431 @return: the list of nodes, sorted
432 @raise errors.ProgrammerError: if the nodes parameter is wrong type
435 if not isinstance(nodes, list):
436 raise errors.OpPrereqError("Invalid argument type 'nodes'",
440 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
441 " non-empty list of nodes whose name is to be expanded.")
443 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
444 return utils.NiceSort(wanted)
447 def _GetWantedInstances(lu, instances):
448 """Returns list of checked and expanded instance names.
450 @type lu: L{LogicalUnit}
451 @param lu: the logical unit on whose behalf we execute
452 @type instances: list
453 @param instances: list of instance names or None for all instances
455 @return: the list of instances, sorted
456 @raise errors.OpPrereqError: if the instances parameter is wrong type
457 @raise errors.OpPrereqError: if any of the passed instances is not found
460 if not isinstance(instances, list):
461 raise errors.OpPrereqError("Invalid argument type 'instances'",
465 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
467 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
471 def _CheckOutputFields(static, dynamic, selected):
472 """Checks whether all selected fields are valid.
474 @type static: L{utils.FieldSet}
475 @param static: static fields set
476 @type dynamic: L{utils.FieldSet}
477 @param dynamic: dynamic fields set
484 delta = f.NonMatching(selected)
486 raise errors.OpPrereqError("Unknown output fields selected: %s"
487 % ",".join(delta), errors.ECODE_INVAL)
490 def _CheckBooleanOpField(op, name):
491 """Validates boolean opcode parameters.
493 This will ensure that an opcode parameter is either a boolean value,
494 or None (but that it always exists).
497 val = getattr(op, name, None)
498 if not (val is None or isinstance(val, bool)):
499 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
500 (name, str(val)), errors.ECODE_INVAL)
501 setattr(op, name, val)
504 def _CheckGlobalHvParams(params):
505 """Validates that given hypervisor params are not global ones.
507 This will ensure that instances don't get customised versions of
511 used_globals = constants.HVC_GLOBALS.intersection(params)
513 msg = ("The following hypervisor parameters are global and cannot"
514 " be customized at instance level, please modify them at"
515 " cluster level: %s" % utils.CommaJoin(used_globals))
516 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
519 def _CheckNodeOnline(lu, node):
520 """Ensure that a given node is online.
522 @param lu: the LU on behalf of which we make the check
523 @param node: the node to check
524 @raise errors.OpPrereqError: if the node is offline
527 if lu.cfg.GetNodeInfo(node).offline:
528 raise errors.OpPrereqError("Can't use offline node %s" % node,
532 def _CheckNodeNotDrained(lu, node):
533 """Ensure that a given node is not drained.
535 @param lu: the LU on behalf of which we make the check
536 @param node: the node to check
537 @raise errors.OpPrereqError: if the node is drained
540 if lu.cfg.GetNodeInfo(node).drained:
541 raise errors.OpPrereqError("Can't use drained node %s" % node,
545 def _CheckNodeHasOS(lu, node, os_name, force_variant):
546 """Ensure that a node supports a given OS.
548 @param lu: the LU on behalf of which we make the check
549 @param node: the node to check
550 @param os_name: the OS to query about
551 @param force_variant: whether to ignore variant errors
552 @raise errors.OpPrereqError: if the node is not supporting the OS
555 result = lu.rpc.call_os_get(node, os_name)
556 result.Raise("OS '%s' not in supported OS list for node %s" %
558 prereq=True, ecode=errors.ECODE_INVAL)
559 if not force_variant:
560 _CheckOSVariant(result.payload, os_name)
563 def _CheckDiskTemplate(template):
564 """Ensure a given disk template is valid.
567 if template not in constants.DISK_TEMPLATES:
568 msg = ("Invalid disk template name '%s', valid templates are: %s" %
569 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
570 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
573 def _CheckInstanceDown(lu, instance, reason):
574 """Ensure that an instance is not running."""
575 if instance.admin_up:
576 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
577 (instance.name, reason), errors.ECODE_STATE)
579 pnode = instance.primary_node
580 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
581 ins_l.Raise("Can't contact node %s for instance information" % pnode,
582 prereq=True, ecode=errors.ECODE_ENVIRON)
584 if instance.name in ins_l.payload:
585 raise errors.OpPrereqError("Instance %s is running, %s" %
586 (instance.name, reason), errors.ECODE_STATE)
589 def _ExpandItemName(fn, name, kind):
590 """Expand an item name.
592 @param fn: the function to use for expansion
593 @param name: requested item name
594 @param kind: text description ('Node' or 'Instance')
595 @return: the resolved (full) name
596 @raise errors.OpPrereqError: if the item is not found
600 if full_name is None:
601 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
606 def _ExpandNodeName(cfg, name):
607 """Wrapper over L{_ExpandItemName} for nodes."""
608 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
611 def _ExpandInstanceName(cfg, name):
612 """Wrapper over L{_ExpandItemName} for instance."""
613 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
616 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
617 memory, vcpus, nics, disk_template, disks,
618 bep, hvp, hypervisor_name):
619 """Builds instance related env variables for hooks
621 This builds the hook environment from individual variables.
624 @param name: the name of the instance
625 @type primary_node: string
626 @param primary_node: the name of the instance's primary node
627 @type secondary_nodes: list
628 @param secondary_nodes: list of secondary nodes as strings
629 @type os_type: string
630 @param os_type: the name of the instance's OS
631 @type status: boolean
632 @param status: the should_run status of the instance
634 @param memory: the memory size of the instance
636 @param vcpus: the count of VCPUs the instance has
638 @param nics: list of tuples (ip, mac, mode, link) representing
639 the NICs the instance has
640 @type disk_template: string
641 @param disk_template: the disk template of the instance
643 @param disks: the list of (size, mode) pairs
645 @param bep: the backend parameters for the instance
647 @param hvp: the hypervisor parameters for the instance
648 @type hypervisor_name: string
649 @param hypervisor_name: the hypervisor for the instance
651 @return: the hook environment for this instance
660 "INSTANCE_NAME": name,
661 "INSTANCE_PRIMARY": primary_node,
662 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
663 "INSTANCE_OS_TYPE": os_type,
664 "INSTANCE_STATUS": str_status,
665 "INSTANCE_MEMORY": memory,
666 "INSTANCE_VCPUS": vcpus,
667 "INSTANCE_DISK_TEMPLATE": disk_template,
668 "INSTANCE_HYPERVISOR": hypervisor_name,
672 nic_count = len(nics)
673 for idx, (ip, mac, mode, link) in enumerate(nics):
676 env["INSTANCE_NIC%d_IP" % idx] = ip
677 env["INSTANCE_NIC%d_MAC" % idx] = mac
678 env["INSTANCE_NIC%d_MODE" % idx] = mode
679 env["INSTANCE_NIC%d_LINK" % idx] = link
680 if mode == constants.NIC_MODE_BRIDGED:
681 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
685 env["INSTANCE_NIC_COUNT"] = nic_count
688 disk_count = len(disks)
689 for idx, (size, mode) in enumerate(disks):
690 env["INSTANCE_DISK%d_SIZE" % idx] = size
691 env["INSTANCE_DISK%d_MODE" % idx] = mode
695 env["INSTANCE_DISK_COUNT"] = disk_count
697 for source, kind in [(bep, "BE"), (hvp, "HV")]:
698 for key, value in source.items():
699 env["INSTANCE_%s_%s" % (kind, key)] = value
704 def _NICListToTuple(lu, nics):
705 """Build a list of nic information tuples.
707 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
708 value in LUQueryInstanceData.
710 @type lu: L{LogicalUnit}
711 @param lu: the logical unit on whose behalf we execute
712 @type nics: list of L{objects.NIC}
713 @param nics: list of nics to convert to hooks tuples
717 c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
721 filled_params = objects.FillDict(c_nicparams, nic.nicparams)
722 mode = filled_params[constants.NIC_MODE]
723 link = filled_params[constants.NIC_LINK]
724 hooks_nics.append((ip, mac, mode, link))
728 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
729 """Builds instance related env variables for hooks from an object.
731 @type lu: L{LogicalUnit}
732 @param lu: the logical unit on whose behalf we execute
733 @type instance: L{objects.Instance}
734 @param instance: the instance for which we should build the
737 @param override: dictionary with key/values that will override
740 @return: the hook environment dictionary
743 cluster = lu.cfg.GetClusterInfo()
744 bep = cluster.FillBE(instance)
745 hvp = cluster.FillHV(instance)
747 'name': instance.name,
748 'primary_node': instance.primary_node,
749 'secondary_nodes': instance.secondary_nodes,
750 'os_type': instance.os,
751 'status': instance.admin_up,
752 'memory': bep[constants.BE_MEMORY],
753 'vcpus': bep[constants.BE_VCPUS],
754 'nics': _NICListToTuple(lu, instance.nics),
755 'disk_template': instance.disk_template,
756 'disks': [(disk.size, disk.mode) for disk in instance.disks],
759 'hypervisor_name': instance.hypervisor,
762 args.update(override)
763 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
766 def _AdjustCandidatePool(lu, exceptions):
767 """Adjust the candidate pool after node operations.
770 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
772 lu.LogInfo("Promoted nodes to master candidate role: %s",
773 utils.CommaJoin(node.name for node in mod_list))
774 for name in mod_list:
775 lu.context.ReaddNode(name)
776 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
778 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
782 def _DecideSelfPromotion(lu, exceptions=None):
783 """Decide whether I should promote myself as a master candidate.
786 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
787 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
788 # the new node will increase mc_max with one, so:
789 mc_should = min(mc_should + 1, cp_size)
790 return mc_now < mc_should
793 def _CheckNicsBridgesExist(lu, target_nics, target_node,
794 profile=constants.PP_DEFAULT):
795 """Check that the brigdes needed by a list of nics exist.
798 c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
799 paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
800 for nic in target_nics]
801 brlist = [params[constants.NIC_LINK] for params in paramslist
802 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
804 result = lu.rpc.call_bridges_exist(target_node, brlist)
805 result.Raise("Error checking bridges on destination node '%s'" %
806 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
809 def _CheckInstanceBridgesExist(lu, instance, node=None):
810 """Check that the brigdes needed by an instance exist.
814 node = instance.primary_node
815 _CheckNicsBridgesExist(lu, instance.nics, node)
818 def _CheckOSVariant(os_obj, name):
819 """Check whether an OS name conforms to the os variants specification.
821 @type os_obj: L{objects.OS}
822 @param os_obj: OS object to check
824 @param name: OS name passed by the user, to check for validity
827 if not os_obj.supported_variants:
830 variant = name.split("+", 1)[1]
832 raise errors.OpPrereqError("OS name must include a variant",
835 if variant not in os_obj.supported_variants:
836 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
839 def _GetNodeInstancesInner(cfg, fn):
840 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
843 def _GetNodeInstances(cfg, node_name):
844 """Returns a list of all primary and secondary instances on a node.
848 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
851 def _GetNodePrimaryInstances(cfg, node_name):
852 """Returns primary instances on a node.
855 return _GetNodeInstancesInner(cfg,
856 lambda inst: node_name == inst.primary_node)
859 def _GetNodeSecondaryInstances(cfg, node_name):
860 """Returns secondary instances on a node.
863 return _GetNodeInstancesInner(cfg,
864 lambda inst: node_name in inst.secondary_nodes)
867 def _GetStorageTypeArgs(cfg, storage_type):
868 """Returns the arguments for a storage type.
871 # Special case for file storage
872 if storage_type == constants.ST_FILE:
873 # storage.FileStorage wants a list of storage directories
874 return [[cfg.GetFileStorageDir()]]
879 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
882 for dev in instance.disks:
883 cfg.SetDiskID(dev, node_name)
885 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
886 result.Raise("Failed to get disk status from node %s" % node_name,
887 prereq=prereq, ecode=errors.ECODE_ENVIRON)
889 for idx, bdev_status in enumerate(result.payload):
890 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
896 def _FormatTimestamp(secs):
897 """Formats a Unix timestamp with the local timezone.
900 return time.strftime("%F %T %Z", time.gmtime(secs))
903 class LUPostInitCluster(LogicalUnit):
904 """Logical unit for running hooks after cluster initialization.
907 HPATH = "cluster-init"
908 HTYPE = constants.HTYPE_CLUSTER
911 def BuildHooksEnv(self):
915 env = {"OP_TARGET": self.cfg.GetClusterName()}
916 mn = self.cfg.GetMasterNode()
919 def CheckPrereq(self):
920 """No prerequisites to check.
925 def Exec(self, feedback_fn):
932 class LUDestroyCluster(LogicalUnit):
933 """Logical unit for destroying the cluster.
936 HPATH = "cluster-destroy"
937 HTYPE = constants.HTYPE_CLUSTER
940 def BuildHooksEnv(self):
944 env = {"OP_TARGET": self.cfg.GetClusterName()}
947 def CheckPrereq(self):
948 """Check prerequisites.
950 This checks whether the cluster is empty.
952 Any errors are signaled by raising errors.OpPrereqError.
955 master = self.cfg.GetMasterNode()
957 nodelist = self.cfg.GetNodeList()
958 if len(nodelist) != 1 or nodelist[0] != master:
959 raise errors.OpPrereqError("There are still %d node(s) in"
960 " this cluster." % (len(nodelist) - 1),
962 instancelist = self.cfg.GetInstanceList()
964 raise errors.OpPrereqError("There are still %d instance(s) in"
965 " this cluster." % len(instancelist),
968 def Exec(self, feedback_fn):
969 """Destroys the cluster.
972 master = self.cfg.GetMasterNode()
973 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
975 # Run post hooks on master node before it's removed
976 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
978 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
980 # pylint: disable-msg=W0702
981 self.LogWarning("Errors occurred running hooks on %s" % master)
983 result = self.rpc.call_node_stop_master(master, False)
984 result.Raise("Could not disable the master role")
987 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
988 utils.CreateBackup(priv_key)
989 utils.CreateBackup(pub_key)
994 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
995 warn_days=constants.SSL_CERT_EXPIRATION_WARN,
996 error_days=constants.SSL_CERT_EXPIRATION_ERROR):
997 """Verifies certificate details for LUVerifyCluster.
1001 msg = "Certificate %s is expired" % filename
1003 if not_before is not None and not_after is not None:
1004 msg += (" (valid from %s to %s)" %
1005 (_FormatTimestamp(not_before),
1006 _FormatTimestamp(not_after)))
1007 elif not_before is not None:
1008 msg += " (valid from %s)" % _FormatTimestamp(not_before)
1009 elif not_after is not None:
1010 msg += " (valid until %s)" % _FormatTimestamp(not_after)
1012 return (LUVerifyCluster.ETYPE_ERROR, msg)
1014 elif not_before is not None and not_before > now:
1015 return (LUVerifyCluster.ETYPE_WARNING,
1016 "Certificate %s not yet valid (valid from %s)" %
1017 (filename, _FormatTimestamp(not_before)))
1019 elif not_after is not None:
1020 remaining_days = int((not_after - now) / (24 * 3600))
1022 msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1024 if remaining_days <= error_days:
1025 return (LUVerifyCluster.ETYPE_ERROR, msg)
1027 if remaining_days <= warn_days:
1028 return (LUVerifyCluster.ETYPE_WARNING, msg)
1033 def _VerifyCertificate(filename):
1034 """Verifies a certificate for LUVerifyCluster.
1036 @type filename: string
1037 @param filename: Path to PEM file
1041 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1042 utils.ReadFile(filename))
1043 except Exception, err: # pylint: disable-msg=W0703
1044 return (LUVerifyCluster.ETYPE_ERROR,
1045 "Failed to load X509 certificate %s: %s" % (filename, err))
1047 # Depending on the pyOpenSSL version, this can just return (None, None)
1048 (not_before, not_after) = utils.GetX509CertValidity(cert)
1050 return _VerifyCertificateInner(filename, cert.has_expired(),
1051 not_before, not_after, time.time())
1054 class LUVerifyCluster(LogicalUnit):
1055 """Verifies the cluster status.
1058 HPATH = "cluster-verify"
1059 HTYPE = constants.HTYPE_CLUSTER
1060 _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1063 TCLUSTER = "cluster"
1065 TINSTANCE = "instance"
1067 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1068 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1069 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1070 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1071 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1072 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1073 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1074 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1075 ENODEDRBD = (TNODE, "ENODEDRBD")
1076 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1077 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1078 ENODEHV = (TNODE, "ENODEHV")
1079 ENODELVM = (TNODE, "ENODELVM")
1080 ENODEN1 = (TNODE, "ENODEN1")
1081 ENODENET = (TNODE, "ENODENET")
1082 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1083 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1084 ENODERPC = (TNODE, "ENODERPC")
1085 ENODESSH = (TNODE, "ENODESSH")
1086 ENODEVERSION = (TNODE, "ENODEVERSION")
1087 ENODESETUP = (TNODE, "ENODESETUP")
1088 ENODETIME = (TNODE, "ENODETIME")
1090 ETYPE_FIELD = "code"
1091 ETYPE_ERROR = "ERROR"
1092 ETYPE_WARNING = "WARNING"
1094 class NodeImage(object):
1095 """A class representing the logical and physical status of a node.
1097 @ivar volumes: a structure as returned from
1098 L{ganeti.backend.GetVolumeList} (runtime)
1099 @ivar instances: a list of running instances (runtime)
1100 @ivar pinst: list of configured primary instances (config)
1101 @ivar sinst: list of configured secondary instances (config)
1102 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1103 of this node (config)
1104 @ivar mfree: free memory, as reported by hypervisor (runtime)
1105 @ivar dfree: free disk, as reported by the node (runtime)
1106 @ivar offline: the offline status (config)
1107 @type rpc_fail: boolean
1108 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1109 not whether the individual keys were correct) (runtime)
1110 @type lvm_fail: boolean
1111 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1112 @type hyp_fail: boolean
1113 @ivar hyp_fail: whether the RPC call didn't return the instance list
1114 @type ghost: boolean
1115 @ivar ghost: whether this is a known node or not (config)
1118 def __init__(self, offline=False):
1126 self.offline = offline
1127 self.rpc_fail = False
1128 self.lvm_fail = False
1129 self.hyp_fail = False
1132 def ExpandNames(self):
1133 self.needed_locks = {
1134 locking.LEVEL_NODE: locking.ALL_SET,
1135 locking.LEVEL_INSTANCE: locking.ALL_SET,
1137 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1139 def _Error(self, ecode, item, msg, *args, **kwargs):
1140 """Format an error message.
1142 Based on the opcode's error_codes parameter, either format a
1143 parseable error code, or a simpler error string.
1145 This must be called only from Exec and functions called from Exec.
1148 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1150 # first complete the msg
1153 # then format the whole message
1154 if self.op.error_codes:
1155 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1161 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1162 # and finally report it via the feedback_fn
1163 self._feedback_fn(" - %s" % msg)
1165 def _ErrorIf(self, cond, *args, **kwargs):
1166 """Log an error message if the passed condition is True.
1169 cond = bool(cond) or self.op.debug_simulate_errors
1171 self._Error(*args, **kwargs)
1172 # do not mark the operation as failed for WARN cases only
1173 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1174 self.bad = self.bad or cond
1176 def _VerifyNode(self, ninfo, nresult):
1177 """Run multiple tests against a node.
1181 - compares ganeti version
1182 - checks vg existence and size > 20G
1183 - checks config file checksum
1184 - checks ssh to other nodes
1186 @type ninfo: L{objects.Node}
1187 @param ninfo: the node to check
1188 @param nresult: the results from the node
1190 @return: whether overall this call was successful (and we can expect
1191 reasonable values in the respose)
1195 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1197 # main result, nresult should be a non-empty dict
1198 test = not nresult or not isinstance(nresult, dict)
1199 _ErrorIf(test, self.ENODERPC, node,
1200 "unable to verify node: no data returned")
1204 # compares ganeti version
1205 local_version = constants.PROTOCOL_VERSION
1206 remote_version = nresult.get("version", None)
1207 test = not (remote_version and
1208 isinstance(remote_version, (list, tuple)) and
1209 len(remote_version) == 2)
1210 _ErrorIf(test, self.ENODERPC, node,
1211 "connection to node returned invalid data")
1215 test = local_version != remote_version[0]
1216 _ErrorIf(test, self.ENODEVERSION, node,
1217 "incompatible protocol versions: master %s,"
1218 " node %s", local_version, remote_version[0])
1222 # node seems compatible, we can actually try to look into its results
1224 # full package version
1225 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1226 self.ENODEVERSION, node,
1227 "software version mismatch: master %s, node %s",
1228 constants.RELEASE_VERSION, remote_version[1],
1229 code=self.ETYPE_WARNING)
1231 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1232 if isinstance(hyp_result, dict):
1233 for hv_name, hv_result in hyp_result.iteritems():
1234 test = hv_result is not None
1235 _ErrorIf(test, self.ENODEHV, node,
1236 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1239 test = nresult.get(constants.NV_NODESETUP,
1240 ["Missing NODESETUP results"])
1241 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1246 def _VerifyNodeTime(self, ninfo, nresult,
1247 nvinfo_starttime, nvinfo_endtime):
1248 """Check the node time.
1250 @type ninfo: L{objects.Node}
1251 @param ninfo: the node to check
1252 @param nresult: the remote results for the node
1253 @param nvinfo_starttime: the start time of the RPC call
1254 @param nvinfo_endtime: the end time of the RPC call
1258 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1260 ntime = nresult.get(constants.NV_TIME, None)
1262 ntime_merged = utils.MergeTime(ntime)
1263 except (ValueError, TypeError):
1264 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1267 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1268 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1269 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1270 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1274 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1275 "Node time diverges by at least %s from master node time",
1278 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1279 """Check the node time.
1281 @type ninfo: L{objects.Node}
1282 @param ninfo: the node to check
1283 @param nresult: the remote results for the node
1284 @param vg_name: the configured VG name
1291 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1293 # checks vg existence and size > 20G
1294 vglist = nresult.get(constants.NV_VGLIST, None)
1296 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1298 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1299 constants.MIN_VG_SIZE)
1300 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1303 pvlist = nresult.get(constants.NV_PVLIST, None)
1304 test = pvlist is None
1305 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1307 # check that ':' is not present in PV names, since it's a
1308 # special character for lvcreate (denotes the range of PEs to
1310 for _, pvname, owner_vg in pvlist:
1311 test = ":" in pvname
1312 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1313 " '%s' of VG '%s'", pvname, owner_vg)
1315 def _VerifyNodeNetwork(self, ninfo, nresult):
1316 """Check the node time.
1318 @type ninfo: L{objects.Node}
1319 @param ninfo: the node to check
1320 @param nresult: the remote results for the node
1324 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1326 test = constants.NV_NODELIST not in nresult
1327 _ErrorIf(test, self.ENODESSH, node,
1328 "node hasn't returned node ssh connectivity data")
1330 if nresult[constants.NV_NODELIST]:
1331 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1332 _ErrorIf(True, self.ENODESSH, node,
1333 "ssh communication with node '%s': %s", a_node, a_msg)
1335 test = constants.NV_NODENETTEST not in nresult
1336 _ErrorIf(test, self.ENODENET, node,
1337 "node hasn't returned node tcp connectivity data")
1339 if nresult[constants.NV_NODENETTEST]:
1340 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1342 _ErrorIf(True, self.ENODENET, node,
1343 "tcp communication with node '%s': %s",
1344 anode, nresult[constants.NV_NODENETTEST][anode])
1346 def _VerifyInstance(self, instance, instanceconfig, node_image):
1347 """Verify an instance.
1349 This function checks to see if the required block devices are
1350 available on the instance's node.
1353 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1354 node_current = instanceconfig.primary_node
1356 node_vol_should = {}
1357 instanceconfig.MapLVsByNode(node_vol_should)
1359 for node in node_vol_should:
1360 n_img = node_image[node]
1361 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1362 # ignore missing volumes on offline or broken nodes
1364 for volume in node_vol_should[node]:
1365 test = volume not in n_img.volumes
1366 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1367 "volume %s missing on node %s", volume, node)
1369 if instanceconfig.admin_up:
1370 pri_img = node_image[node_current]
1371 test = instance not in pri_img.instances and not pri_img.offline
1372 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1373 "instance not running on its primary node %s",
1376 for node, n_img in node_image.items():
1377 if (not node == node_current):
1378 test = instance in n_img.instances
1379 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1380 "instance should not run on node %s", node)
1382 def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1383 """Verify if there are any unknown volumes in the cluster.
1385 The .os, .swap and backup volumes are ignored. All other volumes are
1386 reported as unknown.
1389 for node, n_img in node_image.items():
1390 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1391 # skip non-healthy nodes
1393 for volume in n_img.volumes:
1394 test = (node not in node_vol_should or
1395 volume not in node_vol_should[node])
1396 self._ErrorIf(test, self.ENODEORPHANLV, node,
1397 "volume %s is unknown", volume)
1399 def _VerifyOrphanInstances(self, instancelist, node_image):
1400 """Verify the list of running instances.
1402 This checks what instances are running but unknown to the cluster.
1405 for node, n_img in node_image.items():
1406 for o_inst in n_img.instances:
1407 test = o_inst not in instancelist
1408 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1409 "instance %s on node %s should not exist", o_inst, node)
1411 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1412 """Verify N+1 Memory Resilience.
1414 Check that if one single node dies we can still start all the
1415 instances it was primary for.
1418 for node, n_img in node_image.items():
1419 # This code checks that every node which is now listed as
1420 # secondary has enough memory to host all instances it is
1421 # supposed to should a single other node in the cluster fail.
1422 # FIXME: not ready for failover to an arbitrary node
1423 # FIXME: does not support file-backed instances
1424 # WARNING: we currently take into account down instances as well
1425 # as up ones, considering that even if they're down someone
1426 # might want to start them even in the event of a node failure.
1427 for prinode, instances in n_img.sbp.items():
1429 for instance in instances:
1430 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1431 if bep[constants.BE_AUTO_BALANCE]:
1432 needed_mem += bep[constants.BE_MEMORY]
1433 test = n_img.mfree < needed_mem
1434 self._ErrorIf(test, self.ENODEN1, node,
1435 "not enough memory on to accommodate"
1436 " failovers should peer node %s fail", prinode)
1438 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1440 """Verifies and computes the node required file checksums.
1442 @type ninfo: L{objects.Node}
1443 @param ninfo: the node to check
1444 @param nresult: the remote results for the node
1445 @param file_list: required list of files
1446 @param local_cksum: dictionary of local files and their checksums
1447 @param master_files: list of files that only masters should have
1451 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1453 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1454 test = not isinstance(remote_cksum, dict)
1455 _ErrorIf(test, self.ENODEFILECHECK, node,
1456 "node hasn't returned file checksum data")
1460 for file_name in file_list:
1461 node_is_mc = ninfo.master_candidate
1462 must_have = (file_name not in master_files) or node_is_mc
1464 test1 = file_name not in remote_cksum
1466 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1468 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1469 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1470 "file '%s' missing", file_name)
1471 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1472 "file '%s' has wrong checksum", file_name)
1473 # not candidate and this is not a must-have file
1474 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1475 "file '%s' should not exist on non master"
1476 " candidates (and the file is outdated)", file_name)
1477 # all good, except non-master/non-must have combination
1478 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1479 "file '%s' should not exist"
1480 " on non master candidates", file_name)
1482 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1483 """Verifies and the node DRBD status.
1485 @type ninfo: L{objects.Node}
1486 @param ninfo: the node to check
1487 @param nresult: the remote results for the node
1488 @param instanceinfo: the dict of instances
1489 @param drbd_map: the DRBD map as returned by
1490 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1494 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1496 # compute the DRBD minors
1498 for minor, instance in drbd_map[node].items():
1499 test = instance not in instanceinfo
1500 _ErrorIf(test, self.ECLUSTERCFG, None,
1501 "ghost instance '%s' in temporary DRBD map", instance)
1502 # ghost instance should not be running, but otherwise we
1503 # don't give double warnings (both ghost instance and
1504 # unallocated minor in use)
1506 node_drbd[minor] = (instance, False)
1508 instance = instanceinfo[instance]
1509 node_drbd[minor] = (instance.name, instance.admin_up)
1511 # and now check them
1512 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1513 test = not isinstance(used_minors, (tuple, list))
1514 _ErrorIf(test, self.ENODEDRBD, node,
1515 "cannot parse drbd status file: %s", str(used_minors))
1517 # we cannot check drbd status
1520 for minor, (iname, must_exist) in node_drbd.items():
1521 test = minor not in used_minors and must_exist
1522 _ErrorIf(test, self.ENODEDRBD, node,
1523 "drbd minor %d of instance %s is not active", minor, iname)
1524 for minor in used_minors:
1525 test = minor not in node_drbd
1526 _ErrorIf(test, self.ENODEDRBD, node,
1527 "unallocated drbd minor %d is in use", minor)
1529 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1530 """Verifies and updates the node volume data.
1532 This function will update a L{NodeImage}'s internal structures
1533 with data from the remote call.
1535 @type ninfo: L{objects.Node}
1536 @param ninfo: the node to check
1537 @param nresult: the remote results for the node
1538 @param nimg: the node image object
1539 @param vg_name: the configured VG name
1543 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1545 nimg.lvm_fail = True
1546 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1549 elif isinstance(lvdata, basestring):
1550 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1551 utils.SafeEncode(lvdata))
1552 elif not isinstance(lvdata, dict):
1553 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1555 nimg.volumes = lvdata
1556 nimg.lvm_fail = False
1558 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1559 """Verifies and updates the node instance list.
1561 If the listing was successful, then updates this node's instance
1562 list. Otherwise, it marks the RPC call as failed for the instance
1565 @type ninfo: L{objects.Node}
1566 @param ninfo: the node to check
1567 @param nresult: the remote results for the node
1568 @param nimg: the node image object
1571 idata = nresult.get(constants.NV_INSTANCELIST, None)
1572 test = not isinstance(idata, list)
1573 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1574 " (instancelist): %s", utils.SafeEncode(str(idata)))
1576 nimg.hyp_fail = True
1578 nimg.instances = idata
1580 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1581 """Verifies and computes a node information map
1583 @type ninfo: L{objects.Node}
1584 @param ninfo: the node to check
1585 @param nresult: the remote results for the node
1586 @param nimg: the node image object
1587 @param vg_name: the configured VG name
1591 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1593 # try to read free memory (from the hypervisor)
1594 hv_info = nresult.get(constants.NV_HVINFO, None)
1595 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1596 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1599 nimg.mfree = int(hv_info["memory_free"])
1600 except (ValueError, TypeError):
1601 _ErrorIf(True, self.ENODERPC, node,
1602 "node returned invalid nodeinfo, check hypervisor")
1604 # FIXME: devise a free space model for file based instances as well
1605 if vg_name is not None:
1606 test = (constants.NV_VGLIST not in nresult or
1607 vg_name not in nresult[constants.NV_VGLIST])
1608 _ErrorIf(test, self.ENODELVM, node,
1609 "node didn't return data for the volume group '%s'"
1610 " - it is either missing or broken", vg_name)
1613 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1614 except (ValueError, TypeError):
1615 _ErrorIf(True, self.ENODERPC, node,
1616 "node returned invalid LVM info, check LVM status")
1618 def CheckPrereq(self):
1619 """Check prerequisites.
1621 Transform the list of checks we're going to skip into a set and check that
1622 all its members are valid.
1625 self.skip_set = frozenset(self.op.skip_checks)
1626 if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1627 raise errors.OpPrereqError("Invalid checks to be skipped specified",
1630 def BuildHooksEnv(self):
1633 Cluster-Verify hooks just ran in the post phase and their failure makes
1634 the output be logged in the verify output and the verification to fail.
1637 all_nodes = self.cfg.GetNodeList()
1639 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1641 for node in self.cfg.GetAllNodesInfo().values():
1642 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1644 return env, [], all_nodes
1646 def Exec(self, feedback_fn):
1647 """Verify integrity of cluster, performing various test on nodes.
1651 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1652 verbose = self.op.verbose
1653 self._feedback_fn = feedback_fn
1654 feedback_fn("* Verifying global settings")
1655 for msg in self.cfg.VerifyConfig():
1656 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1658 # Check the cluster certificates
1659 for cert_filename in constants.ALL_CERT_FILES:
1660 (errcode, msg) = _VerifyCertificate(cert_filename)
1661 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1663 vg_name = self.cfg.GetVGName()
1664 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1665 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1666 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1667 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1668 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1669 for iname in instancelist)
1670 i_non_redundant = [] # Non redundant instances
1671 i_non_a_balanced = [] # Non auto-balanced instances
1672 n_offline = 0 # Count of offline nodes
1673 n_drained = 0 # Count of nodes being drained
1674 node_vol_should = {}
1676 # FIXME: verify OS list
1677 # do local checksums
1678 master_files = [constants.CLUSTER_CONF_FILE]
1680 file_names = ssconf.SimpleStore().GetFileList()
1681 file_names.extend(constants.ALL_CERT_FILES)
1682 file_names.extend(master_files)
1684 local_checksums = utils.FingerprintFiles(file_names)
1686 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1687 node_verify_param = {
1688 constants.NV_FILELIST: file_names,
1689 constants.NV_NODELIST: [node.name for node in nodeinfo
1690 if not node.offline],
1691 constants.NV_HYPERVISOR: hypervisors,
1692 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1693 node.secondary_ip) for node in nodeinfo
1694 if not node.offline],
1695 constants.NV_INSTANCELIST: hypervisors,
1696 constants.NV_VERSION: None,
1697 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1698 constants.NV_NODESETUP: None,
1699 constants.NV_TIME: None,
1702 if vg_name is not None:
1703 node_verify_param[constants.NV_VGLIST] = None
1704 node_verify_param[constants.NV_LVLIST] = vg_name
1705 node_verify_param[constants.NV_PVLIST] = [vg_name]
1706 node_verify_param[constants.NV_DRBDLIST] = None
1708 # Build our expected cluster state
1709 node_image = dict((node.name, self.NodeImage(offline=node.offline))
1710 for node in nodeinfo)
1712 for instance in instancelist:
1713 inst_config = instanceinfo[instance]
1715 for nname in inst_config.all_nodes:
1716 if nname not in node_image:
1718 gnode = self.NodeImage()
1720 node_image[nname] = gnode
1722 inst_config.MapLVsByNode(node_vol_should)
1724 pnode = inst_config.primary_node
1725 node_image[pnode].pinst.append(instance)
1727 for snode in inst_config.secondary_nodes:
1728 nimg = node_image[snode]
1729 nimg.sinst.append(instance)
1730 if pnode not in nimg.sbp:
1731 nimg.sbp[pnode] = []
1732 nimg.sbp[pnode].append(instance)
1734 # At this point, we have the in-memory data structures complete,
1735 # except for the runtime information, which we'll gather next
1737 # Due to the way our RPC system works, exact response times cannot be
1738 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1739 # time before and after executing the request, we can at least have a time
1741 nvinfo_starttime = time.time()
1742 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1743 self.cfg.GetClusterName())
1744 nvinfo_endtime = time.time()
1746 cluster = self.cfg.GetClusterInfo()
1747 master_node = self.cfg.GetMasterNode()
1748 all_drbd_map = self.cfg.ComputeDRBDMap()
1750 feedback_fn("* Verifying node status")
1751 for node_i in nodeinfo:
1753 nimg = node_image[node]
1757 feedback_fn("* Skipping offline node %s" % (node,))
1761 if node == master_node:
1763 elif node_i.master_candidate:
1764 ntype = "master candidate"
1765 elif node_i.drained:
1771 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1773 msg = all_nvinfo[node].fail_msg
1774 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1776 nimg.rpc_fail = True
1779 nresult = all_nvinfo[node].payload
1781 nimg.call_ok = self._VerifyNode(node_i, nresult)
1782 self._VerifyNodeNetwork(node_i, nresult)
1783 self._VerifyNodeLVM(node_i, nresult, vg_name)
1784 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1786 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1787 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1789 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1790 self._UpdateNodeInstances(node_i, nresult, nimg)
1791 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1793 feedback_fn("* Verifying instance status")
1794 for instance in instancelist:
1796 feedback_fn("* Verifying instance %s" % instance)
1797 inst_config = instanceinfo[instance]
1798 self._VerifyInstance(instance, inst_config, node_image)
1799 inst_nodes_offline = []
1801 pnode = inst_config.primary_node
1802 pnode_img = node_image[pnode]
1803 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1804 self.ENODERPC, pnode, "instance %s, connection to"
1805 " primary node failed", instance)
1807 if pnode_img.offline:
1808 inst_nodes_offline.append(pnode)
1810 # If the instance is non-redundant we cannot survive losing its primary
1811 # node, so we are not N+1 compliant. On the other hand we have no disk
1812 # templates with more than one secondary so that situation is not well
1814 # FIXME: does not support file-backed instances
1815 if not inst_config.secondary_nodes:
1816 i_non_redundant.append(instance)
1817 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1818 instance, "instance has multiple secondary nodes: %s",
1819 utils.CommaJoin(inst_config.secondary_nodes),
1820 code=self.ETYPE_WARNING)
1822 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1823 i_non_a_balanced.append(instance)
1825 for snode in inst_config.secondary_nodes:
1826 s_img = node_image[snode]
1827 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1828 "instance %s, connection to secondary node failed", instance)
1831 inst_nodes_offline.append(snode)
1833 # warn that the instance lives on offline nodes
1834 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1835 "instance lives on offline node(s) %s",
1836 utils.CommaJoin(inst_nodes_offline))
1837 # ... or ghost nodes
1838 for node in inst_config.all_nodes:
1839 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1840 "instance lives on ghost node %s", node)
1842 feedback_fn("* Verifying orphan volumes")
1843 self._VerifyOrphanVolumes(node_vol_should, node_image)
1845 feedback_fn("* Verifying oprhan instances")
1846 self._VerifyOrphanInstances(instancelist, node_image)
1848 if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1849 feedback_fn("* Verifying N+1 Memory redundancy")
1850 self._VerifyNPlusOneMemory(node_image, instanceinfo)
1852 feedback_fn("* Other Notes")
1854 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
1855 % len(i_non_redundant))
1857 if i_non_a_balanced:
1858 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
1859 % len(i_non_a_balanced))
1862 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
1865 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
1869 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1870 """Analyze the post-hooks' result
1872 This method analyses the hook result, handles it, and sends some
1873 nicely-formatted feedback back to the user.
1875 @param phase: one of L{constants.HOOKS_PHASE_POST} or
1876 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1877 @param hooks_results: the results of the multi-node hooks rpc call
1878 @param feedback_fn: function used send feedback back to the caller
1879 @param lu_result: previous Exec result
1880 @return: the new Exec result, based on the previous result
1884 # We only really run POST phase hooks, and are only interested in
1886 if phase == constants.HOOKS_PHASE_POST:
1887 # Used to change hooks' output to proper indentation
1888 indent_re = re.compile('^', re.M)
1889 feedback_fn("* Hooks Results")
1890 assert hooks_results, "invalid result from hooks"
1892 for node_name in hooks_results:
1893 res = hooks_results[node_name]
1895 test = msg and not res.offline
1896 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1897 "Communication failure in hooks execution: %s", msg)
1898 if res.offline or msg:
1899 # No need to investigate payload if node is offline or gave an error.
1900 # override manually lu_result here as _ErrorIf only
1901 # overrides self.bad
1904 for script, hkr, output in res.payload:
1905 test = hkr == constants.HKR_FAIL
1906 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1907 "Script %s failed, output:", script)
1909 output = indent_re.sub(' ', output)
1910 feedback_fn("%s" % output)
1916 class LUVerifyDisks(NoHooksLU):
1917 """Verifies the cluster disks status.
1923 def ExpandNames(self):
1924 self.needed_locks = {
1925 locking.LEVEL_NODE: locking.ALL_SET,
1926 locking.LEVEL_INSTANCE: locking.ALL_SET,
1928 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1930 def CheckPrereq(self):
1931 """Check prerequisites.
1933 This has no prerequisites.
1938 def Exec(self, feedback_fn):
1939 """Verify integrity of cluster disks.
1941 @rtype: tuple of three items
1942 @return: a tuple of (dict of node-to-node_error, list of instances
1943 which need activate-disks, dict of instance: (node, volume) for
1947 result = res_nodes, res_instances, res_missing = {}, [], {}
1949 vg_name = self.cfg.GetVGName()
1950 nodes = utils.NiceSort(self.cfg.GetNodeList())
1951 instances = [self.cfg.GetInstanceInfo(name)
1952 for name in self.cfg.GetInstanceList()]
1955 for inst in instances:
1957 if (not inst.admin_up or
1958 inst.disk_template not in constants.DTS_NET_MIRROR):
1960 inst.MapLVsByNode(inst_lvs)
1961 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1962 for node, vol_list in inst_lvs.iteritems():
1963 for vol in vol_list:
1964 nv_dict[(node, vol)] = inst
1969 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1973 node_res = node_lvs[node]
1974 if node_res.offline:
1976 msg = node_res.fail_msg
1978 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1979 res_nodes[node] = msg
1982 lvs = node_res.payload
1983 for lv_name, (_, _, lv_online) in lvs.items():
1984 inst = nv_dict.pop((node, lv_name), None)
1985 if (not lv_online and inst is not None
1986 and inst.name not in res_instances):
1987 res_instances.append(inst.name)
1989 # any leftover items in nv_dict are missing LVs, let's arrange the
1991 for key, inst in nv_dict.iteritems():
1992 if inst.name not in res_missing:
1993 res_missing[inst.name] = []
1994 res_missing[inst.name].append(key)
1999 class LURepairDiskSizes(NoHooksLU):
2000 """Verifies the cluster disks sizes.
2003 _OP_REQP = ["instances"]
2006 def ExpandNames(self):
2007 if not isinstance(self.op.instances, list):
2008 raise errors.OpPrereqError("Invalid argument type 'instances'",
2011 if self.op.instances:
2012 self.wanted_names = []
2013 for name in self.op.instances:
2014 full_name = _ExpandInstanceName(self.cfg, name)
2015 self.wanted_names.append(full_name)
2016 self.needed_locks = {
2017 locking.LEVEL_NODE: [],
2018 locking.LEVEL_INSTANCE: self.wanted_names,
2020 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2022 self.wanted_names = None
2023 self.needed_locks = {
2024 locking.LEVEL_NODE: locking.ALL_SET,
2025 locking.LEVEL_INSTANCE: locking.ALL_SET,
2027 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2029 def DeclareLocks(self, level):
2030 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2031 self._LockInstancesNodes(primary_only=True)
2033 def CheckPrereq(self):
2034 """Check prerequisites.
2036 This only checks the optional instance list against the existing names.
2039 if self.wanted_names is None:
2040 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2042 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2043 in self.wanted_names]
2045 def _EnsureChildSizes(self, disk):
2046 """Ensure children of the disk have the needed disk size.
2048 This is valid mainly for DRBD8 and fixes an issue where the
2049 children have smaller disk size.
2051 @param disk: an L{ganeti.objects.Disk} object
2054 if disk.dev_type == constants.LD_DRBD8:
2055 assert disk.children, "Empty children for DRBD8?"
2056 fchild = disk.children[0]
2057 mismatch = fchild.size < disk.size
2059 self.LogInfo("Child disk has size %d, parent %d, fixing",
2060 fchild.size, disk.size)
2061 fchild.size = disk.size
2063 # and we recurse on this child only, not on the metadev
2064 return self._EnsureChildSizes(fchild) or mismatch
2068 def Exec(self, feedback_fn):
2069 """Verify the size of cluster disks.
2072 # TODO: check child disks too
2073 # TODO: check differences in size between primary/secondary nodes
2075 for instance in self.wanted_instances:
2076 pnode = instance.primary_node
2077 if pnode not in per_node_disks:
2078 per_node_disks[pnode] = []
2079 for idx, disk in enumerate(instance.disks):
2080 per_node_disks[pnode].append((instance, idx, disk))
2083 for node, dskl in per_node_disks.items():
2084 newl = [v[2].Copy() for v in dskl]
2086 self.cfg.SetDiskID(dsk, node)
2087 result = self.rpc.call_blockdev_getsizes(node, newl)
2089 self.LogWarning("Failure in blockdev_getsizes call to node"
2090 " %s, ignoring", node)
2092 if len(result.data) != len(dskl):
2093 self.LogWarning("Invalid result from node %s, ignoring node results",
2096 for ((instance, idx, disk), size) in zip(dskl, result.data):
2098 self.LogWarning("Disk %d of instance %s did not return size"
2099 " information, ignoring", idx, instance.name)
2101 if not isinstance(size, (int, long)):
2102 self.LogWarning("Disk %d of instance %s did not return valid"
2103 " size information, ignoring", idx, instance.name)
2106 if size != disk.size:
2107 self.LogInfo("Disk %d of instance %s has mismatched size,"
2108 " correcting: recorded %d, actual %d", idx,
2109 instance.name, disk.size, size)
2111 self.cfg.Update(instance, feedback_fn)
2112 changed.append((instance.name, idx, size))
2113 if self._EnsureChildSizes(disk):
2114 self.cfg.Update(instance, feedback_fn)
2115 changed.append((instance.name, idx, disk.size))
2119 class LURenameCluster(LogicalUnit):
2120 """Rename the cluster.
2123 HPATH = "cluster-rename"
2124 HTYPE = constants.HTYPE_CLUSTER
2127 def BuildHooksEnv(self):
2132 "OP_TARGET": self.cfg.GetClusterName(),
2133 "NEW_NAME": self.op.name,
2135 mn = self.cfg.GetMasterNode()
2136 all_nodes = self.cfg.GetNodeList()
2137 return env, [mn], all_nodes
2139 def CheckPrereq(self):
2140 """Verify that the passed name is a valid one.
2143 hostname = utils.GetHostInfo(self.op.name)
2145 new_name = hostname.name
2146 self.ip = new_ip = hostname.ip
2147 old_name = self.cfg.GetClusterName()
2148 old_ip = self.cfg.GetMasterIP()
2149 if new_name == old_name and new_ip == old_ip:
2150 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2151 " cluster has changed",
2153 if new_ip != old_ip:
2154 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2155 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2156 " reachable on the network. Aborting." %
2157 new_ip, errors.ECODE_NOTUNIQUE)
2159 self.op.name = new_name
2161 def Exec(self, feedback_fn):
2162 """Rename the cluster.
2165 clustername = self.op.name
2168 # shutdown the master IP
2169 master = self.cfg.GetMasterNode()
2170 result = self.rpc.call_node_stop_master(master, False)
2171 result.Raise("Could not disable the master role")
2174 cluster = self.cfg.GetClusterInfo()
2175 cluster.cluster_name = clustername
2176 cluster.master_ip = ip
2177 self.cfg.Update(cluster, feedback_fn)
2179 # update the known hosts file
2180 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2181 node_list = self.cfg.GetNodeList()
2183 node_list.remove(master)
2186 result = self.rpc.call_upload_file(node_list,
2187 constants.SSH_KNOWN_HOSTS_FILE)
2188 for to_node, to_result in result.iteritems():
2189 msg = to_result.fail_msg
2191 msg = ("Copy of file %s to node %s failed: %s" %
2192 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2193 self.proc.LogWarning(msg)
2196 result = self.rpc.call_node_start_master(master, False, False)
2197 msg = result.fail_msg
2199 self.LogWarning("Could not re-enable the master role on"
2200 " the master, please restart manually: %s", msg)
2203 def _RecursiveCheckIfLVMBased(disk):
2204 """Check if the given disk or its children are lvm-based.
2206 @type disk: L{objects.Disk}
2207 @param disk: the disk to check
2209 @return: boolean indicating whether a LD_LV dev_type was found or not
2213 for chdisk in disk.children:
2214 if _RecursiveCheckIfLVMBased(chdisk):
2216 return disk.dev_type == constants.LD_LV
2219 class LUSetClusterParams(LogicalUnit):
2220 """Change the parameters of the cluster.
2223 HPATH = "cluster-modify"
2224 HTYPE = constants.HTYPE_CLUSTER
2228 def CheckArguments(self):
2232 if not hasattr(self.op, "candidate_pool_size"):
2233 self.op.candidate_pool_size = None
2234 if self.op.candidate_pool_size is not None:
2236 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2237 except (ValueError, TypeError), err:
2238 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2239 str(err), errors.ECODE_INVAL)
2240 if self.op.candidate_pool_size < 1:
2241 raise errors.OpPrereqError("At least one master candidate needed",
2243 _CheckBooleanOpField(self.op, "maintain_node_health")
2245 def ExpandNames(self):
2246 # FIXME: in the future maybe other cluster params won't require checking on
2247 # all nodes to be modified.
2248 self.needed_locks = {
2249 locking.LEVEL_NODE: locking.ALL_SET,
2251 self.share_locks[locking.LEVEL_NODE] = 1
2253 def BuildHooksEnv(self):
2258 "OP_TARGET": self.cfg.GetClusterName(),
2259 "NEW_VG_NAME": self.op.vg_name,
2261 mn = self.cfg.GetMasterNode()
2262 return env, [mn], [mn]
2264 def CheckPrereq(self):
2265 """Check prerequisites.
2267 This checks whether the given params don't conflict and
2268 if the given volume group is valid.
2271 if self.op.vg_name is not None and not self.op.vg_name:
2272 instances = self.cfg.GetAllInstancesInfo().values()
2273 for inst in instances:
2274 for disk in inst.disks:
2275 if _RecursiveCheckIfLVMBased(disk):
2276 raise errors.OpPrereqError("Cannot disable lvm storage while"
2277 " lvm-based instances exist",
2280 node_list = self.acquired_locks[locking.LEVEL_NODE]
2282 # if vg_name not None, checks given volume group on all nodes
2284 vglist = self.rpc.call_vg_list(node_list)
2285 for node in node_list:
2286 msg = vglist[node].fail_msg
2288 # ignoring down node
2289 self.LogWarning("Error while gathering data on node %s"
2290 " (ignoring node): %s", node, msg)
2292 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2294 constants.MIN_VG_SIZE)
2296 raise errors.OpPrereqError("Error on node '%s': %s" %
2297 (node, vgstatus), errors.ECODE_ENVIRON)
2299 self.cluster = cluster = self.cfg.GetClusterInfo()
2300 # validate params changes
2301 if self.op.beparams:
2302 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2303 self.new_beparams = objects.FillDict(
2304 cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2306 if self.op.nicparams:
2307 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2308 self.new_nicparams = objects.FillDict(
2309 cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2310 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2313 # check all instances for consistency
2314 for instance in self.cfg.GetAllInstancesInfo().values():
2315 for nic_idx, nic in enumerate(instance.nics):
2316 params_copy = copy.deepcopy(nic.nicparams)
2317 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2319 # check parameter syntax
2321 objects.NIC.CheckParameterSyntax(params_filled)
2322 except errors.ConfigurationError, err:
2323 nic_errors.append("Instance %s, nic/%d: %s" %
2324 (instance.name, nic_idx, err))
2326 # if we're moving instances to routed, check that they have an ip
2327 target_mode = params_filled[constants.NIC_MODE]
2328 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2329 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2330 (instance.name, nic_idx))
2332 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2333 "\n".join(nic_errors))
2335 # hypervisor list/parameters
2336 self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2337 if self.op.hvparams:
2338 if not isinstance(self.op.hvparams, dict):
2339 raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2341 for hv_name, hv_dict in self.op.hvparams.items():
2342 if hv_name not in self.new_hvparams:
2343 self.new_hvparams[hv_name] = hv_dict
2345 self.new_hvparams[hv_name].update(hv_dict)
2347 # os hypervisor parameters
2348 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2350 if not isinstance(self.op.os_hvp, dict):
2351 raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2353 for os_name, hvs in self.op.os_hvp.items():
2354 if not isinstance(hvs, dict):
2355 raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2356 " input"), errors.ECODE_INVAL)
2357 if os_name not in self.new_os_hvp:
2358 self.new_os_hvp[os_name] = hvs
2360 for hv_name, hv_dict in hvs.items():
2361 if hv_name not in self.new_os_hvp[os_name]:
2362 self.new_os_hvp[os_name][hv_name] = hv_dict
2364 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2366 if self.op.enabled_hypervisors is not None:
2367 self.hv_list = self.op.enabled_hypervisors
2368 if not self.hv_list:
2369 raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2370 " least one member",
2372 invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2374 raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2376 utils.CommaJoin(invalid_hvs),
2379 self.hv_list = cluster.enabled_hypervisors
2381 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2382 # either the enabled list has changed, or the parameters have, validate
2383 for hv_name, hv_params in self.new_hvparams.items():
2384 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2385 (self.op.enabled_hypervisors and
2386 hv_name in self.op.enabled_hypervisors)):
2387 # either this is a new hypervisor, or its parameters have changed
2388 hv_class = hypervisor.GetHypervisor(hv_name)
2389 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2390 hv_class.CheckParameterSyntax(hv_params)
2391 _CheckHVParams(self, node_list, hv_name, hv_params)
2394 # no need to check any newly-enabled hypervisors, since the
2395 # defaults have already been checked in the above code-block
2396 for os_name, os_hvp in self.new_os_hvp.items():
2397 for hv_name, hv_params in os_hvp.items():
2398 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2399 # we need to fill in the new os_hvp on top of the actual hv_p
2400 cluster_defaults = self.new_hvparams.get(hv_name, {})
2401 new_osp = objects.FillDict(cluster_defaults, hv_params)
2402 hv_class = hypervisor.GetHypervisor(hv_name)
2403 hv_class.CheckParameterSyntax(new_osp)
2404 _CheckHVParams(self, node_list, hv_name, new_osp)
2407 def Exec(self, feedback_fn):
2408 """Change the parameters of the cluster.
2411 if self.op.vg_name is not None:
2412 new_volume = self.op.vg_name
2415 if new_volume != self.cfg.GetVGName():
2416 self.cfg.SetVGName(new_volume)
2418 feedback_fn("Cluster LVM configuration already in desired"
2419 " state, not changing")
2420 if self.op.hvparams:
2421 self.cluster.hvparams = self.new_hvparams
2423 self.cluster.os_hvp = self.new_os_hvp
2424 if self.op.enabled_hypervisors is not None:
2425 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2426 if self.op.beparams:
2427 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2428 if self.op.nicparams:
2429 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2431 if self.op.candidate_pool_size is not None:
2432 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2433 # we need to update the pool size here, otherwise the save will fail
2434 _AdjustCandidatePool(self, [])
2436 if self.op.maintain_node_health is not None:
2437 self.cluster.maintain_node_health = self.op.maintain_node_health
2439 self.cfg.Update(self.cluster, feedback_fn)
2442 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2443 """Distribute additional files which are part of the cluster configuration.
2445 ConfigWriter takes care of distributing the config and ssconf files, but
2446 there are more files which should be distributed to all nodes. This function
2447 makes sure those are copied.
2449 @param lu: calling logical unit
2450 @param additional_nodes: list of nodes not in the config to distribute to
2453 # 1. Gather target nodes
2454 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2455 dist_nodes = lu.cfg.GetOnlineNodeList()
2456 if additional_nodes is not None:
2457 dist_nodes.extend(additional_nodes)
2458 if myself.name in dist_nodes:
2459 dist_nodes.remove(myself.name)
2461 # 2. Gather files to distribute
2462 dist_files = set([constants.ETC_HOSTS,
2463 constants.SSH_KNOWN_HOSTS_FILE,
2464 constants.RAPI_CERT_FILE,
2465 constants.RAPI_USERS_FILE,
2466 constants.CONFD_HMAC_KEY,
2469 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2470 for hv_name in enabled_hypervisors:
2471 hv_class = hypervisor.GetHypervisor(hv_name)
2472 dist_files.update(hv_class.GetAncillaryFiles())
2474 # 3. Perform the files upload
2475 for fname in dist_files:
2476 if os.path.exists(fname):
2477 result = lu.rpc.call_upload_file(dist_nodes, fname)
2478 for to_node, to_result in result.items():
2479 msg = to_result.fail_msg
2481 msg = ("Copy of file %s to node %s failed: %s" %
2482 (fname, to_node, msg))
2483 lu.proc.LogWarning(msg)
2486 class LURedistributeConfig(NoHooksLU):
2487 """Force the redistribution of cluster configuration.
2489 This is a very simple LU.
2495 def ExpandNames(self):
2496 self.needed_locks = {
2497 locking.LEVEL_NODE: locking.ALL_SET,
2499 self.share_locks[locking.LEVEL_NODE] = 1
2501 def CheckPrereq(self):
2502 """Check prerequisites.
2506 def Exec(self, feedback_fn):
2507 """Redistribute the configuration.
2510 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2511 _RedistributeAncillaryFiles(self)
2514 def _WaitForSync(lu, instance, oneshot=False):
2515 """Sleep and poll for an instance's disk to sync.
2518 if not instance.disks:
2522 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2524 node = instance.primary_node
2526 for dev in instance.disks:
2527 lu.cfg.SetDiskID(dev, node)
2529 # TODO: Convert to utils.Retry
2532 degr_retries = 10 # in seconds, as we sleep 1 second each time
2536 cumul_degraded = False
2537 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2538 msg = rstats.fail_msg
2540 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2543 raise errors.RemoteError("Can't contact node %s for mirror data,"
2544 " aborting." % node)
2547 rstats = rstats.payload
2549 for i, mstat in enumerate(rstats):
2551 lu.LogWarning("Can't compute data for node %s/%s",
2552 node, instance.disks[i].iv_name)
2555 cumul_degraded = (cumul_degraded or
2556 (mstat.is_degraded and mstat.sync_percent is None))
2557 if mstat.sync_percent is not None:
2559 if mstat.estimated_time is not None:
2560 rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2561 max_time = mstat.estimated_time
2563 rem_time = "no time estimate"
2564 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2565 (instance.disks[i].iv_name, mstat.sync_percent,
2568 # if we're done but degraded, let's do a few small retries, to
2569 # make sure we see a stable and not transient situation; therefore
2570 # we force restart of the loop
2571 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2572 logging.info("Degraded disks found, %d retries left", degr_retries)
2580 time.sleep(min(60, max_time))
2583 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2584 return not cumul_degraded
2587 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2588 """Check that mirrors are not degraded.
2590 The ldisk parameter, if True, will change the test from the
2591 is_degraded attribute (which represents overall non-ok status for
2592 the device(s)) to the ldisk (representing the local storage status).
2595 lu.cfg.SetDiskID(dev, node)
2599 if on_primary or dev.AssembleOnSecondary():
2600 rstats = lu.rpc.call_blockdev_find(node, dev)
2601 msg = rstats.fail_msg
2603 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2605 elif not rstats.payload:
2606 lu.LogWarning("Can't find disk on node %s", node)
2610 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2612 result = result and not rstats.payload.is_degraded
2615 for child in dev.children:
2616 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2621 class LUDiagnoseOS(NoHooksLU):
2622 """Logical unit for OS diagnose/query.
2625 _OP_REQP = ["output_fields", "names"]
2627 _FIELDS_STATIC = utils.FieldSet()
2628 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2629 # Fields that need calculation of global os validity
2630 _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2632 def ExpandNames(self):
2634 raise errors.OpPrereqError("Selective OS query not supported",
2637 _CheckOutputFields(static=self._FIELDS_STATIC,
2638 dynamic=self._FIELDS_DYNAMIC,
2639 selected=self.op.output_fields)
2641 # Lock all nodes, in shared mode
2642 # Temporary removal of locks, should be reverted later
2643 # TODO: reintroduce locks when they are lighter-weight
2644 self.needed_locks = {}
2645 #self.share_locks[locking.LEVEL_NODE] = 1
2646 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2648 def CheckPrereq(self):
2649 """Check prerequisites.
2654 def _DiagnoseByOS(rlist):
2655 """Remaps a per-node return list into an a per-os per-node dictionary
2657 @param rlist: a map with node names as keys and OS objects as values
2660 @return: a dictionary with osnames as keys and as value another map, with
2661 nodes as keys and tuples of (path, status, diagnose) as values, eg::
2663 {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2664 (/srv/..., False, "invalid api")],
2665 "node2": [(/srv/..., True, "")]}
2670 # we build here the list of nodes that didn't fail the RPC (at RPC
2671 # level), so that nodes with a non-responding node daemon don't
2672 # make all OSes invalid
2673 good_nodes = [node_name for node_name in rlist
2674 if not rlist[node_name].fail_msg]
2675 for node_name, nr in rlist.items():
2676 if nr.fail_msg or not nr.payload:
2678 for name, path, status, diagnose, variants in nr.payload:
2679 if name not in all_os:
2680 # build a list of nodes for this os containing empty lists
2681 # for each node in node_list
2683 for nname in good_nodes:
2684 all_os[name][nname] = []
2685 all_os[name][node_name].append((path, status, diagnose, variants))
2688 def Exec(self, feedback_fn):
2689 """Compute the list of OSes.
2692 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2693 node_data = self.rpc.call_os_diagnose(valid_nodes)
2694 pol = self._DiagnoseByOS(node_data)
2696 calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2697 calc_variants = "variants" in self.op.output_fields
2699 for os_name, os_data in pol.items():
2704 for osl in os_data.values():
2705 valid = valid and osl and osl[0][1]
2710 node_variants = osl[0][3]
2711 if variants is None:
2712 variants = node_variants
2714 variants = [v for v in variants if v in node_variants]
2716 for field in self.op.output_fields:
2719 elif field == "valid":
2721 elif field == "node_status":
2722 # this is just a copy of the dict
2724 for node_name, nos_list in os_data.items():
2725 val[node_name] = nos_list
2726 elif field == "variants":
2729 raise errors.ParameterError(field)
2736 class LURemoveNode(LogicalUnit):
2737 """Logical unit for removing a node.
2740 HPATH = "node-remove"
2741 HTYPE = constants.HTYPE_NODE
2742 _OP_REQP = ["node_name"]
2744 def BuildHooksEnv(self):
2747 This doesn't run on the target node in the pre phase as a failed
2748 node would then be impossible to remove.
2752 "OP_TARGET": self.op.node_name,
2753 "NODE_NAME": self.op.node_name,
2755 all_nodes = self.cfg.GetNodeList()
2757 all_nodes.remove(self.op.node_name)
2759 logging.warning("Node %s which is about to be removed not found"
2760 " in the all nodes list", self.op.node_name)
2761 return env, all_nodes, all_nodes
2763 def CheckPrereq(self):
2764 """Check prerequisites.
2767 - the node exists in the configuration
2768 - it does not have primary or secondary instances
2769 - it's not the master
2771 Any errors are signaled by raising errors.OpPrereqError.
2774 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2775 node = self.cfg.GetNodeInfo(self.op.node_name)
2776 assert node is not None
2778 instance_list = self.cfg.GetInstanceList()
2780 masternode = self.cfg.GetMasterNode()
2781 if node.name == masternode:
2782 raise errors.OpPrereqError("Node is the master node,"
2783 " you need to failover first.",
2786 for instance_name in instance_list:
2787 instance = self.cfg.GetInstanceInfo(instance_name)
2788 if node.name in instance.all_nodes:
2789 raise errors.OpPrereqError("Instance %s is still running on the node,"
2790 " please remove first." % instance_name,
2792 self.op.node_name = node.name
2795 def Exec(self, feedback_fn):
2796 """Removes the node from the cluster.
2800 logging.info("Stopping the node daemon and removing configs from node %s",
2803 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2805 # Promote nodes to master candidate as needed
2806 _AdjustCandidatePool(self, exceptions=[node.name])
2807 self.context.RemoveNode(node.name)
2809 # Run post hooks on the node before it's removed
2810 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2812 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2814 # pylint: disable-msg=W0702
2815 self.LogWarning("Errors occurred running hooks on %s" % node.name)
2817 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2818 msg = result.fail_msg
2820 self.LogWarning("Errors encountered on the remote node while leaving"
2821 " the cluster: %s", msg)
2824 class LUQueryNodes(NoHooksLU):
2825 """Logical unit for querying nodes.
2828 # pylint: disable-msg=W0142
2829 _OP_REQP = ["output_fields", "names", "use_locking"]
2832 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2833 "master_candidate", "offline", "drained"]
2835 _FIELDS_DYNAMIC = utils.FieldSet(
2837 "mtotal", "mnode", "mfree",
2839 "ctotal", "cnodes", "csockets",
2842 _FIELDS_STATIC = utils.FieldSet(*[
2843 "pinst_cnt", "sinst_cnt",
2844 "pinst_list", "sinst_list",
2845 "pip", "sip", "tags",
2847 "role"] + _SIMPLE_FIELDS
2850 def ExpandNames(self):
2851 _CheckOutputFields(static=self._FIELDS_STATIC,
2852 dynamic=self._FIELDS_DYNAMIC,
2853 selected=self.op.output_fields)
2855 self.needed_locks = {}
2856 self.share_locks[locking.LEVEL_NODE] = 1
2859 self.wanted = _GetWantedNodes(self, self.op.names)
2861 self.wanted = locking.ALL_SET
2863 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2864 self.do_locking = self.do_node_query and self.op.use_locking
2866 # if we don't request only static fields, we need to lock the nodes
2867 self.needed_locks[locking.LEVEL_NODE] = self.wanted
2869 def CheckPrereq(self):
2870 """Check prerequisites.
2873 # The validation of the node list is done in the _GetWantedNodes,
2874 # if non empty, and if empty, there's no validation to do
2877 def Exec(self, feedback_fn):
2878 """Computes the list of nodes and their attributes.
2881 all_info = self.cfg.GetAllNodesInfo()
2883 nodenames = self.acquired_locks[locking.LEVEL_NODE]
2884 elif self.wanted != locking.ALL_SET:
2885 nodenames = self.wanted
2886 missing = set(nodenames).difference(all_info.keys())
2888 raise errors.OpExecError(
2889 "Some nodes were removed before retrieving their data: %s" % missing)
2891 nodenames = all_info.keys()
2893 nodenames = utils.NiceSort(nodenames)
2894 nodelist = [all_info[name] for name in nodenames]
2896 # begin data gathering
2898 if self.do_node_query:
2900 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2901 self.cfg.GetHypervisorType())
2902 for name in nodenames:
2903 nodeinfo = node_data[name]
2904 if not nodeinfo.fail_msg and nodeinfo.payload:
2905 nodeinfo = nodeinfo.payload
2906 fn = utils.TryConvert
2908 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2909 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2910 "mfree": fn(int, nodeinfo.get('memory_free', None)),
2911 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2912 "dfree": fn(int, nodeinfo.get('vg_free', None)),
2913 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2914 "bootid": nodeinfo.get('bootid', None),
2915 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2916 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2919 live_data[name] = {}
2921 live_data = dict.fromkeys(nodenames, {})
2923 node_to_primary = dict([(name, set()) for name in nodenames])
2924 node_to_secondary = dict([(name, set()) for name in nodenames])
2926 inst_fields = frozenset(("pinst_cnt", "pinst_list",
2927 "sinst_cnt", "sinst_list"))
2928 if inst_fields & frozenset(self.op.output_fields):
2929 inst_data = self.cfg.GetAllInstancesInfo()
2931 for inst in inst_data.values():
2932 if inst.primary_node in node_to_primary:
2933 node_to_primary[inst.primary_node].add(inst.name)
2934 for secnode in inst.secondary_nodes:
2935 if secnode in node_to_secondary:
2936 node_to_secondary[secnode].add(inst.name)
2938 master_node = self.cfg.GetMasterNode()
2940 # end data gathering
2943 for node in nodelist:
2945 for field in self.op.output_fields:
2946 if field in self._SIMPLE_FIELDS:
2947 val = getattr(node, field)
2948 elif field == "pinst_list":
2949 val = list(node_to_primary[node.name])
2950 elif field == "sinst_list":
2951 val = list(node_to_secondary[node.name])
2952 elif field == "pinst_cnt":
2953 val = len(node_to_primary[node.name])
2954 elif field == "sinst_cnt":
2955 val = len(node_to_secondary[node.name])
2956 elif field == "pip":
2957 val = node.primary_ip
2958 elif field == "sip":
2959 val = node.secondary_ip
2960 elif field == "tags":
2961 val = list(node.GetTags())
2962 elif field == "master":
2963 val = node.name == master_node
2964 elif self._FIELDS_DYNAMIC.Matches(field):
2965 val = live_data[node.name].get(field, None)
2966 elif field == "role":
2967 if node.name == master_node:
2969 elif node.master_candidate:
2978 raise errors.ParameterError(field)
2979 node_output.append(val)
2980 output.append(node_output)
2985 class LUQueryNodeVolumes(NoHooksLU):
2986 """Logical unit for getting volumes on node(s).
2989 _OP_REQP = ["nodes", "output_fields"]
2991 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2992 _FIELDS_STATIC = utils.FieldSet("node")
2994 def ExpandNames(self):
2995 _CheckOutputFields(static=self._FIELDS_STATIC,
2996 dynamic=self._FIELDS_DYNAMIC,
2997 selected=self.op.output_fields)
2999 self.needed_locks = {}
3000 self.share_locks[locking.LEVEL_NODE] = 1
3001 if not self.op.nodes:
3002 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3004 self.needed_locks[locking.LEVEL_NODE] = \
3005 _GetWantedNodes(self, self.op.nodes)
3007 def CheckPrereq(self):
3008 """Check prerequisites.
3010 This checks that the fields required are valid output fields.
3013 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3015 def Exec(self, feedback_fn):
3016 """Computes the list of nodes and their attributes.
3019 nodenames = self.nodes
3020 volumes = self.rpc.call_node_volumes(nodenames)
3022 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3023 in self.cfg.GetInstanceList()]
3025 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3028 for node in nodenames:
3029 nresult = volumes[node]
3032 msg = nresult.fail_msg
3034 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3037 node_vols = nresult.payload[:]
3038 node_vols.sort(key=lambda vol: vol['dev'])
3040 for vol in node_vols:
3042 for field in self.op.output_fields:
3045 elif field == "phys":
3049 elif field == "name":
3051 elif field == "size":
3052 val = int(float(vol['size']))
3053 elif field == "instance":
3055 if node not in lv_by_node[inst]:
3057 if vol['name'] in lv_by_node[inst][node]:
3063 raise errors.ParameterError(field)
3064 node_output.append(str(val))
3066 output.append(node_output)
3071 class LUQueryNodeStorage(NoHooksLU):
3072 """Logical unit for getting information on storage units on node(s).
3075 _OP_REQP = ["nodes", "storage_type", "output_fields"]
3077 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3079 def ExpandNames(self):
3080 storage_type = self.op.storage_type
3082 if storage_type not in constants.VALID_STORAGE_TYPES:
3083 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
3086 _CheckOutputFields(static=self._FIELDS_STATIC,
3087 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3088 selected=self.op.output_fields)
3090 self.needed_locks = {}
3091 self.share_locks[locking.LEVEL_NODE] = 1
3094 self.needed_locks[locking.LEVEL_NODE] = \
3095 _GetWantedNodes(self, self.op.nodes)
3097 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3099 def CheckPrereq(self):
3100 """Check prerequisites.
3102 This checks that the fields required are valid output fields.
3105 self.op.name = getattr(self.op, "name", None)
3107 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3109 def Exec(self, feedback_fn):
3110 """Computes the list of nodes and their attributes.
3113 # Always get name to sort by
3114 if constants.SF_NAME in self.op.output_fields:
3115 fields = self.op.output_fields[:]
3117 fields = [constants.SF_NAME] + self.op.output_fields
3119 # Never ask for node or type as it's only known to the LU
3120 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3121 while extra in fields:
3122 fields.remove(extra)
3124 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3125 name_idx = field_idx[constants.SF_NAME]
3127 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3128 data = self.rpc.call_storage_list(self.nodes,
3129 self.op.storage_type, st_args,
3130 self.op.name, fields)
3134 for node in utils.NiceSort(self.nodes):
3135 nresult = data[node]
3139 msg = nresult.fail_msg
3141 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3144 rows = dict([(row[name_idx], row) for row in nresult.payload])
3146 for name in utils.NiceSort(rows.keys()):
3151 for field in self.op.output_fields:
3152 if field == constants.SF_NODE:
3154 elif field == constants.SF_TYPE:
3155 val = self.op.storage_type
3156 elif field in field_idx:
3157 val = row[field_idx[field]]
3159 raise errors.ParameterError(field)
3168 class LUModifyNodeStorage(NoHooksLU):
3169 """Logical unit for modifying a storage volume on a node.
3172 _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3175 def CheckArguments(self):
3176 self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3178 storage_type = self.op.storage_type
3179 if storage_type not in constants.VALID_STORAGE_TYPES:
3180 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
3183 def ExpandNames(self):
3184 self.needed_locks = {
3185 locking.LEVEL_NODE: self.op.node_name,
3188 def CheckPrereq(self):
3189 """Check prerequisites.
3192 storage_type = self.op.storage_type
3195 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3197 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3198 " modified" % storage_type,
3201 diff = set(self.op.changes.keys()) - modifiable
3203 raise errors.OpPrereqError("The following fields can not be modified for"
3204 " storage units of type '%s': %r" %
3205 (storage_type, list(diff)),
3208 def Exec(self, feedback_fn):
3209 """Computes the list of nodes and their attributes.
3212 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3213 result = self.rpc.call_storage_modify(self.op.node_name,
3214 self.op.storage_type, st_args,
3215 self.op.name, self.op.changes)
3216 result.Raise("Failed to modify storage unit '%s' on %s" %
3217 (self.op.name, self.op.node_name))
3220 class LUAddNode(LogicalUnit):
3221 """Logical unit for adding node to the cluster.
3225 HTYPE = constants.HTYPE_NODE
3226 _OP_REQP = ["node_name"]
3228 def CheckArguments(self):
3229 # validate/normalize the node name
3230 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3232 def BuildHooksEnv(self):
3235 This will run on all nodes before, and on all nodes + the new node after.
3239 "OP_TARGET": self.op.node_name,
3240 "NODE_NAME": self.op.node_name,
3241 "NODE_PIP": self.op.primary_ip,
3242 "NODE_SIP": self.op.secondary_ip,
3244 nodes_0 = self.cfg.GetNodeList()
3245 nodes_1 = nodes_0 + [self.op.node_name, ]
3246 return env, nodes_0, nodes_1
3248 def CheckPrereq(self):
3249 """Check prerequisites.
3252 - the new node is not already in the config
3254 - its parameters (single/dual homed) matches the cluster
3256 Any errors are signaled by raising errors.OpPrereqError.
3259 node_name = self.op.node_name
3262 dns_data = utils.GetHostInfo(node_name)
3264 node = dns_data.name
3265 primary_ip = self.op.primary_ip = dns_data.ip
3266 secondary_ip = getattr(self.op, "secondary_ip", None)
3267 if secondary_ip is None:
3268 secondary_ip = primary_ip
3269 if not utils.IsValidIP(secondary_ip):
3270 raise errors.OpPrereqError("Invalid secondary IP given",
3272 self.op.secondary_ip = secondary_ip
3274 node_list = cfg.GetNodeList()
3275 if not self.op.readd and node in node_list:
3276 raise errors.OpPrereqError("Node %s is already in the configuration" %
3277 node, errors.ECODE_EXISTS)
3278 elif self.op.readd and node not in node_list:
3279 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3282 for existing_node_name in node_list:
3283 existing_node = cfg.GetNodeInfo(existing_node_name)
3285 if self.op.readd and node == existing_node_name:
3286 if (existing_node.primary_ip != primary_ip or
3287 existing_node.secondary_ip != secondary_ip):
3288 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3289 " address configuration as before",
3293 if (existing_node.primary_ip == primary_ip or
3294 existing_node.secondary_ip == primary_ip or
3295 existing_node.primary_ip == secondary_ip or
3296 existing_node.secondary_ip == secondary_ip):
3297 raise errors.OpPrereqError("New node ip address(es) conflict with"
3298 " existing node %s" % existing_node.name,
3299 errors.ECODE_NOTUNIQUE)
3301 # check that the type of the node (single versus dual homed) is the
3302 # same as for the master
3303 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3304 master_singlehomed = myself.secondary_ip == myself.primary_ip
3305 newbie_singlehomed = secondary_ip == primary_ip
3306 if master_singlehomed != newbie_singlehomed:
3307 if master_singlehomed:
3308 raise errors.OpPrereqError("The master has no private ip but the"
3309 " new node has one",
3312 raise errors.OpPrereqError("The master has a private ip but the"
3313 " new node doesn't have one",
3316 # checks reachability
3317 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3318 raise errors.OpPrereqError("Node not reachable by ping",
3319 errors.ECODE_ENVIRON)
3321 if not newbie_singlehomed:
3322 # check reachability from my secondary ip to newbie's secondary ip
3323 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3324 source=myself.secondary_ip):
3325 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3326 " based ping to noded port",
3327 errors.ECODE_ENVIRON)
3334 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3337 self.new_node = self.cfg.GetNodeInfo(node)
3338 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3340 self.new_node = objects.Node(name=node,
3341 primary_ip=primary_ip,
3342 secondary_ip=secondary_ip,
3343 master_candidate=self.master_candidate,
3344 offline=False, drained=False)
3346 def Exec(self, feedback_fn):
3347 """Adds the new node to the cluster.
3350 new_node = self.new_node
3351 node = new_node.name
3353 # for re-adds, reset the offline/drained/master-candidate flags;
3354 # we need to reset here, otherwise offline would prevent RPC calls
3355 # later in the procedure; this also means that if the re-add
3356 # fails, we are left with a non-offlined, broken node
3358 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3359 self.LogInfo("Readding a node, the offline/drained flags were reset")
3360 # if we demote the node, we do cleanup later in the procedure
3361 new_node.master_candidate = self.master_candidate
3363 # notify the user about any possible mc promotion
3364 if new_node.master_candidate:
3365 self.LogInfo("Node will be a master candidate")
3367 # check connectivity
3368 result = self.rpc.call_version([node])[node]
3369 result.Raise("Can't get version information from node %s" % node)
3370 if constants.PROTOCOL_VERSION == result.payload:
3371 logging.info("Communication to node %s fine, sw version %s match",
3372 node, result.payload)
3374 raise errors.OpExecError("Version mismatch master version %s,"
3375 " node version %s" %
3376 (constants.PROTOCOL_VERSION, result.payload))
3379 if self.cfg.GetClusterInfo().modify_ssh_setup:
3380 logging.info("Copy ssh key to node %s", node)
3381 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3383 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3384 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3388 keyarray.append(utils.ReadFile(i))
3390 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3391 keyarray[2], keyarray[3], keyarray[4],
3393 result.Raise("Cannot transfer ssh keys to the new node")
3395 # Add node to our /etc/hosts, and add key to known_hosts
3396 if self.cfg.GetClusterInfo().modify_etc_hosts:
3397 utils.AddHostToEtcHosts(new_node.name)
3399 if new_node.secondary_ip != new_node.primary_ip:
3400 result = self.rpc.call_node_has_ip_address(new_node.name,
3401 new_node.secondary_ip)
3402 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3403 prereq=True, ecode=errors.ECODE_ENVIRON)
3404 if not result.payload:
3405 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3406 " you gave (%s). Please fix and re-run this"
3407 " command." % new_node.secondary_ip)
3409 node_verify_list = [self.cfg.GetMasterNode()]
3410 node_verify_param = {
3411 constants.NV_NODELIST: [node],
3412 # TODO: do a node-net-test as well?
3415 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3416 self.cfg.GetClusterName())
3417 for verifier in node_verify_list:
3418 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3419 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3421 for failed in nl_payload:
3422 feedback_fn("ssh/hostname verification failed"
3423 " (checking from %s): %s" %
3424 (verifier, nl_payload[failed]))
3425 raise errors.OpExecError("ssh/hostname verification failed.")
3428 _RedistributeAncillaryFiles(self)
3429 self.context.ReaddNode(new_node)
3430 # make sure we redistribute the config
3431 self.cfg.Update(new_node, feedback_fn)
3432 # and make sure the new node will not have old files around
3433 if not new_node.master_candidate:
3434 result = self.rpc.call_node_demote_from_mc(new_node.name)
3435 msg = result.fail_msg
3437 self.LogWarning("Node failed to demote itself from master"
3438 " candidate status: %s" % msg)
3440 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3441 self.context.AddNode(new_node, self.proc.GetECId())
3444 class LUSetNodeParams(LogicalUnit):
3445 """Modifies the parameters of a node.
3448 HPATH = "node-modify"
3449 HTYPE = constants.HTYPE_NODE
3450 _OP_REQP = ["node_name"]
3453 def CheckArguments(self):
3454 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3455 _CheckBooleanOpField(self.op, 'master_candidate')
3456 _CheckBooleanOpField(self.op, 'offline')
3457 _CheckBooleanOpField(self.op, 'drained')
3458 _CheckBooleanOpField(self.op, 'auto_promote')
3459 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3460 if all_mods.count(None) == 3:
3461 raise errors.OpPrereqError("Please pass at least one modification",
3463 if all_mods.count(True) > 1:
3464 raise errors.OpPrereqError("Can't set the node into more than one"
3465 " state at the same time",
3468 # Boolean value that tells us whether we're offlining or draining the node
3469 self.offline_or_drain = (self.op.offline == True or
3470 self.op.drained == True)
3471 self.deoffline_or_drain = (self.op.offline == False or
3472 self.op.drained == False)
3473 self.might_demote = (self.op.master_candidate == False or
3474 self.offline_or_drain)
3476 self.lock_all = self.op.auto_promote and self.might_demote
3479 def ExpandNames(self):
3481 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3483 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3485 def BuildHooksEnv(self):
3488 This runs on the master node.
3492 "OP_TARGET": self.op.node_name,
3493 "MASTER_CANDIDATE": str(self.op.master_candidate),
3494 "OFFLINE": str(self.op.offline),
3495 "DRAINED": str(self.op.drained),
3497 nl = [self.cfg.GetMasterNode(),
3501 def CheckPrereq(self):
3502 """Check prerequisites.
3504 This only checks the instance list against the existing names.
3507 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3509 if (self.op.master_candidate is not None or
3510 self.op.drained is not None or
3511 self.op.offline is not None):
3512 # we can't change the master's node flags
3513 if self.op.node_name == self.cfg.GetMasterNode():
3514 raise errors.OpPrereqError("The master role can be changed"
3515 " only via masterfailover",
3519 if node.master_candidate and self.might_demote and not self.lock_all:
3520 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3521 # check if after removing the current node, we're missing master
3523 (mc_remaining, mc_should, _) = \
3524 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3525 if mc_remaining < mc_should:
3526 raise errors.OpPrereqError("Not enough master candidates, please"
3527 " pass auto_promote to allow promotion",
3530 if (self.op.master_candidate == True and
3531 ((node.offline and not self.op.offline == False) or
3532 (node.drained and not self.op.drained == False))):
3533 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3534 " to master_candidate" % node.name,
3537 # If we're being deofflined/drained, we'll MC ourself if needed
3538 if (self.deoffline_or_drain and not self.offline_or_drain and not
3539 self.op.master_candidate == True and not node.master_candidate):
3540 self.op.master_candidate = _DecideSelfPromotion(self)
3541 if self.op.master_candidate:
3542 self.LogInfo("Autopromoting node to master candidate")
3546 def Exec(self, feedback_fn):
3555 if self.op.offline is not None:
3556 node.offline = self.op.offline
3557 result.append(("offline", str(self.op.offline)))
3558 if self.op.offline == True:
3559 if node.master_candidate:
3560 node.master_candidate = False
3562 result.append(("master_candidate", "auto-demotion due to offline"))
3564 node.drained = False
3565 result.append(("drained", "clear drained status due to offline"))
3567 if self.op.master_candidate is not None:
3568 node.master_candidate = self.op.master_candidate
3570 result.append(("master_candidate", str(self.op.master_candidate)))
3571 if self.op.master_candidate == False:
3572 rrc = self.rpc.call_node_demote_from_mc(node.name)
3575 self.LogWarning("Node failed to demote itself: %s" % msg)
3577 if self.op.drained is not None:
3578 node.drained = self.op.drained
3579 result.append(("drained", str(self.op.drained)))
3580 if self.op.drained == True:
3581 if node.master_candidate:
3582 node.master_candidate = False
3584 result.append(("master_candidate", "auto-demotion due to drain"))
3585 rrc = self.rpc.call_node_demote_from_mc(node.name)
3588 self.LogWarning("Node failed to demote itself: %s" % msg)
3590 node.offline = False
3591 result.append(("offline", "clear offline status due to drain"))
3593 # we locked all nodes, we adjust the CP before updating this node
3595 _AdjustCandidatePool(self, [node.name])
3597 # this will trigger configuration file update, if needed
3598 self.cfg.Update(node, feedback_fn)
3600 # this will trigger job queue propagation or cleanup
3602 self.context.ReaddNode(node)
3607 class LUPowercycleNode(NoHooksLU):
3608 """Powercycles a node.
3611 _OP_REQP = ["node_name", "force"]
3614 def CheckArguments(self):
3615 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3616 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3617 raise errors.OpPrereqError("The node is the master and the force"
3618 " parameter was not set",
3621 def ExpandNames(self):
3622 """Locking for PowercycleNode.
3624 This is a last-resort option and shouldn't block on other
3625 jobs. Therefore, we grab no locks.
3628 self.needed_locks = {}
3630 def CheckPrereq(self):
3631 """Check prerequisites.
3633 This LU has no prereqs.
3638 def Exec(self, feedback_fn):
3642 result = self.rpc.call_node_powercycle(self.op.node_name,
3643 self.cfg.GetHypervisorType())
3644 result.Raise("Failed to schedule the reboot")
3645 return result.payload
3648 class LUQueryClusterInfo(NoHooksLU):
3649 """Query cluster configuration.
3655 def ExpandNames(self):
3656 self.needed_locks = {}
3658 def CheckPrereq(self):
3659 """No prerequsites needed for this LU.
3664 def Exec(self, feedback_fn):
3665 """Return cluster config.
3668 cluster = self.cfg.GetClusterInfo()
3671 # Filter just for enabled hypervisors
3672 for os_name, hv_dict in cluster.os_hvp.items():
3673 os_hvp[os_name] = {}
3674 for hv_name, hv_params in hv_dict.items():
3675 if hv_name in cluster.enabled_hypervisors:
3676 os_hvp[os_name][hv_name] = hv_params
3679 "software_version": constants.RELEASE_VERSION,
3680 "protocol_version": constants.PROTOCOL_VERSION,
3681 "config_version": constants.CONFIG_VERSION,
3682 "os_api_version": max(constants.OS_API_VERSIONS),
3683 "export_version": constants.EXPORT_VERSION,
3684 "architecture": (platform.architecture()[0], platform.machine()),
3685 "name": cluster.cluster_name,
3686 "master": cluster.master_node,
3687 "default_hypervisor": cluster.enabled_hypervisors[0],
3688 "enabled_hypervisors": cluster.enabled_hypervisors,
3689 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3690 for hypervisor_name in cluster.enabled_hypervisors]),
3692 "beparams": cluster.beparams,
3693 "nicparams": cluster.nicparams,
3694 "candidate_pool_size": cluster.candidate_pool_size,
3695 "master_netdev": cluster.master_netdev,
3696 "volume_group_name": cluster.volume_group_name,
3697 "file_storage_dir": cluster.file_storage_dir,
3698 "maintain_node_health": cluster.maintain_node_health,
3699 "ctime": cluster.ctime,
3700 "mtime": cluster.mtime,
3701 "uuid": cluster.uuid,
3702 "tags": list(cluster.GetTags()),
3708 class LUQueryConfigValues(NoHooksLU):
3709 """Return configuration values.
3714 _FIELDS_DYNAMIC = utils.FieldSet()
3715 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3718 def ExpandNames(self):
3719 self.needed_locks = {}
3721 _CheckOutputFields(static=self._FIELDS_STATIC,
3722 dynamic=self._FIELDS_DYNAMIC,
3723 selected=self.op.output_fields)
3725 def CheckPrereq(self):
3726 """No prerequisites.
3731 def Exec(self, feedback_fn):
3732 """Dump a representation of the cluster config to the standard output.
3736 for field in self.op.output_fields:
3737 if field == "cluster_name":
3738 entry = self.cfg.GetClusterName()
3739 elif field == "master_node":
3740 entry = self.cfg.GetMasterNode()
3741 elif field == "drain_flag":
3742 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3743 elif field == "watcher_pause":
3744 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3746 raise errors.ParameterError(field)
3747 values.append(entry)
3751 class LUActivateInstanceDisks(NoHooksLU):
3752 """Bring up an instance's disks.
3755 _OP_REQP = ["instance_name"]
3758 def ExpandNames(self):
3759 self._ExpandAndLockInstance()
3760 self.needed_locks[locking.LEVEL_NODE] = []
3761 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3763 def DeclareLocks(self, level):
3764 if level == locking.LEVEL_NODE:
3765 self._LockInstancesNodes()
3767 def CheckPrereq(self):
3768 """Check prerequisites.
3770 This checks that the instance is in the cluster.
3773 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3774 assert self.instance is not None, \
3775 "Cannot retrieve locked instance %s" % self.op.instance_name
3776 _CheckNodeOnline(self, self.instance.primary_node)
3777 if not hasattr(self.op, "ignore_size"):
3778 self.op.ignore_size = False
3780 def Exec(self, feedback_fn):
3781 """Activate the disks.
3784 disks_ok, disks_info = \
3785 _AssembleInstanceDisks(self, self.instance,
3786 ignore_size=self.op.ignore_size)
3788 raise errors.OpExecError("Cannot activate block devices")
3793 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3795 """Prepare the block devices for an instance.
3797 This sets up the block devices on all nodes.
3799 @type lu: L{LogicalUnit}
3800 @param lu: the logical unit on whose behalf we execute
3801 @type instance: L{objects.Instance}
3802 @param instance: the instance for whose disks we assemble
3803 @type ignore_secondaries: boolean
3804 @param ignore_secondaries: if true, errors on secondary nodes
3805 won't result in an error return from the function
3806 @type ignore_size: boolean
3807 @param ignore_size: if true, the current known size of the disk
3808 will not be used during the disk activation, useful for cases
3809 when the size is wrong
3810 @return: False if the operation failed, otherwise a list of
3811 (host, instance_visible_name, node_visible_name)
3812 with the mapping from node devices to instance devices
3817 iname = instance.name
3818 # With the two passes mechanism we try to reduce the window of
3819 # opportunity for the race condition of switching DRBD to primary
3820 # before handshaking occured, but we do not eliminate it
3822 # The proper fix would be to wait (with some limits) until the
3823 # connection has been made and drbd transitions from WFConnection
3824 # into any other network-connected state (Connected, SyncTarget,
3827 # 1st pass, assemble on all nodes in secondary mode
3828 for inst_disk in instance.disks:
3829 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3831 node_disk = node_disk.Copy()
3832 node_disk.UnsetSize()
3833 lu.cfg.SetDiskID(node_disk, node)
3834 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3835 msg = result.fail_msg
3837 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3838 " (is_primary=False, pass=1): %s",
3839 inst_disk.iv_name, node, msg)
3840 if not ignore_secondaries:
3843 # FIXME: race condition on drbd migration to primary
3845 # 2nd pass, do only the primary node
3846 for inst_disk in instance.disks:
3849 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3850 if node != instance.primary_node:
3853 node_disk = node_disk.Copy()
3854 node_disk.UnsetSize()
3855 lu.cfg.SetDiskID(node_disk, node)
3856 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3857 msg = result.fail_msg
3859 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3860 " (is_primary=True, pass=2): %s",
3861 inst_disk.iv_name, node, msg)
3864 dev_path = result.payload
3866 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3868 # leave the disks configured for the primary node
3869 # this is a workaround that would be fixed better by
3870 # improving the logical/physical id handling
3871 for disk in instance.disks:
3872 lu.cfg.SetDiskID(disk, instance.primary_node)
3874 return disks_ok, device_info
3877 def _StartInstanceDisks(lu, instance, force):
3878 """Start the disks of an instance.
3881 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3882 ignore_secondaries=force)
3884 _ShutdownInstanceDisks(lu, instance)
3885 if force is not None and not force:
3886 lu.proc.LogWarning("", hint="If the message above refers to a"
3888 " you can retry the operation using '--force'.")
3889 raise errors.OpExecError("Disk consistency error")
3892 class LUDeactivateInstanceDisks(NoHooksLU):
3893 """Shutdown an instance's disks.
3896 _OP_REQP = ["instance_name"]
3899 def ExpandNames(self):
3900 self._ExpandAndLockInstance()
3901 self.needed_locks[locking.LEVEL_NODE] = []
3902 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3904 def DeclareLocks(self, level):
3905 if level == locking.LEVEL_NODE:
3906 self._LockInstancesNodes()
3908 def CheckPrereq(self):
3909 """Check prerequisites.
3911 This checks that the instance is in the cluster.
3914 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3915 assert self.instance is not None, \
3916 "Cannot retrieve locked instance %s" % self.op.instance_name
3918 def Exec(self, feedback_fn):
3919 """Deactivate the disks
3922 instance = self.instance
3923 _SafeShutdownInstanceDisks(self, instance)
3926 def _SafeShutdownInstanceDisks(lu, instance):
3927 """Shutdown block devices of an instance.
3929 This function checks if an instance is running, before calling
3930 _ShutdownInstanceDisks.
3933 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
3934 _ShutdownInstanceDisks(lu, instance)
3937 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3938 """Shutdown block devices of an instance.
3940 This does the shutdown on all nodes of the instance.
3942 If the ignore_primary is false, errors on the primary node are
3947 for disk in instance.disks:
3948 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3949 lu.cfg.SetDiskID(top_disk, node)
3950 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3951 msg = result.fail_msg
3953 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3954 disk.iv_name, node, msg)
3955 if not ignore_primary or node != instance.primary_node:
3960 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3961 """Checks if a node has enough free memory.
3963 This function check if a given node has the needed amount of free
3964 memory. In case the node has less memory or we cannot get the
3965 information from the node, this function raise an OpPrereqError
3968 @type lu: C{LogicalUnit}
3969 @param lu: a logical unit from which we get configuration data
3971 @param node: the node to check
3972 @type reason: C{str}
3973 @param reason: string to use in the error message
3974 @type requested: C{int}
3975 @param requested: the amount of memory in MiB to check for
3976 @type hypervisor_name: C{str}
3977 @param hypervisor_name: the hypervisor to ask for memory stats
3978 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3979 we cannot check the node
3982 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3983 nodeinfo[node].Raise("Can't get data from node %s" % node,
3984 prereq=True, ecode=errors.ECODE_ENVIRON)
3985 free_mem = nodeinfo[node].payload.get('memory_free', None)
3986 if not isinstance(free_mem, int):
3987 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3988 " was '%s'" % (node, free_mem),
3989 errors.ECODE_ENVIRON)
3990 if requested > free_mem:
3991 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3992 " needed %s MiB, available %s MiB" %
3993 (node, reason, requested, free_mem),
3997 def _CheckNodesFreeDisk(lu, nodenames, requested):
3998 """Checks if nodes have enough free disk space in the default VG.
4000 This function check if all given nodes have the needed amount of
4001 free disk. In case any node has less disk or we cannot get the
4002 information from the node, this function raise an OpPrereqError
4005 @type lu: C{LogicalUnit}
4006 @param lu: a logical unit from which we get configuration data
4007 @type nodenames: C{list}
4008 @param nodenames: the list of node names to check
4009 @type requested: C{int}
4010 @param requested: the amount of disk in MiB to check for
4011 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4012 we cannot check the node
4015 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4016 lu.cfg.GetHypervisorType())
4017 for node in nodenames:
4018 info = nodeinfo[node]
4019 info.Raise("Cannot get current information from node %s" % node,
4020 prereq=True, ecode=errors.ECODE_ENVIRON)
4021 vg_free = info.payload.get("vg_free", None)
4022 if not isinstance(vg_free, int):
4023 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4024 " result was '%s'" % (node, vg_free),
4025 errors.ECODE_ENVIRON)
4026 if requested > vg_free:
4027 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4028 " required %d MiB, available %d MiB" %
4029 (node, requested, vg_free),
4033 class LUStartupInstance(LogicalUnit):
4034 """Starts an instance.
4037 HPATH = "instance-start"
4038 HTYPE = constants.HTYPE_INSTANCE
4039 _OP_REQP = ["instance_name", "force"]
4042 def ExpandNames(self):
4043 self._ExpandAndLockInstance()
4045 def BuildHooksEnv(self):
4048 This runs on master, primary and secondary nodes of the instance.
4052 "FORCE": self.op.force,
4054 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4055 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4058 def CheckPrereq(self):
4059 """Check prerequisites.
4061 This checks that the instance is in the cluster.
4064 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4065 assert self.instance is not None, \
4066 "Cannot retrieve locked instance %s" % self.op.instance_name
4069 self.beparams = getattr(self.op, "beparams", {})
4071 if not isinstance(self.beparams, dict):
4072 raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4073 " dict" % (type(self.beparams), ),
4075 # fill the beparams dict
4076 utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4077 self.op.beparams = self.beparams
4080 self.hvparams = getattr(self.op, "hvparams", {})
4082 if not isinstance(self.hvparams, dict):
4083 raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4084 " dict" % (type(self.hvparams), ),
4087 # check hypervisor parameter syntax (locally)
4088 cluster = self.cfg.GetClusterInfo()
4089 utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4090 filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4092 filled_hvp.update(self.hvparams)
4093 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4094 hv_type.CheckParameterSyntax(filled_hvp)
4095 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4096 self.op.hvparams = self.hvparams
4098 _CheckNodeOnline(self, instance.primary_node)
4100 bep = self.cfg.GetClusterInfo().FillBE(instance)
4101 # check bridges existence
4102 _CheckInstanceBridgesExist(self, instance)
4104 remote_info = self.rpc.call_instance_info(instance.primary_node,
4106 instance.hypervisor)
4107 remote_info.Raise("Error checking node %s" % instance.primary_node,
4108 prereq=True, ecode=errors.ECODE_ENVIRON)
4109 if not remote_info.payload: # not running already
4110 _CheckNodeFreeMemory(self, instance.primary_node,
4111 "starting instance %s" % instance.name,
4112 bep[constants.BE_MEMORY], instance.hypervisor)
4114 def Exec(self, feedback_fn):
4115 """Start the instance.
4118 instance = self.instance
4119 force = self.op.force
4121 self.cfg.MarkInstanceUp(instance.name)
4123 node_current = instance.primary_node
4125 _StartInstanceDisks(self, instance, force)
4127 result = self.rpc.call_instance_start(node_current, instance,
4128 self.hvparams, self.beparams)
4129 msg = result.fail_msg
4131 _ShutdownInstanceDisks(self, instance)
4132 raise errors.OpExecError("Could not start instance: %s" % msg)
4135 class LURebootInstance(LogicalUnit):
4136 """Reboot an instance.
4139 HPATH = "instance-reboot"
4140 HTYPE = constants.HTYPE_INSTANCE
4141 _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4144 def CheckArguments(self):
4145 """Check the arguments.
4148 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4149 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4151 def ExpandNames(self):
4152 if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4153 constants.INSTANCE_REBOOT_HARD,
4154 constants.INSTANCE_REBOOT_FULL]:
4155 raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4156 (constants.INSTANCE_REBOOT_SOFT,
4157 constants.INSTANCE_REBOOT_HARD,
4158 constants.INSTANCE_REBOOT_FULL))
4159 self._ExpandAndLockInstance()
4161 def BuildHooksEnv(self):
4164 This runs on master, primary and secondary nodes of the instance.
4168 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4169 "REBOOT_TYPE": self.op.reboot_type,
4170 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4172 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4173 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4176 def CheckPrereq(self):
4177 """Check prerequisites.
4179 This checks that the instance is in the cluster.
4182 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4183 assert self.instance is not None, \
4184 "Cannot retrieve locked instance %s" % self.op.instance_name
4186 _CheckNodeOnline(self, instance.primary_node)
4188 # check bridges existence
4189 _CheckInstanceBridgesExist(self, instance)
4191 def Exec(self, feedback_fn):
4192 """Reboot the instance.
4195 instance = self.instance
4196 ignore_secondaries = self.op.ignore_secondaries
4197 reboot_type = self.op.reboot_type
4199 node_current = instance.primary_node
4201 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4202 constants.INSTANCE_REBOOT_HARD]:
4203 for disk in instance.disks:
4204 self.cfg.SetDiskID(disk, node_current)
4205 result = self.rpc.call_instance_reboot(node_current, instance,
4207 self.shutdown_timeout)
4208 result.Raise("Could not reboot instance")
4210 result = self.rpc.call_instance_shutdown(node_current, instance,
4211 self.shutdown_timeout)
4212 result.Raise("Could not shutdown instance for full reboot")
4213 _ShutdownInstanceDisks(self, instance)
4214 _StartInstanceDisks(self, instance, ignore_secondaries)
4215 result = self.rpc.call_instance_start(node_current, instance, None, None)
4216 msg = result.fail_msg
4218 _ShutdownInstanceDisks(self, instance)
4219 raise errors.OpExecError("Could not start instance for"
4220 " full reboot: %s" % msg)
4222 self.cfg.MarkInstanceUp(instance.name)
4225 class LUShutdownInstance(LogicalUnit):
4226 """Shutdown an instance.
4229 HPATH = "instance-stop"
4230 HTYPE = constants.HTYPE_INSTANCE
4231 _OP_REQP = ["instance_name"]
4234 def CheckArguments(self):
4235 """Check the arguments.
4238 self.timeout = getattr(self.op, "timeout",
4239 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4241 def ExpandNames(self):
4242 self._ExpandAndLockInstance()
4244 def BuildHooksEnv(self):
4247 This runs on master, primary and secondary nodes of the instance.
4250 env = _BuildInstanceHookEnvByObject(self, self.instance)
4251 env["TIMEOUT"] = self.timeout
4252 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4255 def CheckPrereq(self):
4256 """Check prerequisites.
4258 This checks that the instance is in the cluster.
4261 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4262 assert self.instance is not None, \
4263 "Cannot retrieve locked instance %s" % self.op.instance_name
4264 _CheckNodeOnline(self, self.instance.primary_node)
4266 def Exec(self, feedback_fn):
4267 """Shutdown the instance.
4270 instance = self.instance
4271 node_current = instance.primary_node
4272 timeout = self.timeout
4273 self.cfg.MarkInstanceDown(instance.name)
4274 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4275 msg = result.fail_msg
4277 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4279 _ShutdownInstanceDisks(self, instance)
4282 class LUReinstallInstance(LogicalUnit):
4283 """Reinstall an instance.
4286 HPATH = "instance-reinstall"
4287 HTYPE = constants.HTYPE_INSTANCE
4288 _OP_REQP = ["instance_name"]
4291 def ExpandNames(self):
4292 self._ExpandAndLockInstance()
4294 def BuildHooksEnv(self):
4297 This runs on master, primary and secondary nodes of the instance.
4300 env = _BuildInstanceHookEnvByObject(self, self.instance)
4301 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4304 def CheckPrereq(self):
4305 """Check prerequisites.
4307 This checks that the instance is in the cluster and is not running.
4310 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4311 assert instance is not None, \
4312 "Cannot retrieve locked instance %s" % self.op.instance_name
4313 _CheckNodeOnline(self, instance.primary_node)
4315 if instance.disk_template == constants.DT_DISKLESS:
4316 raise errors.OpPrereqError("Instance '%s' has no disks" %
4317 self.op.instance_name,
4319 _CheckInstanceDown(self, instance, "cannot reinstall")
4321 self.op.os_type = getattr(self.op, "os_type", None)
4322 self.op.force_variant = getattr(self.op, "force_variant", False)
4323 if self.op.os_type is not None:
4325 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4326 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4328 self.instance = instance
4330 def Exec(self, feedback_fn):
4331 """Reinstall the instance.
4334 inst = self.instance
4336 if self.op.os_type is not None:
4337 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4338 inst.os = self.op.os_type
4339 self.cfg.Update(inst, feedback_fn)
4341 _StartInstanceDisks(self, inst, None)
4343 feedback_fn("Running the instance OS create scripts...")
4344 # FIXME: pass debug option from opcode to backend
4345 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4346 self.op.debug_level)
4347 result.Raise("Could not install OS for instance %s on node %s" %
4348 (inst.name, inst.primary_node))
4350 _ShutdownInstanceDisks(self, inst)
4353 class LURecreateInstanceDisks(LogicalUnit):
4354 """Recreate an instance's missing disks.
4357 HPATH = "instance-recreate-disks"
4358 HTYPE = constants.HTYPE_INSTANCE
4359 _OP_REQP = ["instance_name", "disks"]
4362 def CheckArguments(self):
4363 """Check the arguments.
4366 if not isinstance(self.op.disks, list):
4367 raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4368 for item in self.op.disks:
4369 if (not isinstance(item, int) or
4371 raise errors.OpPrereqError("Invalid disk specification '%s'" %
4372 str(item), errors.ECODE_INVAL)
4374 def ExpandNames(self):
4375 self._ExpandAndLockInstance()
4377 def BuildHooksEnv(self):
4380 This runs on master, primary and secondary nodes of the instance.
4383 env = _BuildInstanceHookEnvByObject(self, self.instance)
4384 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4387 def CheckPrereq(self):
4388 """Check prerequisites.
4390 This checks that the instance is in the cluster and is not running.
4393 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4394 assert instance is not None, \
4395 "Cannot retrieve locked instance %s" % self.op.instance_name
4396 _CheckNodeOnline(self, instance.primary_node)
4398 if instance.disk_template == constants.DT_DISKLESS:
4399 raise errors.OpPrereqError("Instance '%s' has no disks" %
4400 self.op.instance_name, errors.ECODE_INVAL)
4401 _CheckInstanceDown(self, instance, "cannot recreate disks")
4403 if not self.op.disks:
4404 self.op.disks = range(len(instance.disks))
4406 for idx in self.op.disks:
4407 if idx >= len(instance.disks):
4408 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4411 self.instance = instance
4413 def Exec(self, feedback_fn):
4414 """Recreate the disks.
4418 for idx, _ in enumerate(self.instance.disks):
4419 if idx not in self.op.disks: # disk idx has not been passed in
4423 _CreateDisks(self, self.instance, to_skip=to_skip)
4426 class LURenameInstance(LogicalUnit):
4427 """Rename an instance.
4430 HPATH = "instance-rename"
4431 HTYPE = constants.HTYPE_INSTANCE
4432 _OP_REQP = ["instance_name", "new_name"]
4434 def BuildHooksEnv(self):
4437 This runs on master, primary and secondary nodes of the instance.
4440 env = _BuildInstanceHookEnvByObject(self, self.instance)
4441 env["INSTANCE_NEW_NAME"] = self.op.new_name
4442 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4445 def CheckPrereq(self):
4446 """Check prerequisites.
4448 This checks that the instance is in the cluster and is not running.
4451 self.op.instance_name = _ExpandInstanceName(self.cfg,
4452 self.op.instance_name)
4453 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4454 assert instance is not None
4455 _CheckNodeOnline(self, instance.primary_node)
4456 _CheckInstanceDown(self, instance, "cannot rename")
4457 self.instance = instance
4459 # new name verification
4460 name_info = utils.GetHostInfo(self.op.new_name)
4462 self.op.new_name = new_name = name_info.name
4463 instance_list = self.cfg.GetInstanceList()
4464 if new_name in instance_list:
4465 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4466 new_name, errors.ECODE_EXISTS)
4468 if not getattr(self.op, "ignore_ip", False):
4469 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4470 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4471 (name_info.ip, new_name),
4472 errors.ECODE_NOTUNIQUE)
4475 def Exec(self, feedback_fn):
4476 """Reinstall the instance.
4479 inst = self.instance
4480 old_name = inst.name
4482 if inst.disk_template == constants.DT_FILE:
4483 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4485 self.cfg.RenameInstance(inst.name, self.op.new_name)
4486 # Change the instance lock. This is definitely safe while we hold the BGL
4487 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4488 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4490 # re-read the instance from the configuration after rename
4491 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4493 if inst.disk_template == constants.DT_FILE:
4494 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4495 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4496 old_file_storage_dir,
4497 new_file_storage_dir)
4498 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4499 " (but the instance has been renamed in Ganeti)" %
4500 (inst.primary_node, old_file_storage_dir,
4501 new_file_storage_dir))
4503 _StartInstanceDisks(self, inst, None)
4505 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4506 old_name, self.op.debug_level)
4507 msg = result.fail_msg
4509 msg = ("Could not run OS rename script for instance %s on node %s"
4510 " (but the instance has been renamed in Ganeti): %s" %
4511 (inst.name, inst.primary_node, msg))
4512 self.proc.LogWarning(msg)
4514 _ShutdownInstanceDisks(self, inst)
4517 class LURemoveInstance(LogicalUnit):
4518 """Remove an instance.
4521 HPATH = "instance-remove"
4522 HTYPE = constants.HTYPE_INSTANCE
4523 _OP_REQP = ["instance_name", "ignore_failures"]
4526 def CheckArguments(self):
4527 """Check the arguments.
4530 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4531 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4533 def ExpandNames(self):
4534 self._ExpandAndLockInstance()
4535 self.needed_locks[locking.LEVEL_NODE] = []
4536 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4538 def DeclareLocks(self, level):
4539 if level == locking.LEVEL_NODE:
4540 self._LockInstancesNodes()
4542 def BuildHooksEnv(self):
4545 This runs on master, primary and secondary nodes of the instance.
4548 env = _BuildInstanceHookEnvByObject(self, self.instance)
4549 env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4550 nl = [self.cfg.GetMasterNode()]
4551 nl_post = list(self.instance.all_nodes) + nl
4552 return env, nl, nl_post
4554 def CheckPrereq(self):
4555 """Check prerequisites.
4557 This checks that the instance is in the cluster.
4560 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4561 assert self.instance is not None, \
4562 "Cannot retrieve locked instance %s" % self.op.instance_name
4564 def Exec(self, feedback_fn):
4565 """Remove the instance.
4568 instance = self.instance
4569 logging.info("Shutting down instance %s on node %s",
4570 instance.name, instance.primary_node)
4572 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4573 self.shutdown_timeout)
4574 msg = result.fail_msg
4576 if self.op.ignore_failures:
4577 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4579 raise errors.OpExecError("Could not shutdown instance %s on"
4581 (instance.name, instance.primary_node, msg))
4583 logging.info("Removing block devices for instance %s", instance.name)
4585 if not _RemoveDisks(self, instance):
4586 if self.op.ignore_failures:
4587 feedback_fn("Warning: can't remove instance's disks")
4589 raise errors.OpExecError("Can't remove instance's disks")
4591 logging.info("Removing instance %s out of cluster config", instance.name)
4593 self.cfg.RemoveInstance(instance.name)
4594 self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4597 class LUQueryInstances(NoHooksLU):
4598 """Logical unit for querying instances.
4601 # pylint: disable-msg=W0142
4602 _OP_REQP = ["output_fields", "names", "use_locking"]
4604 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4605 "serial_no", "ctime", "mtime", "uuid"]
4606 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4608 "disk_template", "ip", "mac", "bridge",
4609 "nic_mode", "nic_link",
4610 "sda_size", "sdb_size", "vcpus", "tags",
4611 "network_port", "beparams",
4612 r"(disk)\.(size)/([0-9]+)",
4613 r"(disk)\.(sizes)", "disk_usage",
4614 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4615 r"(nic)\.(bridge)/([0-9]+)",
4616 r"(nic)\.(macs|ips|modes|links|bridges)",
4617 r"(disk|nic)\.(count)",
4619 ] + _SIMPLE_FIELDS +
4621 for name in constants.HVS_PARAMETERS
4622 if name not in constants.HVC_GLOBALS] +
4624 for name in constants.BES_PARAMETERS])
4625 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4628 def ExpandNames(self):
4629 _CheckOutputFields(static=self._FIELDS_STATIC,
4630 dynamic=self._FIELDS_DYNAMIC,
4631 selected=self.op.output_fields)
4633 self.needed_locks = {}
4634 self.share_locks[locking.LEVEL_INSTANCE] = 1
4635 self.share_locks[locking.LEVEL_NODE] = 1
4638 self.wanted = _GetWantedInstances(self, self.op.names)
4640 self.wanted = locking.ALL_SET
4642 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4643 self.do_locking = self.do_node_query and self.op.use_locking
4645 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4646 self.needed_locks[locking.LEVEL_NODE] = []
4647 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4649 def DeclareLocks(self, level):
4650 if level == locking.LEVEL_NODE and self.do_locking:
4651 self._LockInstancesNodes()
4653 def CheckPrereq(self):
4654 """Check prerequisites.
4659 def Exec(self, feedback_fn):
4660 """Computes the list of nodes and their attributes.
4663 # pylint: disable-msg=R0912
4664 # way too many branches here
4665 all_info = self.cfg.GetAllInstancesInfo()
4666 if self.wanted == locking.ALL_SET:
4667 # caller didn't specify instance names, so ordering is not important
4669 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4671 instance_names = all_info.keys()
4672 instance_names = utils.NiceSort(instance_names)
4674 # caller did specify names, so we must keep the ordering
4676 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4678 tgt_set = all_info.keys()
4679 missing = set(self.wanted).difference(tgt_set)
4681 raise errors.OpExecError("Some instances were removed before"
4682 " retrieving their data: %s" % missing)
4683 instance_names = self.wanted
4685 instance_list = [all_info[iname] for iname in instance_names]
4687 # begin data gathering
4689 nodes = frozenset([inst.primary_node for inst in instance_list])
4690 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4694 if self.do_node_query:
4696 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4698 result = node_data[name]
4700 # offline nodes will be in both lists
4701 off_nodes.append(name)
4703 bad_nodes.append(name)
4706 live_data.update(result.payload)
4707 # else no instance is alive
4709 live_data = dict([(name, {}) for name in instance_names])
4711 # end data gathering
4716 cluster = self.cfg.GetClusterInfo()
4717 for instance in instance_list:
4719 i_hv = cluster.FillHV(instance, skip_globals=True)
4720 i_be = cluster.FillBE(instance)
4721 i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4722 nic.nicparams) for nic in instance.nics]
4723 for field in self.op.output_fields:
4724 st_match = self._FIELDS_STATIC.Matches(field)
4725 if field in self._SIMPLE_FIELDS:
4726 val = getattr(instance, field)
4727 elif field == "pnode":
4728 val = instance.primary_node
4729 elif field == "snodes":
4730 val = list(instance.secondary_nodes)
4731 elif field == "admin_state":
4732 val = instance.admin_up
4733 elif field == "oper_state":
4734 if instance.primary_node in bad_nodes:
4737 val = bool(live_data.get(instance.name))
4738 elif field == "status":
4739 if instance.primary_node in off_nodes:
4740 val = "ERROR_nodeoffline"
4741 elif instance.primary_node in bad_nodes:
4742 val = "ERROR_nodedown"
4744 running = bool(live_data.get(instance.name))
4746 if instance.admin_up:
4751 if instance.admin_up:
4755 elif field == "oper_ram":
4756 if instance.primary_node in bad_nodes:
4758 elif instance.name in live_data:
4759 val = live_data[instance.name].get("memory", "?")
4762 elif field == "vcpus":
4763 val = i_be[constants.BE_VCPUS]
4764 elif field == "disk_template":
4765 val = instance.disk_template
4768 val = instance.nics[0].ip
4771 elif field == "nic_mode":
4773 val = i_nicp[0][constants.NIC_MODE]
4776 elif field == "nic_link":
4778 val = i_nicp[0][constants.NIC_LINK]
4781 elif field == "bridge":
4782 if (instance.nics and
4783 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4784 val = i_nicp[0][constants.NIC_LINK]
4787 elif field == "mac":
4789 val = instance.nics[0].mac
4792 elif field == "sda_size" or field == "sdb_size":
4793 idx = ord(field[2]) - ord('a')
4795 val = instance.FindDisk(idx).size
4796 except errors.OpPrereqError:
4798 elif field == "disk_usage": # total disk usage per node
4799 disk_sizes = [{'size': disk.size} for disk in instance.disks]
4800 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4801 elif field == "tags":
4802 val = list(instance.GetTags())
4803 elif field == "hvparams":
4805 elif (field.startswith(HVPREFIX) and
4806 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4807 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4808 val = i_hv.get(field[len(HVPREFIX):], None)
4809 elif field == "beparams":
4811 elif (field.startswith(BEPREFIX) and
4812 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4813 val = i_be.get(field[len(BEPREFIX):], None)
4814 elif st_match and st_match.groups():
4815 # matches a variable list
4816 st_groups = st_match.groups()
4817 if st_groups and st_groups[0] == "disk":
4818 if st_groups[1] == "count":
4819 val = len(instance.disks)
4820 elif st_groups[1] == "sizes":
4821 val = [disk.size for disk in instance.disks]
4822 elif st_groups[1] == "size":
4824 val = instance.FindDisk(st_groups[2]).size
4825 except errors.OpPrereqError:
4828 assert False, "Unhandled disk parameter"
4829 elif st_groups[0] == "nic":
4830 if st_groups[1] == "count":
4831 val = len(instance.nics)
4832 elif st_groups[1] == "macs":
4833 val = [nic.mac for nic in instance.nics]
4834 elif st_groups[1] == "ips":
4835 val = [nic.ip for nic in instance.nics]
4836 elif st_groups[1] == "modes":
4837 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4838 elif st_groups[1] == "links":
4839 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4840 elif st_groups[1] == "bridges":
4843 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4844 val.append(nicp[constants.NIC_LINK])
4849 nic_idx = int(st_groups[2])
4850 if nic_idx >= len(instance.nics):
4853 if st_groups[1] == "mac":
4854 val = instance.nics[nic_idx].mac
4855 elif st_groups[1] == "ip":
4856 val = instance.nics[nic_idx].ip
4857 elif st_groups[1] == "mode":
4858 val = i_nicp[nic_idx][constants.NIC_MODE]
4859 elif st_groups[1] == "link":
4860 val = i_nicp[nic_idx][constants.NIC_LINK]
4861 elif st_groups[1] == "bridge":
4862 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4863 if nic_mode == constants.NIC_MODE_BRIDGED:
4864 val = i_nicp[nic_idx][constants.NIC_LINK]
4868 assert False, "Unhandled NIC parameter"
4870 assert False, ("Declared but unhandled variable parameter '%s'" %
4873 assert False, "Declared but unhandled parameter '%s'" % field
4880 class LUFailoverInstance(LogicalUnit):
4881 """Failover an instance.
4884 HPATH = "instance-failover"
4885 HTYPE = constants.HTYPE_INSTANCE
4886 _OP_REQP = ["instance_name", "ignore_consistency"]
4889 def CheckArguments(self):
4890 """Check the arguments.
4893 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4894 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4896 def ExpandNames(self):
4897 self._ExpandAndLockInstance()
4898 self.needed_locks[locking.LEVEL_NODE] = []
4899 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4901 def DeclareLocks(self, level):
4902 if level == locking.LEVEL_NODE:
4903 self._LockInstancesNodes()
4905 def BuildHooksEnv(self):
4908 This runs on master, primary and secondary nodes of the instance.
4911 instance = self.instance
4912 source_node = instance.primary_node
4913 target_node = instance.secondary_nodes[0]
4915 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4916 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4917 "OLD_PRIMARY": source_node,
4918 "OLD_SECONDARY": target_node,
4919 "NEW_PRIMARY": target_node,
4920 "NEW_SECONDARY": source_node,
4922 env.update(_BuildInstanceHookEnvByObject(self, instance))
4923 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4925 nl_post.append(source_node)
4926 return env, nl, nl_post
4928 def CheckPrereq(self):
4929 """Check prerequisites.
4931 This checks that the instance is in the cluster.
4934 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4935 assert self.instance is not None, \
4936 "Cannot retrieve locked instance %s" % self.op.instance_name
4938 bep = self.cfg.GetClusterInfo().FillBE(instance)
4939 if instance.disk_template not in constants.DTS_NET_MIRROR:
4940 raise errors.OpPrereqError("Instance's disk layout is not"
4941 " network mirrored, cannot failover.",
4944 secondary_nodes = instance.secondary_nodes
4945 if not secondary_nodes:
4946 raise errors.ProgrammerError("no secondary node but using "
4947 "a mirrored disk template")
4949 target_node = secondary_nodes[0]
4950 _CheckNodeOnline(self, target_node)
4951 _CheckNodeNotDrained(self, target_node)
4952 if instance.admin_up:
4953 # check memory requirements on the secondary node
4954 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4955 instance.name, bep[constants.BE_MEMORY],
4956 instance.hypervisor)
4958 self.LogInfo("Not checking memory on the secondary node as"
4959 " instance will not be started")
4961 # check bridge existance
4962 _CheckInstanceBridgesExist(self, instance, node=target_node)
4964 def Exec(self, feedback_fn):
4965 """Failover an instance.
4967 The failover is done by shutting it down on its present node and
4968 starting it on the secondary.
4971 instance = self.instance
4973 source_node = instance.primary_node
4974 target_node = instance.secondary_nodes[0]
4976 if instance.admin_up:
4977 feedback_fn("* checking disk consistency between source and target")
4978 for dev in instance.disks:
4979 # for drbd, these are drbd over lvm
4980 if not _CheckDiskConsistency(self, dev, target_node, False):
4981 if not self.op.ignore_consistency:
4982 raise errors.OpExecError("Disk %s is degraded on target node,"
4983 " aborting failover." % dev.iv_name)
4985 feedback_fn("* not checking disk consistency as instance is not running")
4987 feedback_fn("* shutting down instance on source node")
4988 logging.info("Shutting down instance %s on node %s",
4989 instance.name, source_node)
4991 result = self.rpc.call_instance_shutdown(source_node, instance,
4992 self.shutdown_timeout)
4993 msg = result.fail_msg
4995 if self.op.ignore_consistency:
4996 self.proc.LogWarning("Could not shutdown instance %s on node %s."
4997 " Proceeding anyway. Please make sure node"
4998 " %s is down. Error details: %s",
4999 instance.name, source_node, source_node, msg)
5001 raise errors.OpExecError("Could not shutdown instance %s on"
5003 (instance.name, source_node, msg))
5005 feedback_fn("* deactivating the instance's disks on source node")
5006 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5007 raise errors.OpExecError("Can't shut down the instance's disks.")
5009 instance.primary_node = target_node
5010 # distribute new instance config to the other nodes
5011 self.cfg.Update(instance, feedback_fn)
5013 # Only start the instance if it's marked as up
5014 if instance.admin_up:
5015 feedback_fn("* activating the instance's disks on target node")
5016 logging.info("Starting instance %s on node %s",
5017 instance.name, target_node)
5019 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5020 ignore_secondaries=True)
5022 _ShutdownInstanceDisks(self, instance)
5023 raise errors.OpExecError("Can't activate the instance's disks")
5025 feedback_fn("* starting the instance on the target node")
5026 result = self.rpc.call_instance_start(target_node, instance, None, None)
5027 msg = result.fail_msg
5029 _ShutdownInstanceDisks(self, instance)
5030 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5031 (instance.name, target_node, msg))
5034 class LUMigrateInstance(LogicalUnit):
5035 """Migrate an instance.
5037 This is migration without shutting down, compared to the failover,
5038 which is done with shutdown.
5041 HPATH = "instance-migrate"
5042 HTYPE = constants.HTYPE_INSTANCE
5043 _OP_REQP = ["instance_name", "live", "cleanup"]
5047 def ExpandNames(self):
5048 self._ExpandAndLockInstance()
5050 self.needed_locks[locking.LEVEL_NODE] = []
5051 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5053 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5054 self.op.live, self.op.cleanup)
5055 self.tasklets = [self._migrater]
5057 def DeclareLocks(self, level):
5058 if level == locking.LEVEL_NODE:
5059 self._LockInstancesNodes()
5061 def BuildHooksEnv(self):
5064 This runs on master, primary and secondary nodes of the instance.
5067 instance = self._migrater.instance
5068 source_node = instance.primary_node
5069 target_node = instance.secondary_nodes[0]
5070 env = _BuildInstanceHookEnvByObject(self, instance)
5071 env["MIGRATE_LIVE"] = self.op.live
5072 env["MIGRATE_CLEANUP"] = self.op.cleanup
5074 "OLD_PRIMARY": source_node,
5075 "OLD_SECONDARY": target_node,
5076 "NEW_PRIMARY": target_node,
5077 "NEW_SECONDARY": source_node,
5079 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5081 nl_post.append(source_node)
5082 return env, nl, nl_post
5085 class LUMoveInstance(LogicalUnit):
5086 """Move an instance by data-copying.
5089 HPATH = "instance-move"
5090 HTYPE = constants.HTYPE_INSTANCE
5091 _OP_REQP = ["instance_name", "target_node"]
5094 def CheckArguments(self):
5095 """Check the arguments.
5098 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5099 constants.DEFAULT_SHUTDOWN_TIMEOUT)
5101 def ExpandNames(self):
5102 self._ExpandAndLockInstance()
5103 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5104 self.op.target_node = target_node
5105 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5106 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5108 def DeclareLocks(self, level):
5109 if level == locking.LEVEL_NODE:
5110 self._LockInstancesNodes(primary_only=True)
5112 def BuildHooksEnv(self):
5115 This runs on master, primary and secondary nodes of the instance.
5119 "TARGET_NODE": self.op.target_node,
5120 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5122 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5123 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5124 self.op.target_node]
5127 def CheckPrereq(self):
5128 """Check prerequisites.
5130 This checks that the instance is in the cluster.
5133 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5134 assert self.instance is not None, \
5135 "Cannot retrieve locked instance %s" % self.op.instance_name
5137 node = self.cfg.GetNodeInfo(self.op.target_node)
5138 assert node is not None, \
5139 "Cannot retrieve locked node %s" % self.op.target_node
5141 self.target_node = target_node = node.name
5143 if target_node == instance.primary_node:
5144 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5145 (instance.name, target_node),
5148 bep = self.cfg.GetClusterInfo().FillBE(instance)
5150 for idx, dsk in enumerate(instance.disks):
5151 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5152 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5153 " cannot copy" % idx, errors.ECODE_STATE)
5155 _CheckNodeOnline(self, target_node)
5156 _CheckNodeNotDrained(self, target_node)
5158 if instance.admin_up:
5159 # check memory requirements on the secondary node
5160 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5161 instance.name, bep[constants.BE_MEMORY],
5162 instance.hypervisor)
5164 self.LogInfo("Not checking memory on the secondary node as"
5165 " instance will not be started")
5167 # check bridge existance
5168 _CheckInstanceBridgesExist(self, instance, node=target_node)
5170 def Exec(self, feedback_fn):
5171 """Move an instance.
5173 The move is done by shutting it down on its present node, copying
5174 the data over (slow) and starting it on the new node.
5177 instance = self.instance
5179 source_node = instance.primary_node
5180 target_node = self.target_node
5182 self.LogInfo("Shutting down instance %s on source node %s",
5183 instance.name, source_node)
5185 result = self.rpc.call_instance_shutdown(source_node, instance,
5186 self.shutdown_timeout)
5187 msg = result.fail_msg
5189 if self.op.ignore_consistency:
5190 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5191 " Proceeding anyway. Please make sure node"
5192 " %s is down. Error details: %s",
5193 instance.name, source_node, source_node, msg)
5195 raise errors.OpExecError("Could not shutdown instance %s on"
5197 (instance.name, source_node, msg))
5199 # create the target disks
5201 _CreateDisks(self, instance, target_node=target_node)
5202 except errors.OpExecError:
5203 self.LogWarning("Device creation failed, reverting...")
5205 _RemoveDisks(self, instance, target_node=target_node)
5207 self.cfg.ReleaseDRBDMinors(instance.name)
5210 cluster_name = self.cfg.GetClusterInfo().cluster_name
5213 # activate, get path, copy the data over
5214 for idx, disk in enumerate(instance.disks):
5215 self.LogInfo("Copying data for disk %d", idx)
5216 result = self.rpc.call_blockdev_assemble(target_node, disk,
5217 instance.name, True)
5219 self.LogWarning("Can't assemble newly created disk %d: %s",
5220 idx, result.fail_msg)
5221 errs.append(result.fail_msg)
5223 dev_path = result.payload
5224 result = self.rpc.call_blockdev_export(source_node, disk,
5225 target_node, dev_path,
5228 self.LogWarning("Can't copy data over for disk %d: %s",
5229 idx, result.fail_msg)
5230 errs.append(result.fail_msg)
5234 self.LogWarning("Some disks failed to copy, aborting")
5236 _RemoveDisks(self, instance, target_node=target_node)
5238 self.cfg.ReleaseDRBDMinors(instance.name)
5239 raise errors.OpExecError("Errors during disk copy: %s" %
5242 instance.primary_node = target_node
5243 self.cfg.Update(instance, feedback_fn)
5245 self.LogInfo("Removing the disks on the original node")
5246 _RemoveDisks(self, instance, target_node=source_node)
5248 # Only start the instance if it's marked as up
5249 if instance.admin_up:
5250 self.LogInfo("Starting instance %s on node %s",
5251 instance.name, target_node)
5253 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5254 ignore_secondaries=True)
5256 _ShutdownInstanceDisks(self, instance)
5257 raise errors.OpExecError("Can't activate the instance's disks")
5259 result = self.rpc.call_instance_start(target_node, instance, None, None)
5260 msg = result.fail_msg
5262 _ShutdownInstanceDisks(self, instance)
5263 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5264 (instance.name, target_node, msg))
5267 class LUMigrateNode(LogicalUnit):
5268 """Migrate all instances from a node.
5271 HPATH = "node-migrate"
5272 HTYPE = constants.HTYPE_NODE
5273 _OP_REQP = ["node_name", "live"]
5276 def ExpandNames(self):
5277 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5279 self.needed_locks = {
5280 locking.LEVEL_NODE: [self.op.node_name],
5283 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5285 # Create tasklets for migrating instances for all instances on this node
5289 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5290 logging.debug("Migrating instance %s", inst.name)
5291 names.append(inst.name)
5293 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5295 self.tasklets = tasklets
5297 # Declare instance locks
5298 self.needed_locks[locking.LEVEL_INSTANCE] = names
5300 def DeclareLocks(self, level):
5301 if level == locking.LEVEL_NODE:
5302 self._LockInstancesNodes()
5304 def BuildHooksEnv(self):
5307 This runs on the master, the primary and all the secondaries.
5311 "NODE_NAME": self.op.node_name,
5314 nl = [self.cfg.GetMasterNode()]
5316 return (env, nl, nl)
5319 class TLMigrateInstance(Tasklet):
5320 def __init__(self, lu, instance_name, live, cleanup):
5321 """Initializes this class.
5324 Tasklet.__init__(self, lu)
5327 self.instance_name = instance_name
5329 self.cleanup = cleanup
5331 def CheckPrereq(self):
5332 """Check prerequisites.
5334 This checks that the instance is in the cluster.
5337 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5338 instance = self.cfg.GetInstanceInfo(instance_name)
5339 assert instance is not None
5341 if instance.disk_template != constants.DT_DRBD8:
5342 raise errors.OpPrereqError("Instance's disk layout is not"
5343 " drbd8, cannot migrate.", errors.ECODE_STATE)
5345 secondary_nodes = instance.secondary_nodes
5346 if not secondary_nodes:
5347 raise errors.ConfigurationError("No secondary node but using"
5348 " drbd8 disk template")
5350 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5352 target_node = secondary_nodes[0]
5353 # check memory requirements on the secondary node
5354 _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5355 instance.name, i_be[constants.BE_MEMORY],
5356 instance.hypervisor)
5358 # check bridge existance
5359 _CheckInstanceBridgesExist(self, instance, node=target_node)
5361 if not self.cleanup:
5362 _CheckNodeNotDrained(self, target_node)
5363 result = self.rpc.call_instance_migratable(instance.primary_node,
5365 result.Raise("Can't migrate, please use failover",
5366 prereq=True, ecode=errors.ECODE_STATE)
5368 self.instance = instance
5370 def _WaitUntilSync(self):
5371 """Poll with custom rpc for disk sync.
5373 This uses our own step-based rpc call.
5376 self.feedback_fn("* wait until resync is done")
5380 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5382 self.instance.disks)
5384 for node, nres in result.items():
5385 nres.Raise("Cannot resync disks on node %s" % node)
5386 node_done, node_percent = nres.payload
5387 all_done = all_done and node_done
5388 if node_percent is not None:
5389 min_percent = min(min_percent, node_percent)
5391 if min_percent < 100:
5392 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5395 def _EnsureSecondary(self, node):
5396 """Demote a node to secondary.
5399 self.feedback_fn("* switching node %s to secondary mode" % node)
5401 for dev in self.instance.disks:
5402 self.cfg.SetDiskID(dev, node)
5404 result = self.rpc.call_blockdev_close(node, self.instance.name,
5405 self.instance.disks)
5406 result.Raise("Cannot change disk to secondary on node %s" % node)
5408 def _GoStandalone(self):
5409 """Disconnect from the network.
5412 self.feedback_fn("* changing into standalone mode")
5413 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5414 self.instance.disks)
5415 for node, nres in result.items():
5416 nres.Raise("Cannot disconnect disks node %s" % node)
5418 def _GoReconnect(self, multimaster):
5419 """Reconnect to the network.
5425 msg = "single-master"
5426 self.feedback_fn("* changing disks into %s mode" % msg)
5427 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5428 self.instance.disks,
5429 self.instance.name, multimaster)
5430 for node, nres in result.items():
5431 nres.Raise("Cannot change disks config on node %s" % node)
5433 def _ExecCleanup(self):
5434 """Try to cleanup after a failed migration.
5436 The cleanup is done by:
5437 - check that the instance is running only on one node
5438 (and update the config if needed)
5439 - change disks on its secondary node to secondary
5440 - wait until disks are fully synchronized
5441 - disconnect from the network
5442 - change disks into single-master mode
5443 - wait again until disks are fully synchronized
5446 instance = self.instance
5447 target_node = self.target_node
5448 source_node = self.source_node
5450 # check running on only one node
5451 self.feedback_fn("* checking where the instance actually runs"
5452 " (if this hangs, the hypervisor might be in"
5454 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5455 for node, result in ins_l.items():
5456 result.Raise("Can't contact node %s" % node)
5458 runningon_source = instance.name in ins_l[source_node].payload
5459 runningon_target = instance.name in ins_l[target_node].payload
5461 if runningon_source and runningon_target:
5462 raise errors.OpExecError("Instance seems to be running on two nodes,"
5463 " or the hypervisor is confused. You will have"
5464 " to ensure manually that it runs only on one"
5465 " and restart this operation.")
5467 if not (runningon_source or runningon_target):
5468 raise errors.OpExecError("Instance does not seem to be running at all."
5469 " In this case, it's safer to repair by"
5470 " running 'gnt-instance stop' to ensure disk"
5471 " shutdown, and then restarting it.")
5473 if runningon_target:
5474 # the migration has actually succeeded, we need to update the config
5475 self.feedback_fn("* instance running on secondary node (%s),"
5476 " updating config" % target_node)
5477 instance.primary_node = target_node
5478 self.cfg.Update(instance, self.feedback_fn)
5479 demoted_node = source_node
5481 self.feedback_fn("* instance confirmed to be running on its"
5482 " primary node (%s)" % source_node)
5483 demoted_node = target_node
5485 self._EnsureSecondary(demoted_node)
5487 self._WaitUntilSync()
5488 except errors.OpExecError:
5489 # we ignore here errors, since if the device is standalone, it
5490 # won't be able to sync
5492 self._GoStandalone()
5493 self._GoReconnect(False)
5494 self._WaitUntilSync()
5496 self.feedback_fn("* done")
5498 def _RevertDiskStatus(self):
5499 """Try to revert the disk status after a failed migration.
5502 target_node = self.target_node
5504 self._EnsureSecondary(target_node)
5505 self._GoStandalone()
5506 self._GoReconnect(False)
5507 self._WaitUntilSync()
5508 except errors.OpExecError, err:
5509 self.lu.LogWarning("Migration failed and I can't reconnect the"
5510 " drives: error '%s'\n"
5511 "Please look and recover the instance status" %
5514 def _AbortMigration(self):
5515 """Call the hypervisor code to abort a started migration.
5518 instance = self.instance
5519 target_node = self.target_node
5520 migration_info = self.migration_info
5522 abort_result = self.rpc.call_finalize_migration(target_node,
5526 abort_msg = abort_result.fail_msg
5528 logging.error("Aborting migration failed on target node %s: %s",
5529 target_node, abort_msg)
5530 # Don't raise an exception here, as we stil have to try to revert the
5531 # disk status, even if this step failed.
5533 def _ExecMigration(self):
5534 """Migrate an instance.
5536 The migrate is done by:
5537 - change the disks into dual-master mode
5538 - wait until disks are fully synchronized again
5539 - migrate the instance
5540 - change disks on the new secondary node (the old primary) to secondary
5541 - wait until disks are fully synchronized
5542 - change disks into single-master mode
5545 instance = self.instance
5546 target_node = self.target_node
5547 source_node = self.source_node
5549 self.feedback_fn("* checking disk consistency between source and target")
5550 for dev in instance.disks:
5551 if not _CheckDiskConsistency(self, dev, target_node, False):
5552 raise errors.OpExecError("Disk %s is degraded or not fully"
5553 " synchronized on target node,"
5554 " aborting migrate." % dev.iv_name)
5556 # First get the migration information from the remote node
5557 result = self.rpc.call_migration_info(source_node, instance)
5558 msg = result.fail_msg
5560 log_err = ("Failed fetching source migration information from %s: %s" %
5562 logging.error(log_err)
5563 raise errors.OpExecError(log_err)
5565 self.migration_info = migration_info = result.payload
5567 # Then switch the disks to master/master mode
5568 self._EnsureSecondary(target_node)
5569 self._GoStandalone()
5570 self._GoReconnect(True)
5571 self._WaitUntilSync()
5573 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5574 result = self.rpc.call_accept_instance(target_node,
5577 self.nodes_ip[target_node])
5579 msg = result.fail_msg
5581 logging.error("Instance pre-migration failed, trying to revert"
5582 " disk status: %s", msg)
5583 self.feedback_fn("Pre-migration failed, aborting")
5584 self._AbortMigration()
5585 self._RevertDiskStatus()
5586 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5587 (instance.name, msg))
5589 self.feedback_fn("* migrating instance to %s" % target_node)
5591 result = self.rpc.call_instance_migrate(source_node, instance,
5592 self.nodes_ip[target_node],
5594 msg = result.fail_msg
5596 logging.error("Instance migration failed, trying to revert"
5597 " disk status: %s", msg)
5598 self.feedback_fn("Migration failed, aborting")
5599 self._AbortMigration()
5600 self._RevertDiskStatus()
5601 raise errors.OpExecError("Could not migrate instance %s: %s" %
5602 (instance.name, msg))
5605 instance.primary_node = target_node
5606 # distribute new instance config to the other nodes
5607 self.cfg.Update(instance, self.feedback_fn)
5609 result = self.rpc.call_finalize_migration(target_node,
5613 msg = result.fail_msg
5615 logging.error("Instance migration succeeded, but finalization failed:"
5617 raise errors.OpExecError("Could not finalize instance migration: %s" %
5620 self._EnsureSecondary(source_node)
5621 self._WaitUntilSync()
5622 self._GoStandalone()
5623 self._GoReconnect(False)
5624 self._WaitUntilSync()
5626 self.feedback_fn("* done")
5628 def Exec(self, feedback_fn):
5629 """Perform the migration.
5632 feedback_fn("Migrating instance %s" % self.instance.name)
5634 self.feedback_fn = feedback_fn
5636 self.source_node = self.instance.primary_node
5637 self.target_node = self.instance.secondary_nodes[0]
5638 self.all_nodes = [self.source_node, self.target_node]
5640 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5641 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5645 return self._ExecCleanup()
5647 return self._ExecMigration()
5650 def _CreateBlockDev(lu, node, instance, device, force_create,
5652 """Create a tree of block devices on a given node.
5654 If this device type has to be created on secondaries, create it and
5657 If not, just recurse to children keeping the same 'force' value.
5659 @param lu: the lu on whose behalf we execute
5660 @param node: the node on which to create the device
5661 @type instance: L{objects.Instance}
5662 @param instance: the instance which owns the device
5663 @type device: L{objects.Disk}
5664 @param device: the device to create
5665 @type force_create: boolean
5666 @param force_create: whether to force creation of this device; this
5667 will be change to True whenever we find a device which has
5668 CreateOnSecondary() attribute
5669 @param info: the extra 'metadata' we should attach to the device
5670 (this will be represented as a LVM tag)
5671 @type force_open: boolean
5672 @param force_open: this parameter will be passes to the
5673 L{backend.BlockdevCreate} function where it specifies
5674 whether we run on primary or not, and it affects both
5675 the child assembly and the device own Open() execution
5678 if device.CreateOnSecondary():
5682 for child in device.children:
5683 _CreateBlockDev(lu, node, instance, child, force_create,
5686 if not force_create:
5689 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5692 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5693 """Create a single block device on a given node.
5695 This will not recurse over children of the device, so they must be
5698 @param lu: the lu on whose behalf we execute
5699 @param node: the node on which to create the device
5700 @type instance: L{objects.Instance}
5701 @param instance: the instance which owns the device
5702 @type device: L{objects.Disk}
5703 @param device: the device to create
5704 @param info: the extra 'metadata' we should attach to the device
5705 (this will be represented as a LVM tag)
5706 @type force_open: boolean
5707 @param force_open: this parameter will be passes to the
5708 L{backend.BlockdevCreate} function where it specifies
5709 whether we run on primary or not, and it affects both
5710 the child assembly and the device own Open() execution
5713 lu.cfg.SetDiskID(device, node)
5714 result = lu.rpc.call_blockdev_create(node, device, device.size,
5715 instance.name, force_open, info)
5716 result.Raise("Can't create block device %s on"
5717 " node %s for instance %s" % (device, node, instance.name))
5718 if device.physical_id is None:
5719 device.physical_id = result.payload
5722 def _GenerateUniqueNames(lu, exts):
5723 """Generate a suitable LV name.
5725 This will generate a logical volume name for the given instance.
5730 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5731 results.append("%s%s" % (new_id, val))
5735 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5737 """Generate a drbd8 device complete with its children.
5740 port = lu.cfg.AllocatePort()
5741 vgname = lu.cfg.GetVGName()
5742 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5743 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5744 logical_id=(vgname, names[0]))
5745 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5746 logical_id=(vgname, names[1]))
5747 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5748 logical_id=(primary, secondary, port,
5751 children=[dev_data, dev_meta],
5756 def _GenerateDiskTemplate(lu, template_name,
5757 instance_name, primary_node,
5758 secondary_nodes, disk_info,
5759 file_storage_dir, file_driver,
5761 """Generate the entire disk layout for a given template type.
5764 #TODO: compute space requirements
5766 vgname = lu.cfg.GetVGName()
5767 disk_count = len(disk_info)
5769 if template_name == constants.DT_DISKLESS:
5771 elif template_name == constants.DT_PLAIN:
5772 if len(secondary_nodes) != 0:
5773 raise errors.ProgrammerError("Wrong template configuration")
5775 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5776 for i in range(disk_count)])
5777 for idx, disk in enumerate(disk_info):
5778 disk_index = idx + base_index
5779 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5780 logical_id=(vgname, names[idx]),
5781 iv_name="disk/%d" % disk_index,
5783 disks.append(disk_dev)
5784 elif template_name == constants.DT_DRBD8:
5785 if len(secondary_nodes) != 1:
5786 raise errors.ProgrammerError("Wrong template configuration")
5787 remote_node = secondary_nodes[0]
5788 minors = lu.cfg.AllocateDRBDMinor(
5789 [primary_node, remote_node] * len(disk_info), instance_name)
5792 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5793 for i in range(disk_count)]):
5794 names.append(lv_prefix + "_data")
5795 names.append(lv_prefix + "_meta")
5796 for idx, disk in enumerate(disk_info):
5797 disk_index = idx + base_index
5798 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5799 disk["size"], names[idx*2:idx*2+2],
5800 "disk/%d" % disk_index,
5801 minors[idx*2], minors[idx*2+1])
5802 disk_dev.mode = disk["mode"]
5803 disks.append(disk_dev)
5804 elif template_name == constants.DT_FILE:
5805 if len(secondary_nodes) != 0:
5806 raise errors.ProgrammerError("Wrong template configuration")
5808 for idx, disk in enumerate(disk_info):
5809 disk_index = idx + base_index
5810 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5811 iv_name="disk/%d" % disk_index,
5812 logical_id=(file_driver,
5813 "%s/disk%d" % (file_storage_dir,
5816 disks.append(disk_dev)
5818 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5822 def _GetInstanceInfoText(instance):
5823 """Compute that text that should be added to the disk's metadata.
5826 return "originstname+%s" % instance.name
5829 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5830 """Create all disks for an instance.
5832 This abstracts away some work from AddInstance.
5834 @type lu: L{LogicalUnit}
5835 @param lu: the logical unit on whose behalf we execute
5836 @type instance: L{objects.Instance}
5837 @param instance: the instance whose disks we should create
5839 @param to_skip: list of indices to skip
5840 @type target_node: string
5841 @param target_node: if passed, overrides the target node for creation
5843 @return: the success of the creation
5846 info = _GetInstanceInfoText(instance)
5847 if target_node is None:
5848 pnode = instance.primary_node
5849 all_nodes = instance.all_nodes
5854 if instance.disk_template == constants.DT_FILE:
5855 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5856 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5858 result.Raise("Failed to create directory '%s' on"
5859 " node %s" % (file_storage_dir, pnode))
5861 # Note: this needs to be kept in sync with adding of disks in
5862 # LUSetInstanceParams
5863 for idx, device in enumerate(instance.disks):
5864 if to_skip and idx in to_skip:
5866 logging.info("Creating volume %s for instance %s",
5867 device.iv_name, instance.name)
5869 for node in all_nodes:
5870 f_create = node == pnode
5871 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5874 def _RemoveDisks(lu, instance, target_node=None):
5875 """Remove all disks for an instance.
5877 This abstracts away some work from `AddInstance()` and
5878 `RemoveInstance()`. Note that in case some of the devices couldn't
5879 be removed, the removal will continue with the other ones (compare
5880 with `_CreateDisks()`).
5882 @type lu: L{LogicalUnit}
5883 @param lu: the logical unit on whose behalf we execute
5884 @type instance: L{objects.Instance}
5885 @param instance: the instance whose disks we should remove
5886 @type target_node: string
5887 @param target_node: used to override the node on which to remove the disks
5889 @return: the success of the removal
5892 logging.info("Removing block devices for instance %s", instance.name)
5895 for device in instance.disks:
5897 edata = [(target_node, device)]
5899 edata = device.ComputeNodeTree(instance.primary_node)
5900 for node, disk in edata:
5901 lu.cfg.SetDiskID(disk, node)
5902 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5904 lu.LogWarning("Could not remove block device %s on node %s,"
5905 " continuing anyway: %s", device.iv_name, node, msg)
5908 if instance.disk_template == constants.DT_FILE:
5909 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5913 tgt = instance.primary_node
5914 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5916 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5917 file_storage_dir, instance.primary_node, result.fail_msg)
5923 def _ComputeDiskSize(disk_template, disks):
5924 """Compute disk size requirements in the volume group
5927 # Required free disk space as a function of disk and swap space
5929 constants.DT_DISKLESS: None,
5930 constants.DT_PLAIN: sum(d["size"] for d in disks),
5931 # 128 MB are added for drbd metadata for each disk
5932 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5933 constants.DT_FILE: None,
5936 if disk_template not in req_size_dict:
5937 raise errors.ProgrammerError("Disk template '%s' size requirement"
5938 " is unknown" % disk_template)
5940 return req_size_dict[disk_template]
5943 def _CheckHVParams(lu, nodenames, hvname, hvparams):
5944 """Hypervisor parameter validation.
5946 This function abstract the hypervisor parameter validation to be
5947 used in both instance create and instance modify.
5949 @type lu: L{LogicalUnit}
5950 @param lu: the logical unit for which we check
5951 @type nodenames: list
5952 @param nodenames: the list of nodes on which we should check
5953 @type hvname: string
5954 @param hvname: the name of the hypervisor we should use
5955 @type hvparams: dict
5956 @param hvparams: the parameters which we need to check
5957 @raise errors.OpPrereqError: if the parameters are not valid
5960 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5963 for node in nodenames:
5967 info.Raise("Hypervisor parameter validation failed on node %s" % node)
5970 class LUCreateInstance(LogicalUnit):
5971 """Create an instance.
5974 HPATH = "instance-add"
5975 HTYPE = constants.HTYPE_INSTANCE
5976 _OP_REQP = ["instance_name", "disks", "disk_template",
5978 "wait_for_sync", "ip_check", "nics",
5979 "hvparams", "beparams"]
5982 def CheckArguments(self):
5986 # set optional parameters to none if they don't exist
5987 for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5988 if not hasattr(self.op, attr):
5989 setattr(self.op, attr, None)
5991 # do not require name_check to ease forward/backward compatibility
5993 if not hasattr(self.op, "name_check"):
5994 self.op.name_check = True
5995 if not hasattr(self.op, "no_install"):
5996 self.op.no_install = False
5997 if self.op.no_install and self.op.start:
5998 self.LogInfo("No-installation mode selected, disabling startup")
5999 self.op.start = False
6000 # validate/normalize the instance name
6001 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6002 if self.op.ip_check and not self.op.name_check:
6003 # TODO: make the ip check more flexible and not depend on the name check
6004 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6006 if (self.op.disk_template == constants.DT_FILE and
6007 not constants.ENABLE_FILE_STORAGE):
6008 raise errors.OpPrereqError("File storage disabled at configure time",
6010 # check disk information: either all adopt, or no adopt
6011 has_adopt = has_no_adopt = False
6012 for disk in self.op.disks:
6017 if has_adopt and has_no_adopt:
6018 raise errors.OpPrereqError("Either all disks have are adoped or none is",
6021 if self.op.disk_template != constants.DT_PLAIN:
6022 raise errors.OpPrereqError("Disk adoption is only supported for the"
6023 " 'plain' disk template",
6025 if self.op.iallocator is not None:
6026 raise errors.OpPrereqError("Disk adoption not allowed with an"
6027 " iallocator script", errors.ECODE_INVAL)
6028 if self.op.mode == constants.INSTANCE_IMPORT:
6029 raise errors.OpPrereqError("Disk adoption not allowed for"
6030 " instance import", errors.ECODE_INVAL)
6032 self.adopt_disks = has_adopt
6034 def ExpandNames(self):
6035 """ExpandNames for CreateInstance.
6037 Figure out the right locks for instance creation.
6040 self.needed_locks = {}
6042 # cheap checks, mostly valid constants given
6044 # verify creation mode
6045 if self.op.mode not in (constants.INSTANCE_CREATE,
6046 constants.INSTANCE_IMPORT):
6047 raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6048 self.op.mode, errors.ECODE_INVAL)
6050 # disk template and mirror node verification
6051 _CheckDiskTemplate(self.op.disk_template)
6053 if self.op.hypervisor is None:
6054 self.op.hypervisor = self.cfg.GetHypervisorType()
6056 cluster = self.cfg.GetClusterInfo()
6057 enabled_hvs = cluster.enabled_hypervisors
6058 if self.op.hypervisor not in enabled_hvs:
6059 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6060 " cluster (%s)" % (self.op.hypervisor,
6061 ",".join(enabled_hvs)),
6064 # check hypervisor parameter syntax (locally)
6065 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6066 filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
6068 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6069 hv_type.CheckParameterSyntax(filled_hvp)
6070 self.hv_full = filled_hvp
6071 # check that we don't specify global parameters on an instance
6072 _CheckGlobalHvParams(self.op.hvparams)
6074 # fill and remember the beparams dict
6075 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6076 self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6079 #### instance parameters check
6081 # instance name verification
6082 if self.op.name_check:
6083 hostname1 = utils.GetHostInfo(self.op.instance_name)
6084 self.op.instance_name = instance_name = hostname1.name
6085 # used in CheckPrereq for ip ping check
6086 self.check_ip = hostname1.ip
6088 instance_name = self.op.instance_name
6089 self.check_ip = None
6091 # this is just a preventive check, but someone might still add this
6092 # instance in the meantime, and creation will fail at lock-add time
6093 if instance_name in self.cfg.GetInstanceList():
6094 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6095 instance_name, errors.ECODE_EXISTS)
6097 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6101 for idx, nic in enumerate(self.op.nics):
6102 nic_mode_req = nic.get("mode", None)
6103 nic_mode = nic_mode_req
6104 if nic_mode is None:
6105 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6107 # in routed mode, for the first nic, the default ip is 'auto'
6108 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6109 default_ip_mode = constants.VALUE_AUTO
6111 default_ip_mode = constants.VALUE_NONE
6113 # ip validity checks
6114 ip = nic.get("ip", default_ip_mode)
6115 if ip is None or ip.lower() == constants.VALUE_NONE:
6117 elif ip.lower() == constants.VALUE_AUTO:
6118 if not self.op.name_check:
6119 raise errors.OpPrereqError("IP address set to auto but name checks"
6120 " have been skipped. Aborting.",
6122 nic_ip = hostname1.ip
6124 if not utils.IsValidIP(ip):
6125 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6126 " like a valid IP" % ip,
6130 # TODO: check the ip address for uniqueness
6131 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6132 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6135 # MAC address verification
6136 mac = nic.get("mac", constants.VALUE_AUTO)
6137 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6138 mac = utils.NormalizeAndValidateMac(mac)
6141 self.cfg.ReserveMAC(mac, self.proc.GetECId())
6142 except errors.ReservationError:
6143 raise errors.OpPrereqError("MAC address %s already in use"
6144 " in cluster" % mac,
6145 errors.ECODE_NOTUNIQUE)
6147 # bridge verification
6148 bridge = nic.get("bridge", None)
6149 link = nic.get("link", None)
6151 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6152 " at the same time", errors.ECODE_INVAL)
6153 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6154 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6161 nicparams[constants.NIC_MODE] = nic_mode_req
6163 nicparams[constants.NIC_LINK] = link
6165 check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6167 objects.NIC.CheckParameterSyntax(check_params)
6168 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6170 # disk checks/pre-build
6172 for disk in self.op.disks:
6173 mode = disk.get("mode", constants.DISK_RDWR)
6174 if mode not in constants.DISK_ACCESS_SET:
6175 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6176 mode, errors.ECODE_INVAL)
6177 size = disk.get("size", None)
6179 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6182 except (TypeError, ValueError):
6183 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6185 new_disk = {"size": size, "mode": mode}
6187 new_disk["adopt"] = disk["adopt"]
6188 self.disks.append(new_disk)
6190 # file storage checks
6191 if (self.op.file_driver and
6192 not self.op.file_driver in constants.FILE_DRIVER):
6193 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6194 self.op.file_driver, errors.ECODE_INVAL)
6196 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6197 raise errors.OpPrereqError("File storage directory path not absolute",
6200 ### Node/iallocator related checks
6201 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6202 raise errors.OpPrereqError("One and only one of iallocator and primary"
6203 " node must be given",
6206 if self.op.iallocator:
6207 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6209 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6210 nodelist = [self.op.pnode]
6211 if self.op.snode is not None:
6212 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6213 nodelist.append(self.op.snode)
6214 self.needed_locks[locking.LEVEL_NODE] = nodelist
6216 # in case of import lock the source node too
6217 if self.op.mode == constants.INSTANCE_IMPORT:
6218 src_node = getattr(self.op, "src_node", None)
6219 src_path = getattr(self.op, "src_path", None)
6221 if src_path is None:
6222 self.op.src_path = src_path = self.op.instance_name
6224 if src_node is None:
6225 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6226 self.op.src_node = None
6227 if os.path.isabs(src_path):
6228 raise errors.OpPrereqError("Importing an instance from an absolute"
6229 " path requires a source node option.",
6232 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6233 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6234 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6235 if not os.path.isabs(src_path):
6236 self.op.src_path = src_path = \
6237 utils.PathJoin(constants.EXPORT_DIR, src_path)
6239 # On import force_variant must be True, because if we forced it at
6240 # initial install, our only chance when importing it back is that it
6242 self.op.force_variant = True
6244 if self.op.no_install:
6245 self.LogInfo("No-installation mode has no effect during import")
6247 else: # INSTANCE_CREATE
6248 if getattr(self.op, "os_type", None) is None:
6249 raise errors.OpPrereqError("No guest OS specified",
6251 self.op.force_variant = getattr(self.op, "force_variant", False)
6253 def _RunAllocator(self):
6254 """Run the allocator based on input opcode.
6257 nics = [n.ToDict() for n in self.nics]
6258 ial = IAllocator(self.cfg, self.rpc,
6259 mode=constants.IALLOCATOR_MODE_ALLOC,
6260 name=self.op.instance_name,
6261 disk_template=self.op.disk_template,
6264 vcpus=self.be_full[constants.BE_VCPUS],
6265 mem_size=self.be_full[constants.BE_MEMORY],
6268 hypervisor=self.op.hypervisor,
6271 ial.Run(self.op.iallocator)
6274 raise errors.OpPrereqError("Can't compute nodes using"
6275 " iallocator '%s': %s" %
6276 (self.op.iallocator, ial.info),
6278 if len(ial.result) != ial.required_nodes:
6279 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6280 " of nodes (%s), required %s" %
6281 (self.op.iallocator, len(ial.result),
6282 ial.required_nodes), errors.ECODE_FAULT)
6283 self.op.pnode = ial.result[0]
6284 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6285 self.op.instance_name, self.op.iallocator,
6286 utils.CommaJoin(ial.result))
6287 if ial.required_nodes == 2:
6288 self.op.snode = ial.result[1]
6290 def BuildHooksEnv(self):
6293 This runs on master, primary and secondary nodes of the instance.
6297 "ADD_MODE": self.op.mode,
6299 if self.op.mode == constants.INSTANCE_IMPORT:
6300 env["SRC_NODE"] = self.op.src_node
6301 env["SRC_PATH"] = self.op.src_path
6302 env["SRC_IMAGES"] = self.src_images
6304 env.update(_BuildInstanceHookEnv(
6305 name=self.op.instance_name,
6306 primary_node=self.op.pnode,
6307 secondary_nodes=self.secondaries,
6308 status=self.op.start,
6309 os_type=self.op.os_type,
6310 memory=self.be_full[constants.BE_MEMORY],
6311 vcpus=self.be_full[constants.BE_VCPUS],
6312 nics=_NICListToTuple(self, self.nics),
6313 disk_template=self.op.disk_template,
6314 disks=[(d["size"], d["mode"]) for d in self.disks],
6317 hypervisor_name=self.op.hypervisor,
6320 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6325 def CheckPrereq(self):
6326 """Check prerequisites.
6329 if (not self.cfg.GetVGName() and
6330 self.op.disk_template not in constants.DTS_NOT_LVM):
6331 raise errors.OpPrereqError("Cluster does not support lvm-based"
6332 " instances", errors.ECODE_STATE)
6334 if self.op.mode == constants.INSTANCE_IMPORT:
6335 src_node = self.op.src_node
6336 src_path = self.op.src_path
6338 if src_node is None:
6339 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6340 exp_list = self.rpc.call_export_list(locked_nodes)
6342 for node in exp_list:
6343 if exp_list[node].fail_msg:
6345 if src_path in exp_list[node].payload:
6347 self.op.src_node = src_node = node
6348 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6352 raise errors.OpPrereqError("No export found for relative path %s" %
6353 src_path, errors.ECODE_INVAL)
6355 _CheckNodeOnline(self, src_node)
6356 result = self.rpc.call_export_info(src_node, src_path)
6357 result.Raise("No export or invalid export found in dir %s" % src_path)
6359 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6360 if not export_info.has_section(constants.INISECT_EXP):
6361 raise errors.ProgrammerError("Corrupted export config",
6362 errors.ECODE_ENVIRON)
6364 ei_version = export_info.get(constants.INISECT_EXP, 'version')
6365 if (int(ei_version) != constants.EXPORT_VERSION):
6366 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6367 (ei_version, constants.EXPORT_VERSION),
6368 errors.ECODE_ENVIRON)
6370 # Check that the new instance doesn't have less disks than the export
6371 instance_disks = len(self.disks)
6372 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6373 if instance_disks < export_disks:
6374 raise errors.OpPrereqError("Not enough disks to import."
6375 " (instance: %d, export: %d)" %
6376 (instance_disks, export_disks),
6379 self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
6381 for idx in range(export_disks):
6382 option = 'disk%d_dump' % idx
6383 if export_info.has_option(constants.INISECT_INS, option):
6384 # FIXME: are the old os-es, disk sizes, etc. useful?
6385 export_name = export_info.get(constants.INISECT_INS, option)
6386 image = utils.PathJoin(src_path, export_name)
6387 disk_images.append(image)
6389 disk_images.append(False)
6391 self.src_images = disk_images
6393 old_name = export_info.get(constants.INISECT_INS, 'name')
6394 # FIXME: int() here could throw a ValueError on broken exports
6395 exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
6396 if self.op.instance_name == old_name:
6397 for idx, nic in enumerate(self.nics):
6398 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6399 nic_mac_ini = 'nic%d_mac' % idx
6400 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6402 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6404 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6405 if self.op.ip_check:
6406 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6407 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6408 (self.check_ip, self.op.instance_name),
6409 errors.ECODE_NOTUNIQUE)
6411 #### mac address generation
6412 # By generating here the mac address both the allocator and the hooks get
6413 # the real final mac address rather than the 'auto' or 'generate' value.
6414 # There is a race condition between the generation and the instance object
6415 # creation, which means that we know the mac is valid now, but we're not
6416 # sure it will be when we actually add the instance. If things go bad
6417 # adding the instance will abort because of a duplicate mac, and the
6418 # creation job will fail.
6419 for nic in self.nics:
6420 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6421 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6425 if self.op.iallocator is not None:
6426 self._RunAllocator()
6428 #### node related checks
6430 # check primary node
6431 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6432 assert self.pnode is not None, \
6433 "Cannot retrieve locked node %s" % self.op.pnode
6435 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6436 pnode.name, errors.ECODE_STATE)
6438 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6439 pnode.name, errors.ECODE_STATE)
6441 self.secondaries = []
6443 # mirror node verification
6444 if self.op.disk_template in constants.DTS_NET_MIRROR:
6445 if self.op.snode is None:
6446 raise errors.OpPrereqError("The networked disk templates need"
6447 " a mirror node", errors.ECODE_INVAL)
6448 if self.op.snode == pnode.name:
6449 raise errors.OpPrereqError("The secondary node cannot be the"
6450 " primary node.", errors.ECODE_INVAL)
6451 _CheckNodeOnline(self, self.op.snode)
6452 _CheckNodeNotDrained(self, self.op.snode)
6453 self.secondaries.append(self.op.snode)
6455 nodenames = [pnode.name] + self.secondaries
6457 req_size = _ComputeDiskSize(self.op.disk_template,
6460 # Check lv size requirements, if not adopting
6461 if req_size is not None and not self.adopt_disks:
6462 _CheckNodesFreeDisk(self, nodenames, req_size)
6464 if self.adopt_disks: # instead, we must check the adoption data
6465 all_lvs = set([i["adopt"] for i in self.disks])
6466 if len(all_lvs) != len(self.disks):
6467 raise errors.OpPrereqError("Duplicate volume names given for adoption",
6469 for lv_name in all_lvs:
6471 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6472 except errors.ReservationError:
6473 raise errors.OpPrereqError("LV named %s used by another instance" %
6474 lv_name, errors.ECODE_NOTUNIQUE)
6476 node_lvs = self.rpc.call_lv_list([pnode.name],
6477 self.cfg.GetVGName())[pnode.name]
6478 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6479 node_lvs = node_lvs.payload
6480 delta = all_lvs.difference(node_lvs.keys())
6482 raise errors.OpPrereqError("Missing logical volume(s): %s" %
6483 utils.CommaJoin(delta),
6485 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6487 raise errors.OpPrereqError("Online logical volumes found, cannot"
6488 " adopt: %s" % utils.CommaJoin(online_lvs),
6490 # update the size of disk based on what is found
6491 for dsk in self.disks:
6492 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6494 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6496 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6498 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6500 # memory check on primary node
6502 _CheckNodeFreeMemory(self, self.pnode.name,
6503 "creating instance %s" % self.op.instance_name,
6504 self.be_full[constants.BE_MEMORY],
6507 self.dry_run_result = list(nodenames)
6509 def Exec(self, feedback_fn):
6510 """Create and add the instance to the cluster.
6513 instance = self.op.instance_name
6514 pnode_name = self.pnode.name
6516 ht_kind = self.op.hypervisor
6517 if ht_kind in constants.HTS_REQ_PORT:
6518 network_port = self.cfg.AllocatePort()
6522 ##if self.op.vnc_bind_address is None:
6523 ## self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
6525 # this is needed because os.path.join does not accept None arguments
6526 if self.op.file_storage_dir is None:
6527 string_file_storage_dir = ""
6529 string_file_storage_dir = self.op.file_storage_dir
6531 # build the full file storage dir path
6532 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6533 string_file_storage_dir, instance)
6536 disks = _GenerateDiskTemplate(self,
6537 self.op.disk_template,
6538 instance, pnode_name,
6542 self.op.file_driver,
6545 iobj = objects.Instance(name=instance, os=self.op.os_type,
6546 primary_node=pnode_name,
6547 nics=self.nics, disks=disks,
6548 disk_template=self.op.disk_template,
6550 network_port=network_port,
6551 beparams=self.op.beparams,
6552 hvparams=self.op.hvparams,
6553 hypervisor=self.op.hypervisor,
6556 if self.adopt_disks:
6557 # rename LVs to the newly-generated names; we need to construct
6558 # 'fake' LV disks with the old data, plus the new unique_id
6559 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6561 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6562 rename_to.append(t_dsk.logical_id)
6563 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6564 self.cfg.SetDiskID(t_dsk, pnode_name)
6565 result = self.rpc.call_blockdev_rename(pnode_name,
6566 zip(tmp_disks, rename_to))
6567 result.Raise("Failed to rename adoped LVs")
6569 feedback_fn("* creating instance disks...")
6571 _CreateDisks(self, iobj)
6572 except errors.OpExecError:
6573 self.LogWarning("Device creation failed, reverting...")
6575 _RemoveDisks(self, iobj)
6577 self.cfg.ReleaseDRBDMinors(instance)
6580 feedback_fn("adding instance %s to cluster config" % instance)
6582 self.cfg.AddInstance(iobj, self.proc.GetECId())
6584 # Declare that we don't want to remove the instance lock anymore, as we've
6585 # added the instance to the config
6586 del self.remove_locks[locking.LEVEL_INSTANCE]
6587 # Unlock all the nodes
6588 if self.op.mode == constants.INSTANCE_IMPORT:
6589 nodes_keep = [self.op.src_node]
6590 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6591 if node != self.op.src_node]
6592 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6593 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6595 self.context.glm.release(locking.LEVEL_NODE)
6596 del self.acquired_locks[locking.LEVEL_NODE]
6598 if self.op.wait_for_sync:
6599 disk_abort = not _WaitForSync(self, iobj)
6600 elif iobj.disk_template in constants.DTS_NET_MIRROR:
6601 # make sure the disks are not degraded (still sync-ing is ok)
6603 feedback_fn("* checking mirrors status")
6604 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6609 _RemoveDisks(self, iobj)
6610 self.cfg.RemoveInstance(iobj.name)
6611 # Make sure the instance lock gets removed
6612 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6613 raise errors.OpExecError("There are some degraded disks for"
6616 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6617 if self.op.mode == constants.INSTANCE_CREATE:
6618 if not self.op.no_install:
6619 feedback_fn("* running the instance OS create scripts...")
6620 # FIXME: pass debug option from opcode to backend
6621 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6622 self.op.debug_level)
6623 result.Raise("Could not add os for instance %s"
6624 " on node %s" % (instance, pnode_name))
6626 elif self.op.mode == constants.INSTANCE_IMPORT:
6627 feedback_fn("* running the instance OS import scripts...")
6628 src_node = self.op.src_node
6629 src_images = self.src_images
6630 cluster_name = self.cfg.GetClusterName()
6631 # FIXME: pass debug option from opcode to backend
6632 import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6633 src_node, src_images,
6635 self.op.debug_level)
6636 msg = import_result.fail_msg
6638 self.LogWarning("Error while importing the disk images for instance"
6639 " %s on node %s: %s" % (instance, pnode_name, msg))
6641 # also checked in the prereq part
6642 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6646 iobj.admin_up = True
6647 self.cfg.Update(iobj, feedback_fn)
6648 logging.info("Starting instance %s on node %s", instance, pnode_name)
6649 feedback_fn("* starting instance...")
6650 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6651 result.Raise("Could not start instance")
6653 return list(iobj.all_nodes)
6656 class LUConnectConsole(NoHooksLU):
6657 """Connect to an instance's console.
6659 This is somewhat special in that it returns the command line that
6660 you need to run on the master node in order to connect to the
6664 _OP_REQP = ["instance_name"]
6667 def ExpandNames(self):
6668 self._ExpandAndLockInstance()
6670 def CheckPrereq(self):
6671 """Check prerequisites.
6673 This checks that the instance is in the cluster.
6676 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6677 assert self.instance is not None, \
6678 "Cannot retrieve locked instance %s" % self.op.instance_name
6679 _CheckNodeOnline(self, self.instance.primary_node)
6681 def Exec(self, feedback_fn):
6682 """Connect to the console of an instance
6685 instance = self.instance
6686 node = instance.primary_node
6688 node_insts = self.rpc.call_instance_list([node],
6689 [instance.hypervisor])[node]
6690 node_insts.Raise("Can't get node information from %s" % node)
6692 if instance.name not in node_insts.payload:
6693 raise errors.OpExecError("Instance %s is not running." % instance.name)
6695 logging.debug("Connecting to console of %s on %s", instance.name, node)
6697 hyper = hypervisor.GetHypervisor(instance.hypervisor)
6698 cluster = self.cfg.GetClusterInfo()
6699 # beparams and hvparams are passed separately, to avoid editing the
6700 # instance and then saving the defaults in the instance itself.
6701 hvparams = cluster.FillHV(instance)
6702 beparams = cluster.FillBE(instance)
6703 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6706 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6709 class LUReplaceDisks(LogicalUnit):
6710 """Replace the disks of an instance.
6713 HPATH = "mirrors-replace"
6714 HTYPE = constants.HTYPE_INSTANCE
6715 _OP_REQP = ["instance_name", "mode", "disks"]
6718 def CheckArguments(self):
6719 if not hasattr(self.op, "remote_node"):
6720 self.op.remote_node = None
6721 if not hasattr(self.op, "iallocator"):
6722 self.op.iallocator = None
6723 if not hasattr(self.op, "early_release"):
6724 self.op.early_release = False
6726 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6729 def ExpandNames(self):
6730 self._ExpandAndLockInstance()
6732 if self.op.iallocator is not None:
6733 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6735 elif self.op.remote_node is not None:
6736 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6737 self.op.remote_node = remote_node
6739 # Warning: do not remove the locking of the new secondary here
6740 # unless DRBD8.AddChildren is changed to work in parallel;
6741 # currently it doesn't since parallel invocations of
6742 # FindUnusedMinor will conflict
6743 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6744 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6747 self.needed_locks[locking.LEVEL_NODE] = []
6748 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6750 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6751 self.op.iallocator, self.op.remote_node,
6752 self.op.disks, False, self.op.early_release)
6754 self.tasklets = [self.replacer]
6756 def DeclareLocks(self, level):
6757 # If we're not already locking all nodes in the set we have to declare the
6758 # instance's primary/secondary nodes.
6759 if (level == locking.LEVEL_NODE and
6760 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6761 self._LockInstancesNodes()
6763 def BuildHooksEnv(self):
6766 This runs on the master, the primary and all the secondaries.
6769 instance = self.replacer.instance
6771 "MODE": self.op.mode,
6772 "NEW_SECONDARY": self.op.remote_node,
6773 "OLD_SECONDARY": instance.secondary_nodes[0],
6775 env.update(_BuildInstanceHookEnvByObject(self, instance))
6777 self.cfg.GetMasterNode(),
6778 instance.primary_node,
6780 if self.op.remote_node is not None:
6781 nl.append(self.op.remote_node)
6785 class LUEvacuateNode(LogicalUnit):
6786 """Relocate the secondary instances from a node.
6789 HPATH = "node-evacuate"
6790 HTYPE = constants.HTYPE_NODE
6791 _OP_REQP = ["node_name"]
6794 def CheckArguments(self):
6795 if not hasattr(self.op, "remote_node"):
6796 self.op.remote_node = None
6797 if not hasattr(self.op, "iallocator"):
6798 self.op.iallocator = None
6799 if not hasattr(self.op, "early_release"):
6800 self.op.early_release = False
6802 TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6803 self.op.remote_node,
6806 def ExpandNames(self):
6807 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6809 self.needed_locks = {}
6811 # Declare node locks
6812 if self.op.iallocator is not None:
6813 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6815 elif self.op.remote_node is not None:
6816 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6818 # Warning: do not remove the locking of the new secondary here
6819 # unless DRBD8.AddChildren is changed to work in parallel;
6820 # currently it doesn't since parallel invocations of
6821 # FindUnusedMinor will conflict
6822 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6823 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6826 raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6828 # Create tasklets for replacing disks for all secondary instances on this
6833 for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6834 logging.debug("Replacing disks for instance %s", inst.name)
6835 names.append(inst.name)
6837 replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6838 self.op.iallocator, self.op.remote_node, [],
6839 True, self.op.early_release)
6840 tasklets.append(replacer)
6842 self.tasklets = tasklets
6843 self.instance_names = names
6845 # Declare instance locks
6846 self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6848 def DeclareLocks(self, level):
6849 # If we're not already locking all nodes in the set we have to declare the
6850 # instance's primary/secondary nodes.
6851 if (level == locking.LEVEL_NODE and
6852 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6853 self._LockInstancesNodes()
6855 def BuildHooksEnv(self):
6858 This runs on the master, the primary and all the secondaries.
6862 "NODE_NAME": self.op.node_name,
6865 nl = [self.cfg.GetMasterNode()]
6867 if self.op.remote_node is not None:
6868 env["NEW_SECONDARY"] = self.op.remote_node
6869 nl.append(self.op.remote_node)
6871 return (env, nl, nl)
6874 class TLReplaceDisks(Tasklet):
6875 """Replaces disks for an instance.
6877 Note: Locking is not within the scope of this class.
6880 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6881 disks, delay_iallocator, early_release):
6882 """Initializes this class.
6885 Tasklet.__init__(self, lu)
6888 self.instance_name = instance_name
6890 self.iallocator_name = iallocator_name
6891 self.remote_node = remote_node
6893 self.delay_iallocator = delay_iallocator
6894 self.early_release = early_release
6897 self.instance = None
6898 self.new_node = None
6899 self.target_node = None
6900 self.other_node = None
6901 self.remote_node_info = None
6902 self.node_secondary_ip = None
6905 def CheckArguments(mode, remote_node, iallocator):
6906 """Helper function for users of this class.
6909 # check for valid parameter combination
6910 if mode == constants.REPLACE_DISK_CHG:
6911 if remote_node is None and iallocator is None:
6912 raise errors.OpPrereqError("When changing the secondary either an"
6913 " iallocator script must be used or the"
6914 " new node given", errors.ECODE_INVAL)
6916 if remote_node is not None and iallocator is not None:
6917 raise errors.OpPrereqError("Give either the iallocator or the new"
6918 " secondary, not both", errors.ECODE_INVAL)
6920 elif remote_node is not None or iallocator is not None:
6921 # Not replacing the secondary
6922 raise errors.OpPrereqError("The iallocator and new node options can"
6923 " only be used when changing the"
6924 " secondary node", errors.ECODE_INVAL)
6927 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6928 """Compute a new secondary node using an IAllocator.
6931 ial = IAllocator(lu.cfg, lu.rpc,
6932 mode=constants.IALLOCATOR_MODE_RELOC,
6934 relocate_from=relocate_from)
6936 ial.Run(iallocator_name)
6939 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6940 " %s" % (iallocator_name, ial.info),
6943 if len(ial.result) != ial.required_nodes:
6944 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6945 " of nodes (%s), required %s" %
6947 len(ial.result), ial.required_nodes),
6950 remote_node_name = ial.result[0]
6952 lu.LogInfo("Selected new secondary for instance '%s': %s",
6953 instance_name, remote_node_name)
6955 return remote_node_name
6957 def _FindFaultyDisks(self, node_name):
6958 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6961 def CheckPrereq(self):
6962 """Check prerequisites.
6964 This checks that the instance is in the cluster.
6967 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
6968 assert instance is not None, \
6969 "Cannot retrieve locked instance %s" % self.instance_name
6971 if instance.disk_template != constants.DT_DRBD8:
6972 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6973 " instances", errors.ECODE_INVAL)
6975 if len(instance.secondary_nodes) != 1:
6976 raise errors.OpPrereqError("The instance has a strange layout,"
6977 " expected one secondary but found %d" %
6978 len(instance.secondary_nodes),
6981 if not self.delay_iallocator:
6982 self._CheckPrereq2()
6984 def _CheckPrereq2(self):
6985 """Check prerequisites, second part.
6987 This function should always be part of CheckPrereq. It was separated and is
6988 now called from Exec because during node evacuation iallocator was only
6989 called with an unmodified cluster model, not taking planned changes into
6993 instance = self.instance
6994 secondary_node = instance.secondary_nodes[0]
6996 if self.iallocator_name is None:
6997 remote_node = self.remote_node
6999 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7000 instance.name, instance.secondary_nodes)
7002 if remote_node is not None:
7003 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7004 assert self.remote_node_info is not None, \
7005 "Cannot retrieve locked node %s" % remote_node
7007 self.remote_node_info = None
7009 if remote_node == self.instance.primary_node:
7010 raise errors.OpPrereqError("The specified node is the primary node of"
7011 " the instance.", errors.ECODE_INVAL)
7013 if remote_node == secondary_node:
7014 raise errors.OpPrereqError("The specified node is already the"
7015 " secondary node of the instance.",
7018 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7019 constants.REPLACE_DISK_CHG):
7020 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7023 if self.mode == constants.REPLACE_DISK_AUTO:
7024 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7025 faulty_secondary = self._FindFaultyDisks(secondary_node)
7027 if faulty_primary and faulty_secondary:
7028 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7029 " one node and can not be repaired"
7030 " automatically" % self.instance_name,
7034 self.disks = faulty_primary
7035 self.target_node = instance.primary_node
7036 self.other_node = secondary_node
7037 check_nodes = [self.target_node, self.other_node]
7038 elif faulty_secondary:
7039 self.disks = faulty_secondary
7040 self.target_node = secondary_node
7041 self.other_node = instance.primary_node
7042 check_nodes = [self.target_node, self.other_node]
7048 # Non-automatic modes
7049 if self.mode == constants.REPLACE_DISK_PRI:
7050 self.target_node = instance.primary_node
7051 self.other_node = secondary_node
7052 check_nodes = [self.target_node, self.other_node]
7054 elif self.mode == constants.REPLACE_DISK_SEC:
7055 self.target_node = secondary_node
7056 self.other_node = instance.primary_node
7057 check_nodes = [self.target_node, self.other_node]
7059 elif self.mode == constants.REPLACE_DISK_CHG:
7060 self.new_node = remote_node
7061 self.other_node = instance.primary_node
7062 self.target_node = secondary_node
7063 check_nodes = [self.new_node, self.other_node]
7065 _CheckNodeNotDrained(self.lu, remote_node)
7067 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7068 assert old_node_info is not None
7069 if old_node_info.offline and not self.early_release:
7070 # doesn't make sense to delay the release
7071 self.early_release = True
7072 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7073 " early-release mode", secondary_node)
7076 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7079 # If not specified all disks should be replaced
7081 self.disks = range(len(self.instance.disks))
7083 for node in check_nodes:
7084 _CheckNodeOnline(self.lu, node)
7086 # Check whether disks are valid
7087 for disk_idx in self.disks:
7088 instance.FindDisk(disk_idx)
7090 # Get secondary node IP addresses
7093 for node_name in [self.target_node, self.other_node, self.new_node]:
7094 if node_name is not None:
7095 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7097 self.node_secondary_ip = node_2nd_ip
7099 def Exec(self, feedback_fn):
7100 """Execute disk replacement.
7102 This dispatches the disk replacement to the appropriate handler.
7105 if self.delay_iallocator:
7106 self._CheckPrereq2()
7109 feedback_fn("No disks need replacement")
7112 feedback_fn("Replacing disk(s) %s for %s" %
7113 (utils.CommaJoin(self.disks), self.instance.name))
7115 activate_disks = (not self.instance.admin_up)
7117 # Activate the instance disks if we're replacing them on a down instance
7119 _StartInstanceDisks(self.lu, self.instance, True)
7122 # Should we replace the secondary node?
7123 if self.new_node is not None:
7124 fn = self._ExecDrbd8Secondary
7126 fn = self._ExecDrbd8DiskOnly
7128 return fn(feedback_fn)
7131 # Deactivate the instance disks if we're replacing them on a
7134 _SafeShutdownInstanceDisks(self.lu, self.instance)
7136 def _CheckVolumeGroup(self, nodes):
7137 self.lu.LogInfo("Checking volume groups")
7139 vgname = self.cfg.GetVGName()
7141 # Make sure volume group exists on all involved nodes
7142 results = self.rpc.call_vg_list(nodes)
7144 raise errors.OpExecError("Can't list volume groups on the nodes")
7148 res.Raise("Error checking node %s" % node)
7149 if vgname not in res.payload:
7150 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7153 def _CheckDisksExistence(self, nodes):
7154 # Check disk existence
7155 for idx, dev in enumerate(self.instance.disks):
7156 if idx not in self.disks:
7160 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7161 self.cfg.SetDiskID(dev, node)
7163 result = self.rpc.call_blockdev_find(node, dev)
7165 msg = result.fail_msg
7166 if msg or not result.payload:
7168 msg = "disk not found"
7169 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7172 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7173 for idx, dev in enumerate(self.instance.disks):
7174 if idx not in self.disks:
7177 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7180 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7182 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7183 " replace disks for instance %s" %
7184 (node_name, self.instance.name))
7186 def _CreateNewStorage(self, node_name):
7187 vgname = self.cfg.GetVGName()
7190 for idx, dev in enumerate(self.instance.disks):
7191 if idx not in self.disks:
7194 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7196 self.cfg.SetDiskID(dev, node_name)
7198 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7199 names = _GenerateUniqueNames(self.lu, lv_names)
7201 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7202 logical_id=(vgname, names[0]))
7203 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7204 logical_id=(vgname, names[1]))
7206 new_lvs = [lv_data, lv_meta]
7207 old_lvs = dev.children
7208 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7210 # we pass force_create=True to force the LVM creation
7211 for new_lv in new_lvs:
7212 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7213 _GetInstanceInfoText(self.instance), False)
7217 def _CheckDevices(self, node_name, iv_names):
7218 for name, (dev, _, _) in iv_names.iteritems():
7219 self.cfg.SetDiskID(dev, node_name)
7221 result = self.rpc.call_blockdev_find(node_name, dev)
7223 msg = result.fail_msg
7224 if msg or not result.payload:
7226 msg = "disk not found"
7227 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7230 if result.payload.is_degraded:
7231 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7233 def _RemoveOldStorage(self, node_name, iv_names):
7234 for name, (_, old_lvs, _) in iv_names.iteritems():
7235 self.lu.LogInfo("Remove logical volumes for %s" % name)
7238 self.cfg.SetDiskID(lv, node_name)
7240 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7242 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7243 hint="remove unused LVs manually")
7245 def _ReleaseNodeLock(self, node_name):
7246 """Releases the lock for a given node."""
7247 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7249 def _ExecDrbd8DiskOnly(self, feedback_fn):
7250 """Replace a disk on the primary or secondary for DRBD 8.
7252 The algorithm for replace is quite complicated:
7254 1. for each disk to be replaced:
7256 1. create new LVs on the target node with unique names
7257 1. detach old LVs from the drbd device
7258 1. rename old LVs to name_replaced.<time_t>
7259 1. rename new LVs to old LVs
7260 1. attach the new LVs (with the old names now) to the drbd device
7262 1. wait for sync across all devices
7264 1. for each modified disk:
7266 1. remove old LVs (which have the name name_replaces.<time_t>)
7268 Failures are not very well handled.
7273 # Step: check device activation
7274 self.lu.LogStep(1, steps_total, "Check device existence")
7275 self._CheckDisksExistence([self.other_node, self.target_node])
7276 self._CheckVolumeGroup([self.target_node, self.other_node])
7278 # Step: check other node consistency
7279 self.lu.LogStep(2, steps_total, "Check peer consistency")
7280 self._CheckDisksConsistency(self.other_node,
7281 self.other_node == self.instance.primary_node,
7284 # Step: create new storage
7285 self.lu.LogStep(3, steps_total, "Allocate new storage")
7286 iv_names = self._CreateNewStorage(self.target_node)
7288 # Step: for each lv, detach+rename*2+attach
7289 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7290 for dev, old_lvs, new_lvs in iv_names.itervalues():
7291 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7293 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7295 result.Raise("Can't detach drbd from local storage on node"
7296 " %s for device %s" % (self.target_node, dev.iv_name))
7298 #cfg.Update(instance)
7300 # ok, we created the new LVs, so now we know we have the needed
7301 # storage; as such, we proceed on the target node to rename
7302 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7303 # using the assumption that logical_id == physical_id (which in
7304 # turn is the unique_id on that node)
7306 # FIXME(iustin): use a better name for the replaced LVs
7307 temp_suffix = int(time.time())
7308 ren_fn = lambda d, suff: (d.physical_id[0],
7309 d.physical_id[1] + "_replaced-%s" % suff)
7311 # Build the rename list based on what LVs exist on the node
7312 rename_old_to_new = []
7313 for to_ren in old_lvs:
7314 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7315 if not result.fail_msg and result.payload:
7317 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7319 self.lu.LogInfo("Renaming the old LVs on the target node")
7320 result = self.rpc.call_blockdev_rename(self.target_node,
7322 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7324 # Now we rename the new LVs to the old LVs
7325 self.lu.LogInfo("Renaming the new LVs on the target node")
7326 rename_new_to_old = [(new, old.physical_id)
7327 for old, new in zip(old_lvs, new_lvs)]
7328 result = self.rpc.call_blockdev_rename(self.target_node,
7330 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7332 for old, new in zip(old_lvs, new_lvs):
7333 new.logical_id = old.logical_id
7334 self.cfg.SetDiskID(new, self.target_node)
7336 for disk in old_lvs:
7337 disk.logical_id = ren_fn(disk, temp_suffix)
7338 self.cfg.SetDiskID(disk, self.target_node)
7340 # Now that the new lvs have the old name, we can add them to the device
7341 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7342 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7344 msg = result.fail_msg
7346 for new_lv in new_lvs:
7347 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7350 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7351 hint=("cleanup manually the unused logical"
7353 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7355 dev.children = new_lvs
7357 self.cfg.Update(self.instance, feedback_fn)
7360 if self.early_release:
7361 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7363 self._RemoveOldStorage(self.target_node, iv_names)
7364 # WARNING: we release both node locks here, do not do other RPCs
7365 # than WaitForSync to the primary node
7366 self._ReleaseNodeLock([self.target_node, self.other_node])
7369 # This can fail as the old devices are degraded and _WaitForSync
7370 # does a combined result over all disks, so we don't check its return value
7371 self.lu.LogStep(cstep, steps_total, "Sync devices")
7373 _WaitForSync(self.lu, self.instance)
7375 # Check all devices manually
7376 self._CheckDevices(self.instance.primary_node, iv_names)
7378 # Step: remove old storage
7379 if not self.early_release:
7380 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7382 self._RemoveOldStorage(self.target_node, iv_names)
7384 def _ExecDrbd8Secondary(self, feedback_fn):
7385 """Replace the secondary node for DRBD 8.
7387 The algorithm for replace is quite complicated:
7388 - for all disks of the instance:
7389 - create new LVs on the new node with same names
7390 - shutdown the drbd device on the old secondary
7391 - disconnect the drbd network on the primary
7392 - create the drbd device on the new secondary
7393 - network attach the drbd on the primary, using an artifice:
7394 the drbd code for Attach() will connect to the network if it
7395 finds a device which is connected to the good local disks but
7397 - wait for sync across all devices
7398 - remove all disks from the old secondary
7400 Failures are not very well handled.
7405 # Step: check device activation
7406 self.lu.LogStep(1, steps_total, "Check device existence")
7407 self._CheckDisksExistence([self.instance.primary_node])
7408 self._CheckVolumeGroup([self.instance.primary_node])
7410 # Step: check other node consistency
7411 self.lu.LogStep(2, steps_total, "Check peer consistency")
7412 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7414 # Step: create new storage
7415 self.lu.LogStep(3, steps_total, "Allocate new storage")
7416 for idx, dev in enumerate(self.instance.disks):
7417 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7418 (self.new_node, idx))
7419 # we pass force_create=True to force LVM creation
7420 for new_lv in dev.children:
7421 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7422 _GetInstanceInfoText(self.instance), False)
7424 # Step 4: dbrd minors and drbd setups changes
7425 # after this, we must manually remove the drbd minors on both the
7426 # error and the success paths
7427 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7428 minors = self.cfg.AllocateDRBDMinor([self.new_node
7429 for dev in self.instance.disks],
7431 logging.debug("Allocated minors %r", minors)
7434 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7435 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7436 (self.new_node, idx))
7437 # create new devices on new_node; note that we create two IDs:
7438 # one without port, so the drbd will be activated without
7439 # networking information on the new node at this stage, and one
7440 # with network, for the latter activation in step 4
7441 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7442 if self.instance.primary_node == o_node1:
7445 assert self.instance.primary_node == o_node2, "Three-node instance?"
7448 new_alone_id = (self.instance.primary_node, self.new_node, None,
7449 p_minor, new_minor, o_secret)
7450 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7451 p_minor, new_minor, o_secret)
7453 iv_names[idx] = (dev, dev.children, new_net_id)
7454 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7456 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7457 logical_id=new_alone_id,
7458 children=dev.children,
7461 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7462 _GetInstanceInfoText(self.instance), False)
7463 except errors.GenericError:
7464 self.cfg.ReleaseDRBDMinors(self.instance.name)
7467 # We have new devices, shutdown the drbd on the old secondary
7468 for idx, dev in enumerate(self.instance.disks):
7469 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7470 self.cfg.SetDiskID(dev, self.target_node)
7471 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7473 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7474 "node: %s" % (idx, msg),
7475 hint=("Please cleanup this device manually as"
7476 " soon as possible"))
7478 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7479 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7480 self.node_secondary_ip,
7481 self.instance.disks)\
7482 [self.instance.primary_node]
7484 msg = result.fail_msg
7486 # detaches didn't succeed (unlikely)
7487 self.cfg.ReleaseDRBDMinors(self.instance.name)
7488 raise errors.OpExecError("Can't detach the disks from the network on"
7489 " old node: %s" % (msg,))
7491 # if we managed to detach at least one, we update all the disks of
7492 # the instance to point to the new secondary
7493 self.lu.LogInfo("Updating instance configuration")
7494 for dev, _, new_logical_id in iv_names.itervalues():
7495 dev.logical_id = new_logical_id
7496 self.cfg.SetDiskID(dev, self.instance.primary_node)
7498 self.cfg.Update(self.instance, feedback_fn)
7500 # and now perform the drbd attach
7501 self.lu.LogInfo("Attaching primary drbds to new secondary"
7502 " (standalone => connected)")
7503 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7505 self.node_secondary_ip,
7506 self.instance.disks,
7509 for to_node, to_result in result.items():
7510 msg = to_result.fail_msg
7512 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7514 hint=("please do a gnt-instance info to see the"
7515 " status of disks"))
7517 if self.early_release:
7518 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7520 self._RemoveOldStorage(self.target_node, iv_names)
7521 # WARNING: we release all node locks here, do not do other RPCs
7522 # than WaitForSync to the primary node
7523 self._ReleaseNodeLock([self.instance.primary_node,
7528 # This can fail as the old devices are degraded and _WaitForSync
7529 # does a combined result over all disks, so we don't check its return value
7530 self.lu.LogStep(cstep, steps_total, "Sync devices")
7532 _WaitForSync(self.lu, self.instance)
7534 # Check all devices manually
7535 self._CheckDevices(self.instance.primary_node, iv_names)
7537 # Step: remove old storage
7538 if not self.early_release:
7539 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7540 self._RemoveOldStorage(self.target_node, iv_names)
7543 class LURepairNodeStorage(NoHooksLU):
7544 """Repairs the volume group on a node.
7547 _OP_REQP = ["node_name"]
7550 def CheckArguments(self):
7551 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7553 def ExpandNames(self):
7554 self.needed_locks = {
7555 locking.LEVEL_NODE: [self.op.node_name],
7558 def _CheckFaultyDisks(self, instance, node_name):
7559 """Ensure faulty disks abort the opcode or at least warn."""
7561 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7563 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7564 " node '%s'" % (instance.name, node_name),
7566 except errors.OpPrereqError, err:
7567 if self.op.ignore_consistency:
7568 self.proc.LogWarning(str(err.args[0]))
7572 def CheckPrereq(self):
7573 """Check prerequisites.
7576 storage_type = self.op.storage_type
7578 if (constants.SO_FIX_CONSISTENCY not in
7579 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7580 raise errors.OpPrereqError("Storage units of type '%s' can not be"
7581 " repaired" % storage_type,
7584 # Check whether any instance on this node has faulty disks
7585 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7586 if not inst.admin_up:
7588 check_nodes = set(inst.all_nodes)
7589 check_nodes.discard(self.op.node_name)
7590 for inst_node_name in check_nodes:
7591 self._CheckFaultyDisks(inst, inst_node_name)
7593 def Exec(self, feedback_fn):
7594 feedback_fn("Repairing storage unit '%s' on %s ..." %
7595 (self.op.name, self.op.node_name))
7597 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7598 result = self.rpc.call_storage_execute(self.op.node_name,
7599 self.op.storage_type, st_args,
7601 constants.SO_FIX_CONSISTENCY)
7602 result.Raise("Failed to repair storage unit '%s' on %s" %
7603 (self.op.name, self.op.node_name))
7606 class LUNodeEvacuationStrategy(NoHooksLU):
7607 """Computes the node evacuation strategy.
7610 _OP_REQP = ["nodes"]
7613 def CheckArguments(self):
7614 if not hasattr(self.op, "remote_node"):
7615 self.op.remote_node = None
7616 if not hasattr(self.op, "iallocator"):
7617 self.op.iallocator = None
7618 if self.op.remote_node is not None and self.op.iallocator is not None:
7619 raise errors.OpPrereqError("Give either the iallocator or the new"
7620 " secondary, not both", errors.ECODE_INVAL)
7622 def ExpandNames(self):
7623 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7624 self.needed_locks = locks = {}
7625 if self.op.remote_node is None:
7626 locks[locking.LEVEL_NODE] = locking.ALL_SET
7628 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7629 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7631 def CheckPrereq(self):
7634 def Exec(self, feedback_fn):
7635 if self.op.remote_node is not None:
7637 for node in self.op.nodes:
7638 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7641 if i.primary_node == self.op.remote_node:
7642 raise errors.OpPrereqError("Node %s is the primary node of"
7643 " instance %s, cannot use it as"
7645 (self.op.remote_node, i.name),
7647 result.append([i.name, self.op.remote_node])
7649 ial = IAllocator(self.cfg, self.rpc,
7650 mode=constants.IALLOCATOR_MODE_MEVAC,
7651 evac_nodes=self.op.nodes)
7652 ial.Run(self.op.iallocator, validate=True)
7654 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7660 class LUGrowDisk(LogicalUnit):
7661 """Grow a disk of an instance.
7665 HTYPE = constants.HTYPE_INSTANCE
7666 _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7669 def ExpandNames(self):
7670 self._ExpandAndLockInstance()
7671 self.needed_locks[locking.LEVEL_NODE] = []
7672 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7674 def DeclareLocks(self, level):
7675 if level == locking.LEVEL_NODE:
7676 self._LockInstancesNodes()
7678 def BuildHooksEnv(self):
7681 This runs on the master, the primary and all the secondaries.
7685 "DISK": self.op.disk,
7686 "AMOUNT": self.op.amount,
7688 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7689 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7692 def CheckPrereq(self):
7693 """Check prerequisites.
7695 This checks that the instance is in the cluster.
7698 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7699 assert instance is not None, \
7700 "Cannot retrieve locked instance %s" % self.op.instance_name
7701 nodenames = list(instance.all_nodes)
7702 for node in nodenames:
7703 _CheckNodeOnline(self, node)
7706 self.instance = instance
7708 if instance.disk_template not in constants.DTS_GROWABLE:
7709 raise errors.OpPrereqError("Instance's disk layout does not support"
7710 " growing.", errors.ECODE_INVAL)
7712 self.disk = instance.FindDisk(self.op.disk)
7714 if instance.disk_template != constants.DT_FILE:
7715 # TODO: check the free disk space for file, when that feature will be
7717 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7719 def Exec(self, feedback_fn):
7720 """Execute disk grow.
7723 instance = self.instance
7725 for node in instance.all_nodes:
7726 self.cfg.SetDiskID(disk, node)
7727 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7728 result.Raise("Grow request failed to node %s" % node)
7730 # TODO: Rewrite code to work properly
7731 # DRBD goes into sync mode for a short amount of time after executing the
7732 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7733 # calling "resize" in sync mode fails. Sleeping for a short amount of
7734 # time is a work-around.
7737 disk.RecordGrow(self.op.amount)
7738 self.cfg.Update(instance, feedback_fn)
7739 if self.op.wait_for_sync:
7740 disk_abort = not _WaitForSync(self, instance)
7742 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7743 " status.\nPlease check the instance.")
7746 class LUQueryInstanceData(NoHooksLU):
7747 """Query runtime instance data.
7750 _OP_REQP = ["instances", "static"]
7753 def ExpandNames(self):
7754 self.needed_locks = {}
7755 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7757 if not isinstance(self.op.instances, list):
7758 raise errors.OpPrereqError("Invalid argument type 'instances'",
7761 if self.op.instances:
7762 self.wanted_names = []
7763 for name in self.op.instances:
7764 full_name = _ExpandInstanceName(self.cfg, name)
7765 self.wanted_names.append(full_name)
7766 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7768 self.wanted_names = None
7769 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7771 self.needed_locks[locking.LEVEL_NODE] = []
7772 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7774 def DeclareLocks(self, level):
7775 if level == locking.LEVEL_NODE:
7776 self._LockInstancesNodes()
7778 def CheckPrereq(self):
7779 """Check prerequisites.
7781 This only checks the optional instance list against the existing names.
7784 if self.wanted_names is None:
7785 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7787 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7788 in self.wanted_names]
7791 def _ComputeBlockdevStatus(self, node, instance_name, dev):
7792 """Returns the status of a block device
7795 if self.op.static or not node:
7798 self.cfg.SetDiskID(dev, node)
7800 result = self.rpc.call_blockdev_find(node, dev)
7804 result.Raise("Can't compute disk status for %s" % instance_name)
7806 status = result.payload
7810 return (status.dev_path, status.major, status.minor,
7811 status.sync_percent, status.estimated_time,
7812 status.is_degraded, status.ldisk_status)
7814 def _ComputeDiskStatus(self, instance, snode, dev):
7815 """Compute block device status.
7818 if dev.dev_type in constants.LDS_DRBD:
7819 # we change the snode then (otherwise we use the one passed in)
7820 if dev.logical_id[0] == instance.primary_node:
7821 snode = dev.logical_id[1]
7823 snode = dev.logical_id[0]
7825 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7827 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7830 dev_children = [self._ComputeDiskStatus(instance, snode, child)
7831 for child in dev.children]
7836 "iv_name": dev.iv_name,
7837 "dev_type": dev.dev_type,
7838 "logical_id": dev.logical_id,
7839 "physical_id": dev.physical_id,
7840 "pstatus": dev_pstatus,
7841 "sstatus": dev_sstatus,
7842 "children": dev_children,
7849 def Exec(self, feedback_fn):
7850 """Gather and return data"""
7853 cluster = self.cfg.GetClusterInfo()
7855 for instance in self.wanted_instances:
7856 if not self.op.static:
7857 remote_info = self.rpc.call_instance_info(instance.primary_node,
7859 instance.hypervisor)
7860 remote_info.Raise("Error checking node %s" % instance.primary_node)
7861 remote_info = remote_info.payload
7862 if remote_info and "state" in remote_info:
7865 remote_state = "down"
7868 if instance.admin_up:
7871 config_state = "down"
7873 disks = [self._ComputeDiskStatus(instance, None, device)
7874 for device in instance.disks]
7877 "name": instance.name,
7878 "config_state": config_state,
7879 "run_state": remote_state,
7880 "pnode": instance.primary_node,
7881 "snodes": instance.secondary_nodes,
7883 # this happens to be the same format used for hooks
7884 "nics": _NICListToTuple(self, instance.nics),
7886 "hypervisor": instance.hypervisor,
7887 "network_port": instance.network_port,
7888 "hv_instance": instance.hvparams,
7889 "hv_actual": cluster.FillHV(instance, skip_globals=True),
7890 "be_instance": instance.beparams,
7891 "be_actual": cluster.FillBE(instance),
7892 "serial_no": instance.serial_no,
7893 "mtime": instance.mtime,
7894 "ctime": instance.ctime,
7895 "uuid": instance.uuid,
7898 result[instance.name] = idict
7903 class LUSetInstanceParams(LogicalUnit):
7904 """Modifies an instances's parameters.
7907 HPATH = "instance-modify"
7908 HTYPE = constants.HTYPE_INSTANCE
7909 _OP_REQP = ["instance_name"]
7912 def CheckArguments(self):
7913 if not hasattr(self.op, 'nics'):
7915 if not hasattr(self.op, 'disks'):
7917 if not hasattr(self.op, 'beparams'):
7918 self.op.beparams = {}
7919 if not hasattr(self.op, 'hvparams'):
7920 self.op.hvparams = {}
7921 if not hasattr(self.op, "disk_template"):
7922 self.op.disk_template = None
7923 if not hasattr(self.op, "remote_node"):
7924 self.op.remote_node = None
7925 if not hasattr(self.op, "os_name"):
7926 self.op.os_name = None
7927 if not hasattr(self.op, "force_variant"):
7928 self.op.force_variant = False
7929 self.op.force = getattr(self.op, "force", False)
7930 if not (self.op.nics or self.op.disks or self.op.disk_template or
7931 self.op.hvparams or self.op.beparams or self.op.os_name):
7932 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
7934 if self.op.hvparams:
7935 _CheckGlobalHvParams(self.op.hvparams)
7939 for disk_op, disk_dict in self.op.disks:
7940 if disk_op == constants.DDM_REMOVE:
7943 elif disk_op == constants.DDM_ADD:
7946 if not isinstance(disk_op, int):
7947 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
7948 if not isinstance(disk_dict, dict):
7949 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7950 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7952 if disk_op == constants.DDM_ADD:
7953 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7954 if mode not in constants.DISK_ACCESS_SET:
7955 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
7957 size = disk_dict.get('size', None)
7959 raise errors.OpPrereqError("Required disk parameter size missing",
7963 except (TypeError, ValueError), err:
7964 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7965 str(err), errors.ECODE_INVAL)
7966 disk_dict['size'] = size
7968 # modification of disk
7969 if 'size' in disk_dict:
7970 raise errors.OpPrereqError("Disk size change not possible, use"
7971 " grow-disk", errors.ECODE_INVAL)
7973 if disk_addremove > 1:
7974 raise errors.OpPrereqError("Only one disk add or remove operation"
7975 " supported at a time", errors.ECODE_INVAL)
7977 if self.op.disks and self.op.disk_template is not None:
7978 raise errors.OpPrereqError("Disk template conversion and other disk"
7979 " changes not supported at the same time",
7982 if self.op.disk_template:
7983 _CheckDiskTemplate(self.op.disk_template)
7984 if (self.op.disk_template in constants.DTS_NET_MIRROR and
7985 self.op.remote_node is None):
7986 raise errors.OpPrereqError("Changing the disk template to a mirrored"
7987 " one requires specifying a secondary node",
7992 for nic_op, nic_dict in self.op.nics:
7993 if nic_op == constants.DDM_REMOVE:
7996 elif nic_op == constants.DDM_ADD:
7999 if not isinstance(nic_op, int):
8000 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8001 if not isinstance(nic_dict, dict):
8002 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8003 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8005 # nic_dict should be a dict
8006 nic_ip = nic_dict.get('ip', None)
8007 if nic_ip is not None:
8008 if nic_ip.lower() == constants.VALUE_NONE:
8009 nic_dict['ip'] = None
8011 if not utils.IsValidIP(nic_ip):
8012 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8015 nic_bridge = nic_dict.get('bridge', None)
8016 nic_link = nic_dict.get('link', None)
8017 if nic_bridge and nic_link:
8018 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8019 " at the same time", errors.ECODE_INVAL)
8020 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8021 nic_dict['bridge'] = None
8022 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8023 nic_dict['link'] = None
8025 if nic_op == constants.DDM_ADD:
8026 nic_mac = nic_dict.get('mac', None)
8028 nic_dict['mac'] = constants.VALUE_AUTO
8030 if 'mac' in nic_dict:
8031 nic_mac = nic_dict['mac']
8032 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8033 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8035 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8036 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8037 " modifying an existing nic",
8040 if nic_addremove > 1:
8041 raise errors.OpPrereqError("Only one NIC add or remove operation"
8042 " supported at a time", errors.ECODE_INVAL)
8044 def ExpandNames(self):
8045 self._ExpandAndLockInstance()
8046 self.needed_locks[locking.LEVEL_NODE] = []
8047 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8049 def DeclareLocks(self, level):
8050 if level == locking.LEVEL_NODE:
8051 self._LockInstancesNodes()
8052 if self.op.disk_template and self.op.remote_node:
8053 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8054 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8056 def BuildHooksEnv(self):
8059 This runs on the master, primary and secondaries.
8063 if constants.BE_MEMORY in self.be_new:
8064 args['memory'] = self.be_new[constants.BE_MEMORY]
8065 if constants.BE_VCPUS in self.be_new:
8066 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8067 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8068 # information at all.
8071 nic_override = dict(self.op.nics)
8072 c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8073 for idx, nic in enumerate(self.instance.nics):
8074 if idx in nic_override:
8075 this_nic_override = nic_override[idx]
8077 this_nic_override = {}
8078 if 'ip' in this_nic_override:
8079 ip = this_nic_override['ip']
8082 if 'mac' in this_nic_override:
8083 mac = this_nic_override['mac']
8086 if idx in self.nic_pnew:
8087 nicparams = self.nic_pnew[idx]
8089 nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8090 mode = nicparams[constants.NIC_MODE]
8091 link = nicparams[constants.NIC_LINK]
8092 args['nics'].append((ip, mac, mode, link))
8093 if constants.DDM_ADD in nic_override:
8094 ip = nic_override[constants.DDM_ADD].get('ip', None)
8095 mac = nic_override[constants.DDM_ADD]['mac']
8096 nicparams = self.nic_pnew[constants.DDM_ADD]
8097 mode = nicparams[constants.NIC_MODE]
8098 link = nicparams[constants.NIC_LINK]
8099 args['nics'].append((ip, mac, mode, link))
8100 elif constants.DDM_REMOVE in nic_override:
8101 del args['nics'][-1]
8103 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8104 if self.op.disk_template:
8105 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8106 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8110 def _GetUpdatedParams(old_params, update_dict,
8111 default_values, parameter_types):
8112 """Return the new params dict for the given params.
8114 @type old_params: dict
8115 @param old_params: old parameters
8116 @type update_dict: dict
8117 @param update_dict: dict containing new parameter values,
8118 or constants.VALUE_DEFAULT to reset the
8119 parameter to its default value
8120 @type default_values: dict
8121 @param default_values: default values for the filled parameters
8122 @type parameter_types: dict
8123 @param parameter_types: dict mapping target dict keys to types
8124 in constants.ENFORCEABLE_TYPES
8125 @rtype: (dict, dict)
8126 @return: (new_parameters, filled_parameters)
8129 params_copy = copy.deepcopy(old_params)
8130 for key, val in update_dict.iteritems():
8131 if val == constants.VALUE_DEFAULT:
8133 del params_copy[key]
8137 params_copy[key] = val
8138 utils.ForceDictType(params_copy, parameter_types)
8139 params_filled = objects.FillDict(default_values, params_copy)
8140 return (params_copy, params_filled)
8142 def CheckPrereq(self):
8143 """Check prerequisites.
8145 This only checks the instance list against the existing names.
8148 self.force = self.op.force
8150 # checking the new params on the primary/secondary nodes
8152 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8153 cluster = self.cluster = self.cfg.GetClusterInfo()
8154 assert self.instance is not None, \
8155 "Cannot retrieve locked instance %s" % self.op.instance_name
8156 pnode = instance.primary_node
8157 nodelist = list(instance.all_nodes)
8159 if self.op.disk_template:
8160 if instance.disk_template == self.op.disk_template:
8161 raise errors.OpPrereqError("Instance already has disk template %s" %
8162 instance.disk_template, errors.ECODE_INVAL)
8164 if (instance.disk_template,
8165 self.op.disk_template) not in self._DISK_CONVERSIONS:
8166 raise errors.OpPrereqError("Unsupported disk template conversion from"
8167 " %s to %s" % (instance.disk_template,
8168 self.op.disk_template),
8170 if self.op.disk_template in constants.DTS_NET_MIRROR:
8171 _CheckNodeOnline(self, self.op.remote_node)
8172 _CheckNodeNotDrained(self, self.op.remote_node)
8173 disks = [{"size": d.size} for d in instance.disks]
8174 required = _ComputeDiskSize(self.op.disk_template, disks)
8175 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8176 _CheckInstanceDown(self, instance, "cannot change disk template")
8178 # hvparams processing
8179 if self.op.hvparams:
8180 i_hvdict, hv_new = self._GetUpdatedParams(
8181 instance.hvparams, self.op.hvparams,
8182 cluster.hvparams[instance.hypervisor],
8183 constants.HVS_PARAMETER_TYPES)
8185 hypervisor.GetHypervisor(
8186 instance.hypervisor).CheckParameterSyntax(hv_new)
8187 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8188 self.hv_new = hv_new # the new actual values
8189 self.hv_inst = i_hvdict # the new dict (without defaults)
8191 self.hv_new = self.hv_inst = {}
8193 # beparams processing
8194 if self.op.beparams:
8195 i_bedict, be_new = self._GetUpdatedParams(
8196 instance.beparams, self.op.beparams,
8197 cluster.beparams[constants.PP_DEFAULT],
8198 constants.BES_PARAMETER_TYPES)
8199 self.be_new = be_new # the new actual values
8200 self.be_inst = i_bedict # the new dict (without defaults)
8202 self.be_new = self.be_inst = {}
8206 if constants.BE_MEMORY in self.op.beparams and not self.force:
8207 mem_check_list = [pnode]
8208 if be_new[constants.BE_AUTO_BALANCE]:
8209 # either we changed auto_balance to yes or it was from before
8210 mem_check_list.extend(instance.secondary_nodes)
8211 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8212 instance.hypervisor)
8213 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8214 instance.hypervisor)
8215 pninfo = nodeinfo[pnode]
8216 msg = pninfo.fail_msg
8218 # Assume the primary node is unreachable and go ahead
8219 self.warn.append("Can't get info from primary node %s: %s" %
8221 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8222 self.warn.append("Node data from primary node %s doesn't contain"
8223 " free memory information" % pnode)
8224 elif instance_info.fail_msg:
8225 self.warn.append("Can't get instance runtime information: %s" %
8226 instance_info.fail_msg)
8228 if instance_info.payload:
8229 current_mem = int(instance_info.payload['memory'])
8231 # Assume instance not running
8232 # (there is a slight race condition here, but it's not very probable,
8233 # and we have no other way to check)
8235 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8236 pninfo.payload['memory_free'])
8238 raise errors.OpPrereqError("This change will prevent the instance"
8239 " from starting, due to %d MB of memory"
8240 " missing on its primary node" % miss_mem,
8243 if be_new[constants.BE_AUTO_BALANCE]:
8244 for node, nres in nodeinfo.items():
8245 if node not in instance.secondary_nodes:
8249 self.warn.append("Can't get info from secondary node %s: %s" %
8251 elif not isinstance(nres.payload.get('memory_free', None), int):
8252 self.warn.append("Secondary node %s didn't return free"
8253 " memory information" % node)
8254 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8255 self.warn.append("Not enough memory to failover instance to"
8256 " secondary node %s" % node)
8261 for nic_op, nic_dict in self.op.nics:
8262 if nic_op == constants.DDM_REMOVE:
8263 if not instance.nics:
8264 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8267 if nic_op != constants.DDM_ADD:
8269 if not instance.nics:
8270 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8271 " no NICs" % nic_op,
8273 if nic_op < 0 or nic_op >= len(instance.nics):
8274 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8276 (nic_op, len(instance.nics) - 1),
8278 old_nic_params = instance.nics[nic_op].nicparams
8279 old_nic_ip = instance.nics[nic_op].ip
8284 update_params_dict = dict([(key, nic_dict[key])
8285 for key in constants.NICS_PARAMETERS
8286 if key in nic_dict])
8288 if 'bridge' in nic_dict:
8289 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8291 new_nic_params, new_filled_nic_params = \
8292 self._GetUpdatedParams(old_nic_params, update_params_dict,
8293 cluster.nicparams[constants.PP_DEFAULT],
8294 constants.NICS_PARAMETER_TYPES)
8295 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8296 self.nic_pinst[nic_op] = new_nic_params
8297 self.nic_pnew[nic_op] = new_filled_nic_params
8298 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8300 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8301 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8302 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8304 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8306 self.warn.append(msg)
8308 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8309 if new_nic_mode == constants.NIC_MODE_ROUTED:
8310 if 'ip' in nic_dict:
8311 nic_ip = nic_dict['ip']
8315 raise errors.OpPrereqError('Cannot set the nic ip to None'
8316 ' on a routed nic', errors.ECODE_INVAL)
8317 if 'mac' in nic_dict:
8318 nic_mac = nic_dict['mac']
8320 raise errors.OpPrereqError('Cannot set the nic mac to None',
8322 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8323 # otherwise generate the mac
8324 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8326 # or validate/reserve the current one
8328 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8329 except errors.ReservationError:
8330 raise errors.OpPrereqError("MAC address %s already in use"
8331 " in cluster" % nic_mac,
8332 errors.ECODE_NOTUNIQUE)
8335 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8336 raise errors.OpPrereqError("Disk operations not supported for"
8337 " diskless instances",
8339 for disk_op, _ in self.op.disks:
8340 if disk_op == constants.DDM_REMOVE:
8341 if len(instance.disks) == 1:
8342 raise errors.OpPrereqError("Cannot remove the last disk of"
8343 " an instance", errors.ECODE_INVAL)
8344 _CheckInstanceDown(self, instance, "cannot remove disks")
8346 if (disk_op == constants.DDM_ADD and
8347 len(instance.nics) >= constants.MAX_DISKS):
8348 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8349 " add more" % constants.MAX_DISKS,
8351 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8353 if disk_op < 0 or disk_op >= len(instance.disks):
8354 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8356 (disk_op, len(instance.disks)),
8360 if self.op.os_name and not self.op.force:
8361 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8362 self.op.force_variant)
8366 def _ConvertPlainToDrbd(self, feedback_fn):
8367 """Converts an instance from plain to drbd.
8370 feedback_fn("Converting template to drbd")
8371 instance = self.instance
8372 pnode = instance.primary_node
8373 snode = self.op.remote_node
8375 # create a fake disk info for _GenerateDiskTemplate
8376 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8377 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8378 instance.name, pnode, [snode],
8379 disk_info, None, None, 0)
8380 info = _GetInstanceInfoText(instance)
8381 feedback_fn("Creating aditional volumes...")
8382 # first, create the missing data and meta devices
8383 for disk in new_disks:
8384 # unfortunately this is... not too nice
8385 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8387 for child in disk.children:
8388 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8389 # at this stage, all new LVs have been created, we can rename the
8391 feedback_fn("Renaming original volumes...")
8392 rename_list = [(o, n.children[0].logical_id)
8393 for (o, n) in zip(instance.disks, new_disks)]
8394 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8395 result.Raise("Failed to rename original LVs")
8397 feedback_fn("Initializing DRBD devices...")
8398 # all child devices are in place, we can now create the DRBD devices
8399 for disk in new_disks:
8400 for node in [pnode, snode]:
8401 f_create = node == pnode
8402 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8404 # at this point, the instance has been modified
8405 instance.disk_template = constants.DT_DRBD8
8406 instance.disks = new_disks
8407 self.cfg.Update(instance, feedback_fn)
8409 # disks are created, waiting for sync
8410 disk_abort = not _WaitForSync(self, instance)
8412 raise errors.OpExecError("There are some degraded disks for"
8413 " this instance, please cleanup manually")
8415 def _ConvertDrbdToPlain(self, feedback_fn):
8416 """Converts an instance from drbd to plain.
8419 instance = self.instance
8420 assert len(instance.secondary_nodes) == 1
8421 pnode = instance.primary_node
8422 snode = instance.secondary_nodes[0]
8423 feedback_fn("Converting template to plain")
8425 old_disks = instance.disks
8426 new_disks = [d.children[0] for d in old_disks]
8428 # copy over size and mode
8429 for parent, child in zip(old_disks, new_disks):
8430 child.size = parent.size
8431 child.mode = parent.mode
8433 # update instance structure
8434 instance.disks = new_disks
8435 instance.disk_template = constants.DT_PLAIN
8436 self.cfg.Update(instance, feedback_fn)
8438 feedback_fn("Removing volumes on the secondary node...")
8439 for disk in old_disks:
8440 self.cfg.SetDiskID(disk, snode)
8441 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8443 self.LogWarning("Could not remove block device %s on node %s,"
8444 " continuing anyway: %s", disk.iv_name, snode, msg)
8446 feedback_fn("Removing unneeded volumes on the primary node...")
8447 for idx, disk in enumerate(old_disks):
8448 meta = disk.children[1]
8449 self.cfg.SetDiskID(meta, pnode)
8450 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8452 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8453 " continuing anyway: %s", idx, pnode, msg)
8456 def Exec(self, feedback_fn):
8457 """Modifies an instance.
8459 All parameters take effect only at the next restart of the instance.
8462 # Process here the warnings from CheckPrereq, as we don't have a
8463 # feedback_fn there.
8464 for warn in self.warn:
8465 feedback_fn("WARNING: %s" % warn)
8468 instance = self.instance
8470 for disk_op, disk_dict in self.op.disks:
8471 if disk_op == constants.DDM_REMOVE:
8472 # remove the last disk
8473 device = instance.disks.pop()
8474 device_idx = len(instance.disks)
8475 for node, disk in device.ComputeNodeTree(instance.primary_node):
8476 self.cfg.SetDiskID(disk, node)
8477 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8479 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8480 " continuing anyway", device_idx, node, msg)
8481 result.append(("disk/%d" % device_idx, "remove"))
8482 elif disk_op == constants.DDM_ADD:
8484 if instance.disk_template == constants.DT_FILE:
8485 file_driver, file_path = instance.disks[0].logical_id
8486 file_path = os.path.dirname(file_path)
8488 file_driver = file_path = None
8489 disk_idx_base = len(instance.disks)
8490 new_disk = _GenerateDiskTemplate(self,
8491 instance.disk_template,
8492 instance.name, instance.primary_node,
8493 instance.secondary_nodes,
8498 instance.disks.append(new_disk)
8499 info = _GetInstanceInfoText(instance)
8501 logging.info("Creating volume %s for instance %s",
8502 new_disk.iv_name, instance.name)
8503 # Note: this needs to be kept in sync with _CreateDisks
8505 for node in instance.all_nodes:
8506 f_create = node == instance.primary_node
8508 _CreateBlockDev(self, node, instance, new_disk,
8509 f_create, info, f_create)
8510 except errors.OpExecError, err:
8511 self.LogWarning("Failed to create volume %s (%s) on"
8513 new_disk.iv_name, new_disk, node, err)
8514 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8515 (new_disk.size, new_disk.mode)))
8517 # change a given disk
8518 instance.disks[disk_op].mode = disk_dict['mode']
8519 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8521 if self.op.disk_template:
8522 r_shut = _ShutdownInstanceDisks(self, instance)
8524 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8525 " proceed with disk template conversion")
8526 mode = (instance.disk_template, self.op.disk_template)
8528 self._DISK_CONVERSIONS[mode](self, feedback_fn)
8530 self.cfg.ReleaseDRBDMinors(instance.name)
8532 result.append(("disk_template", self.op.disk_template))
8535 for nic_op, nic_dict in self.op.nics:
8536 if nic_op == constants.DDM_REMOVE:
8537 # remove the last nic
8538 del instance.nics[-1]
8539 result.append(("nic.%d" % len(instance.nics), "remove"))
8540 elif nic_op == constants.DDM_ADD:
8541 # mac and bridge should be set, by now
8542 mac = nic_dict['mac']
8543 ip = nic_dict.get('ip', None)
8544 nicparams = self.nic_pinst[constants.DDM_ADD]
8545 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8546 instance.nics.append(new_nic)
8547 result.append(("nic.%d" % (len(instance.nics) - 1),
8548 "add:mac=%s,ip=%s,mode=%s,link=%s" %
8549 (new_nic.mac, new_nic.ip,
8550 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8551 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8554 for key in 'mac', 'ip':
8556 setattr(instance.nics[nic_op], key, nic_dict[key])
8557 if nic_op in self.nic_pinst:
8558 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8559 for key, val in nic_dict.iteritems():
8560 result.append(("nic.%s/%d" % (key, nic_op), val))
8563 if self.op.hvparams:
8564 instance.hvparams = self.hv_inst
8565 for key, val in self.op.hvparams.iteritems():
8566 result.append(("hv/%s" % key, val))
8569 if self.op.beparams:
8570 instance.beparams = self.be_inst
8571 for key, val in self.op.beparams.iteritems():
8572 result.append(("be/%s" % key, val))
8576 instance.os = self.op.os_name
8578 self.cfg.Update(instance, feedback_fn)
8582 _DISK_CONVERSIONS = {
8583 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8584 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8587 class LUQueryExports(NoHooksLU):
8588 """Query the exports list
8591 _OP_REQP = ['nodes']
8594 def ExpandNames(self):
8595 self.needed_locks = {}
8596 self.share_locks[locking.LEVEL_NODE] = 1
8597 if not self.op.nodes:
8598 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8600 self.needed_locks[locking.LEVEL_NODE] = \
8601 _GetWantedNodes(self, self.op.nodes)
8603 def CheckPrereq(self):
8604 """Check prerequisites.
8607 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8609 def Exec(self, feedback_fn):
8610 """Compute the list of all the exported system images.
8613 @return: a dictionary with the structure node->(export-list)
8614 where export-list is a list of the instances exported on
8618 rpcresult = self.rpc.call_export_list(self.nodes)
8620 for node in rpcresult:
8621 if rpcresult[node].fail_msg:
8622 result[node] = False
8624 result[node] = rpcresult[node].payload
8629 class LUExportInstance(LogicalUnit):
8630 """Export an instance to an image in the cluster.
8633 HPATH = "instance-export"
8634 HTYPE = constants.HTYPE_INSTANCE
8635 _OP_REQP = ["instance_name", "target_node", "shutdown"]
8638 def CheckArguments(self):
8639 """Check the arguments.
8642 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8643 constants.DEFAULT_SHUTDOWN_TIMEOUT)
8645 def ExpandNames(self):
8646 self._ExpandAndLockInstance()
8647 # FIXME: lock only instance primary and destination node
8649 # Sad but true, for now we have do lock all nodes, as we don't know where
8650 # the previous export might be, and and in this LU we search for it and
8651 # remove it from its current node. In the future we could fix this by:
8652 # - making a tasklet to search (share-lock all), then create the new one,
8653 # then one to remove, after
8654 # - removing the removal operation altogether
8655 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8657 def DeclareLocks(self, level):
8658 """Last minute lock declaration."""
8659 # All nodes are locked anyway, so nothing to do here.
8661 def BuildHooksEnv(self):
8664 This will run on the master, primary node and target node.
8668 "EXPORT_NODE": self.op.target_node,
8669 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8670 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8672 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8673 nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8674 self.op.target_node]
8677 def CheckPrereq(self):
8678 """Check prerequisites.
8680 This checks that the instance and node names are valid.
8683 instance_name = self.op.instance_name
8684 self.instance = self.cfg.GetInstanceInfo(instance_name)
8685 assert self.instance is not None, \
8686 "Cannot retrieve locked instance %s" % self.op.instance_name
8687 _CheckNodeOnline(self, self.instance.primary_node)
8689 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8690 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8691 assert self.dst_node is not None
8693 _CheckNodeOnline(self, self.dst_node.name)
8694 _CheckNodeNotDrained(self, self.dst_node.name)
8696 # instance disk type verification
8697 for disk in self.instance.disks:
8698 if disk.dev_type == constants.LD_FILE:
8699 raise errors.OpPrereqError("Export not supported for instances with"
8700 " file-based disks", errors.ECODE_INVAL)
8702 def Exec(self, feedback_fn):
8703 """Export an instance to an image in the cluster.
8706 instance = self.instance
8707 dst_node = self.dst_node
8708 src_node = instance.primary_node
8710 if self.op.shutdown:
8711 # shutdown the instance, but not the disks
8712 feedback_fn("Shutting down instance %s" % instance.name)
8713 result = self.rpc.call_instance_shutdown(src_node, instance,
8714 self.shutdown_timeout)
8715 result.Raise("Could not shutdown instance %s on"
8716 " node %s" % (instance.name, src_node))
8718 vgname = self.cfg.GetVGName()
8722 # set the disks ID correctly since call_instance_start needs the
8723 # correct drbd minor to create the symlinks
8724 for disk in instance.disks:
8725 self.cfg.SetDiskID(disk, src_node)
8727 activate_disks = (not instance.admin_up)
8730 # Activate the instance disks if we'exporting a stopped instance
8731 feedback_fn("Activating disks for %s" % instance.name)
8732 _StartInstanceDisks(self, instance, None)
8738 for idx, disk in enumerate(instance.disks):
8739 feedback_fn("Creating a snapshot of disk/%s on node %s" %
8742 # result.payload will be a snapshot of an lvm leaf of the one we
8744 result = self.rpc.call_blockdev_snapshot(src_node, disk)
8745 msg = result.fail_msg
8747 self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8749 snap_disks.append(False)
8751 disk_id = (vgname, result.payload)
8752 new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8753 logical_id=disk_id, physical_id=disk_id,
8754 iv_name=disk.iv_name)
8755 snap_disks.append(new_dev)
8758 if self.op.shutdown and instance.admin_up:
8759 feedback_fn("Starting instance %s" % instance.name)
8760 result = self.rpc.call_instance_start(src_node, instance, None, None)
8761 msg = result.fail_msg
8763 _ShutdownInstanceDisks(self, instance)
8764 raise errors.OpExecError("Could not start instance: %s" % msg)
8766 # TODO: check for size
8768 cluster_name = self.cfg.GetClusterName()
8769 for idx, dev in enumerate(snap_disks):
8770 feedback_fn("Exporting snapshot %s from %s to %s" %
8771 (idx, src_node, dst_node.name))
8773 # FIXME: pass debug from opcode to backend
8774 result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8775 instance, cluster_name,
8776 idx, self.op.debug_level)
8777 msg = result.fail_msg
8779 self.LogWarning("Could not export disk/%s from node %s to"
8780 " node %s: %s", idx, src_node, dst_node.name, msg)
8781 dresults.append(False)
8783 dresults.append(True)
8784 msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8786 self.LogWarning("Could not remove snapshot for disk/%d from node"
8787 " %s: %s", idx, src_node, msg)
8789 dresults.append(False)
8791 feedback_fn("Finalizing export on %s" % dst_node.name)
8792 result = self.rpc.call_finalize_export(dst_node.name, instance,
8795 msg = result.fail_msg
8797 self.LogWarning("Could not finalize export for instance %s"
8798 " on node %s: %s", instance.name, dst_node.name, msg)
8803 feedback_fn("Deactivating disks for %s" % instance.name)
8804 _ShutdownInstanceDisks(self, instance)
8806 nodelist = self.cfg.GetNodeList()
8807 nodelist.remove(dst_node.name)
8809 # on one-node clusters nodelist will be empty after the removal
8810 # if we proceed the backup would be removed because OpQueryExports
8811 # substitutes an empty list with the full cluster node list.
8812 iname = instance.name
8814 feedback_fn("Removing old exports for instance %s" % iname)
8815 exportlist = self.rpc.call_export_list(nodelist)
8816 for node in exportlist:
8817 if exportlist[node].fail_msg:
8819 if iname in exportlist[node].payload:
8820 msg = self.rpc.call_export_remove(node, iname).fail_msg
8822 self.LogWarning("Could not remove older export for instance %s"
8823 " on node %s: %s", iname, node, msg)
8824 return fin_resu, dresults
8827 class LURemoveExport(NoHooksLU):
8828 """Remove exports related to the named instance.
8831 _OP_REQP = ["instance_name"]
8834 def ExpandNames(self):
8835 self.needed_locks = {}
8836 # We need all nodes to be locked in order for RemoveExport to work, but we
8837 # don't need to lock the instance itself, as nothing will happen to it (and
8838 # we can remove exports also for a removed instance)
8839 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8841 def CheckPrereq(self):
8842 """Check prerequisites.
8846 def Exec(self, feedback_fn):
8847 """Remove any export.
8850 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8851 # If the instance was not found we'll try with the name that was passed in.
8852 # This will only work if it was an FQDN, though.
8854 if not instance_name:
8856 instance_name = self.op.instance_name
8858 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8859 exportlist = self.rpc.call_export_list(locked_nodes)
8861 for node in exportlist:
8862 msg = exportlist[node].fail_msg
8864 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8866 if instance_name in exportlist[node].payload:
8868 result = self.rpc.call_export_remove(node, instance_name)
8869 msg = result.fail_msg
8871 logging.error("Could not remove export for instance %s"
8872 " on node %s: %s", instance_name, node, msg)
8874 if fqdn_warn and not found:
8875 feedback_fn("Export not found. If trying to remove an export belonging"
8876 " to a deleted instance please use its Fully Qualified"
8880 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
8883 This is an abstract class which is the parent of all the other tags LUs.
8887 def ExpandNames(self):
8888 self.needed_locks = {}
8889 if self.op.kind == constants.TAG_NODE:
8890 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
8891 self.needed_locks[locking.LEVEL_NODE] = self.op.name
8892 elif self.op.kind == constants.TAG_INSTANCE:
8893 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
8894 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
8896 def CheckPrereq(self):
8897 """Check prerequisites.
8900 if self.op.kind == constants.TAG_CLUSTER:
8901 self.target = self.cfg.GetClusterInfo()
8902 elif self.op.kind == constants.TAG_NODE:
8903 self.target = self.cfg.GetNodeInfo(self.op.name)
8904 elif self.op.kind == constants.TAG_INSTANCE:
8905 self.target = self.cfg.GetInstanceInfo(self.op.name)
8907 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8908 str(self.op.kind), errors.ECODE_INVAL)
8911 class LUGetTags(TagsLU):
8912 """Returns the tags of a given object.
8915 _OP_REQP = ["kind", "name"]
8918 def Exec(self, feedback_fn):
8919 """Returns the tag list.
8922 return list(self.target.GetTags())
8925 class LUSearchTags(NoHooksLU):
8926 """Searches the tags for a given pattern.
8929 _OP_REQP = ["pattern"]
8932 def ExpandNames(self):
8933 self.needed_locks = {}
8935 def CheckPrereq(self):
8936 """Check prerequisites.
8938 This checks the pattern passed for validity by compiling it.
8942 self.re = re.compile(self.op.pattern)
8943 except re.error, err:
8944 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
8945 (self.op.pattern, err), errors.ECODE_INVAL)
8947 def Exec(self, feedback_fn):
8948 """Returns the tag list.
8952 tgts = [("/cluster", cfg.GetClusterInfo())]
8953 ilist = cfg.GetAllInstancesInfo().values()
8954 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
8955 nlist = cfg.GetAllNodesInfo().values()
8956 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
8958 for path, target in tgts:
8959 for tag in target.GetTags():
8960 if self.re.search(tag):
8961 results.append((path, tag))
8965 class LUAddTags(TagsLU):
8966 """Sets a tag on a given object.
8969 _OP_REQP = ["kind", "name", "tags"]
8972 def CheckPrereq(self):
8973 """Check prerequisites.
8975 This checks the type and length of the tag name and value.
8978 TagsLU.CheckPrereq(self)
8979 for tag in self.op.tags:
8980 objects.TaggableObject.ValidateTag(tag)
8982 def Exec(self, feedback_fn):
8987 for tag in self.op.tags:
8988 self.target.AddTag(tag)
8989 except errors.TagError, err:
8990 raise errors.OpExecError("Error while setting tag: %s" % str(err))
8991 self.cfg.Update(self.target, feedback_fn)
8994 class LUDelTags(TagsLU):
8995 """Delete a list of tags from a given object.
8998 _OP_REQP = ["kind", "name", "tags"]
9001 def CheckPrereq(self):
9002 """Check prerequisites.
9004 This checks that we have the given tag.
9007 TagsLU.CheckPrereq(self)
9008 for tag in self.op.tags:
9009 objects.TaggableObject.ValidateTag(tag)
9010 del_tags = frozenset(self.op.tags)
9011 cur_tags = self.target.GetTags()
9012 if not del_tags <= cur_tags:
9013 diff_tags = del_tags - cur_tags
9014 diff_names = ["'%s'" % tag for tag in diff_tags]
9016 raise errors.OpPrereqError("Tag(s) %s not found" %
9017 (",".join(diff_names)), errors.ECODE_NOENT)
9019 def Exec(self, feedback_fn):
9020 """Remove the tag from the object.
9023 for tag in self.op.tags:
9024 self.target.RemoveTag(tag)
9025 self.cfg.Update(self.target, feedback_fn)
9028 class LUTestDelay(NoHooksLU):
9029 """Sleep for a specified amount of time.
9031 This LU sleeps on the master and/or nodes for a specified amount of
9035 _OP_REQP = ["duration", "on_master", "on_nodes"]
9038 def ExpandNames(self):
9039 """Expand names and set required locks.
9041 This expands the node list, if any.
9044 self.needed_locks = {}
9045 if self.op.on_nodes:
9046 # _GetWantedNodes can be used here, but is not always appropriate to use
9047 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9049 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9050 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9052 def CheckPrereq(self):
9053 """Check prerequisites.
9057 def Exec(self, feedback_fn):
9058 """Do the actual sleep.
9061 if self.op.on_master:
9062 if not utils.TestDelay(self.op.duration):
9063 raise errors.OpExecError("Error during master delay test")
9064 if self.op.on_nodes:
9065 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9066 for node, node_result in result.items():
9067 node_result.Raise("Failure during rpc call to node %s" % node)
9070 class IAllocator(object):
9071 """IAllocator framework.
9073 An IAllocator instance has three sets of attributes:
9074 - cfg that is needed to query the cluster
9075 - input data (all members of the _KEYS class attribute are required)
9076 - four buffer attributes (in|out_data|text), that represent the
9077 input (to the external script) in text and data structure format,
9078 and the output from it, again in two formats
9079 - the result variables from the script (success, info, nodes) for
9083 # pylint: disable-msg=R0902
9084 # lots of instance attributes
9086 "name", "mem_size", "disks", "disk_template",
9087 "os", "tags", "nics", "vcpus", "hypervisor",
9090 "name", "relocate_from",
9096 def __init__(self, cfg, rpc, mode, **kwargs):
9099 # init buffer variables
9100 self.in_text = self.out_text = self.in_data = self.out_data = None
9101 # init all input fields so that pylint is happy
9103 self.mem_size = self.disks = self.disk_template = None
9104 self.os = self.tags = self.nics = self.vcpus = None
9105 self.hypervisor = None
9106 self.relocate_from = None
9108 self.evac_nodes = None
9110 self.required_nodes = None
9111 # init result fields
9112 self.success = self.info = self.result = None
9113 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9114 keyset = self._ALLO_KEYS
9115 fn = self._AddNewInstance
9116 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9117 keyset = self._RELO_KEYS
9118 fn = self._AddRelocateInstance
9119 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9120 keyset = self._EVAC_KEYS
9121 fn = self._AddEvacuateNodes
9123 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9124 " IAllocator" % self.mode)
9126 if key not in keyset:
9127 raise errors.ProgrammerError("Invalid input parameter '%s' to"
9128 " IAllocator" % key)
9129 setattr(self, key, kwargs[key])
9132 if key not in kwargs:
9133 raise errors.ProgrammerError("Missing input parameter '%s' to"
9134 " IAllocator" % key)
9135 self._BuildInputData(fn)
9137 def _ComputeClusterData(self):
9138 """Compute the generic allocator input data.
9140 This is the data that is independent of the actual operation.
9144 cluster_info = cfg.GetClusterInfo()
9147 "version": constants.IALLOCATOR_VERSION,
9148 "cluster_name": cfg.GetClusterName(),
9149 "cluster_tags": list(cluster_info.GetTags()),
9150 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9151 # we don't have job IDs
9153 iinfo = cfg.GetAllInstancesInfo().values()
9154 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9158 node_list = cfg.GetNodeList()
9160 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9161 hypervisor_name = self.hypervisor
9162 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9163 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9164 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9165 hypervisor_name = cluster_info.enabled_hypervisors[0]
9167 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9170 self.rpc.call_all_instances_info(node_list,
9171 cluster_info.enabled_hypervisors)
9172 for nname, nresult in node_data.items():
9173 # first fill in static (config-based) values
9174 ninfo = cfg.GetNodeInfo(nname)
9176 "tags": list(ninfo.GetTags()),
9177 "primary_ip": ninfo.primary_ip,
9178 "secondary_ip": ninfo.secondary_ip,
9179 "offline": ninfo.offline,
9180 "drained": ninfo.drained,
9181 "master_candidate": ninfo.master_candidate,
9184 if not (ninfo.offline or ninfo.drained):
9185 nresult.Raise("Can't get data for node %s" % nname)
9186 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9188 remote_info = nresult.payload
9190 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9191 'vg_size', 'vg_free', 'cpu_total']:
9192 if attr not in remote_info:
9193 raise errors.OpExecError("Node '%s' didn't return attribute"
9194 " '%s'" % (nname, attr))
9195 if not isinstance(remote_info[attr], int):
9196 raise errors.OpExecError("Node '%s' returned invalid value"
9198 (nname, attr, remote_info[attr]))
9199 # compute memory used by primary instances
9200 i_p_mem = i_p_up_mem = 0
9201 for iinfo, beinfo in i_list:
9202 if iinfo.primary_node == nname:
9203 i_p_mem += beinfo[constants.BE_MEMORY]
9204 if iinfo.name not in node_iinfo[nname].payload:
9207 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9208 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9209 remote_info['memory_free'] -= max(0, i_mem_diff)
9212 i_p_up_mem += beinfo[constants.BE_MEMORY]
9214 # compute memory used by instances
9216 "total_memory": remote_info['memory_total'],
9217 "reserved_memory": remote_info['memory_dom0'],
9218 "free_memory": remote_info['memory_free'],
9219 "total_disk": remote_info['vg_size'],
9220 "free_disk": remote_info['vg_free'],
9221 "total_cpus": remote_info['cpu_total'],
9222 "i_pri_memory": i_p_mem,
9223 "i_pri_up_memory": i_p_up_mem,
9227 node_results[nname] = pnr
9228 data["nodes"] = node_results
9232 for iinfo, beinfo in i_list:
9234 for nic in iinfo.nics:
9235 filled_params = objects.FillDict(
9236 cluster_info.nicparams[constants.PP_DEFAULT],
9238 nic_dict = {"mac": nic.mac,
9240 "mode": filled_params[constants.NIC_MODE],
9241 "link": filled_params[constants.NIC_LINK],
9243 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9244 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9245 nic_data.append(nic_dict)
9247 "tags": list(iinfo.GetTags()),
9248 "admin_up": iinfo.admin_up,
9249 "vcpus": beinfo[constants.BE_VCPUS],
9250 "memory": beinfo[constants.BE_MEMORY],
9252 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9254 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9255 "disk_template": iinfo.disk_template,
9256 "hypervisor": iinfo.hypervisor,
9258 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9260 instance_data[iinfo.name] = pir
9262 data["instances"] = instance_data
9266 def _AddNewInstance(self):
9267 """Add new instance data to allocator structure.
9269 This in combination with _AllocatorGetClusterData will create the
9270 correct structure needed as input for the allocator.
9272 The checks for the completeness of the opcode must have already been
9276 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9278 if self.disk_template in constants.DTS_NET_MIRROR:
9279 self.required_nodes = 2
9281 self.required_nodes = 1
9284 "disk_template": self.disk_template,
9287 "vcpus": self.vcpus,
9288 "memory": self.mem_size,
9289 "disks": self.disks,
9290 "disk_space_total": disk_space,
9292 "required_nodes": self.required_nodes,
9296 def _AddRelocateInstance(self):
9297 """Add relocate instance data to allocator structure.
9299 This in combination with _IAllocatorGetClusterData will create the
9300 correct structure needed as input for the allocator.
9302 The checks for the completeness of the opcode must have already been
9306 instance = self.cfg.GetInstanceInfo(self.name)
9307 if instance is None:
9308 raise errors.ProgrammerError("Unknown instance '%s' passed to"
9309 " IAllocator" % self.name)
9311 if instance.disk_template not in constants.DTS_NET_MIRROR:
9312 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9315 if len(instance.secondary_nodes) != 1:
9316 raise errors.OpPrereqError("Instance has not exactly one secondary node",
9319 self.required_nodes = 1
9320 disk_sizes = [{'size': disk.size} for disk in instance.disks]
9321 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9325 "disk_space_total": disk_space,
9326 "required_nodes": self.required_nodes,
9327 "relocate_from": self.relocate_from,
9331 def _AddEvacuateNodes(self):
9332 """Add evacuate nodes data to allocator structure.
9336 "evac_nodes": self.evac_nodes
9340 def _BuildInputData(self, fn):
9341 """Build input data structures.
9344 self._ComputeClusterData()
9347 request["type"] = self.mode
9348 self.in_data["request"] = request
9350 self.in_text = serializer.Dump(self.in_data)
9352 def Run(self, name, validate=True, call_fn=None):
9353 """Run an instance allocator and return the results.
9357 call_fn = self.rpc.call_iallocator_runner
9359 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9360 result.Raise("Failure while running the iallocator script")
9362 self.out_text = result.payload
9364 self._ValidateResult()
9366 def _ValidateResult(self):
9367 """Process the allocator results.
9369 This will process and if successful save the result in
9370 self.out_data and the other parameters.
9374 rdict = serializer.Load(self.out_text)
9375 except Exception, err:
9376 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9378 if not isinstance(rdict, dict):
9379 raise errors.OpExecError("Can't parse iallocator results: not a dict")
9381 # TODO: remove backwards compatiblity in later versions
9382 if "nodes" in rdict and "result" not in rdict:
9383 rdict["result"] = rdict["nodes"]
9386 for key in "success", "info", "result":
9387 if key not in rdict:
9388 raise errors.OpExecError("Can't parse iallocator results:"
9389 " missing key '%s'" % key)
9390 setattr(self, key, rdict[key])
9392 if not isinstance(rdict["result"], list):
9393 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9395 self.out_data = rdict
9398 class LUTestAllocator(NoHooksLU):
9399 """Run allocator tests.
9401 This LU runs the allocator tests
9404 _OP_REQP = ["direction", "mode", "name"]
9406 def CheckPrereq(self):
9407 """Check prerequisites.
9409 This checks the opcode parameters depending on the director and mode test.
9412 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9413 for attr in ["name", "mem_size", "disks", "disk_template",
9414 "os", "tags", "nics", "vcpus"]:
9415 if not hasattr(self.op, attr):
9416 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9417 attr, errors.ECODE_INVAL)
9418 iname = self.cfg.ExpandInstanceName(self.op.name)
9419 if iname is not None:
9420 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9421 iname, errors.ECODE_EXISTS)
9422 if not isinstance(self.op.nics, list):
9423 raise errors.OpPrereqError("Invalid parameter 'nics'",
9425 for row in self.op.nics:
9426 if (not isinstance(row, dict) or
9429 "bridge" not in row):
9430 raise errors.OpPrereqError("Invalid contents of the 'nics'"
9431 " parameter", errors.ECODE_INVAL)
9432 if not isinstance(self.op.disks, list):
9433 raise errors.OpPrereqError("Invalid parameter 'disks'",
9435 for row in self.op.disks:
9436 if (not isinstance(row, dict) or
9437 "size" not in row or
9438 not isinstance(row["size"], int) or
9439 "mode" not in row or
9440 row["mode"] not in ['r', 'w']):
9441 raise errors.OpPrereqError("Invalid contents of the 'disks'"
9442 " parameter", errors.ECODE_INVAL)
9443 if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9444 self.op.hypervisor = self.cfg.GetHypervisorType()
9445 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9446 if not hasattr(self.op, "name"):
9447 raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9449 fname = _ExpandInstanceName(self.cfg, self.op.name)
9450 self.op.name = fname
9451 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9452 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9453 if not hasattr(self.op, "evac_nodes"):
9454 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9455 " opcode input", errors.ECODE_INVAL)
9457 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9458 self.op.mode, errors.ECODE_INVAL)
9460 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9461 if not hasattr(self.op, "allocator") or self.op.allocator is None:
9462 raise errors.OpPrereqError("Missing allocator name",
9464 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9465 raise errors.OpPrereqError("Wrong allocator test '%s'" %
9466 self.op.direction, errors.ECODE_INVAL)
9468 def Exec(self, feedback_fn):
9469 """Run the allocator test.
9472 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9473 ial = IAllocator(self.cfg, self.rpc,
9476 mem_size=self.op.mem_size,
9477 disks=self.op.disks,
9478 disk_template=self.op.disk_template,
9482 vcpus=self.op.vcpus,
9483 hypervisor=self.op.hypervisor,
9485 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9486 ial = IAllocator(self.cfg, self.rpc,
9489 relocate_from=list(self.relocate_from),
9491 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9492 ial = IAllocator(self.cfg, self.rpc,
9494 evac_nodes=self.op.evac_nodes)
9496 raise errors.ProgrammerError("Uncatched mode %s in"
9497 " LUTestAllocator.Exec", self.op.mode)
9499 if self.op.direction == constants.IALLOCATOR_DIR_IN:
9500 result = ial.in_text
9502 ial.Run(self.op.allocator, validate=False)
9503 result = ial.out_text