4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
38 from ganeti import ssh
39 from ganeti import utils
40 from ganeti import errors
41 from ganeti import hypervisor
42 from ganeti import locking
43 from ganeti import constants
44 from ganeti import objects
45 from ganeti import serializer
46 from ganeti import ssconf
49 class LogicalUnit(object):
50 """Logical Unit base class.
52 Subclasses must follow these rules:
53 - implement ExpandNames
54 - implement CheckPrereq (except when tasklets are used)
55 - implement Exec (except when tasklets are used)
56 - implement BuildHooksEnv
57 - redefine HPATH and HTYPE
58 - optionally redefine their run requirements:
59 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
61 Note that all commands require root permissions.
63 @ivar dry_run_result: the value (if any) that will be returned to the caller
64 in dry-run mode (signalled by opcode dry_run parameter)
72 def __init__(self, processor, op, context, rpc):
73 """Constructor for LogicalUnit.
75 This needs to be overridden in derived classes in order to check op
81 self.cfg = context.cfg
82 self.context = context
84 # Dicts used to declare locking needs to mcpu
85 self.needed_locks = None
86 self.acquired_locks = {}
87 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
89 self.remove_locks = {}
90 # Used to force good behavior when calling helper functions
91 self.recalculate_locks = {}
94 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
95 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
96 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
98 self.dry_run_result = None
99 # support for generic debug attribute
100 if (not hasattr(self.op, "debug_level") or
101 not isinstance(self.op.debug_level, int)):
102 self.op.debug_level = 0
107 for attr_name in self._OP_REQP:
108 attr_val = getattr(op, attr_name, None)
110 raise errors.OpPrereqError("Required parameter '%s' missing" %
111 attr_name, errors.ECODE_INVAL)
113 self.CheckArguments()
116 """Returns the SshRunner object
120 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
123 ssh = property(fget=__GetSSH)
125 def CheckArguments(self):
126 """Check syntactic validity for the opcode arguments.
128 This method is for doing a simple syntactic check and ensure
129 validity of opcode parameters, without any cluster-related
130 checks. While the same can be accomplished in ExpandNames and/or
131 CheckPrereq, doing these separate is better because:
133 - ExpandNames is left as as purely a lock-related function
134 - CheckPrereq is run after we have acquired locks (and possible
137 The function is allowed to change the self.op attribute so that
138 later methods can no longer worry about missing parameters.
143 def ExpandNames(self):
144 """Expand names for this LU.
146 This method is called before starting to execute the opcode, and it should
147 update all the parameters of the opcode to their canonical form (e.g. a
148 short node name must be fully expanded after this method has successfully
149 completed). This way locking, hooks, logging, ecc. can work correctly.
151 LUs which implement this method must also populate the self.needed_locks
152 member, as a dict with lock levels as keys, and a list of needed lock names
155 - use an empty dict if you don't need any lock
156 - if you don't need any lock at a particular level omit that level
157 - don't put anything for the BGL level
158 - if you want all locks at a level use locking.ALL_SET as a value
160 If you need to share locks (rather than acquire them exclusively) at one
161 level you can modify self.share_locks, setting a true value (usually 1) for
162 that level. By default locks are not shared.
164 This function can also define a list of tasklets, which then will be
165 executed in order instead of the usual LU-level CheckPrereq and Exec
166 functions, if those are not defined by the LU.
170 # Acquire all nodes and one instance
171 self.needed_locks = {
172 locking.LEVEL_NODE: locking.ALL_SET,
173 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
175 # Acquire just two nodes
176 self.needed_locks = {
177 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
180 self.needed_locks = {} # No, you can't leave it to the default value None
183 # The implementation of this method is mandatory only if the new LU is
184 # concurrent, so that old LUs don't need to be changed all at the same
187 self.needed_locks = {} # Exclusive LUs don't need locks.
189 raise NotImplementedError
191 def DeclareLocks(self, level):
192 """Declare LU locking needs for a level
194 While most LUs can just declare their locking needs at ExpandNames time,
195 sometimes there's the need to calculate some locks after having acquired
196 the ones before. This function is called just before acquiring locks at a
197 particular level, but after acquiring the ones at lower levels, and permits
198 such calculations. It can be used to modify self.needed_locks, and by
199 default it does nothing.
201 This function is only called if you have something already set in
202 self.needed_locks for the level.
204 @param level: Locking level which is going to be locked
205 @type level: member of ganeti.locking.LEVELS
209 def CheckPrereq(self):
210 """Check prerequisites for this LU.
212 This method should check that the prerequisites for the execution
213 of this LU are fulfilled. It can do internode communication, but
214 it should be idempotent - no cluster or system changes are
217 The method should raise errors.OpPrereqError in case something is
218 not fulfilled. Its return value is ignored.
220 This method should also update all the parameters of the opcode to
221 their canonical form if it hasn't been done by ExpandNames before.
224 if self.tasklets is not None:
225 for (idx, tl) in enumerate(self.tasklets):
226 logging.debug("Checking prerequisites for tasklet %s/%s",
227 idx + 1, len(self.tasklets))
230 raise NotImplementedError
232 def Exec(self, feedback_fn):
235 This method should implement the actual work. It should raise
236 errors.OpExecError for failures that are somewhat dealt with in
240 if self.tasklets is not None:
241 for (idx, tl) in enumerate(self.tasklets):
242 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
245 raise NotImplementedError
247 def BuildHooksEnv(self):
248 """Build hooks environment for this LU.
250 This method should return a three-node tuple consisting of: a dict
251 containing the environment that will be used for running the
252 specific hook for this LU, a list of node names on which the hook
253 should run before the execution, and a list of node names on which
254 the hook should run after the execution.
256 The keys of the dict must not have 'GANETI_' prefixed as this will
257 be handled in the hooks runner. Also note additional keys will be
258 added by the hooks runner. If the LU doesn't define any
259 environment, an empty dict (and not None) should be returned.
261 No nodes should be returned as an empty list (and not None).
263 Note that if the HPATH for a LU class is None, this function will
267 raise NotImplementedError
269 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
270 """Notify the LU about the results of its hooks.
272 This method is called every time a hooks phase is executed, and notifies
273 the Logical Unit about the hooks' result. The LU can then use it to alter
274 its result based on the hooks. By default the method does nothing and the
275 previous result is passed back unchanged but any LU can define it if it
276 wants to use the local cluster hook-scripts somehow.
278 @param phase: one of L{constants.HOOKS_PHASE_POST} or
279 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
280 @param hook_results: the results of the multi-node hooks rpc call
281 @param feedback_fn: function used send feedback back to the caller
282 @param lu_result: the previous Exec result this LU had, or None
284 @return: the new Exec result, based on the previous result
288 # API must be kept, thus we ignore the unused argument and could
289 # be a function warnings
290 # pylint: disable-msg=W0613,R0201
293 def _ExpandAndLockInstance(self):
294 """Helper function to expand and lock an instance.
296 Many LUs that work on an instance take its name in self.op.instance_name
297 and need to expand it and then declare the expanded name for locking. This
298 function does it, and then updates self.op.instance_name to the expanded
299 name. It also initializes needed_locks as a dict, if this hasn't been done
303 if self.needed_locks is None:
304 self.needed_locks = {}
306 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
307 "_ExpandAndLockInstance called with instance-level locks set"
308 self.op.instance_name = _ExpandInstanceName(self.cfg,
309 self.op.instance_name)
310 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
312 def _LockInstancesNodes(self, primary_only=False):
313 """Helper function to declare instances' nodes for locking.
315 This function should be called after locking one or more instances to lock
316 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
317 with all primary or secondary nodes for instances already locked and
318 present in self.needed_locks[locking.LEVEL_INSTANCE].
320 It should be called from DeclareLocks, and for safety only works if
321 self.recalculate_locks[locking.LEVEL_NODE] is set.
323 In the future it may grow parameters to just lock some instance's nodes, or
324 to just lock primaries or secondary nodes, if needed.
326 If should be called in DeclareLocks in a way similar to::
328 if level == locking.LEVEL_NODE:
329 self._LockInstancesNodes()
331 @type primary_only: boolean
332 @param primary_only: only lock primary nodes of locked instances
335 assert locking.LEVEL_NODE in self.recalculate_locks, \
336 "_LockInstancesNodes helper function called with no nodes to recalculate"
338 # TODO: check if we're really been called with the instance locks held
340 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
341 # future we might want to have different behaviors depending on the value
342 # of self.recalculate_locks[locking.LEVEL_NODE]
344 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
345 instance = self.context.cfg.GetInstanceInfo(instance_name)
346 wanted_nodes.append(instance.primary_node)
348 wanted_nodes.extend(instance.secondary_nodes)
350 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
351 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
352 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
353 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
355 del self.recalculate_locks[locking.LEVEL_NODE]
358 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
359 """Simple LU which runs no hooks.
361 This LU is intended as a parent for other LogicalUnits which will
362 run no hooks, in order to reduce duplicate code.
368 def BuildHooksEnv(self):
369 """Empty BuildHooksEnv for NoHooksLu.
371 This just raises an error.
374 assert False, "BuildHooksEnv called for NoHooksLUs"
378 """Tasklet base class.
380 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
381 they can mix legacy code with tasklets. Locking needs to be done in the LU,
382 tasklets know nothing about locks.
384 Subclasses must follow these rules:
385 - Implement CheckPrereq
389 def __init__(self, lu):
396 def CheckPrereq(self):
397 """Check prerequisites for this tasklets.
399 This method should check whether the prerequisites for the execution of
400 this tasklet are fulfilled. It can do internode communication, but it
401 should be idempotent - no cluster or system changes are allowed.
403 The method should raise errors.OpPrereqError in case something is not
404 fulfilled. Its return value is ignored.
406 This method should also update all parameters to their canonical form if it
407 hasn't been done before.
410 raise NotImplementedError
412 def Exec(self, feedback_fn):
413 """Execute the tasklet.
415 This method should implement the actual work. It should raise
416 errors.OpExecError for failures that are somewhat dealt with in code, or
420 raise NotImplementedError
423 def _GetWantedNodes(lu, nodes):
424 """Returns list of checked and expanded node names.
426 @type lu: L{LogicalUnit}
427 @param lu: the logical unit on whose behalf we execute
429 @param nodes: list of node names or None for all nodes
431 @return: the list of nodes, sorted
432 @raise errors.ProgrammerError: if the nodes parameter is wrong type
435 if not isinstance(nodes, list):
436 raise errors.OpPrereqError("Invalid argument type 'nodes'",
440 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
441 " non-empty list of nodes whose name is to be expanded.")
443 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
444 return utils.NiceSort(wanted)
447 def _GetWantedInstances(lu, instances):
448 """Returns list of checked and expanded instance names.
450 @type lu: L{LogicalUnit}
451 @param lu: the logical unit on whose behalf we execute
452 @type instances: list
453 @param instances: list of instance names or None for all instances
455 @return: the list of instances, sorted
456 @raise errors.OpPrereqError: if the instances parameter is wrong type
457 @raise errors.OpPrereqError: if any of the passed instances is not found
460 if not isinstance(instances, list):
461 raise errors.OpPrereqError("Invalid argument type 'instances'",
465 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
467 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
471 def _CheckOutputFields(static, dynamic, selected):
472 """Checks whether all selected fields are valid.
474 @type static: L{utils.FieldSet}
475 @param static: static fields set
476 @type dynamic: L{utils.FieldSet}
477 @param dynamic: dynamic fields set
484 delta = f.NonMatching(selected)
486 raise errors.OpPrereqError("Unknown output fields selected: %s"
487 % ",".join(delta), errors.ECODE_INVAL)
490 def _CheckBooleanOpField(op, name):
491 """Validates boolean opcode parameters.
493 This will ensure that an opcode parameter is either a boolean value,
494 or None (but that it always exists).
497 val = getattr(op, name, None)
498 if not (val is None or isinstance(val, bool)):
499 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
500 (name, str(val)), errors.ECODE_INVAL)
501 setattr(op, name, val)
504 def _CheckGlobalHvParams(params):
505 """Validates that given hypervisor params are not global ones.
507 This will ensure that instances don't get customised versions of
511 used_globals = constants.HVC_GLOBALS.intersection(params)
513 msg = ("The following hypervisor parameters are global and cannot"
514 " be customized at instance level, please modify them at"
515 " cluster level: %s" % utils.CommaJoin(used_globals))
516 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
519 def _CheckNodeOnline(lu, node):
520 """Ensure that a given node is online.
522 @param lu: the LU on behalf of which we make the check
523 @param node: the node to check
524 @raise errors.OpPrereqError: if the node is offline
527 if lu.cfg.GetNodeInfo(node).offline:
528 raise errors.OpPrereqError("Can't use offline node %s" % node,
532 def _CheckNodeNotDrained(lu, node):
533 """Ensure that a given node is not drained.
535 @param lu: the LU on behalf of which we make the check
536 @param node: the node to check
537 @raise errors.OpPrereqError: if the node is drained
540 if lu.cfg.GetNodeInfo(node).drained:
541 raise errors.OpPrereqError("Can't use drained node %s" % node,
545 def _ExpandItemName(fn, name, kind):
546 """Expand an item name.
548 @param fn: the function to use for expansion
549 @param name: requested item name
550 @param kind: text description ('Node' or 'Instance')
551 @return: the resolved (full) name
552 @raise errors.OpPrereqError: if the item is not found
556 if full_name is None:
557 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
562 def _ExpandNodeName(cfg, name):
563 """Wrapper over L{_ExpandItemName} for nodes."""
564 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
567 def _ExpandInstanceName(cfg, name):
568 """Wrapper over L{_ExpandItemName} for instance."""
569 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
572 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
573 memory, vcpus, nics, disk_template, disks,
574 bep, hvp, hypervisor_name):
575 """Builds instance related env variables for hooks
577 This builds the hook environment from individual variables.
580 @param name: the name of the instance
581 @type primary_node: string
582 @param primary_node: the name of the instance's primary node
583 @type secondary_nodes: list
584 @param secondary_nodes: list of secondary nodes as strings
585 @type os_type: string
586 @param os_type: the name of the instance's OS
587 @type status: boolean
588 @param status: the should_run status of the instance
590 @param memory: the memory size of the instance
592 @param vcpus: the count of VCPUs the instance has
594 @param nics: list of tuples (ip, mac, mode, link) representing
595 the NICs the instance has
596 @type disk_template: string
597 @param disk_template: the disk template of the instance
599 @param disks: the list of (size, mode) pairs
601 @param bep: the backend parameters for the instance
603 @param hvp: the hypervisor parameters for the instance
604 @type hypervisor_name: string
605 @param hypervisor_name: the hypervisor for the instance
607 @return: the hook environment for this instance
616 "INSTANCE_NAME": name,
617 "INSTANCE_PRIMARY": primary_node,
618 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
619 "INSTANCE_OS_TYPE": os_type,
620 "INSTANCE_STATUS": str_status,
621 "INSTANCE_MEMORY": memory,
622 "INSTANCE_VCPUS": vcpus,
623 "INSTANCE_DISK_TEMPLATE": disk_template,
624 "INSTANCE_HYPERVISOR": hypervisor_name,
628 nic_count = len(nics)
629 for idx, (ip, mac, mode, link) in enumerate(nics):
632 env["INSTANCE_NIC%d_IP" % idx] = ip
633 env["INSTANCE_NIC%d_MAC" % idx] = mac
634 env["INSTANCE_NIC%d_MODE" % idx] = mode
635 env["INSTANCE_NIC%d_LINK" % idx] = link
636 if mode == constants.NIC_MODE_BRIDGED:
637 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
641 env["INSTANCE_NIC_COUNT"] = nic_count
644 disk_count = len(disks)
645 for idx, (size, mode) in enumerate(disks):
646 env["INSTANCE_DISK%d_SIZE" % idx] = size
647 env["INSTANCE_DISK%d_MODE" % idx] = mode
651 env["INSTANCE_DISK_COUNT"] = disk_count
653 for source, kind in [(bep, "BE"), (hvp, "HV")]:
654 for key, value in source.items():
655 env["INSTANCE_%s_%s" % (kind, key)] = value
660 def _NICListToTuple(lu, nics):
661 """Build a list of nic information tuples.
663 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
664 value in LUQueryInstanceData.
666 @type lu: L{LogicalUnit}
667 @param lu: the logical unit on whose behalf we execute
668 @type nics: list of L{objects.NIC}
669 @param nics: list of nics to convert to hooks tuples
673 c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
677 filled_params = objects.FillDict(c_nicparams, nic.nicparams)
678 mode = filled_params[constants.NIC_MODE]
679 link = filled_params[constants.NIC_LINK]
680 hooks_nics.append((ip, mac, mode, link))
684 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
685 """Builds instance related env variables for hooks from an object.
687 @type lu: L{LogicalUnit}
688 @param lu: the logical unit on whose behalf we execute
689 @type instance: L{objects.Instance}
690 @param instance: the instance for which we should build the
693 @param override: dictionary with key/values that will override
696 @return: the hook environment dictionary
699 cluster = lu.cfg.GetClusterInfo()
700 bep = cluster.FillBE(instance)
701 hvp = cluster.FillHV(instance)
703 'name': instance.name,
704 'primary_node': instance.primary_node,
705 'secondary_nodes': instance.secondary_nodes,
706 'os_type': instance.os,
707 'status': instance.admin_up,
708 'memory': bep[constants.BE_MEMORY],
709 'vcpus': bep[constants.BE_VCPUS],
710 'nics': _NICListToTuple(lu, instance.nics),
711 'disk_template': instance.disk_template,
712 'disks': [(disk.size, disk.mode) for disk in instance.disks],
715 'hypervisor_name': instance.hypervisor,
718 args.update(override)
719 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
722 def _AdjustCandidatePool(lu, exceptions):
723 """Adjust the candidate pool after node operations.
726 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
728 lu.LogInfo("Promoted nodes to master candidate role: %s",
729 utils.CommaJoin(node.name for node in mod_list))
730 for name in mod_list:
731 lu.context.ReaddNode(name)
732 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
734 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
738 def _DecideSelfPromotion(lu, exceptions=None):
739 """Decide whether I should promote myself as a master candidate.
742 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
743 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
744 # the new node will increase mc_max with one, so:
745 mc_should = min(mc_should + 1, cp_size)
746 return mc_now < mc_should
749 def _CheckNicsBridgesExist(lu, target_nics, target_node,
750 profile=constants.PP_DEFAULT):
751 """Check that the brigdes needed by a list of nics exist.
754 c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
755 paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
756 for nic in target_nics]
757 brlist = [params[constants.NIC_LINK] for params in paramslist
758 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
760 result = lu.rpc.call_bridges_exist(target_node, brlist)
761 result.Raise("Error checking bridges on destination node '%s'" %
762 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
765 def _CheckInstanceBridgesExist(lu, instance, node=None):
766 """Check that the brigdes needed by an instance exist.
770 node = instance.primary_node
771 _CheckNicsBridgesExist(lu, instance.nics, node)
774 def _CheckOSVariant(os_obj, name):
775 """Check whether an OS name conforms to the os variants specification.
777 @type os_obj: L{objects.OS}
778 @param os_obj: OS object to check
780 @param name: OS name passed by the user, to check for validity
783 if not os_obj.supported_variants:
786 variant = name.split("+", 1)[1]
788 raise errors.OpPrereqError("OS name must include a variant",
791 if variant not in os_obj.supported_variants:
792 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
795 def _GetNodeInstancesInner(cfg, fn):
796 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
799 def _GetNodeInstances(cfg, node_name):
800 """Returns a list of all primary and secondary instances on a node.
804 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
807 def _GetNodePrimaryInstances(cfg, node_name):
808 """Returns primary instances on a node.
811 return _GetNodeInstancesInner(cfg,
812 lambda inst: node_name == inst.primary_node)
815 def _GetNodeSecondaryInstances(cfg, node_name):
816 """Returns secondary instances on a node.
819 return _GetNodeInstancesInner(cfg,
820 lambda inst: node_name in inst.secondary_nodes)
823 def _GetStorageTypeArgs(cfg, storage_type):
824 """Returns the arguments for a storage type.
827 # Special case for file storage
828 if storage_type == constants.ST_FILE:
829 # storage.FileStorage wants a list of storage directories
830 return [[cfg.GetFileStorageDir()]]
835 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
838 for dev in instance.disks:
839 cfg.SetDiskID(dev, node_name)
841 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
842 result.Raise("Failed to get disk status from node %s" % node_name,
843 prereq=prereq, ecode=errors.ECODE_ENVIRON)
845 for idx, bdev_status in enumerate(result.payload):
846 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
852 def _FormatTimestamp(secs):
853 """Formats a Unix timestamp with the local timezone.
856 return time.strftime("%F %T %Z", time.gmtime(secs))
859 class LUPostInitCluster(LogicalUnit):
860 """Logical unit for running hooks after cluster initialization.
863 HPATH = "cluster-init"
864 HTYPE = constants.HTYPE_CLUSTER
867 def BuildHooksEnv(self):
871 env = {"OP_TARGET": self.cfg.GetClusterName()}
872 mn = self.cfg.GetMasterNode()
875 def CheckPrereq(self):
876 """No prerequisites to check.
881 def Exec(self, feedback_fn):
888 class LUDestroyCluster(LogicalUnit):
889 """Logical unit for destroying the cluster.
892 HPATH = "cluster-destroy"
893 HTYPE = constants.HTYPE_CLUSTER
896 def BuildHooksEnv(self):
900 env = {"OP_TARGET": self.cfg.GetClusterName()}
903 def CheckPrereq(self):
904 """Check prerequisites.
906 This checks whether the cluster is empty.
908 Any errors are signaled by raising errors.OpPrereqError.
911 master = self.cfg.GetMasterNode()
913 nodelist = self.cfg.GetNodeList()
914 if len(nodelist) != 1 or nodelist[0] != master:
915 raise errors.OpPrereqError("There are still %d node(s) in"
916 " this cluster." % (len(nodelist) - 1),
918 instancelist = self.cfg.GetInstanceList()
920 raise errors.OpPrereqError("There are still %d instance(s) in"
921 " this cluster." % len(instancelist),
924 def Exec(self, feedback_fn):
925 """Destroys the cluster.
928 master = self.cfg.GetMasterNode()
929 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
931 # Run post hooks on master node before it's removed
932 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
934 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
936 # pylint: disable-msg=W0702
937 self.LogWarning("Errors occurred running hooks on %s" % master)
939 result = self.rpc.call_node_stop_master(master, False)
940 result.Raise("Could not disable the master role")
943 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
944 utils.CreateBackup(priv_key)
945 utils.CreateBackup(pub_key)
950 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
951 warn_days=constants.SSL_CERT_EXPIRATION_WARN,
952 error_days=constants.SSL_CERT_EXPIRATION_ERROR):
953 """Verifies certificate details for LUVerifyCluster.
957 msg = "Certificate %s is expired" % filename
959 if not_before is not None and not_after is not None:
960 msg += (" (valid from %s to %s)" %
961 (_FormatTimestamp(not_before),
962 _FormatTimestamp(not_after)))
963 elif not_before is not None:
964 msg += " (valid from %s)" % _FormatTimestamp(not_before)
965 elif not_after is not None:
966 msg += " (valid until %s)" % _FormatTimestamp(not_after)
968 return (LUVerifyCluster.ETYPE_ERROR, msg)
970 elif not_before is not None and not_before > now:
971 return (LUVerifyCluster.ETYPE_WARNING,
972 "Certificate %s not yet valid (valid from %s)" %
973 (filename, _FormatTimestamp(not_before)))
975 elif not_after is not None:
976 remaining_days = int((not_after - now) / (24 * 3600))
978 msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
980 if remaining_days <= error_days:
981 return (LUVerifyCluster.ETYPE_ERROR, msg)
983 if remaining_days <= warn_days:
984 return (LUVerifyCluster.ETYPE_WARNING, msg)
989 def _VerifyCertificate(filename):
990 """Verifies a certificate for LUVerifyCluster.
992 @type filename: string
993 @param filename: Path to PEM file
997 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
998 utils.ReadFile(filename))
999 except Exception, err: # pylint: disable-msg=W0703
1000 return (LUVerifyCluster.ETYPE_ERROR,
1001 "Failed to load X509 certificate %s: %s" % (filename, err))
1003 # Depending on the pyOpenSSL version, this can just return (None, None)
1004 (not_before, not_after) = utils.GetX509CertValidity(cert)
1006 return _VerifyCertificateInner(filename, cert.has_expired(),
1007 not_before, not_after, time.time())
1010 class LUVerifyCluster(LogicalUnit):
1011 """Verifies the cluster status.
1014 HPATH = "cluster-verify"
1015 HTYPE = constants.HTYPE_CLUSTER
1016 _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1019 TCLUSTER = "cluster"
1021 TINSTANCE = "instance"
1023 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1024 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1025 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1026 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1027 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1028 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1029 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1030 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1031 ENODEDRBD = (TNODE, "ENODEDRBD")
1032 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1033 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1034 ENODEHV = (TNODE, "ENODEHV")
1035 ENODELVM = (TNODE, "ENODELVM")
1036 ENODEN1 = (TNODE, "ENODEN1")
1037 ENODENET = (TNODE, "ENODENET")
1038 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1039 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1040 ENODERPC = (TNODE, "ENODERPC")
1041 ENODESSH = (TNODE, "ENODESSH")
1042 ENODEVERSION = (TNODE, "ENODEVERSION")
1043 ENODESETUP = (TNODE, "ENODESETUP")
1044 ENODETIME = (TNODE, "ENODETIME")
1046 ETYPE_FIELD = "code"
1047 ETYPE_ERROR = "ERROR"
1048 ETYPE_WARNING = "WARNING"
1050 def ExpandNames(self):
1051 self.needed_locks = {
1052 locking.LEVEL_NODE: locking.ALL_SET,
1053 locking.LEVEL_INSTANCE: locking.ALL_SET,
1055 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1057 def _Error(self, ecode, item, msg, *args, **kwargs):
1058 """Format an error message.
1060 Based on the opcode's error_codes parameter, either format a
1061 parseable error code, or a simpler error string.
1063 This must be called only from Exec and functions called from Exec.
1066 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1068 # first complete the msg
1071 # then format the whole message
1072 if self.op.error_codes:
1073 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1079 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1080 # and finally report it via the feedback_fn
1081 self._feedback_fn(" - %s" % msg)
1083 def _ErrorIf(self, cond, *args, **kwargs):
1084 """Log an error message if the passed condition is True.
1087 cond = bool(cond) or self.op.debug_simulate_errors
1089 self._Error(*args, **kwargs)
1090 # do not mark the operation as failed for WARN cases only
1091 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1092 self.bad = self.bad or cond
1094 def _VerifyNode(self, nodeinfo, file_list, local_cksum,
1095 node_result, master_files, drbd_map, vg_name):
1096 """Run multiple tests against a node.
1100 - compares ganeti version
1101 - checks vg existence and size > 20G
1102 - checks config file checksum
1103 - checks ssh to other nodes
1105 @type nodeinfo: L{objects.Node}
1106 @param nodeinfo: the node to check
1107 @param file_list: required list of files
1108 @param local_cksum: dictionary of local files and their checksums
1109 @param node_result: the results from the node
1110 @param master_files: list of files that only masters should have
1111 @param drbd_map: the useddrbd minors for this node, in
1112 form of minor: (instance, must_exist) which correspond to instances
1113 and their running status
1114 @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
1117 node = nodeinfo.name
1118 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1120 # main result, node_result should be a non-empty dict
1121 test = not node_result or not isinstance(node_result, dict)
1122 _ErrorIf(test, self.ENODERPC, node,
1123 "unable to verify node: no data returned")
1127 # compares ganeti version
1128 local_version = constants.PROTOCOL_VERSION
1129 remote_version = node_result.get('version', None)
1130 test = not (remote_version and
1131 isinstance(remote_version, (list, tuple)) and
1132 len(remote_version) == 2)
1133 _ErrorIf(test, self.ENODERPC, node,
1134 "connection to node returned invalid data")
1138 test = local_version != remote_version[0]
1139 _ErrorIf(test, self.ENODEVERSION, node,
1140 "incompatible protocol versions: master %s,"
1141 " node %s", local_version, remote_version[0])
1145 # node seems compatible, we can actually try to look into its results
1147 # full package version
1148 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1149 self.ENODEVERSION, node,
1150 "software version mismatch: master %s, node %s",
1151 constants.RELEASE_VERSION, remote_version[1],
1152 code=self.ETYPE_WARNING)
1154 # checks vg existence and size > 20G
1155 if vg_name is not None:
1156 vglist = node_result.get(constants.NV_VGLIST, None)
1158 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1160 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1161 constants.MIN_VG_SIZE)
1162 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1164 # checks config file checksum
1166 remote_cksum = node_result.get(constants.NV_FILELIST, None)
1167 test = not isinstance(remote_cksum, dict)
1168 _ErrorIf(test, self.ENODEFILECHECK, node,
1169 "node hasn't returned file checksum data")
1171 for file_name in file_list:
1172 node_is_mc = nodeinfo.master_candidate
1173 must_have = (file_name not in master_files) or node_is_mc
1175 test1 = file_name not in remote_cksum
1177 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1179 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1180 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1181 "file '%s' missing", file_name)
1182 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1183 "file '%s' has wrong checksum", file_name)
1184 # not candidate and this is not a must-have file
1185 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1186 "file '%s' should not exist on non master"
1187 " candidates (and the file is outdated)", file_name)
1188 # all good, except non-master/non-must have combination
1189 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1190 "file '%s' should not exist"
1191 " on non master candidates", file_name)
1195 test = constants.NV_NODELIST not in node_result
1196 _ErrorIf(test, self.ENODESSH, node,
1197 "node hasn't returned node ssh connectivity data")
1199 if node_result[constants.NV_NODELIST]:
1200 for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1201 _ErrorIf(True, self.ENODESSH, node,
1202 "ssh communication with node '%s': %s", a_node, a_msg)
1204 test = constants.NV_NODENETTEST not in node_result
1205 _ErrorIf(test, self.ENODENET, node,
1206 "node hasn't returned node tcp connectivity data")
1208 if node_result[constants.NV_NODENETTEST]:
1209 nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1211 _ErrorIf(True, self.ENODENET, node,
1212 "tcp communication with node '%s': %s",
1213 anode, node_result[constants.NV_NODENETTEST][anode])
1215 hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1216 if isinstance(hyp_result, dict):
1217 for hv_name, hv_result in hyp_result.iteritems():
1218 test = hv_result is not None
1219 _ErrorIf(test, self.ENODEHV, node,
1220 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1222 # check used drbd list
1223 if vg_name is not None:
1224 used_minors = node_result.get(constants.NV_DRBDLIST, [])
1225 test = not isinstance(used_minors, (tuple, list))
1226 _ErrorIf(test, self.ENODEDRBD, node,
1227 "cannot parse drbd status file: %s", str(used_minors))
1229 for minor, (iname, must_exist) in drbd_map.items():
1230 test = minor not in used_minors and must_exist
1231 _ErrorIf(test, self.ENODEDRBD, node,
1232 "drbd minor %d of instance %s is not active",
1234 for minor in used_minors:
1235 test = minor not in drbd_map
1236 _ErrorIf(test, self.ENODEDRBD, node,
1237 "unallocated drbd minor %d is in use", minor)
1238 test = node_result.get(constants.NV_NODESETUP,
1239 ["Missing NODESETUP results"])
1240 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1244 if vg_name is not None:
1245 pvlist = node_result.get(constants.NV_PVLIST, None)
1246 test = pvlist is None
1247 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1249 # check that ':' is not present in PV names, since it's a
1250 # special character for lvcreate (denotes the range of PEs to
1252 for _, pvname, owner_vg in pvlist:
1253 test = ":" in pvname
1254 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1255 " '%s' of VG '%s'", pvname, owner_vg)
1257 def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1258 node_instance, n_offline):
1259 """Verify an instance.
1261 This function checks to see if the required block devices are
1262 available on the instance's node.
1265 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1266 node_current = instanceconfig.primary_node
1268 node_vol_should = {}
1269 instanceconfig.MapLVsByNode(node_vol_should)
1271 for node in node_vol_should:
1272 if node in n_offline:
1273 # ignore missing volumes on offline nodes
1275 for volume in node_vol_should[node]:
1276 test = node not in node_vol_is or volume not in node_vol_is[node]
1277 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1278 "volume %s missing on node %s", volume, node)
1280 if instanceconfig.admin_up:
1281 test = ((node_current not in node_instance or
1282 not instance in node_instance[node_current]) and
1283 node_current not in n_offline)
1284 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1285 "instance not running on its primary node %s",
1288 for node in node_instance:
1289 if (not node == node_current):
1290 test = instance in node_instance[node]
1291 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1292 "instance should not run on node %s", node)
1294 def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1295 """Verify if there are any unknown volumes in the cluster.
1297 The .os, .swap and backup volumes are ignored. All other volumes are
1298 reported as unknown.
1301 for node in node_vol_is:
1302 for volume in node_vol_is[node]:
1303 test = (node not in node_vol_should or
1304 volume not in node_vol_should[node])
1305 self._ErrorIf(test, self.ENODEORPHANLV, node,
1306 "volume %s is unknown", volume)
1308 def _VerifyOrphanInstances(self, instancelist, node_instance):
1309 """Verify the list of running instances.
1311 This checks what instances are running but unknown to the cluster.
1314 for node in node_instance:
1315 for o_inst in node_instance[node]:
1316 test = o_inst not in instancelist
1317 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1318 "instance %s on node %s should not exist", o_inst, node)
1320 def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1321 """Verify N+1 Memory Resilience.
1323 Check that if one single node dies we can still start all the instances it
1327 for node, nodeinfo in node_info.iteritems():
1328 # This code checks that every node which is now listed as secondary has
1329 # enough memory to host all instances it is supposed to should a single
1330 # other node in the cluster fail.
1331 # FIXME: not ready for failover to an arbitrary node
1332 # FIXME: does not support file-backed instances
1333 # WARNING: we currently take into account down instances as well as up
1334 # ones, considering that even if they're down someone might want to start
1335 # them even in the event of a node failure.
1336 for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1338 for instance in instances:
1339 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1340 if bep[constants.BE_AUTO_BALANCE]:
1341 needed_mem += bep[constants.BE_MEMORY]
1342 test = nodeinfo['mfree'] < needed_mem
1343 self._ErrorIf(test, self.ENODEN1, node,
1344 "not enough memory on to accommodate"
1345 " failovers should peer node %s fail", prinode)
1347 def CheckPrereq(self):
1348 """Check prerequisites.
1350 Transform the list of checks we're going to skip into a set and check that
1351 all its members are valid.
1354 self.skip_set = frozenset(self.op.skip_checks)
1355 if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1356 raise errors.OpPrereqError("Invalid checks to be skipped specified",
1359 def BuildHooksEnv(self):
1362 Cluster-Verify hooks just ran in the post phase and their failure makes
1363 the output be logged in the verify output and the verification to fail.
1366 all_nodes = self.cfg.GetNodeList()
1368 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1370 for node in self.cfg.GetAllNodesInfo().values():
1371 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1373 return env, [], all_nodes
1375 def Exec(self, feedback_fn):
1376 """Verify integrity of cluster, performing various test on nodes.
1380 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1381 verbose = self.op.verbose
1382 self._feedback_fn = feedback_fn
1383 feedback_fn("* Verifying global settings")
1384 for msg in self.cfg.VerifyConfig():
1385 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1387 # Check the cluster certificates
1388 for cert_filename in constants.ALL_CERT_FILES:
1389 (errcode, msg) = _VerifyCertificate(cert_filename)
1390 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1392 vg_name = self.cfg.GetVGName()
1393 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1394 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1395 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1396 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1397 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1398 for iname in instancelist)
1399 i_non_redundant = [] # Non redundant instances
1400 i_non_a_balanced = [] # Non auto-balanced instances
1401 n_offline = [] # List of offline nodes
1402 n_drained = [] # List of nodes being drained
1408 # FIXME: verify OS list
1409 # do local checksums
1410 master_files = [constants.CLUSTER_CONF_FILE]
1412 file_names = ssconf.SimpleStore().GetFileList()
1413 file_names.extend(constants.ALL_CERT_FILES)
1414 file_names.extend(master_files)
1416 local_checksums = utils.FingerprintFiles(file_names)
1418 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1419 node_verify_param = {
1420 constants.NV_FILELIST: file_names,
1421 constants.NV_NODELIST: [node.name for node in nodeinfo
1422 if not node.offline],
1423 constants.NV_HYPERVISOR: hypervisors,
1424 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1425 node.secondary_ip) for node in nodeinfo
1426 if not node.offline],
1427 constants.NV_INSTANCELIST: hypervisors,
1428 constants.NV_VERSION: None,
1429 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1430 constants.NV_NODESETUP: None,
1431 constants.NV_TIME: None,
1434 if vg_name is not None:
1435 node_verify_param[constants.NV_VGLIST] = None
1436 node_verify_param[constants.NV_LVLIST] = vg_name
1437 node_verify_param[constants.NV_PVLIST] = [vg_name]
1438 node_verify_param[constants.NV_DRBDLIST] = None
1440 # Due to the way our RPC system works, exact response times cannot be
1441 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1442 # time before and after executing the request, we can at least have a time
1444 nvinfo_starttime = time.time()
1445 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1446 self.cfg.GetClusterName())
1447 nvinfo_endtime = time.time()
1449 cluster = self.cfg.GetClusterInfo()
1450 master_node = self.cfg.GetMasterNode()
1451 all_drbd_map = self.cfg.ComputeDRBDMap()
1453 feedback_fn("* Verifying node status")
1454 for node_i in nodeinfo:
1459 feedback_fn("* Skipping offline node %s" % (node,))
1460 n_offline.append(node)
1463 if node == master_node:
1465 elif node_i.master_candidate:
1466 ntype = "master candidate"
1467 elif node_i.drained:
1469 n_drained.append(node)
1473 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1475 msg = all_nvinfo[node].fail_msg
1476 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1480 nresult = all_nvinfo[node].payload
1482 for minor, instance in all_drbd_map[node].items():
1483 test = instance not in instanceinfo
1484 _ErrorIf(test, self.ECLUSTERCFG, None,
1485 "ghost instance '%s' in temporary DRBD map", instance)
1486 # ghost instance should not be running, but otherwise we
1487 # don't give double warnings (both ghost instance and
1488 # unallocated minor in use)
1490 node_drbd[minor] = (instance, False)
1492 instance = instanceinfo[instance]
1493 node_drbd[minor] = (instance.name, instance.admin_up)
1495 self._VerifyNode(node_i, file_names, local_checksums,
1496 nresult, master_files, node_drbd, vg_name)
1498 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1500 node_volume[node] = {}
1501 elif isinstance(lvdata, basestring):
1502 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1503 utils.SafeEncode(lvdata))
1504 node_volume[node] = {}
1505 elif not isinstance(lvdata, dict):
1506 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1509 node_volume[node] = lvdata
1512 idata = nresult.get(constants.NV_INSTANCELIST, None)
1513 test = not isinstance(idata, list)
1514 _ErrorIf(test, self.ENODEHV, node,
1515 "rpc call to node failed (instancelist): %s",
1516 utils.SafeEncode(str(idata)))
1520 node_instance[node] = idata
1523 nodeinfo = nresult.get(constants.NV_HVINFO, None)
1524 test = not isinstance(nodeinfo, dict)
1525 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1530 ntime = nresult.get(constants.NV_TIME, None)
1532 ntime_merged = utils.MergeTime(ntime)
1533 except (ValueError, TypeError):
1534 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1536 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1537 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1538 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1539 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1543 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1544 "Node time diverges by at least %s from master node time",
1547 if ntime_diff is not None:
1552 "mfree": int(nodeinfo['memory_free']),
1555 # dictionary holding all instances this node is secondary for,
1556 # grouped by their primary node. Each key is a cluster node, and each
1557 # value is a list of instances which have the key as primary and the
1558 # current node as secondary. this is handy to calculate N+1 memory
1559 # availability if you can only failover from a primary to its
1561 "sinst-by-pnode": {},
1563 # FIXME: devise a free space model for file based instances as well
1564 if vg_name is not None:
1565 test = (constants.NV_VGLIST not in nresult or
1566 vg_name not in nresult[constants.NV_VGLIST])
1567 _ErrorIf(test, self.ENODELVM, node,
1568 "node didn't return data for the volume group '%s'"
1569 " - it is either missing or broken", vg_name)
1572 node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1573 except (ValueError, KeyError):
1574 _ErrorIf(True, self.ENODERPC, node,
1575 "node returned invalid nodeinfo, check lvm/hypervisor")
1578 node_vol_should = {}
1580 feedback_fn("* Verifying instance status")
1581 for instance in instancelist:
1583 feedback_fn("* Verifying instance %s" % instance)
1584 inst_config = instanceinfo[instance]
1585 self._VerifyInstance(instance, inst_config, node_volume,
1586 node_instance, n_offline)
1587 inst_nodes_offline = []
1589 inst_config.MapLVsByNode(node_vol_should)
1591 instance_cfg[instance] = inst_config
1593 pnode = inst_config.primary_node
1594 _ErrorIf(pnode not in node_info and pnode not in n_offline,
1595 self.ENODERPC, pnode, "instance %s, connection to"
1596 " primary node failed", instance)
1597 if pnode in node_info:
1598 node_info[pnode]['pinst'].append(instance)
1600 if pnode in n_offline:
1601 inst_nodes_offline.append(pnode)
1603 # If the instance is non-redundant we cannot survive losing its primary
1604 # node, so we are not N+1 compliant. On the other hand we have no disk
1605 # templates with more than one secondary so that situation is not well
1607 # FIXME: does not support file-backed instances
1608 if len(inst_config.secondary_nodes) == 0:
1609 i_non_redundant.append(instance)
1610 _ErrorIf(len(inst_config.secondary_nodes) > 1,
1611 self.EINSTANCELAYOUT, instance,
1612 "instance has multiple secondary nodes", code="WARNING")
1614 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1615 i_non_a_balanced.append(instance)
1617 for snode in inst_config.secondary_nodes:
1618 _ErrorIf(snode not in node_info and snode not in n_offline,
1619 self.ENODERPC, snode,
1620 "instance %s, connection to secondary node"
1621 " failed", instance)
1623 if snode in node_info:
1624 node_info[snode]['sinst'].append(instance)
1625 if pnode not in node_info[snode]['sinst-by-pnode']:
1626 node_info[snode]['sinst-by-pnode'][pnode] = []
1627 node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1629 if snode in n_offline:
1630 inst_nodes_offline.append(snode)
1632 # warn that the instance lives on offline nodes
1633 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1634 "instance lives on offline node(s) %s",
1635 utils.CommaJoin(inst_nodes_offline))
1637 feedback_fn("* Verifying orphan volumes")
1638 self._VerifyOrphanVolumes(node_vol_should, node_volume)
1640 feedback_fn("* Verifying remaining instances")
1641 self._VerifyOrphanInstances(instancelist, node_instance)
1643 if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1644 feedback_fn("* Verifying N+1 Memory redundancy")
1645 self._VerifyNPlusOneMemory(node_info, instance_cfg)
1647 feedback_fn("* Other Notes")
1649 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
1650 % len(i_non_redundant))
1652 if i_non_a_balanced:
1653 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
1654 % len(i_non_a_balanced))
1657 feedback_fn(" - NOTICE: %d offline node(s) found." % len(n_offline))
1660 feedback_fn(" - NOTICE: %d drained node(s) found." % len(n_drained))
1664 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1665 """Analyze the post-hooks' result
1667 This method analyses the hook result, handles it, and sends some
1668 nicely-formatted feedback back to the user.
1670 @param phase: one of L{constants.HOOKS_PHASE_POST} or
1671 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1672 @param hooks_results: the results of the multi-node hooks rpc call
1673 @param feedback_fn: function used send feedback back to the caller
1674 @param lu_result: previous Exec result
1675 @return: the new Exec result, based on the previous result
1679 # We only really run POST phase hooks, and are only interested in
1681 if phase == constants.HOOKS_PHASE_POST:
1682 # Used to change hooks' output to proper indentation
1683 indent_re = re.compile('^', re.M)
1684 feedback_fn("* Hooks Results")
1685 assert hooks_results, "invalid result from hooks"
1687 for node_name in hooks_results:
1688 res = hooks_results[node_name]
1690 test = msg and not res.offline
1691 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1692 "Communication failure in hooks execution: %s", msg)
1693 if res.offline or msg:
1694 # No need to investigate payload if node is offline or gave an error.
1695 # override manually lu_result here as _ErrorIf only
1696 # overrides self.bad
1699 for script, hkr, output in res.payload:
1700 test = hkr == constants.HKR_FAIL
1701 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1702 "Script %s failed, output:", script)
1704 output = indent_re.sub(' ', output)
1705 feedback_fn("%s" % output)
1711 class LUVerifyDisks(NoHooksLU):
1712 """Verifies the cluster disks status.
1718 def ExpandNames(self):
1719 self.needed_locks = {
1720 locking.LEVEL_NODE: locking.ALL_SET,
1721 locking.LEVEL_INSTANCE: locking.ALL_SET,
1723 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1725 def CheckPrereq(self):
1726 """Check prerequisites.
1728 This has no prerequisites.
1733 def Exec(self, feedback_fn):
1734 """Verify integrity of cluster disks.
1736 @rtype: tuple of three items
1737 @return: a tuple of (dict of node-to-node_error, list of instances
1738 which need activate-disks, dict of instance: (node, volume) for
1742 result = res_nodes, res_instances, res_missing = {}, [], {}
1744 vg_name = self.cfg.GetVGName()
1745 nodes = utils.NiceSort(self.cfg.GetNodeList())
1746 instances = [self.cfg.GetInstanceInfo(name)
1747 for name in self.cfg.GetInstanceList()]
1750 for inst in instances:
1752 if (not inst.admin_up or
1753 inst.disk_template not in constants.DTS_NET_MIRROR):
1755 inst.MapLVsByNode(inst_lvs)
1756 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1757 for node, vol_list in inst_lvs.iteritems():
1758 for vol in vol_list:
1759 nv_dict[(node, vol)] = inst
1764 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1768 node_res = node_lvs[node]
1769 if node_res.offline:
1771 msg = node_res.fail_msg
1773 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1774 res_nodes[node] = msg
1777 lvs = node_res.payload
1778 for lv_name, (_, _, lv_online) in lvs.items():
1779 inst = nv_dict.pop((node, lv_name), None)
1780 if (not lv_online and inst is not None
1781 and inst.name not in res_instances):
1782 res_instances.append(inst.name)
1784 # any leftover items in nv_dict are missing LVs, let's arrange the
1786 for key, inst in nv_dict.iteritems():
1787 if inst.name not in res_missing:
1788 res_missing[inst.name] = []
1789 res_missing[inst.name].append(key)
1794 class LURepairDiskSizes(NoHooksLU):
1795 """Verifies the cluster disks sizes.
1798 _OP_REQP = ["instances"]
1801 def ExpandNames(self):
1802 if not isinstance(self.op.instances, list):
1803 raise errors.OpPrereqError("Invalid argument type 'instances'",
1806 if self.op.instances:
1807 self.wanted_names = []
1808 for name in self.op.instances:
1809 full_name = _ExpandInstanceName(self.cfg, name)
1810 self.wanted_names.append(full_name)
1811 self.needed_locks = {
1812 locking.LEVEL_NODE: [],
1813 locking.LEVEL_INSTANCE: self.wanted_names,
1815 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1817 self.wanted_names = None
1818 self.needed_locks = {
1819 locking.LEVEL_NODE: locking.ALL_SET,
1820 locking.LEVEL_INSTANCE: locking.ALL_SET,
1822 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1824 def DeclareLocks(self, level):
1825 if level == locking.LEVEL_NODE and self.wanted_names is not None:
1826 self._LockInstancesNodes(primary_only=True)
1828 def CheckPrereq(self):
1829 """Check prerequisites.
1831 This only checks the optional instance list against the existing names.
1834 if self.wanted_names is None:
1835 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1837 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1838 in self.wanted_names]
1840 def _EnsureChildSizes(self, disk):
1841 """Ensure children of the disk have the needed disk size.
1843 This is valid mainly for DRBD8 and fixes an issue where the
1844 children have smaller disk size.
1846 @param disk: an L{ganeti.objects.Disk} object
1849 if disk.dev_type == constants.LD_DRBD8:
1850 assert disk.children, "Empty children for DRBD8?"
1851 fchild = disk.children[0]
1852 mismatch = fchild.size < disk.size
1854 self.LogInfo("Child disk has size %d, parent %d, fixing",
1855 fchild.size, disk.size)
1856 fchild.size = disk.size
1858 # and we recurse on this child only, not on the metadev
1859 return self._EnsureChildSizes(fchild) or mismatch
1863 def Exec(self, feedback_fn):
1864 """Verify the size of cluster disks.
1867 # TODO: check child disks too
1868 # TODO: check differences in size between primary/secondary nodes
1870 for instance in self.wanted_instances:
1871 pnode = instance.primary_node
1872 if pnode not in per_node_disks:
1873 per_node_disks[pnode] = []
1874 for idx, disk in enumerate(instance.disks):
1875 per_node_disks[pnode].append((instance, idx, disk))
1878 for node, dskl in per_node_disks.items():
1879 newl = [v[2].Copy() for v in dskl]
1881 self.cfg.SetDiskID(dsk, node)
1882 result = self.rpc.call_blockdev_getsizes(node, newl)
1884 self.LogWarning("Failure in blockdev_getsizes call to node"
1885 " %s, ignoring", node)
1887 if len(result.data) != len(dskl):
1888 self.LogWarning("Invalid result from node %s, ignoring node results",
1891 for ((instance, idx, disk), size) in zip(dskl, result.data):
1893 self.LogWarning("Disk %d of instance %s did not return size"
1894 " information, ignoring", idx, instance.name)
1896 if not isinstance(size, (int, long)):
1897 self.LogWarning("Disk %d of instance %s did not return valid"
1898 " size information, ignoring", idx, instance.name)
1901 if size != disk.size:
1902 self.LogInfo("Disk %d of instance %s has mismatched size,"
1903 " correcting: recorded %d, actual %d", idx,
1904 instance.name, disk.size, size)
1906 self.cfg.Update(instance, feedback_fn)
1907 changed.append((instance.name, idx, size))
1908 if self._EnsureChildSizes(disk):
1909 self.cfg.Update(instance, feedback_fn)
1910 changed.append((instance.name, idx, disk.size))
1914 class LURenameCluster(LogicalUnit):
1915 """Rename the cluster.
1918 HPATH = "cluster-rename"
1919 HTYPE = constants.HTYPE_CLUSTER
1922 def BuildHooksEnv(self):
1927 "OP_TARGET": self.cfg.GetClusterName(),
1928 "NEW_NAME": self.op.name,
1930 mn = self.cfg.GetMasterNode()
1931 all_nodes = self.cfg.GetNodeList()
1932 return env, [mn], all_nodes
1934 def CheckPrereq(self):
1935 """Verify that the passed name is a valid one.
1938 hostname = utils.GetHostInfo(self.op.name)
1940 new_name = hostname.name
1941 self.ip = new_ip = hostname.ip
1942 old_name = self.cfg.GetClusterName()
1943 old_ip = self.cfg.GetMasterIP()
1944 if new_name == old_name and new_ip == old_ip:
1945 raise errors.OpPrereqError("Neither the name nor the IP address of the"
1946 " cluster has changed",
1948 if new_ip != old_ip:
1949 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1950 raise errors.OpPrereqError("The given cluster IP address (%s) is"
1951 " reachable on the network. Aborting." %
1952 new_ip, errors.ECODE_NOTUNIQUE)
1954 self.op.name = new_name
1956 def Exec(self, feedback_fn):
1957 """Rename the cluster.
1960 clustername = self.op.name
1963 # shutdown the master IP
1964 master = self.cfg.GetMasterNode()
1965 result = self.rpc.call_node_stop_master(master, False)
1966 result.Raise("Could not disable the master role")
1969 cluster = self.cfg.GetClusterInfo()
1970 cluster.cluster_name = clustername
1971 cluster.master_ip = ip
1972 self.cfg.Update(cluster, feedback_fn)
1974 # update the known hosts file
1975 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1976 node_list = self.cfg.GetNodeList()
1978 node_list.remove(master)
1981 result = self.rpc.call_upload_file(node_list,
1982 constants.SSH_KNOWN_HOSTS_FILE)
1983 for to_node, to_result in result.iteritems():
1984 msg = to_result.fail_msg
1986 msg = ("Copy of file %s to node %s failed: %s" %
1987 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1988 self.proc.LogWarning(msg)
1991 result = self.rpc.call_node_start_master(master, False, False)
1992 msg = result.fail_msg
1994 self.LogWarning("Could not re-enable the master role on"
1995 " the master, please restart manually: %s", msg)
1998 def _RecursiveCheckIfLVMBased(disk):
1999 """Check if the given disk or its children are lvm-based.
2001 @type disk: L{objects.Disk}
2002 @param disk: the disk to check
2004 @return: boolean indicating whether a LD_LV dev_type was found or not
2008 for chdisk in disk.children:
2009 if _RecursiveCheckIfLVMBased(chdisk):
2011 return disk.dev_type == constants.LD_LV
2014 class LUSetClusterParams(LogicalUnit):
2015 """Change the parameters of the cluster.
2018 HPATH = "cluster-modify"
2019 HTYPE = constants.HTYPE_CLUSTER
2023 def CheckArguments(self):
2027 if not hasattr(self.op, "candidate_pool_size"):
2028 self.op.candidate_pool_size = None
2029 if self.op.candidate_pool_size is not None:
2031 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2032 except (ValueError, TypeError), err:
2033 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2034 str(err), errors.ECODE_INVAL)
2035 if self.op.candidate_pool_size < 1:
2036 raise errors.OpPrereqError("At least one master candidate needed",
2039 def ExpandNames(self):
2040 # FIXME: in the future maybe other cluster params won't require checking on
2041 # all nodes to be modified.
2042 self.needed_locks = {
2043 locking.LEVEL_NODE: locking.ALL_SET,
2045 self.share_locks[locking.LEVEL_NODE] = 1
2047 def BuildHooksEnv(self):
2052 "OP_TARGET": self.cfg.GetClusterName(),
2053 "NEW_VG_NAME": self.op.vg_name,
2055 mn = self.cfg.GetMasterNode()
2056 return env, [mn], [mn]
2058 def CheckPrereq(self):
2059 """Check prerequisites.
2061 This checks whether the given params don't conflict and
2062 if the given volume group is valid.
2065 if self.op.vg_name is not None and not self.op.vg_name:
2066 instances = self.cfg.GetAllInstancesInfo().values()
2067 for inst in instances:
2068 for disk in inst.disks:
2069 if _RecursiveCheckIfLVMBased(disk):
2070 raise errors.OpPrereqError("Cannot disable lvm storage while"
2071 " lvm-based instances exist",
2074 node_list = self.acquired_locks[locking.LEVEL_NODE]
2076 # if vg_name not None, checks given volume group on all nodes
2078 vglist = self.rpc.call_vg_list(node_list)
2079 for node in node_list:
2080 msg = vglist[node].fail_msg
2082 # ignoring down node
2083 self.LogWarning("Error while gathering data on node %s"
2084 " (ignoring node): %s", node, msg)
2086 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2088 constants.MIN_VG_SIZE)
2090 raise errors.OpPrereqError("Error on node '%s': %s" %
2091 (node, vgstatus), errors.ECODE_ENVIRON)
2093 self.cluster = cluster = self.cfg.GetClusterInfo()
2094 # validate params changes
2095 if self.op.beparams:
2096 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2097 self.new_beparams = objects.FillDict(
2098 cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2100 if self.op.nicparams:
2101 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2102 self.new_nicparams = objects.FillDict(
2103 cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2104 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2107 # check all instances for consistency
2108 for instance in self.cfg.GetAllInstancesInfo().values():
2109 for nic_idx, nic in enumerate(instance.nics):
2110 params_copy = copy.deepcopy(nic.nicparams)
2111 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2113 # check parameter syntax
2115 objects.NIC.CheckParameterSyntax(params_filled)
2116 except errors.ConfigurationError, err:
2117 nic_errors.append("Instance %s, nic/%d: %s" %
2118 (instance.name, nic_idx, err))
2120 # if we're moving instances to routed, check that they have an ip
2121 target_mode = params_filled[constants.NIC_MODE]
2122 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2123 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2124 (instance.name, nic_idx))
2126 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2127 "\n".join(nic_errors))
2129 # hypervisor list/parameters
2130 self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2131 if self.op.hvparams:
2132 if not isinstance(self.op.hvparams, dict):
2133 raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2135 for hv_name, hv_dict in self.op.hvparams.items():
2136 if hv_name not in self.new_hvparams:
2137 self.new_hvparams[hv_name] = hv_dict
2139 self.new_hvparams[hv_name].update(hv_dict)
2141 # os hypervisor parameters
2142 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2144 if not isinstance(self.op.os_hvp, dict):
2145 raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2147 for os_name, hvs in self.op.os_hvp.items():
2148 if not isinstance(hvs, dict):
2149 raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2150 " input"), errors.ECODE_INVAL)
2151 if os_name not in self.new_os_hvp:
2152 self.new_os_hvp[os_name] = hvs
2154 for hv_name, hv_dict in hvs.items():
2155 if hv_name not in self.new_os_hvp[os_name]:
2156 self.new_os_hvp[os_name][hv_name] = hv_dict
2158 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2160 if self.op.enabled_hypervisors is not None:
2161 self.hv_list = self.op.enabled_hypervisors
2162 if not self.hv_list:
2163 raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2164 " least one member",
2166 invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2168 raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2170 utils.CommaJoin(invalid_hvs),
2173 self.hv_list = cluster.enabled_hypervisors
2175 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2176 # either the enabled list has changed, or the parameters have, validate
2177 for hv_name, hv_params in self.new_hvparams.items():
2178 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2179 (self.op.enabled_hypervisors and
2180 hv_name in self.op.enabled_hypervisors)):
2181 # either this is a new hypervisor, or its parameters have changed
2182 hv_class = hypervisor.GetHypervisor(hv_name)
2183 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2184 hv_class.CheckParameterSyntax(hv_params)
2185 _CheckHVParams(self, node_list, hv_name, hv_params)
2188 # no need to check any newly-enabled hypervisors, since the
2189 # defaults have already been checked in the above code-block
2190 for os_name, os_hvp in self.new_os_hvp.items():
2191 for hv_name, hv_params in os_hvp.items():
2192 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2193 # we need to fill in the new os_hvp on top of the actual hv_p
2194 cluster_defaults = self.new_hvparams.get(hv_name, {})
2195 new_osp = objects.FillDict(cluster_defaults, hv_params)
2196 hv_class = hypervisor.GetHypervisor(hv_name)
2197 hv_class.CheckParameterSyntax(new_osp)
2198 _CheckHVParams(self, node_list, hv_name, new_osp)
2201 def Exec(self, feedback_fn):
2202 """Change the parameters of the cluster.
2205 if self.op.vg_name is not None:
2206 new_volume = self.op.vg_name
2209 if new_volume != self.cfg.GetVGName():
2210 self.cfg.SetVGName(new_volume)
2212 feedback_fn("Cluster LVM configuration already in desired"
2213 " state, not changing")
2214 if self.op.hvparams:
2215 self.cluster.hvparams = self.new_hvparams
2217 self.cluster.os_hvp = self.new_os_hvp
2218 if self.op.enabled_hypervisors is not None:
2219 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2220 if self.op.beparams:
2221 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2222 if self.op.nicparams:
2223 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2225 if self.op.candidate_pool_size is not None:
2226 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2227 # we need to update the pool size here, otherwise the save will fail
2228 _AdjustCandidatePool(self, [])
2230 self.cfg.Update(self.cluster, feedback_fn)
2233 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2234 """Distribute additional files which are part of the cluster configuration.
2236 ConfigWriter takes care of distributing the config and ssconf files, but
2237 there are more files which should be distributed to all nodes. This function
2238 makes sure those are copied.
2240 @param lu: calling logical unit
2241 @param additional_nodes: list of nodes not in the config to distribute to
2244 # 1. Gather target nodes
2245 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2246 dist_nodes = lu.cfg.GetOnlineNodeList()
2247 if additional_nodes is not None:
2248 dist_nodes.extend(additional_nodes)
2249 if myself.name in dist_nodes:
2250 dist_nodes.remove(myself.name)
2252 # 2. Gather files to distribute
2253 dist_files = set([constants.ETC_HOSTS,
2254 constants.SSH_KNOWN_HOSTS_FILE,
2255 constants.RAPI_CERT_FILE,
2256 constants.RAPI_USERS_FILE,
2257 constants.HMAC_CLUSTER_KEY,
2260 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2261 for hv_name in enabled_hypervisors:
2262 hv_class = hypervisor.GetHypervisor(hv_name)
2263 dist_files.update(hv_class.GetAncillaryFiles())
2265 # 3. Perform the files upload
2266 for fname in dist_files:
2267 if os.path.exists(fname):
2268 result = lu.rpc.call_upload_file(dist_nodes, fname)
2269 for to_node, to_result in result.items():
2270 msg = to_result.fail_msg
2272 msg = ("Copy of file %s to node %s failed: %s" %
2273 (fname, to_node, msg))
2274 lu.proc.LogWarning(msg)
2277 class LURedistributeConfig(NoHooksLU):
2278 """Force the redistribution of cluster configuration.
2280 This is a very simple LU.
2286 def ExpandNames(self):
2287 self.needed_locks = {
2288 locking.LEVEL_NODE: locking.ALL_SET,
2290 self.share_locks[locking.LEVEL_NODE] = 1
2292 def CheckPrereq(self):
2293 """Check prerequisites.
2297 def Exec(self, feedback_fn):
2298 """Redistribute the configuration.
2301 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2302 _RedistributeAncillaryFiles(self)
2305 def _WaitForSync(lu, instance, oneshot=False):
2306 """Sleep and poll for an instance's disk to sync.
2309 if not instance.disks:
2313 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2315 node = instance.primary_node
2317 for dev in instance.disks:
2318 lu.cfg.SetDiskID(dev, node)
2320 # TODO: Convert to utils.Retry
2323 degr_retries = 10 # in seconds, as we sleep 1 second each time
2327 cumul_degraded = False
2328 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2329 msg = rstats.fail_msg
2331 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2334 raise errors.RemoteError("Can't contact node %s for mirror data,"
2335 " aborting." % node)
2338 rstats = rstats.payload
2340 for i, mstat in enumerate(rstats):
2342 lu.LogWarning("Can't compute data for node %s/%s",
2343 node, instance.disks[i].iv_name)
2346 cumul_degraded = (cumul_degraded or
2347 (mstat.is_degraded and mstat.sync_percent is None))
2348 if mstat.sync_percent is not None:
2350 if mstat.estimated_time is not None:
2351 rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2352 max_time = mstat.estimated_time
2354 rem_time = "no time estimate"
2355 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2356 (instance.disks[i].iv_name, mstat.sync_percent,
2359 # if we're done but degraded, let's do a few small retries, to
2360 # make sure we see a stable and not transient situation; therefore
2361 # we force restart of the loop
2362 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2363 logging.info("Degraded disks found, %d retries left", degr_retries)
2371 time.sleep(min(60, max_time))
2374 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2375 return not cumul_degraded
2378 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2379 """Check that mirrors are not degraded.
2381 The ldisk parameter, if True, will change the test from the
2382 is_degraded attribute (which represents overall non-ok status for
2383 the device(s)) to the ldisk (representing the local storage status).
2386 lu.cfg.SetDiskID(dev, node)
2390 if on_primary or dev.AssembleOnSecondary():
2391 rstats = lu.rpc.call_blockdev_find(node, dev)
2392 msg = rstats.fail_msg
2394 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2396 elif not rstats.payload:
2397 lu.LogWarning("Can't find disk on node %s", node)
2401 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2403 result = result and not rstats.payload.is_degraded
2406 for child in dev.children:
2407 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2412 class LUDiagnoseOS(NoHooksLU):
2413 """Logical unit for OS diagnose/query.
2416 _OP_REQP = ["output_fields", "names"]
2418 _FIELDS_STATIC = utils.FieldSet()
2419 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2420 # Fields that need calculation of global os validity
2421 _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2423 def ExpandNames(self):
2425 raise errors.OpPrereqError("Selective OS query not supported",
2428 _CheckOutputFields(static=self._FIELDS_STATIC,
2429 dynamic=self._FIELDS_DYNAMIC,
2430 selected=self.op.output_fields)
2432 # Lock all nodes, in shared mode
2433 # Temporary removal of locks, should be reverted later
2434 # TODO: reintroduce locks when they are lighter-weight
2435 self.needed_locks = {}
2436 #self.share_locks[locking.LEVEL_NODE] = 1
2437 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2439 def CheckPrereq(self):
2440 """Check prerequisites.
2445 def _DiagnoseByOS(rlist):
2446 """Remaps a per-node return list into an a per-os per-node dictionary
2448 @param rlist: a map with node names as keys and OS objects as values
2451 @return: a dictionary with osnames as keys and as value another map, with
2452 nodes as keys and tuples of (path, status, diagnose) as values, eg::
2454 {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2455 (/srv/..., False, "invalid api")],
2456 "node2": [(/srv/..., True, "")]}
2461 # we build here the list of nodes that didn't fail the RPC (at RPC
2462 # level), so that nodes with a non-responding node daemon don't
2463 # make all OSes invalid
2464 good_nodes = [node_name for node_name in rlist
2465 if not rlist[node_name].fail_msg]
2466 for node_name, nr in rlist.items():
2467 if nr.fail_msg or not nr.payload:
2469 for name, path, status, diagnose, variants in nr.payload:
2470 if name not in all_os:
2471 # build a list of nodes for this os containing empty lists
2472 # for each node in node_list
2474 for nname in good_nodes:
2475 all_os[name][nname] = []
2476 all_os[name][node_name].append((path, status, diagnose, variants))
2479 def Exec(self, feedback_fn):
2480 """Compute the list of OSes.
2483 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2484 node_data = self.rpc.call_os_diagnose(valid_nodes)
2485 pol = self._DiagnoseByOS(node_data)
2487 calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2488 calc_variants = "variants" in self.op.output_fields
2490 for os_name, os_data in pol.items():
2495 for osl in os_data.values():
2496 valid = valid and osl and osl[0][1]
2501 node_variants = osl[0][3]
2502 if variants is None:
2503 variants = node_variants
2505 variants = [v for v in variants if v in node_variants]
2507 for field in self.op.output_fields:
2510 elif field == "valid":
2512 elif field == "node_status":
2513 # this is just a copy of the dict
2515 for node_name, nos_list in os_data.items():
2516 val[node_name] = nos_list
2517 elif field == "variants":
2520 raise errors.ParameterError(field)
2527 class LURemoveNode(LogicalUnit):
2528 """Logical unit for removing a node.
2531 HPATH = "node-remove"
2532 HTYPE = constants.HTYPE_NODE
2533 _OP_REQP = ["node_name"]
2535 def BuildHooksEnv(self):
2538 This doesn't run on the target node in the pre phase as a failed
2539 node would then be impossible to remove.
2543 "OP_TARGET": self.op.node_name,
2544 "NODE_NAME": self.op.node_name,
2546 all_nodes = self.cfg.GetNodeList()
2548 all_nodes.remove(self.op.node_name)
2550 logging.warning("Node %s which is about to be removed not found"
2551 " in the all nodes list", self.op.node_name)
2552 return env, all_nodes, all_nodes
2554 def CheckPrereq(self):
2555 """Check prerequisites.
2558 - the node exists in the configuration
2559 - it does not have primary or secondary instances
2560 - it's not the master
2562 Any errors are signaled by raising errors.OpPrereqError.
2565 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2566 node = self.cfg.GetNodeInfo(self.op.node_name)
2567 assert node is not None
2569 instance_list = self.cfg.GetInstanceList()
2571 masternode = self.cfg.GetMasterNode()
2572 if node.name == masternode:
2573 raise errors.OpPrereqError("Node is the master node,"
2574 " you need to failover first.",
2577 for instance_name in instance_list:
2578 instance = self.cfg.GetInstanceInfo(instance_name)
2579 if node.name in instance.all_nodes:
2580 raise errors.OpPrereqError("Instance %s is still running on the node,"
2581 " please remove first." % instance_name,
2583 self.op.node_name = node.name
2586 def Exec(self, feedback_fn):
2587 """Removes the node from the cluster.
2591 logging.info("Stopping the node daemon and removing configs from node %s",
2594 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2596 # Promote nodes to master candidate as needed
2597 _AdjustCandidatePool(self, exceptions=[node.name])
2598 self.context.RemoveNode(node.name)
2600 # Run post hooks on the node before it's removed
2601 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2603 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2605 # pylint: disable-msg=W0702
2606 self.LogWarning("Errors occurred running hooks on %s" % node.name)
2608 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2609 msg = result.fail_msg
2611 self.LogWarning("Errors encountered on the remote node while leaving"
2612 " the cluster: %s", msg)
2615 class LUQueryNodes(NoHooksLU):
2616 """Logical unit for querying nodes.
2619 # pylint: disable-msg=W0142
2620 _OP_REQP = ["output_fields", "names", "use_locking"]
2623 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2624 "master_candidate", "offline", "drained"]
2626 _FIELDS_DYNAMIC = utils.FieldSet(
2628 "mtotal", "mnode", "mfree",
2630 "ctotal", "cnodes", "csockets",
2633 _FIELDS_STATIC = utils.FieldSet(*[
2634 "pinst_cnt", "sinst_cnt",
2635 "pinst_list", "sinst_list",
2636 "pip", "sip", "tags",
2638 "role"] + _SIMPLE_FIELDS
2641 def ExpandNames(self):
2642 _CheckOutputFields(static=self._FIELDS_STATIC,
2643 dynamic=self._FIELDS_DYNAMIC,
2644 selected=self.op.output_fields)
2646 self.needed_locks = {}
2647 self.share_locks[locking.LEVEL_NODE] = 1
2650 self.wanted = _GetWantedNodes(self, self.op.names)
2652 self.wanted = locking.ALL_SET
2654 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2655 self.do_locking = self.do_node_query and self.op.use_locking
2657 # if we don't request only static fields, we need to lock the nodes
2658 self.needed_locks[locking.LEVEL_NODE] = self.wanted
2660 def CheckPrereq(self):
2661 """Check prerequisites.
2664 # The validation of the node list is done in the _GetWantedNodes,
2665 # if non empty, and if empty, there's no validation to do
2668 def Exec(self, feedback_fn):
2669 """Computes the list of nodes and their attributes.
2672 all_info = self.cfg.GetAllNodesInfo()
2674 nodenames = self.acquired_locks[locking.LEVEL_NODE]
2675 elif self.wanted != locking.ALL_SET:
2676 nodenames = self.wanted
2677 missing = set(nodenames).difference(all_info.keys())
2679 raise errors.OpExecError(
2680 "Some nodes were removed before retrieving their data: %s" % missing)
2682 nodenames = all_info.keys()
2684 nodenames = utils.NiceSort(nodenames)
2685 nodelist = [all_info[name] for name in nodenames]
2687 # begin data gathering
2689 if self.do_node_query:
2691 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2692 self.cfg.GetHypervisorType())
2693 for name in nodenames:
2694 nodeinfo = node_data[name]
2695 if not nodeinfo.fail_msg and nodeinfo.payload:
2696 nodeinfo = nodeinfo.payload
2697 fn = utils.TryConvert
2699 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2700 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2701 "mfree": fn(int, nodeinfo.get('memory_free', None)),
2702 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2703 "dfree": fn(int, nodeinfo.get('vg_free', None)),
2704 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2705 "bootid": nodeinfo.get('bootid', None),
2706 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2707 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2710 live_data[name] = {}
2712 live_data = dict.fromkeys(nodenames, {})
2714 node_to_primary = dict([(name, set()) for name in nodenames])
2715 node_to_secondary = dict([(name, set()) for name in nodenames])
2717 inst_fields = frozenset(("pinst_cnt", "pinst_list",
2718 "sinst_cnt", "sinst_list"))
2719 if inst_fields & frozenset(self.op.output_fields):
2720 inst_data = self.cfg.GetAllInstancesInfo()
2722 for inst in inst_data.values():
2723 if inst.primary_node in node_to_primary:
2724 node_to_primary[inst.primary_node].add(inst.name)
2725 for secnode in inst.secondary_nodes:
2726 if secnode in node_to_secondary:
2727 node_to_secondary[secnode].add(inst.name)
2729 master_node = self.cfg.GetMasterNode()
2731 # end data gathering
2734 for node in nodelist:
2736 for field in self.op.output_fields:
2737 if field in self._SIMPLE_FIELDS:
2738 val = getattr(node, field)
2739 elif field == "pinst_list":
2740 val = list(node_to_primary[node.name])
2741 elif field == "sinst_list":
2742 val = list(node_to_secondary[node.name])
2743 elif field == "pinst_cnt":
2744 val = len(node_to_primary[node.name])
2745 elif field == "sinst_cnt":
2746 val = len(node_to_secondary[node.name])
2747 elif field == "pip":
2748 val = node.primary_ip
2749 elif field == "sip":
2750 val = node.secondary_ip
2751 elif field == "tags":
2752 val = list(node.GetTags())
2753 elif field == "master":
2754 val = node.name == master_node
2755 elif self._FIELDS_DYNAMIC.Matches(field):
2756 val = live_data[node.name].get(field, None)
2757 elif field == "role":
2758 if node.name == master_node:
2760 elif node.master_candidate:
2769 raise errors.ParameterError(field)
2770 node_output.append(val)
2771 output.append(node_output)
2776 class LUQueryNodeVolumes(NoHooksLU):
2777 """Logical unit for getting volumes on node(s).
2780 _OP_REQP = ["nodes", "output_fields"]
2782 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2783 _FIELDS_STATIC = utils.FieldSet("node")
2785 def ExpandNames(self):
2786 _CheckOutputFields(static=self._FIELDS_STATIC,
2787 dynamic=self._FIELDS_DYNAMIC,
2788 selected=self.op.output_fields)
2790 self.needed_locks = {}
2791 self.share_locks[locking.LEVEL_NODE] = 1
2792 if not self.op.nodes:
2793 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2795 self.needed_locks[locking.LEVEL_NODE] = \
2796 _GetWantedNodes(self, self.op.nodes)
2798 def CheckPrereq(self):
2799 """Check prerequisites.
2801 This checks that the fields required are valid output fields.
2804 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2806 def Exec(self, feedback_fn):
2807 """Computes the list of nodes and their attributes.
2810 nodenames = self.nodes
2811 volumes = self.rpc.call_node_volumes(nodenames)
2813 ilist = [self.cfg.GetInstanceInfo(iname) for iname
2814 in self.cfg.GetInstanceList()]
2816 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2819 for node in nodenames:
2820 nresult = volumes[node]
2823 msg = nresult.fail_msg
2825 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2828 node_vols = nresult.payload[:]
2829 node_vols.sort(key=lambda vol: vol['dev'])
2831 for vol in node_vols:
2833 for field in self.op.output_fields:
2836 elif field == "phys":
2840 elif field == "name":
2842 elif field == "size":
2843 val = int(float(vol['size']))
2844 elif field == "instance":
2846 if node not in lv_by_node[inst]:
2848 if vol['name'] in lv_by_node[inst][node]:
2854 raise errors.ParameterError(field)
2855 node_output.append(str(val))
2857 output.append(node_output)
2862 class LUQueryNodeStorage(NoHooksLU):
2863 """Logical unit for getting information on storage units on node(s).
2866 _OP_REQP = ["nodes", "storage_type", "output_fields"]
2868 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
2870 def ExpandNames(self):
2871 storage_type = self.op.storage_type
2873 if storage_type not in constants.VALID_STORAGE_TYPES:
2874 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2877 _CheckOutputFields(static=self._FIELDS_STATIC,
2878 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
2879 selected=self.op.output_fields)
2881 self.needed_locks = {}
2882 self.share_locks[locking.LEVEL_NODE] = 1
2885 self.needed_locks[locking.LEVEL_NODE] = \
2886 _GetWantedNodes(self, self.op.nodes)
2888 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2890 def CheckPrereq(self):
2891 """Check prerequisites.
2893 This checks that the fields required are valid output fields.
2896 self.op.name = getattr(self.op, "name", None)
2898 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2900 def Exec(self, feedback_fn):
2901 """Computes the list of nodes and their attributes.
2904 # Always get name to sort by
2905 if constants.SF_NAME in self.op.output_fields:
2906 fields = self.op.output_fields[:]
2908 fields = [constants.SF_NAME] + self.op.output_fields
2910 # Never ask for node or type as it's only known to the LU
2911 for extra in [constants.SF_NODE, constants.SF_TYPE]:
2912 while extra in fields:
2913 fields.remove(extra)
2915 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2916 name_idx = field_idx[constants.SF_NAME]
2918 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2919 data = self.rpc.call_storage_list(self.nodes,
2920 self.op.storage_type, st_args,
2921 self.op.name, fields)
2925 for node in utils.NiceSort(self.nodes):
2926 nresult = data[node]
2930 msg = nresult.fail_msg
2932 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2935 rows = dict([(row[name_idx], row) for row in nresult.payload])
2937 for name in utils.NiceSort(rows.keys()):
2942 for field in self.op.output_fields:
2943 if field == constants.SF_NODE:
2945 elif field == constants.SF_TYPE:
2946 val = self.op.storage_type
2947 elif field in field_idx:
2948 val = row[field_idx[field]]
2950 raise errors.ParameterError(field)
2959 class LUModifyNodeStorage(NoHooksLU):
2960 """Logical unit for modifying a storage volume on a node.
2963 _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2966 def CheckArguments(self):
2967 self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
2969 storage_type = self.op.storage_type
2970 if storage_type not in constants.VALID_STORAGE_TYPES:
2971 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2974 def ExpandNames(self):
2975 self.needed_locks = {
2976 locking.LEVEL_NODE: self.op.node_name,
2979 def CheckPrereq(self):
2980 """Check prerequisites.
2983 storage_type = self.op.storage_type
2986 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2988 raise errors.OpPrereqError("Storage units of type '%s' can not be"
2989 " modified" % storage_type,
2992 diff = set(self.op.changes.keys()) - modifiable
2994 raise errors.OpPrereqError("The following fields can not be modified for"
2995 " storage units of type '%s': %r" %
2996 (storage_type, list(diff)),
2999 def Exec(self, feedback_fn):
3000 """Computes the list of nodes and their attributes.
3003 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3004 result = self.rpc.call_storage_modify(self.op.node_name,
3005 self.op.storage_type, st_args,
3006 self.op.name, self.op.changes)
3007 result.Raise("Failed to modify storage unit '%s' on %s" %
3008 (self.op.name, self.op.node_name))
3011 class LUAddNode(LogicalUnit):
3012 """Logical unit for adding node to the cluster.
3016 HTYPE = constants.HTYPE_NODE
3017 _OP_REQP = ["node_name"]
3019 def CheckArguments(self):
3020 # validate/normalize the node name
3021 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3023 def BuildHooksEnv(self):
3026 This will run on all nodes before, and on all nodes + the new node after.
3030 "OP_TARGET": self.op.node_name,
3031 "NODE_NAME": self.op.node_name,
3032 "NODE_PIP": self.op.primary_ip,
3033 "NODE_SIP": self.op.secondary_ip,
3035 nodes_0 = self.cfg.GetNodeList()
3036 nodes_1 = nodes_0 + [self.op.node_name, ]
3037 return env, nodes_0, nodes_1
3039 def CheckPrereq(self):
3040 """Check prerequisites.
3043 - the new node is not already in the config
3045 - its parameters (single/dual homed) matches the cluster
3047 Any errors are signaled by raising errors.OpPrereqError.
3050 node_name = self.op.node_name
3053 dns_data = utils.GetHostInfo(node_name)
3055 node = dns_data.name
3056 primary_ip = self.op.primary_ip = dns_data.ip
3057 secondary_ip = getattr(self.op, "secondary_ip", None)
3058 if secondary_ip is None:
3059 secondary_ip = primary_ip
3060 if not utils.IsValidIP(secondary_ip):
3061 raise errors.OpPrereqError("Invalid secondary IP given",
3063 self.op.secondary_ip = secondary_ip
3065 node_list = cfg.GetNodeList()
3066 if not self.op.readd and node in node_list:
3067 raise errors.OpPrereqError("Node %s is already in the configuration" %
3068 node, errors.ECODE_EXISTS)
3069 elif self.op.readd and node not in node_list:
3070 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3073 for existing_node_name in node_list:
3074 existing_node = cfg.GetNodeInfo(existing_node_name)
3076 if self.op.readd and node == existing_node_name:
3077 if (existing_node.primary_ip != primary_ip or
3078 existing_node.secondary_ip != secondary_ip):
3079 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3080 " address configuration as before",
3084 if (existing_node.primary_ip == primary_ip or
3085 existing_node.secondary_ip == primary_ip or
3086 existing_node.primary_ip == secondary_ip or
3087 existing_node.secondary_ip == secondary_ip):
3088 raise errors.OpPrereqError("New node ip address(es) conflict with"
3089 " existing node %s" % existing_node.name,
3090 errors.ECODE_NOTUNIQUE)
3092 # check that the type of the node (single versus dual homed) is the
3093 # same as for the master
3094 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3095 master_singlehomed = myself.secondary_ip == myself.primary_ip
3096 newbie_singlehomed = secondary_ip == primary_ip
3097 if master_singlehomed != newbie_singlehomed:
3098 if master_singlehomed:
3099 raise errors.OpPrereqError("The master has no private ip but the"
3100 " new node has one",
3103 raise errors.OpPrereqError("The master has a private ip but the"
3104 " new node doesn't have one",
3107 # checks reachability
3108 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3109 raise errors.OpPrereqError("Node not reachable by ping",
3110 errors.ECODE_ENVIRON)
3112 if not newbie_singlehomed:
3113 # check reachability from my secondary ip to newbie's secondary ip
3114 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3115 source=myself.secondary_ip):
3116 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3117 " based ping to noded port",
3118 errors.ECODE_ENVIRON)
3125 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3128 self.new_node = self.cfg.GetNodeInfo(node)
3129 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3131 self.new_node = objects.Node(name=node,
3132 primary_ip=primary_ip,
3133 secondary_ip=secondary_ip,
3134 master_candidate=self.master_candidate,
3135 offline=False, drained=False)
3137 def Exec(self, feedback_fn):
3138 """Adds the new node to the cluster.
3141 new_node = self.new_node
3142 node = new_node.name
3144 # for re-adds, reset the offline/drained/master-candidate flags;
3145 # we need to reset here, otherwise offline would prevent RPC calls
3146 # later in the procedure; this also means that if the re-add
3147 # fails, we are left with a non-offlined, broken node
3149 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3150 self.LogInfo("Readding a node, the offline/drained flags were reset")
3151 # if we demote the node, we do cleanup later in the procedure
3152 new_node.master_candidate = self.master_candidate
3154 # notify the user about any possible mc promotion
3155 if new_node.master_candidate:
3156 self.LogInfo("Node will be a master candidate")
3158 # check connectivity
3159 result = self.rpc.call_version([node])[node]
3160 result.Raise("Can't get version information from node %s" % node)
3161 if constants.PROTOCOL_VERSION == result.payload:
3162 logging.info("Communication to node %s fine, sw version %s match",
3163 node, result.payload)
3165 raise errors.OpExecError("Version mismatch master version %s,"
3166 " node version %s" %
3167 (constants.PROTOCOL_VERSION, result.payload))
3170 if self.cfg.GetClusterInfo().modify_ssh_setup:
3171 logging.info("Copy ssh key to node %s", node)
3172 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3174 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3175 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3179 keyarray.append(utils.ReadFile(i))
3181 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3182 keyarray[2], keyarray[3], keyarray[4],
3184 result.Raise("Cannot transfer ssh keys to the new node")
3186 # Add node to our /etc/hosts, and add key to known_hosts
3187 if self.cfg.GetClusterInfo().modify_etc_hosts:
3188 utils.AddHostToEtcHosts(new_node.name)
3190 if new_node.secondary_ip != new_node.primary_ip:
3191 result = self.rpc.call_node_has_ip_address(new_node.name,
3192 new_node.secondary_ip)
3193 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3194 prereq=True, ecode=errors.ECODE_ENVIRON)
3195 if not result.payload:
3196 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3197 " you gave (%s). Please fix and re-run this"
3198 " command." % new_node.secondary_ip)
3200 node_verify_list = [self.cfg.GetMasterNode()]
3201 node_verify_param = {
3202 constants.NV_NODELIST: [node],
3203 # TODO: do a node-net-test as well?
3206 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3207 self.cfg.GetClusterName())
3208 for verifier in node_verify_list:
3209 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3210 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3212 for failed in nl_payload:
3213 feedback_fn("ssh/hostname verification failed"
3214 " (checking from %s): %s" %
3215 (verifier, nl_payload[failed]))
3216 raise errors.OpExecError("ssh/hostname verification failed.")
3219 _RedistributeAncillaryFiles(self)
3220 self.context.ReaddNode(new_node)
3221 # make sure we redistribute the config
3222 self.cfg.Update(new_node, feedback_fn)
3223 # and make sure the new node will not have old files around
3224 if not new_node.master_candidate:
3225 result = self.rpc.call_node_demote_from_mc(new_node.name)
3226 msg = result.fail_msg
3228 self.LogWarning("Node failed to demote itself from master"
3229 " candidate status: %s" % msg)
3231 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3232 self.context.AddNode(new_node, self.proc.GetECId())
3235 class LUSetNodeParams(LogicalUnit):
3236 """Modifies the parameters of a node.
3239 HPATH = "node-modify"
3240 HTYPE = constants.HTYPE_NODE
3241 _OP_REQP = ["node_name"]
3244 def CheckArguments(self):
3245 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3246 _CheckBooleanOpField(self.op, 'master_candidate')
3247 _CheckBooleanOpField(self.op, 'offline')
3248 _CheckBooleanOpField(self.op, 'drained')
3249 _CheckBooleanOpField(self.op, 'auto_promote')
3250 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3251 if all_mods.count(None) == 3:
3252 raise errors.OpPrereqError("Please pass at least one modification",
3254 if all_mods.count(True) > 1:
3255 raise errors.OpPrereqError("Can't set the node into more than one"
3256 " state at the same time",
3259 # Boolean value that tells us whether we're offlining or draining the node
3260 self.offline_or_drain = (self.op.offline == True or
3261 self.op.drained == True)
3262 self.deoffline_or_drain = (self.op.offline == False or
3263 self.op.drained == False)
3264 self.might_demote = (self.op.master_candidate == False or
3265 self.offline_or_drain)
3267 self.lock_all = self.op.auto_promote and self.might_demote
3270 def ExpandNames(self):
3272 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3274 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3276 def BuildHooksEnv(self):
3279 This runs on the master node.
3283 "OP_TARGET": self.op.node_name,
3284 "MASTER_CANDIDATE": str(self.op.master_candidate),
3285 "OFFLINE": str(self.op.offline),
3286 "DRAINED": str(self.op.drained),
3288 nl = [self.cfg.GetMasterNode(),
3292 def CheckPrereq(self):
3293 """Check prerequisites.
3295 This only checks the instance list against the existing names.
3298 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3300 if (self.op.master_candidate is not None or
3301 self.op.drained is not None or
3302 self.op.offline is not None):
3303 # we can't change the master's node flags
3304 if self.op.node_name == self.cfg.GetMasterNode():
3305 raise errors.OpPrereqError("The master role can be changed"
3306 " only via masterfailover",
3310 if node.master_candidate and self.might_demote and not self.lock_all:
3311 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3312 # check if after removing the current node, we're missing master
3314 (mc_remaining, mc_should, _) = \
3315 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3316 if mc_remaining != mc_should:
3317 raise errors.OpPrereqError("Not enough master candidates, please"
3318 " pass auto_promote to allow promotion",
3321 if (self.op.master_candidate == True and
3322 ((node.offline and not self.op.offline == False) or
3323 (node.drained and not self.op.drained == False))):
3324 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3325 " to master_candidate" % node.name,
3328 # If we're being deofflined/drained, we'll MC ourself if needed
3329 if (self.deoffline_or_drain and not self.offline_or_drain and not
3330 self.op.master_candidate == True and not node.master_candidate):
3331 self.op.master_candidate = _DecideSelfPromotion(self)
3332 if self.op.master_candidate:
3333 self.LogInfo("Autopromoting node to master candidate")
3337 def Exec(self, feedback_fn):
3346 if self.op.offline is not None:
3347 node.offline = self.op.offline
3348 result.append(("offline", str(self.op.offline)))
3349 if self.op.offline == True:
3350 if node.master_candidate:
3351 node.master_candidate = False
3353 result.append(("master_candidate", "auto-demotion due to offline"))
3355 node.drained = False
3356 result.append(("drained", "clear drained status due to offline"))
3358 if self.op.master_candidate is not None:
3359 node.master_candidate = self.op.master_candidate
3361 result.append(("master_candidate", str(self.op.master_candidate)))
3362 if self.op.master_candidate == False:
3363 rrc = self.rpc.call_node_demote_from_mc(node.name)
3366 self.LogWarning("Node failed to demote itself: %s" % msg)
3368 if self.op.drained is not None:
3369 node.drained = self.op.drained
3370 result.append(("drained", str(self.op.drained)))
3371 if self.op.drained == True:
3372 if node.master_candidate:
3373 node.master_candidate = False
3375 result.append(("master_candidate", "auto-demotion due to drain"))
3376 rrc = self.rpc.call_node_demote_from_mc(node.name)
3379 self.LogWarning("Node failed to demote itself: %s" % msg)
3381 node.offline = False
3382 result.append(("offline", "clear offline status due to drain"))
3384 # we locked all nodes, we adjust the CP before updating this node
3386 _AdjustCandidatePool(self, [node.name])
3388 # this will trigger configuration file update, if needed
3389 self.cfg.Update(node, feedback_fn)
3391 # this will trigger job queue propagation or cleanup
3393 self.context.ReaddNode(node)
3398 class LUPowercycleNode(NoHooksLU):
3399 """Powercycles a node.
3402 _OP_REQP = ["node_name", "force"]
3405 def CheckArguments(self):
3406 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3407 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3408 raise errors.OpPrereqError("The node is the master and the force"
3409 " parameter was not set",
3412 def ExpandNames(self):
3413 """Locking for PowercycleNode.
3415 This is a last-resort option and shouldn't block on other
3416 jobs. Therefore, we grab no locks.
3419 self.needed_locks = {}
3421 def CheckPrereq(self):
3422 """Check prerequisites.
3424 This LU has no prereqs.
3429 def Exec(self, feedback_fn):
3433 result = self.rpc.call_node_powercycle(self.op.node_name,
3434 self.cfg.GetHypervisorType())
3435 result.Raise("Failed to schedule the reboot")
3436 return result.payload
3439 class LUQueryClusterInfo(NoHooksLU):
3440 """Query cluster configuration.
3446 def ExpandNames(self):
3447 self.needed_locks = {}
3449 def CheckPrereq(self):
3450 """No prerequsites needed for this LU.
3455 def Exec(self, feedback_fn):
3456 """Return cluster config.
3459 cluster = self.cfg.GetClusterInfo()
3462 # Filter just for enabled hypervisors
3463 for os_name, hv_dict in cluster.os_hvp.items():
3464 os_hvp[os_name] = {}
3465 for hv_name, hv_params in hv_dict.items():
3466 if hv_name in cluster.enabled_hypervisors:
3467 os_hvp[os_name][hv_name] = hv_params
3470 "software_version": constants.RELEASE_VERSION,
3471 "protocol_version": constants.PROTOCOL_VERSION,
3472 "config_version": constants.CONFIG_VERSION,
3473 "os_api_version": max(constants.OS_API_VERSIONS),
3474 "export_version": constants.EXPORT_VERSION,
3475 "architecture": (platform.architecture()[0], platform.machine()),
3476 "name": cluster.cluster_name,
3477 "master": cluster.master_node,
3478 "default_hypervisor": cluster.enabled_hypervisors[0],
3479 "enabled_hypervisors": cluster.enabled_hypervisors,
3480 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3481 for hypervisor_name in cluster.enabled_hypervisors]),
3483 "beparams": cluster.beparams,
3484 "nicparams": cluster.nicparams,
3485 "candidate_pool_size": cluster.candidate_pool_size,
3486 "master_netdev": cluster.master_netdev,
3487 "volume_group_name": cluster.volume_group_name,
3488 "file_storage_dir": cluster.file_storage_dir,
3489 "ctime": cluster.ctime,
3490 "mtime": cluster.mtime,
3491 "uuid": cluster.uuid,
3492 "tags": list(cluster.GetTags()),
3498 class LUQueryConfigValues(NoHooksLU):
3499 """Return configuration values.
3504 _FIELDS_DYNAMIC = utils.FieldSet()
3505 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3508 def ExpandNames(self):
3509 self.needed_locks = {}
3511 _CheckOutputFields(static=self._FIELDS_STATIC,
3512 dynamic=self._FIELDS_DYNAMIC,
3513 selected=self.op.output_fields)
3515 def CheckPrereq(self):
3516 """No prerequisites.
3521 def Exec(self, feedback_fn):
3522 """Dump a representation of the cluster config to the standard output.
3526 for field in self.op.output_fields:
3527 if field == "cluster_name":
3528 entry = self.cfg.GetClusterName()
3529 elif field == "master_node":
3530 entry = self.cfg.GetMasterNode()
3531 elif field == "drain_flag":
3532 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3533 elif field == "watcher_pause":
3534 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3536 raise errors.ParameterError(field)
3537 values.append(entry)
3541 class LUActivateInstanceDisks(NoHooksLU):
3542 """Bring up an instance's disks.
3545 _OP_REQP = ["instance_name"]
3548 def ExpandNames(self):
3549 self._ExpandAndLockInstance()
3550 self.needed_locks[locking.LEVEL_NODE] = []
3551 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3553 def DeclareLocks(self, level):
3554 if level == locking.LEVEL_NODE:
3555 self._LockInstancesNodes()
3557 def CheckPrereq(self):
3558 """Check prerequisites.
3560 This checks that the instance is in the cluster.
3563 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3564 assert self.instance is not None, \
3565 "Cannot retrieve locked instance %s" % self.op.instance_name
3566 _CheckNodeOnline(self, self.instance.primary_node)
3567 if not hasattr(self.op, "ignore_size"):
3568 self.op.ignore_size = False
3570 def Exec(self, feedback_fn):
3571 """Activate the disks.
3574 disks_ok, disks_info = \
3575 _AssembleInstanceDisks(self, self.instance,
3576 ignore_size=self.op.ignore_size)
3578 raise errors.OpExecError("Cannot activate block devices")
3583 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3585 """Prepare the block devices for an instance.
3587 This sets up the block devices on all nodes.
3589 @type lu: L{LogicalUnit}
3590 @param lu: the logical unit on whose behalf we execute
3591 @type instance: L{objects.Instance}
3592 @param instance: the instance for whose disks we assemble
3593 @type ignore_secondaries: boolean
3594 @param ignore_secondaries: if true, errors on secondary nodes
3595 won't result in an error return from the function
3596 @type ignore_size: boolean
3597 @param ignore_size: if true, the current known size of the disk
3598 will not be used during the disk activation, useful for cases
3599 when the size is wrong
3600 @return: False if the operation failed, otherwise a list of
3601 (host, instance_visible_name, node_visible_name)
3602 with the mapping from node devices to instance devices
3607 iname = instance.name
3608 # With the two passes mechanism we try to reduce the window of
3609 # opportunity for the race condition of switching DRBD to primary
3610 # before handshaking occured, but we do not eliminate it
3612 # The proper fix would be to wait (with some limits) until the
3613 # connection has been made and drbd transitions from WFConnection
3614 # into any other network-connected state (Connected, SyncTarget,
3617 # 1st pass, assemble on all nodes in secondary mode
3618 for inst_disk in instance.disks:
3619 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3621 node_disk = node_disk.Copy()
3622 node_disk.UnsetSize()
3623 lu.cfg.SetDiskID(node_disk, node)
3624 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3625 msg = result.fail_msg
3627 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3628 " (is_primary=False, pass=1): %s",
3629 inst_disk.iv_name, node, msg)
3630 if not ignore_secondaries:
3633 # FIXME: race condition on drbd migration to primary
3635 # 2nd pass, do only the primary node
3636 for inst_disk in instance.disks:
3639 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3640 if node != instance.primary_node:
3643 node_disk = node_disk.Copy()
3644 node_disk.UnsetSize()
3645 lu.cfg.SetDiskID(node_disk, node)
3646 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3647 msg = result.fail_msg
3649 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3650 " (is_primary=True, pass=2): %s",
3651 inst_disk.iv_name, node, msg)
3654 dev_path = result.payload
3656 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3658 # leave the disks configured for the primary node
3659 # this is a workaround that would be fixed better by
3660 # improving the logical/physical id handling
3661 for disk in instance.disks:
3662 lu.cfg.SetDiskID(disk, instance.primary_node)
3664 return disks_ok, device_info
3667 def _StartInstanceDisks(lu, instance, force):
3668 """Start the disks of an instance.
3671 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3672 ignore_secondaries=force)
3674 _ShutdownInstanceDisks(lu, instance)
3675 if force is not None and not force:
3676 lu.proc.LogWarning("", hint="If the message above refers to a"
3678 " you can retry the operation using '--force'.")
3679 raise errors.OpExecError("Disk consistency error")
3682 class LUDeactivateInstanceDisks(NoHooksLU):
3683 """Shutdown an instance's disks.
3686 _OP_REQP = ["instance_name"]
3689 def ExpandNames(self):
3690 self._ExpandAndLockInstance()
3691 self.needed_locks[locking.LEVEL_NODE] = []
3692 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3694 def DeclareLocks(self, level):
3695 if level == locking.LEVEL_NODE:
3696 self._LockInstancesNodes()
3698 def CheckPrereq(self):
3699 """Check prerequisites.
3701 This checks that the instance is in the cluster.
3704 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3705 assert self.instance is not None, \
3706 "Cannot retrieve locked instance %s" % self.op.instance_name
3708 def Exec(self, feedback_fn):
3709 """Deactivate the disks
3712 instance = self.instance
3713 _SafeShutdownInstanceDisks(self, instance)
3716 def _SafeShutdownInstanceDisks(lu, instance):
3717 """Shutdown block devices of an instance.
3719 This function checks if an instance is running, before calling
3720 _ShutdownInstanceDisks.
3723 pnode = instance.primary_node
3724 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3725 ins_l.Raise("Can't contact node %s" % pnode)
3727 if instance.name in ins_l.payload:
3728 raise errors.OpExecError("Instance is running, can't shutdown"
3731 _ShutdownInstanceDisks(lu, instance)
3734 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3735 """Shutdown block devices of an instance.
3737 This does the shutdown on all nodes of the instance.
3739 If the ignore_primary is false, errors on the primary node are
3744 for disk in instance.disks:
3745 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3746 lu.cfg.SetDiskID(top_disk, node)
3747 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3748 msg = result.fail_msg
3750 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3751 disk.iv_name, node, msg)
3752 if not ignore_primary or node != instance.primary_node:
3757 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3758 """Checks if a node has enough free memory.
3760 This function check if a given node has the needed amount of free
3761 memory. In case the node has less memory or we cannot get the
3762 information from the node, this function raise an OpPrereqError
3765 @type lu: C{LogicalUnit}
3766 @param lu: a logical unit from which we get configuration data
3768 @param node: the node to check
3769 @type reason: C{str}
3770 @param reason: string to use in the error message
3771 @type requested: C{int}
3772 @param requested: the amount of memory in MiB to check for
3773 @type hypervisor_name: C{str}
3774 @param hypervisor_name: the hypervisor to ask for memory stats
3775 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3776 we cannot check the node
3779 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3780 nodeinfo[node].Raise("Can't get data from node %s" % node,
3781 prereq=True, ecode=errors.ECODE_ENVIRON)
3782 free_mem = nodeinfo[node].payload.get('memory_free', None)
3783 if not isinstance(free_mem, int):
3784 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3785 " was '%s'" % (node, free_mem),
3786 errors.ECODE_ENVIRON)
3787 if requested > free_mem:
3788 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3789 " needed %s MiB, available %s MiB" %
3790 (node, reason, requested, free_mem),
3794 class LUStartupInstance(LogicalUnit):
3795 """Starts an instance.
3798 HPATH = "instance-start"
3799 HTYPE = constants.HTYPE_INSTANCE
3800 _OP_REQP = ["instance_name", "force"]
3803 def ExpandNames(self):
3804 self._ExpandAndLockInstance()
3806 def BuildHooksEnv(self):
3809 This runs on master, primary and secondary nodes of the instance.
3813 "FORCE": self.op.force,
3815 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3816 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3819 def CheckPrereq(self):
3820 """Check prerequisites.
3822 This checks that the instance is in the cluster.
3825 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3826 assert self.instance is not None, \
3827 "Cannot retrieve locked instance %s" % self.op.instance_name
3830 self.beparams = getattr(self.op, "beparams", {})
3832 if not isinstance(self.beparams, dict):
3833 raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3834 " dict" % (type(self.beparams), ),
3836 # fill the beparams dict
3837 utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3838 self.op.beparams = self.beparams
3841 self.hvparams = getattr(self.op, "hvparams", {})
3843 if not isinstance(self.hvparams, dict):
3844 raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3845 " dict" % (type(self.hvparams), ),
3848 # check hypervisor parameter syntax (locally)
3849 cluster = self.cfg.GetClusterInfo()
3850 utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3851 filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3853 filled_hvp.update(self.hvparams)
3854 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3855 hv_type.CheckParameterSyntax(filled_hvp)
3856 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3857 self.op.hvparams = self.hvparams
3859 _CheckNodeOnline(self, instance.primary_node)
3861 bep = self.cfg.GetClusterInfo().FillBE(instance)
3862 # check bridges existence
3863 _CheckInstanceBridgesExist(self, instance)
3865 remote_info = self.rpc.call_instance_info(instance.primary_node,
3867 instance.hypervisor)
3868 remote_info.Raise("Error checking node %s" % instance.primary_node,
3869 prereq=True, ecode=errors.ECODE_ENVIRON)
3870 if not remote_info.payload: # not running already
3871 _CheckNodeFreeMemory(self, instance.primary_node,
3872 "starting instance %s" % instance.name,
3873 bep[constants.BE_MEMORY], instance.hypervisor)
3875 def Exec(self, feedback_fn):
3876 """Start the instance.
3879 instance = self.instance
3880 force = self.op.force
3882 self.cfg.MarkInstanceUp(instance.name)
3884 node_current = instance.primary_node
3886 _StartInstanceDisks(self, instance, force)
3888 result = self.rpc.call_instance_start(node_current, instance,
3889 self.hvparams, self.beparams)
3890 msg = result.fail_msg
3892 _ShutdownInstanceDisks(self, instance)
3893 raise errors.OpExecError("Could not start instance: %s" % msg)
3896 class LURebootInstance(LogicalUnit):
3897 """Reboot an instance.
3900 HPATH = "instance-reboot"
3901 HTYPE = constants.HTYPE_INSTANCE
3902 _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3905 def CheckArguments(self):
3906 """Check the arguments.
3909 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3910 constants.DEFAULT_SHUTDOWN_TIMEOUT)
3912 def ExpandNames(self):
3913 if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3914 constants.INSTANCE_REBOOT_HARD,
3915 constants.INSTANCE_REBOOT_FULL]:
3916 raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3917 (constants.INSTANCE_REBOOT_SOFT,
3918 constants.INSTANCE_REBOOT_HARD,
3919 constants.INSTANCE_REBOOT_FULL))
3920 self._ExpandAndLockInstance()
3922 def BuildHooksEnv(self):
3925 This runs on master, primary and secondary nodes of the instance.
3929 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3930 "REBOOT_TYPE": self.op.reboot_type,
3931 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
3933 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3934 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3937 def CheckPrereq(self):
3938 """Check prerequisites.
3940 This checks that the instance is in the cluster.
3943 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3944 assert self.instance is not None, \
3945 "Cannot retrieve locked instance %s" % self.op.instance_name
3947 _CheckNodeOnline(self, instance.primary_node)
3949 # check bridges existence
3950 _CheckInstanceBridgesExist(self, instance)
3952 def Exec(self, feedback_fn):
3953 """Reboot the instance.
3956 instance = self.instance
3957 ignore_secondaries = self.op.ignore_secondaries
3958 reboot_type = self.op.reboot_type
3960 node_current = instance.primary_node
3962 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3963 constants.INSTANCE_REBOOT_HARD]:
3964 for disk in instance.disks:
3965 self.cfg.SetDiskID(disk, node_current)
3966 result = self.rpc.call_instance_reboot(node_current, instance,
3968 self.shutdown_timeout)
3969 result.Raise("Could not reboot instance")
3971 result = self.rpc.call_instance_shutdown(node_current, instance,
3972 self.shutdown_timeout)
3973 result.Raise("Could not shutdown instance for full reboot")
3974 _ShutdownInstanceDisks(self, instance)
3975 _StartInstanceDisks(self, instance, ignore_secondaries)
3976 result = self.rpc.call_instance_start(node_current, instance, None, None)
3977 msg = result.fail_msg
3979 _ShutdownInstanceDisks(self, instance)
3980 raise errors.OpExecError("Could not start instance for"
3981 " full reboot: %s" % msg)
3983 self.cfg.MarkInstanceUp(instance.name)
3986 class LUShutdownInstance(LogicalUnit):
3987 """Shutdown an instance.
3990 HPATH = "instance-stop"
3991 HTYPE = constants.HTYPE_INSTANCE
3992 _OP_REQP = ["instance_name"]
3995 def CheckArguments(self):
3996 """Check the arguments.
3999 self.timeout = getattr(self.op, "timeout",
4000 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4002 def ExpandNames(self):
4003 self._ExpandAndLockInstance()
4005 def BuildHooksEnv(self):
4008 This runs on master, primary and secondary nodes of the instance.
4011 env = _BuildInstanceHookEnvByObject(self, self.instance)
4012 env["TIMEOUT"] = self.timeout
4013 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4016 def CheckPrereq(self):
4017 """Check prerequisites.
4019 This checks that the instance is in the cluster.
4022 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4023 assert self.instance is not None, \
4024 "Cannot retrieve locked instance %s" % self.op.instance_name
4025 _CheckNodeOnline(self, self.instance.primary_node)
4027 def Exec(self, feedback_fn):
4028 """Shutdown the instance.
4031 instance = self.instance
4032 node_current = instance.primary_node
4033 timeout = self.timeout
4034 self.cfg.MarkInstanceDown(instance.name)
4035 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4036 msg = result.fail_msg
4038 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4040 _ShutdownInstanceDisks(self, instance)
4043 class LUReinstallInstance(LogicalUnit):
4044 """Reinstall an instance.
4047 HPATH = "instance-reinstall"
4048 HTYPE = constants.HTYPE_INSTANCE
4049 _OP_REQP = ["instance_name"]
4052 def ExpandNames(self):
4053 self._ExpandAndLockInstance()
4055 def BuildHooksEnv(self):
4058 This runs on master, primary and secondary nodes of the instance.
4061 env = _BuildInstanceHookEnvByObject(self, self.instance)
4062 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4065 def CheckPrereq(self):
4066 """Check prerequisites.
4068 This checks that the instance is in the cluster and is not running.
4071 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4072 assert instance is not None, \
4073 "Cannot retrieve locked instance %s" % self.op.instance_name
4074 _CheckNodeOnline(self, instance.primary_node)
4076 if instance.disk_template == constants.DT_DISKLESS:
4077 raise errors.OpPrereqError("Instance '%s' has no disks" %
4078 self.op.instance_name,
4080 if instance.admin_up:
4081 raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4082 self.op.instance_name,
4084 remote_info = self.rpc.call_instance_info(instance.primary_node,
4086 instance.hypervisor)
4087 remote_info.Raise("Error checking node %s" % instance.primary_node,
4088 prereq=True, ecode=errors.ECODE_ENVIRON)
4089 if remote_info.payload:
4090 raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4091 (self.op.instance_name,
4092 instance.primary_node),
4095 self.op.os_type = getattr(self.op, "os_type", None)
4096 self.op.force_variant = getattr(self.op, "force_variant", False)
4097 if self.op.os_type is not None:
4099 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4100 result = self.rpc.call_os_get(pnode, self.op.os_type)
4101 result.Raise("OS '%s' not in supported OS list for primary node %s" %
4102 (self.op.os_type, pnode),
4103 prereq=True, ecode=errors.ECODE_INVAL)
4104 if not self.op.force_variant:
4105 _CheckOSVariant(result.payload, self.op.os_type)
4107 self.instance = instance
4109 def Exec(self, feedback_fn):
4110 """Reinstall the instance.
4113 inst = self.instance
4115 if self.op.os_type is not None:
4116 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4117 inst.os = self.op.os_type
4118 self.cfg.Update(inst, feedback_fn)
4120 _StartInstanceDisks(self, inst, None)
4122 feedback_fn("Running the instance OS create scripts...")
4123 # FIXME: pass debug option from opcode to backend
4124 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4125 self.op.debug_level)
4126 result.Raise("Could not install OS for instance %s on node %s" %
4127 (inst.name, inst.primary_node))
4129 _ShutdownInstanceDisks(self, inst)
4132 class LURecreateInstanceDisks(LogicalUnit):
4133 """Recreate an instance's missing disks.
4136 HPATH = "instance-recreate-disks"
4137 HTYPE = constants.HTYPE_INSTANCE
4138 _OP_REQP = ["instance_name", "disks"]
4141 def CheckArguments(self):
4142 """Check the arguments.
4145 if not isinstance(self.op.disks, list):
4146 raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4147 for item in self.op.disks:
4148 if (not isinstance(item, int) or
4150 raise errors.OpPrereqError("Invalid disk specification '%s'" %
4151 str(item), errors.ECODE_INVAL)
4153 def ExpandNames(self):
4154 self._ExpandAndLockInstance()
4156 def BuildHooksEnv(self):
4159 This runs on master, primary and secondary nodes of the instance.
4162 env = _BuildInstanceHookEnvByObject(self, self.instance)
4163 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4166 def CheckPrereq(self):
4167 """Check prerequisites.
4169 This checks that the instance is in the cluster and is not running.
4172 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4173 assert instance is not None, \
4174 "Cannot retrieve locked instance %s" % self.op.instance_name
4175 _CheckNodeOnline(self, instance.primary_node)
4177 if instance.disk_template == constants.DT_DISKLESS:
4178 raise errors.OpPrereqError("Instance '%s' has no disks" %
4179 self.op.instance_name, errors.ECODE_INVAL)
4180 if instance.admin_up:
4181 raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4182 self.op.instance_name, errors.ECODE_STATE)
4183 remote_info = self.rpc.call_instance_info(instance.primary_node,
4185 instance.hypervisor)
4186 remote_info.Raise("Error checking node %s" % instance.primary_node,
4187 prereq=True, ecode=errors.ECODE_ENVIRON)
4188 if remote_info.payload:
4189 raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4190 (self.op.instance_name,
4191 instance.primary_node), errors.ECODE_STATE)
4193 if not self.op.disks:
4194 self.op.disks = range(len(instance.disks))
4196 for idx in self.op.disks:
4197 if idx >= len(instance.disks):
4198 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4201 self.instance = instance
4203 def Exec(self, feedback_fn):
4204 """Recreate the disks.
4208 for idx, _ in enumerate(self.instance.disks):
4209 if idx not in self.op.disks: # disk idx has not been passed in
4213 _CreateDisks(self, self.instance, to_skip=to_skip)
4216 class LURenameInstance(LogicalUnit):
4217 """Rename an instance.
4220 HPATH = "instance-rename"
4221 HTYPE = constants.HTYPE_INSTANCE
4222 _OP_REQP = ["instance_name", "new_name"]
4224 def BuildHooksEnv(self):
4227 This runs on master, primary and secondary nodes of the instance.
4230 env = _BuildInstanceHookEnvByObject(self, self.instance)
4231 env["INSTANCE_NEW_NAME"] = self.op.new_name
4232 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4235 def CheckPrereq(self):
4236 """Check prerequisites.
4238 This checks that the instance is in the cluster and is not running.
4241 self.op.instance_name = _ExpandInstanceName(self.cfg,
4242 self.op.instance_name)
4243 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4244 assert instance is not None
4245 _CheckNodeOnline(self, instance.primary_node)
4247 if instance.admin_up:
4248 raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4249 self.op.instance_name, errors.ECODE_STATE)
4250 remote_info = self.rpc.call_instance_info(instance.primary_node,
4252 instance.hypervisor)
4253 remote_info.Raise("Error checking node %s" % instance.primary_node,
4254 prereq=True, ecode=errors.ECODE_ENVIRON)
4255 if remote_info.payload:
4256 raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4257 (self.op.instance_name,
4258 instance.primary_node), errors.ECODE_STATE)
4259 self.instance = instance
4261 # new name verification
4262 name_info = utils.GetHostInfo(self.op.new_name)
4264 self.op.new_name = new_name = name_info.name
4265 instance_list = self.cfg.GetInstanceList()
4266 if new_name in instance_list:
4267 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4268 new_name, errors.ECODE_EXISTS)
4270 if not getattr(self.op, "ignore_ip", False):
4271 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4272 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4273 (name_info.ip, new_name),
4274 errors.ECODE_NOTUNIQUE)
4277 def Exec(self, feedback_fn):
4278 """Reinstall the instance.
4281 inst = self.instance
4282 old_name = inst.name
4284 if inst.disk_template == constants.DT_FILE:
4285 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4287 self.cfg.RenameInstance(inst.name, self.op.new_name)
4288 # Change the instance lock. This is definitely safe while we hold the BGL
4289 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4290 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4292 # re-read the instance from the configuration after rename
4293 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4295 if inst.disk_template == constants.DT_FILE:
4296 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4297 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4298 old_file_storage_dir,
4299 new_file_storage_dir)
4300 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4301 " (but the instance has been renamed in Ganeti)" %
4302 (inst.primary_node, old_file_storage_dir,
4303 new_file_storage_dir))
4305 _StartInstanceDisks(self, inst, None)
4307 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4308 old_name, self.op.debug_level)
4309 msg = result.fail_msg
4311 msg = ("Could not run OS rename script for instance %s on node %s"
4312 " (but the instance has been renamed in Ganeti): %s" %
4313 (inst.name, inst.primary_node, msg))
4314 self.proc.LogWarning(msg)
4316 _ShutdownInstanceDisks(self, inst)
4319 class LURemoveInstance(LogicalUnit):
4320 """Remove an instance.
4323 HPATH = "instance-remove"
4324 HTYPE = constants.HTYPE_INSTANCE
4325 _OP_REQP = ["instance_name", "ignore_failures"]
4328 def CheckArguments(self):
4329 """Check the arguments.
4332 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4333 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4335 def ExpandNames(self):
4336 self._ExpandAndLockInstance()
4337 self.needed_locks[locking.LEVEL_NODE] = []
4338 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4340 def DeclareLocks(self, level):
4341 if level == locking.LEVEL_NODE:
4342 self._LockInstancesNodes()
4344 def BuildHooksEnv(self):
4347 This runs on master, primary and secondary nodes of the instance.
4350 env = _BuildInstanceHookEnvByObject(self, self.instance)
4351 env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4352 nl = [self.cfg.GetMasterNode()]
4353 nl_post = list(self.instance.all_nodes) + nl
4354 return env, nl, nl_post
4356 def CheckPrereq(self):
4357 """Check prerequisites.
4359 This checks that the instance is in the cluster.
4362 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4363 assert self.instance is not None, \
4364 "Cannot retrieve locked instance %s" % self.op.instance_name
4366 def Exec(self, feedback_fn):
4367 """Remove the instance.
4370 instance = self.instance
4371 logging.info("Shutting down instance %s on node %s",
4372 instance.name, instance.primary_node)
4374 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4375 self.shutdown_timeout)
4376 msg = result.fail_msg
4378 if self.op.ignore_failures:
4379 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4381 raise errors.OpExecError("Could not shutdown instance %s on"
4383 (instance.name, instance.primary_node, msg))
4385 logging.info("Removing block devices for instance %s", instance.name)
4387 if not _RemoveDisks(self, instance):
4388 if self.op.ignore_failures:
4389 feedback_fn("Warning: can't remove instance's disks")
4391 raise errors.OpExecError("Can't remove instance's disks")
4393 logging.info("Removing instance %s out of cluster config", instance.name)
4395 self.cfg.RemoveInstance(instance.name)
4396 self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4399 class LUQueryInstances(NoHooksLU):
4400 """Logical unit for querying instances.
4403 # pylint: disable-msg=W0142
4404 _OP_REQP = ["output_fields", "names", "use_locking"]
4406 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4407 "serial_no", "ctime", "mtime", "uuid"]
4408 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4410 "disk_template", "ip", "mac", "bridge",
4411 "nic_mode", "nic_link",
4412 "sda_size", "sdb_size", "vcpus", "tags",
4413 "network_port", "beparams",
4414 r"(disk)\.(size)/([0-9]+)",
4415 r"(disk)\.(sizes)", "disk_usage",
4416 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4417 r"(nic)\.(bridge)/([0-9]+)",
4418 r"(nic)\.(macs|ips|modes|links|bridges)",
4419 r"(disk|nic)\.(count)",
4421 ] + _SIMPLE_FIELDS +
4423 for name in constants.HVS_PARAMETERS
4424 if name not in constants.HVC_GLOBALS] +
4426 for name in constants.BES_PARAMETERS])
4427 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4430 def ExpandNames(self):
4431 _CheckOutputFields(static=self._FIELDS_STATIC,
4432 dynamic=self._FIELDS_DYNAMIC,
4433 selected=self.op.output_fields)
4435 self.needed_locks = {}
4436 self.share_locks[locking.LEVEL_INSTANCE] = 1
4437 self.share_locks[locking.LEVEL_NODE] = 1
4440 self.wanted = _GetWantedInstances(self, self.op.names)
4442 self.wanted = locking.ALL_SET
4444 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4445 self.do_locking = self.do_node_query and self.op.use_locking
4447 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4448 self.needed_locks[locking.LEVEL_NODE] = []
4449 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4451 def DeclareLocks(self, level):
4452 if level == locking.LEVEL_NODE and self.do_locking:
4453 self._LockInstancesNodes()
4455 def CheckPrereq(self):
4456 """Check prerequisites.
4461 def Exec(self, feedback_fn):
4462 """Computes the list of nodes and their attributes.
4465 # pylint: disable-msg=R0912
4466 # way too many branches here
4467 all_info = self.cfg.GetAllInstancesInfo()
4468 if self.wanted == locking.ALL_SET:
4469 # caller didn't specify instance names, so ordering is not important
4471 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4473 instance_names = all_info.keys()
4474 instance_names = utils.NiceSort(instance_names)
4476 # caller did specify names, so we must keep the ordering
4478 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4480 tgt_set = all_info.keys()
4481 missing = set(self.wanted).difference(tgt_set)
4483 raise errors.OpExecError("Some instances were removed before"
4484 " retrieving their data: %s" % missing)
4485 instance_names = self.wanted
4487 instance_list = [all_info[iname] for iname in instance_names]
4489 # begin data gathering
4491 nodes = frozenset([inst.primary_node for inst in instance_list])
4492 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4496 if self.do_node_query:
4498 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4500 result = node_data[name]
4502 # offline nodes will be in both lists
4503 off_nodes.append(name)
4505 bad_nodes.append(name)
4508 live_data.update(result.payload)
4509 # else no instance is alive
4511 live_data = dict([(name, {}) for name in instance_names])
4513 # end data gathering
4518 cluster = self.cfg.GetClusterInfo()
4519 for instance in instance_list:
4521 i_hv = cluster.FillHV(instance, skip_globals=True)
4522 i_be = cluster.FillBE(instance)
4523 i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4524 nic.nicparams) for nic in instance.nics]
4525 for field in self.op.output_fields:
4526 st_match = self._FIELDS_STATIC.Matches(field)
4527 if field in self._SIMPLE_FIELDS:
4528 val = getattr(instance, field)
4529 elif field == "pnode":
4530 val = instance.primary_node
4531 elif field == "snodes":
4532 val = list(instance.secondary_nodes)
4533 elif field == "admin_state":
4534 val = instance.admin_up
4535 elif field == "oper_state":
4536 if instance.primary_node in bad_nodes:
4539 val = bool(live_data.get(instance.name))
4540 elif field == "status":
4541 if instance.primary_node in off_nodes:
4542 val = "ERROR_nodeoffline"
4543 elif instance.primary_node in bad_nodes:
4544 val = "ERROR_nodedown"
4546 running = bool(live_data.get(instance.name))
4548 if instance.admin_up:
4553 if instance.admin_up:
4557 elif field == "oper_ram":
4558 if instance.primary_node in bad_nodes:
4560 elif instance.name in live_data:
4561 val = live_data[instance.name].get("memory", "?")
4564 elif field == "vcpus":
4565 val = i_be[constants.BE_VCPUS]
4566 elif field == "disk_template":
4567 val = instance.disk_template
4570 val = instance.nics[0].ip
4573 elif field == "nic_mode":
4575 val = i_nicp[0][constants.NIC_MODE]
4578 elif field == "nic_link":
4580 val = i_nicp[0][constants.NIC_LINK]
4583 elif field == "bridge":
4584 if (instance.nics and
4585 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4586 val = i_nicp[0][constants.NIC_LINK]
4589 elif field == "mac":
4591 val = instance.nics[0].mac
4594 elif field == "sda_size" or field == "sdb_size":
4595 idx = ord(field[2]) - ord('a')
4597 val = instance.FindDisk(idx).size
4598 except errors.OpPrereqError:
4600 elif field == "disk_usage": # total disk usage per node
4601 disk_sizes = [{'size': disk.size} for disk in instance.disks]
4602 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4603 elif field == "tags":
4604 val = list(instance.GetTags())
4605 elif field == "hvparams":
4607 elif (field.startswith(HVPREFIX) and
4608 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4609 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4610 val = i_hv.get(field[len(HVPREFIX):], None)
4611 elif field == "beparams":
4613 elif (field.startswith(BEPREFIX) and
4614 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4615 val = i_be.get(field[len(BEPREFIX):], None)
4616 elif st_match and st_match.groups():
4617 # matches a variable list
4618 st_groups = st_match.groups()
4619 if st_groups and st_groups[0] == "disk":
4620 if st_groups[1] == "count":
4621 val = len(instance.disks)
4622 elif st_groups[1] == "sizes":
4623 val = [disk.size for disk in instance.disks]
4624 elif st_groups[1] == "size":
4626 val = instance.FindDisk(st_groups[2]).size
4627 except errors.OpPrereqError:
4630 assert False, "Unhandled disk parameter"
4631 elif st_groups[0] == "nic":
4632 if st_groups[1] == "count":
4633 val = len(instance.nics)
4634 elif st_groups[1] == "macs":
4635 val = [nic.mac for nic in instance.nics]
4636 elif st_groups[1] == "ips":
4637 val = [nic.ip for nic in instance.nics]
4638 elif st_groups[1] == "modes":
4639 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4640 elif st_groups[1] == "links":
4641 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4642 elif st_groups[1] == "bridges":
4645 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4646 val.append(nicp[constants.NIC_LINK])
4651 nic_idx = int(st_groups[2])
4652 if nic_idx >= len(instance.nics):
4655 if st_groups[1] == "mac":
4656 val = instance.nics[nic_idx].mac
4657 elif st_groups[1] == "ip":
4658 val = instance.nics[nic_idx].ip
4659 elif st_groups[1] == "mode":
4660 val = i_nicp[nic_idx][constants.NIC_MODE]
4661 elif st_groups[1] == "link":
4662 val = i_nicp[nic_idx][constants.NIC_LINK]
4663 elif st_groups[1] == "bridge":
4664 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4665 if nic_mode == constants.NIC_MODE_BRIDGED:
4666 val = i_nicp[nic_idx][constants.NIC_LINK]
4670 assert False, "Unhandled NIC parameter"
4672 assert False, ("Declared but unhandled variable parameter '%s'" %
4675 assert False, "Declared but unhandled parameter '%s'" % field
4682 class LUFailoverInstance(LogicalUnit):
4683 """Failover an instance.
4686 HPATH = "instance-failover"
4687 HTYPE = constants.HTYPE_INSTANCE
4688 _OP_REQP = ["instance_name", "ignore_consistency"]
4691 def CheckArguments(self):
4692 """Check the arguments.
4695 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4696 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4698 def ExpandNames(self):
4699 self._ExpandAndLockInstance()
4700 self.needed_locks[locking.LEVEL_NODE] = []
4701 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4703 def DeclareLocks(self, level):
4704 if level == locking.LEVEL_NODE:
4705 self._LockInstancesNodes()
4707 def BuildHooksEnv(self):
4710 This runs on master, primary and secondary nodes of the instance.
4713 instance = self.instance
4714 source_node = instance.primary_node
4715 target_node = instance.secondary_nodes[0]
4717 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4718 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4719 "OLD_PRIMARY": source_node,
4720 "OLD_SECONDARY": target_node,
4721 "NEW_PRIMARY": target_node,
4722 "NEW_SECONDARY": source_node,
4724 env.update(_BuildInstanceHookEnvByObject(self, instance))
4725 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4727 nl_post.append(source_node)
4728 return env, nl, nl_post
4730 def CheckPrereq(self):
4731 """Check prerequisites.
4733 This checks that the instance is in the cluster.
4736 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4737 assert self.instance is not None, \
4738 "Cannot retrieve locked instance %s" % self.op.instance_name
4740 bep = self.cfg.GetClusterInfo().FillBE(instance)
4741 if instance.disk_template not in constants.DTS_NET_MIRROR:
4742 raise errors.OpPrereqError("Instance's disk layout is not"
4743 " network mirrored, cannot failover.",
4746 secondary_nodes = instance.secondary_nodes
4747 if not secondary_nodes:
4748 raise errors.ProgrammerError("no secondary node but using "
4749 "a mirrored disk template")
4751 target_node = secondary_nodes[0]
4752 _CheckNodeOnline(self, target_node)
4753 _CheckNodeNotDrained(self, target_node)
4754 if instance.admin_up:
4755 # check memory requirements on the secondary node
4756 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4757 instance.name, bep[constants.BE_MEMORY],
4758 instance.hypervisor)
4760 self.LogInfo("Not checking memory on the secondary node as"
4761 " instance will not be started")
4763 # check bridge existance
4764 _CheckInstanceBridgesExist(self, instance, node=target_node)
4766 def Exec(self, feedback_fn):
4767 """Failover an instance.
4769 The failover is done by shutting it down on its present node and
4770 starting it on the secondary.
4773 instance = self.instance
4775 source_node = instance.primary_node
4776 target_node = instance.secondary_nodes[0]
4778 if instance.admin_up:
4779 feedback_fn("* checking disk consistency between source and target")
4780 for dev in instance.disks:
4781 # for drbd, these are drbd over lvm
4782 if not _CheckDiskConsistency(self, dev, target_node, False):
4783 if not self.op.ignore_consistency:
4784 raise errors.OpExecError("Disk %s is degraded on target node,"
4785 " aborting failover." % dev.iv_name)
4787 feedback_fn("* not checking disk consistency as instance is not running")
4789 feedback_fn("* shutting down instance on source node")
4790 logging.info("Shutting down instance %s on node %s",
4791 instance.name, source_node)
4793 result = self.rpc.call_instance_shutdown(source_node, instance,
4794 self.shutdown_timeout)
4795 msg = result.fail_msg
4797 if self.op.ignore_consistency:
4798 self.proc.LogWarning("Could not shutdown instance %s on node %s."
4799 " Proceeding anyway. Please make sure node"
4800 " %s is down. Error details: %s",
4801 instance.name, source_node, source_node, msg)
4803 raise errors.OpExecError("Could not shutdown instance %s on"
4805 (instance.name, source_node, msg))
4807 feedback_fn("* deactivating the instance's disks on source node")
4808 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4809 raise errors.OpExecError("Can't shut down the instance's disks.")
4811 instance.primary_node = target_node
4812 # distribute new instance config to the other nodes
4813 self.cfg.Update(instance, feedback_fn)
4815 # Only start the instance if it's marked as up
4816 if instance.admin_up:
4817 feedback_fn("* activating the instance's disks on target node")
4818 logging.info("Starting instance %s on node %s",
4819 instance.name, target_node)
4821 disks_ok, _ = _AssembleInstanceDisks(self, instance,
4822 ignore_secondaries=True)
4824 _ShutdownInstanceDisks(self, instance)
4825 raise errors.OpExecError("Can't activate the instance's disks")
4827 feedback_fn("* starting the instance on the target node")
4828 result = self.rpc.call_instance_start(target_node, instance, None, None)
4829 msg = result.fail_msg
4831 _ShutdownInstanceDisks(self, instance)
4832 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4833 (instance.name, target_node, msg))
4836 class LUMigrateInstance(LogicalUnit):
4837 """Migrate an instance.
4839 This is migration without shutting down, compared to the failover,
4840 which is done with shutdown.
4843 HPATH = "instance-migrate"
4844 HTYPE = constants.HTYPE_INSTANCE
4845 _OP_REQP = ["instance_name", "live", "cleanup"]
4849 def ExpandNames(self):
4850 self._ExpandAndLockInstance()
4852 self.needed_locks[locking.LEVEL_NODE] = []
4853 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4855 self._migrater = TLMigrateInstance(self, self.op.instance_name,
4856 self.op.live, self.op.cleanup)
4857 self.tasklets = [self._migrater]
4859 def DeclareLocks(self, level):
4860 if level == locking.LEVEL_NODE:
4861 self._LockInstancesNodes()
4863 def BuildHooksEnv(self):
4866 This runs on master, primary and secondary nodes of the instance.
4869 instance = self._migrater.instance
4870 source_node = instance.primary_node
4871 target_node = instance.secondary_nodes[0]
4872 env = _BuildInstanceHookEnvByObject(self, instance)
4873 env["MIGRATE_LIVE"] = self.op.live
4874 env["MIGRATE_CLEANUP"] = self.op.cleanup
4876 "OLD_PRIMARY": source_node,
4877 "OLD_SECONDARY": target_node,
4878 "NEW_PRIMARY": target_node,
4879 "NEW_SECONDARY": source_node,
4881 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4883 nl_post.append(source_node)
4884 return env, nl, nl_post
4887 class LUMoveInstance(LogicalUnit):
4888 """Move an instance by data-copying.
4891 HPATH = "instance-move"
4892 HTYPE = constants.HTYPE_INSTANCE
4893 _OP_REQP = ["instance_name", "target_node"]
4896 def CheckArguments(self):
4897 """Check the arguments.
4900 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4901 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4903 def ExpandNames(self):
4904 self._ExpandAndLockInstance()
4905 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
4906 self.op.target_node = target_node
4907 self.needed_locks[locking.LEVEL_NODE] = [target_node]
4908 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4910 def DeclareLocks(self, level):
4911 if level == locking.LEVEL_NODE:
4912 self._LockInstancesNodes(primary_only=True)
4914 def BuildHooksEnv(self):
4917 This runs on master, primary and secondary nodes of the instance.
4921 "TARGET_NODE": self.op.target_node,
4922 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4924 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4925 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4926 self.op.target_node]
4929 def CheckPrereq(self):
4930 """Check prerequisites.
4932 This checks that the instance is in the cluster.
4935 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4936 assert self.instance is not None, \
4937 "Cannot retrieve locked instance %s" % self.op.instance_name
4939 node = self.cfg.GetNodeInfo(self.op.target_node)
4940 assert node is not None, \
4941 "Cannot retrieve locked node %s" % self.op.target_node
4943 self.target_node = target_node = node.name
4945 if target_node == instance.primary_node:
4946 raise errors.OpPrereqError("Instance %s is already on the node %s" %
4947 (instance.name, target_node),
4950 bep = self.cfg.GetClusterInfo().FillBE(instance)
4952 for idx, dsk in enumerate(instance.disks):
4953 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4954 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4955 " cannot copy" % idx, errors.ECODE_STATE)
4957 _CheckNodeOnline(self, target_node)
4958 _CheckNodeNotDrained(self, target_node)
4960 if instance.admin_up:
4961 # check memory requirements on the secondary node
4962 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4963 instance.name, bep[constants.BE_MEMORY],
4964 instance.hypervisor)
4966 self.LogInfo("Not checking memory on the secondary node as"
4967 " instance will not be started")
4969 # check bridge existance
4970 _CheckInstanceBridgesExist(self, instance, node=target_node)
4972 def Exec(self, feedback_fn):
4973 """Move an instance.
4975 The move is done by shutting it down on its present node, copying
4976 the data over (slow) and starting it on the new node.
4979 instance = self.instance
4981 source_node = instance.primary_node
4982 target_node = self.target_node
4984 self.LogInfo("Shutting down instance %s on source node %s",
4985 instance.name, source_node)
4987 result = self.rpc.call_instance_shutdown(source_node, instance,
4988 self.shutdown_timeout)
4989 msg = result.fail_msg
4991 if self.op.ignore_consistency:
4992 self.proc.LogWarning("Could not shutdown instance %s on node %s."
4993 " Proceeding anyway. Please make sure node"
4994 " %s is down. Error details: %s",
4995 instance.name, source_node, source_node, msg)
4997 raise errors.OpExecError("Could not shutdown instance %s on"
4999 (instance.name, source_node, msg))
5001 # create the target disks
5003 _CreateDisks(self, instance, target_node=target_node)
5004 except errors.OpExecError:
5005 self.LogWarning("Device creation failed, reverting...")
5007 _RemoveDisks(self, instance, target_node=target_node)
5009 self.cfg.ReleaseDRBDMinors(instance.name)
5012 cluster_name = self.cfg.GetClusterInfo().cluster_name
5015 # activate, get path, copy the data over
5016 for idx, disk in enumerate(instance.disks):
5017 self.LogInfo("Copying data for disk %d", idx)
5018 result = self.rpc.call_blockdev_assemble(target_node, disk,
5019 instance.name, True)
5021 self.LogWarning("Can't assemble newly created disk %d: %s",
5022 idx, result.fail_msg)
5023 errs.append(result.fail_msg)
5025 dev_path = result.payload
5026 result = self.rpc.call_blockdev_export(source_node, disk,
5027 target_node, dev_path,
5030 self.LogWarning("Can't copy data over for disk %d: %s",
5031 idx, result.fail_msg)
5032 errs.append(result.fail_msg)
5036 self.LogWarning("Some disks failed to copy, aborting")
5038 _RemoveDisks(self, instance, target_node=target_node)
5040 self.cfg.ReleaseDRBDMinors(instance.name)
5041 raise errors.OpExecError("Errors during disk copy: %s" %
5044 instance.primary_node = target_node
5045 self.cfg.Update(instance, feedback_fn)
5047 self.LogInfo("Removing the disks on the original node")
5048 _RemoveDisks(self, instance, target_node=source_node)
5050 # Only start the instance if it's marked as up
5051 if instance.admin_up:
5052 self.LogInfo("Starting instance %s on node %s",
5053 instance.name, target_node)
5055 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5056 ignore_secondaries=True)
5058 _ShutdownInstanceDisks(self, instance)
5059 raise errors.OpExecError("Can't activate the instance's disks")
5061 result = self.rpc.call_instance_start(target_node, instance, None, None)
5062 msg = result.fail_msg
5064 _ShutdownInstanceDisks(self, instance)
5065 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5066 (instance.name, target_node, msg))
5069 class LUMigrateNode(LogicalUnit):
5070 """Migrate all instances from a node.
5073 HPATH = "node-migrate"
5074 HTYPE = constants.HTYPE_NODE
5075 _OP_REQP = ["node_name", "live"]
5078 def ExpandNames(self):
5079 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5081 self.needed_locks = {
5082 locking.LEVEL_NODE: [self.op.node_name],
5085 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5087 # Create tasklets for migrating instances for all instances on this node
5091 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5092 logging.debug("Migrating instance %s", inst.name)
5093 names.append(inst.name)
5095 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5097 self.tasklets = tasklets
5099 # Declare instance locks
5100 self.needed_locks[locking.LEVEL_INSTANCE] = names
5102 def DeclareLocks(self, level):
5103 if level == locking.LEVEL_NODE:
5104 self._LockInstancesNodes()
5106 def BuildHooksEnv(self):
5109 This runs on the master, the primary and all the secondaries.
5113 "NODE_NAME": self.op.node_name,
5116 nl = [self.cfg.GetMasterNode()]
5118 return (env, nl, nl)
5121 class TLMigrateInstance(Tasklet):
5122 def __init__(self, lu, instance_name, live, cleanup):
5123 """Initializes this class.
5126 Tasklet.__init__(self, lu)
5129 self.instance_name = instance_name
5131 self.cleanup = cleanup
5133 def CheckPrereq(self):
5134 """Check prerequisites.
5136 This checks that the instance is in the cluster.
5139 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5140 instance = self.cfg.GetInstanceInfo(instance_name)
5141 assert instance is not None
5143 if instance.disk_template != constants.DT_DRBD8:
5144 raise errors.OpPrereqError("Instance's disk layout is not"
5145 " drbd8, cannot migrate.", errors.ECODE_STATE)
5147 secondary_nodes = instance.secondary_nodes
5148 if not secondary_nodes:
5149 raise errors.ConfigurationError("No secondary node but using"
5150 " drbd8 disk template")
5152 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5154 target_node = secondary_nodes[0]
5155 # check memory requirements on the secondary node
5156 _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5157 instance.name, i_be[constants.BE_MEMORY],
5158 instance.hypervisor)
5160 # check bridge existance
5161 _CheckInstanceBridgesExist(self, instance, node=target_node)
5163 if not self.cleanup:
5164 _CheckNodeNotDrained(self, target_node)
5165 result = self.rpc.call_instance_migratable(instance.primary_node,
5167 result.Raise("Can't migrate, please use failover",
5168 prereq=True, ecode=errors.ECODE_STATE)
5170 self.instance = instance
5172 def _WaitUntilSync(self):
5173 """Poll with custom rpc for disk sync.
5175 This uses our own step-based rpc call.
5178 self.feedback_fn("* wait until resync is done")
5182 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5184 self.instance.disks)
5186 for node, nres in result.items():
5187 nres.Raise("Cannot resync disks on node %s" % node)
5188 node_done, node_percent = nres.payload
5189 all_done = all_done and node_done
5190 if node_percent is not None:
5191 min_percent = min(min_percent, node_percent)
5193 if min_percent < 100:
5194 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5197 def _EnsureSecondary(self, node):
5198 """Demote a node to secondary.
5201 self.feedback_fn("* switching node %s to secondary mode" % node)
5203 for dev in self.instance.disks:
5204 self.cfg.SetDiskID(dev, node)
5206 result = self.rpc.call_blockdev_close(node, self.instance.name,
5207 self.instance.disks)
5208 result.Raise("Cannot change disk to secondary on node %s" % node)
5210 def _GoStandalone(self):
5211 """Disconnect from the network.
5214 self.feedback_fn("* changing into standalone mode")
5215 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5216 self.instance.disks)
5217 for node, nres in result.items():
5218 nres.Raise("Cannot disconnect disks node %s" % node)
5220 def _GoReconnect(self, multimaster):
5221 """Reconnect to the network.
5227 msg = "single-master"
5228 self.feedback_fn("* changing disks into %s mode" % msg)
5229 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5230 self.instance.disks,
5231 self.instance.name, multimaster)
5232 for node, nres in result.items():
5233 nres.Raise("Cannot change disks config on node %s" % node)
5235 def _ExecCleanup(self):
5236 """Try to cleanup after a failed migration.
5238 The cleanup is done by:
5239 - check that the instance is running only on one node
5240 (and update the config if needed)
5241 - change disks on its secondary node to secondary
5242 - wait until disks are fully synchronized
5243 - disconnect from the network
5244 - change disks into single-master mode
5245 - wait again until disks are fully synchronized
5248 instance = self.instance
5249 target_node = self.target_node
5250 source_node = self.source_node
5252 # check running on only one node
5253 self.feedback_fn("* checking where the instance actually runs"
5254 " (if this hangs, the hypervisor might be in"
5256 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5257 for node, result in ins_l.items():
5258 result.Raise("Can't contact node %s" % node)
5260 runningon_source = instance.name in ins_l[source_node].payload
5261 runningon_target = instance.name in ins_l[target_node].payload
5263 if runningon_source and runningon_target:
5264 raise errors.OpExecError("Instance seems to be running on two nodes,"
5265 " or the hypervisor is confused. You will have"
5266 " to ensure manually that it runs only on one"
5267 " and restart this operation.")
5269 if not (runningon_source or runningon_target):
5270 raise errors.OpExecError("Instance does not seem to be running at all."
5271 " In this case, it's safer to repair by"
5272 " running 'gnt-instance stop' to ensure disk"
5273 " shutdown, and then restarting it.")
5275 if runningon_target:
5276 # the migration has actually succeeded, we need to update the config
5277 self.feedback_fn("* instance running on secondary node (%s),"
5278 " updating config" % target_node)
5279 instance.primary_node = target_node
5280 self.cfg.Update(instance, self.feedback_fn)
5281 demoted_node = source_node
5283 self.feedback_fn("* instance confirmed to be running on its"
5284 " primary node (%s)" % source_node)
5285 demoted_node = target_node
5287 self._EnsureSecondary(demoted_node)
5289 self._WaitUntilSync()
5290 except errors.OpExecError:
5291 # we ignore here errors, since if the device is standalone, it
5292 # won't be able to sync
5294 self._GoStandalone()
5295 self._GoReconnect(False)
5296 self._WaitUntilSync()
5298 self.feedback_fn("* done")
5300 def _RevertDiskStatus(self):
5301 """Try to revert the disk status after a failed migration.
5304 target_node = self.target_node
5306 self._EnsureSecondary(target_node)
5307 self._GoStandalone()
5308 self._GoReconnect(False)
5309 self._WaitUntilSync()
5310 except errors.OpExecError, err:
5311 self.lu.LogWarning("Migration failed and I can't reconnect the"
5312 " drives: error '%s'\n"
5313 "Please look and recover the instance status" %
5316 def _AbortMigration(self):
5317 """Call the hypervisor code to abort a started migration.
5320 instance = self.instance
5321 target_node = self.target_node
5322 migration_info = self.migration_info
5324 abort_result = self.rpc.call_finalize_migration(target_node,
5328 abort_msg = abort_result.fail_msg
5330 logging.error("Aborting migration failed on target node %s: %s",
5331 target_node, abort_msg)
5332 # Don't raise an exception here, as we stil have to try to revert the
5333 # disk status, even if this step failed.
5335 def _ExecMigration(self):
5336 """Migrate an instance.
5338 The migrate is done by:
5339 - change the disks into dual-master mode
5340 - wait until disks are fully synchronized again
5341 - migrate the instance
5342 - change disks on the new secondary node (the old primary) to secondary
5343 - wait until disks are fully synchronized
5344 - change disks into single-master mode
5347 instance = self.instance
5348 target_node = self.target_node
5349 source_node = self.source_node
5351 self.feedback_fn("* checking disk consistency between source and target")
5352 for dev in instance.disks:
5353 if not _CheckDiskConsistency(self, dev, target_node, False):
5354 raise errors.OpExecError("Disk %s is degraded or not fully"
5355 " synchronized on target node,"
5356 " aborting migrate." % dev.iv_name)
5358 # First get the migration information from the remote node
5359 result = self.rpc.call_migration_info(source_node, instance)
5360 msg = result.fail_msg
5362 log_err = ("Failed fetching source migration information from %s: %s" %
5364 logging.error(log_err)
5365 raise errors.OpExecError(log_err)
5367 self.migration_info = migration_info = result.payload
5369 # Then switch the disks to master/master mode
5370 self._EnsureSecondary(target_node)
5371 self._GoStandalone()
5372 self._GoReconnect(True)
5373 self._WaitUntilSync()
5375 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5376 result = self.rpc.call_accept_instance(target_node,
5379 self.nodes_ip[target_node])
5381 msg = result.fail_msg
5383 logging.error("Instance pre-migration failed, trying to revert"
5384 " disk status: %s", msg)
5385 self.feedback_fn("Pre-migration failed, aborting")
5386 self._AbortMigration()
5387 self._RevertDiskStatus()
5388 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5389 (instance.name, msg))
5391 self.feedback_fn("* migrating instance to %s" % target_node)
5393 result = self.rpc.call_instance_migrate(source_node, instance,
5394 self.nodes_ip[target_node],
5396 msg = result.fail_msg
5398 logging.error("Instance migration failed, trying to revert"
5399 " disk status: %s", msg)
5400 self.feedback_fn("Migration failed, aborting")
5401 self._AbortMigration()
5402 self._RevertDiskStatus()
5403 raise errors.OpExecError("Could not migrate instance %s: %s" %
5404 (instance.name, msg))
5407 instance.primary_node = target_node
5408 # distribute new instance config to the other nodes
5409 self.cfg.Update(instance, self.feedback_fn)
5411 result = self.rpc.call_finalize_migration(target_node,
5415 msg = result.fail_msg
5417 logging.error("Instance migration succeeded, but finalization failed:"
5419 raise errors.OpExecError("Could not finalize instance migration: %s" %
5422 self._EnsureSecondary(source_node)
5423 self._WaitUntilSync()
5424 self._GoStandalone()
5425 self._GoReconnect(False)
5426 self._WaitUntilSync()
5428 self.feedback_fn("* done")
5430 def Exec(self, feedback_fn):
5431 """Perform the migration.
5434 feedback_fn("Migrating instance %s" % self.instance.name)
5436 self.feedback_fn = feedback_fn
5438 self.source_node = self.instance.primary_node
5439 self.target_node = self.instance.secondary_nodes[0]
5440 self.all_nodes = [self.source_node, self.target_node]
5442 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5443 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5447 return self._ExecCleanup()
5449 return self._ExecMigration()
5452 def _CreateBlockDev(lu, node, instance, device, force_create,
5454 """Create a tree of block devices on a given node.
5456 If this device type has to be created on secondaries, create it and
5459 If not, just recurse to children keeping the same 'force' value.
5461 @param lu: the lu on whose behalf we execute
5462 @param node: the node on which to create the device
5463 @type instance: L{objects.Instance}
5464 @param instance: the instance which owns the device
5465 @type device: L{objects.Disk}
5466 @param device: the device to create
5467 @type force_create: boolean
5468 @param force_create: whether to force creation of this device; this
5469 will be change to True whenever we find a device which has
5470 CreateOnSecondary() attribute
5471 @param info: the extra 'metadata' we should attach to the device
5472 (this will be represented as a LVM tag)
5473 @type force_open: boolean
5474 @param force_open: this parameter will be passes to the
5475 L{backend.BlockdevCreate} function where it specifies
5476 whether we run on primary or not, and it affects both
5477 the child assembly and the device own Open() execution
5480 if device.CreateOnSecondary():
5484 for child in device.children:
5485 _CreateBlockDev(lu, node, instance, child, force_create,
5488 if not force_create:
5491 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5494 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5495 """Create a single block device on a given node.
5497 This will not recurse over children of the device, so they must be
5500 @param lu: the lu on whose behalf we execute
5501 @param node: the node on which to create the device
5502 @type instance: L{objects.Instance}
5503 @param instance: the instance which owns the device
5504 @type device: L{objects.Disk}
5505 @param device: the device to create
5506 @param info: the extra 'metadata' we should attach to the device
5507 (this will be represented as a LVM tag)
5508 @type force_open: boolean
5509 @param force_open: this parameter will be passes to the
5510 L{backend.BlockdevCreate} function where it specifies
5511 whether we run on primary or not, and it affects both
5512 the child assembly and the device own Open() execution
5515 lu.cfg.SetDiskID(device, node)
5516 result = lu.rpc.call_blockdev_create(node, device, device.size,
5517 instance.name, force_open, info)
5518 result.Raise("Can't create block device %s on"
5519 " node %s for instance %s" % (device, node, instance.name))
5520 if device.physical_id is None:
5521 device.physical_id = result.payload
5524 def _GenerateUniqueNames(lu, exts):
5525 """Generate a suitable LV name.
5527 This will generate a logical volume name for the given instance.
5532 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5533 results.append("%s%s" % (new_id, val))
5537 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5539 """Generate a drbd8 device complete with its children.
5542 port = lu.cfg.AllocatePort()
5543 vgname = lu.cfg.GetVGName()
5544 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5545 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5546 logical_id=(vgname, names[0]))
5547 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5548 logical_id=(vgname, names[1]))
5549 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5550 logical_id=(primary, secondary, port,
5553 children=[dev_data, dev_meta],
5558 def _GenerateDiskTemplate(lu, template_name,
5559 instance_name, primary_node,
5560 secondary_nodes, disk_info,
5561 file_storage_dir, file_driver,
5563 """Generate the entire disk layout for a given template type.
5566 #TODO: compute space requirements
5568 vgname = lu.cfg.GetVGName()
5569 disk_count = len(disk_info)
5571 if template_name == constants.DT_DISKLESS:
5573 elif template_name == constants.DT_PLAIN:
5574 if len(secondary_nodes) != 0:
5575 raise errors.ProgrammerError("Wrong template configuration")
5577 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5578 for i in range(disk_count)])
5579 for idx, disk in enumerate(disk_info):
5580 disk_index = idx + base_index
5581 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5582 logical_id=(vgname, names[idx]),
5583 iv_name="disk/%d" % disk_index,
5585 disks.append(disk_dev)
5586 elif template_name == constants.DT_DRBD8:
5587 if len(secondary_nodes) != 1:
5588 raise errors.ProgrammerError("Wrong template configuration")
5589 remote_node = secondary_nodes[0]
5590 minors = lu.cfg.AllocateDRBDMinor(
5591 [primary_node, remote_node] * len(disk_info), instance_name)
5594 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5595 for i in range(disk_count)]):
5596 names.append(lv_prefix + "_data")
5597 names.append(lv_prefix + "_meta")
5598 for idx, disk in enumerate(disk_info):
5599 disk_index = idx + base_index
5600 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5601 disk["size"], names[idx*2:idx*2+2],
5602 "disk/%d" % disk_index,
5603 minors[idx*2], minors[idx*2+1])
5604 disk_dev.mode = disk["mode"]
5605 disks.append(disk_dev)
5606 elif template_name == constants.DT_FILE:
5607 if len(secondary_nodes) != 0:
5608 raise errors.ProgrammerError("Wrong template configuration")
5610 for idx, disk in enumerate(disk_info):
5611 disk_index = idx + base_index
5612 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5613 iv_name="disk/%d" % disk_index,
5614 logical_id=(file_driver,
5615 "%s/disk%d" % (file_storage_dir,
5618 disks.append(disk_dev)
5620 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5624 def _GetInstanceInfoText(instance):
5625 """Compute that text that should be added to the disk's metadata.
5628 return "originstname+%s" % instance.name
5631 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5632 """Create all disks for an instance.
5634 This abstracts away some work from AddInstance.
5636 @type lu: L{LogicalUnit}
5637 @param lu: the logical unit on whose behalf we execute
5638 @type instance: L{objects.Instance}
5639 @param instance: the instance whose disks we should create
5641 @param to_skip: list of indices to skip
5642 @type target_node: string
5643 @param target_node: if passed, overrides the target node for creation
5645 @return: the success of the creation
5648 info = _GetInstanceInfoText(instance)
5649 if target_node is None:
5650 pnode = instance.primary_node
5651 all_nodes = instance.all_nodes
5656 if instance.disk_template == constants.DT_FILE:
5657 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5658 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5660 result.Raise("Failed to create directory '%s' on"
5661 " node %s" % (file_storage_dir, pnode))
5663 # Note: this needs to be kept in sync with adding of disks in
5664 # LUSetInstanceParams
5665 for idx, device in enumerate(instance.disks):
5666 if to_skip and idx in to_skip:
5668 logging.info("Creating volume %s for instance %s",
5669 device.iv_name, instance.name)
5671 for node in all_nodes:
5672 f_create = node == pnode
5673 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5676 def _RemoveDisks(lu, instance, target_node=None):
5677 """Remove all disks for an instance.
5679 This abstracts away some work from `AddInstance()` and
5680 `RemoveInstance()`. Note that in case some of the devices couldn't
5681 be removed, the removal will continue with the other ones (compare
5682 with `_CreateDisks()`).
5684 @type lu: L{LogicalUnit}
5685 @param lu: the logical unit on whose behalf we execute
5686 @type instance: L{objects.Instance}
5687 @param instance: the instance whose disks we should remove
5688 @type target_node: string
5689 @param target_node: used to override the node on which to remove the disks
5691 @return: the success of the removal
5694 logging.info("Removing block devices for instance %s", instance.name)
5697 for device in instance.disks:
5699 edata = [(target_node, device)]
5701 edata = device.ComputeNodeTree(instance.primary_node)
5702 for node, disk in edata:
5703 lu.cfg.SetDiskID(disk, node)
5704 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5706 lu.LogWarning("Could not remove block device %s on node %s,"
5707 " continuing anyway: %s", device.iv_name, node, msg)
5710 if instance.disk_template == constants.DT_FILE:
5711 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5715 tgt = instance.primary_node
5716 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5718 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5719 file_storage_dir, instance.primary_node, result.fail_msg)
5725 def _ComputeDiskSize(disk_template, disks):
5726 """Compute disk size requirements in the volume group
5729 # Required free disk space as a function of disk and swap space
5731 constants.DT_DISKLESS: None,
5732 constants.DT_PLAIN: sum(d["size"] for d in disks),
5733 # 128 MB are added for drbd metadata for each disk
5734 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5735 constants.DT_FILE: None,
5738 if disk_template not in req_size_dict:
5739 raise errors.ProgrammerError("Disk template '%s' size requirement"
5740 " is unknown" % disk_template)
5742 return req_size_dict[disk_template]
5745 def _CheckHVParams(lu, nodenames, hvname, hvparams):
5746 """Hypervisor parameter validation.
5748 This function abstract the hypervisor parameter validation to be
5749 used in both instance create and instance modify.
5751 @type lu: L{LogicalUnit}
5752 @param lu: the logical unit for which we check
5753 @type nodenames: list
5754 @param nodenames: the list of nodes on which we should check
5755 @type hvname: string
5756 @param hvname: the name of the hypervisor we should use
5757 @type hvparams: dict
5758 @param hvparams: the parameters which we need to check
5759 @raise errors.OpPrereqError: if the parameters are not valid
5762 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5765 for node in nodenames:
5769 info.Raise("Hypervisor parameter validation failed on node %s" % node)
5772 class LUCreateInstance(LogicalUnit):
5773 """Create an instance.
5776 HPATH = "instance-add"
5777 HTYPE = constants.HTYPE_INSTANCE
5778 _OP_REQP = ["instance_name", "disks", "disk_template",
5780 "wait_for_sync", "ip_check", "nics",
5781 "hvparams", "beparams"]
5784 def CheckArguments(self):
5788 # set optional parameters to none if they don't exist
5789 for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5790 if not hasattr(self.op, attr):
5791 setattr(self.op, attr, None)
5793 # do not require name_check to ease forward/backward compatibility
5795 if not hasattr(self.op, "name_check"):
5796 self.op.name_check = True
5797 # validate/normalize the instance name
5798 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
5799 if self.op.ip_check and not self.op.name_check:
5800 # TODO: make the ip check more flexible and not depend on the name check
5801 raise errors.OpPrereqError("Cannot do ip checks without a name check",
5803 if (self.op.disk_template == constants.DT_FILE and
5804 not constants.ENABLE_FILE_STORAGE):
5805 raise errors.OpPrereqError("File storage disabled at configure time",
5807 # check disk information: either all adopt, or no adopt
5808 has_adopt = has_no_adopt = False
5809 for disk in self.op.disks:
5814 if has_adopt and has_no_adopt:
5815 raise errors.OpPrereqError("Either all disks have are adoped or none is",
5818 if self.op.disk_template != constants.DT_PLAIN:
5819 raise errors.OpPrereqError("Disk adoption is only supported for the"
5820 " 'plain' disk template",
5822 if self.op.iallocator is not None:
5823 raise errors.OpPrereqError("Disk adoption not allowed with an"
5824 " iallocator script", errors.ECODE_INVAL)
5825 if self.op.mode == constants.INSTANCE_IMPORT:
5826 raise errors.OpPrereqError("Disk adoption not allowed for"
5827 " instance import", errors.ECODE_INVAL)
5829 self.adopt_disks = has_adopt
5831 def ExpandNames(self):
5832 """ExpandNames for CreateInstance.
5834 Figure out the right locks for instance creation.
5837 self.needed_locks = {}
5839 # cheap checks, mostly valid constants given
5841 # verify creation mode
5842 if self.op.mode not in (constants.INSTANCE_CREATE,
5843 constants.INSTANCE_IMPORT):
5844 raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5845 self.op.mode, errors.ECODE_INVAL)
5847 # disk template and mirror node verification
5848 if self.op.disk_template not in constants.DISK_TEMPLATES:
5849 raise errors.OpPrereqError("Invalid disk template name",
5852 if self.op.hypervisor is None:
5853 self.op.hypervisor = self.cfg.GetHypervisorType()
5855 cluster = self.cfg.GetClusterInfo()
5856 enabled_hvs = cluster.enabled_hypervisors
5857 if self.op.hypervisor not in enabled_hvs:
5858 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5859 " cluster (%s)" % (self.op.hypervisor,
5860 ",".join(enabled_hvs)),
5863 # check hypervisor parameter syntax (locally)
5864 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5865 filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5867 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5868 hv_type.CheckParameterSyntax(filled_hvp)
5869 self.hv_full = filled_hvp
5870 # check that we don't specify global parameters on an instance
5871 _CheckGlobalHvParams(self.op.hvparams)
5873 # fill and remember the beparams dict
5874 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5875 self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5878 #### instance parameters check
5880 # instance name verification
5881 if self.op.name_check:
5882 hostname1 = utils.GetHostInfo(self.op.instance_name)
5883 self.op.instance_name = instance_name = hostname1.name
5884 # used in CheckPrereq for ip ping check
5885 self.check_ip = hostname1.ip
5887 instance_name = self.op.instance_name
5888 self.check_ip = None
5890 # this is just a preventive check, but someone might still add this
5891 # instance in the meantime, and creation will fail at lock-add time
5892 if instance_name in self.cfg.GetInstanceList():
5893 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5894 instance_name, errors.ECODE_EXISTS)
5896 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5900 for idx, nic in enumerate(self.op.nics):
5901 nic_mode_req = nic.get("mode", None)
5902 nic_mode = nic_mode_req
5903 if nic_mode is None:
5904 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5906 # in routed mode, for the first nic, the default ip is 'auto'
5907 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5908 default_ip_mode = constants.VALUE_AUTO
5910 default_ip_mode = constants.VALUE_NONE
5912 # ip validity checks
5913 ip = nic.get("ip", default_ip_mode)
5914 if ip is None or ip.lower() == constants.VALUE_NONE:
5916 elif ip.lower() == constants.VALUE_AUTO:
5917 if not self.op.name_check:
5918 raise errors.OpPrereqError("IP address set to auto but name checks"
5919 " have been skipped. Aborting.",
5921 nic_ip = hostname1.ip
5923 if not utils.IsValidIP(ip):
5924 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5925 " like a valid IP" % ip,
5929 # TODO: check the ip address for uniqueness
5930 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5931 raise errors.OpPrereqError("Routed nic mode requires an ip address",
5934 # MAC address verification
5935 mac = nic.get("mac", constants.VALUE_AUTO)
5936 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5937 mac = utils.NormalizeAndValidateMac(mac)
5940 self.cfg.ReserveMAC(mac, self.proc.GetECId())
5941 except errors.ReservationError:
5942 raise errors.OpPrereqError("MAC address %s already in use"
5943 " in cluster" % mac,
5944 errors.ECODE_NOTUNIQUE)
5946 # bridge verification
5947 bridge = nic.get("bridge", None)
5948 link = nic.get("link", None)
5950 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5951 " at the same time", errors.ECODE_INVAL)
5952 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5953 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
5960 nicparams[constants.NIC_MODE] = nic_mode_req
5962 nicparams[constants.NIC_LINK] = link
5964 check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5966 objects.NIC.CheckParameterSyntax(check_params)
5967 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5969 # disk checks/pre-build
5971 for disk in self.op.disks:
5972 mode = disk.get("mode", constants.DISK_RDWR)
5973 if mode not in constants.DISK_ACCESS_SET:
5974 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5975 mode, errors.ECODE_INVAL)
5976 size = disk.get("size", None)
5978 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
5981 except (TypeError, ValueError):
5982 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
5984 new_disk = {"size": size, "mode": mode}
5986 new_disk["adopt"] = disk["adopt"]
5987 self.disks.append(new_disk)
5989 # file storage checks
5990 if (self.op.file_driver and
5991 not self.op.file_driver in constants.FILE_DRIVER):
5992 raise errors.OpPrereqError("Invalid file driver name '%s'" %
5993 self.op.file_driver, errors.ECODE_INVAL)
5995 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
5996 raise errors.OpPrereqError("File storage directory path not absolute",
5999 ### Node/iallocator related checks
6000 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6001 raise errors.OpPrereqError("One and only one of iallocator and primary"
6002 " node must be given",
6005 if self.op.iallocator:
6006 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6008 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6009 nodelist = [self.op.pnode]
6010 if self.op.snode is not None:
6011 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6012 nodelist.append(self.op.snode)
6013 self.needed_locks[locking.LEVEL_NODE] = nodelist
6015 # in case of import lock the source node too
6016 if self.op.mode == constants.INSTANCE_IMPORT:
6017 src_node = getattr(self.op, "src_node", None)
6018 src_path = getattr(self.op, "src_path", None)
6020 if src_path is None:
6021 self.op.src_path = src_path = self.op.instance_name
6023 if src_node is None:
6024 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6025 self.op.src_node = None
6026 if os.path.isabs(src_path):
6027 raise errors.OpPrereqError("Importing an instance from an absolute"
6028 " path requires a source node option.",
6031 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6032 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6033 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6034 if not os.path.isabs(src_path):
6035 self.op.src_path = src_path = \
6036 utils.PathJoin(constants.EXPORT_DIR, src_path)
6038 # On import force_variant must be True, because if we forced it at
6039 # initial install, our only chance when importing it back is that it
6041 self.op.force_variant = True
6043 else: # INSTANCE_CREATE
6044 if getattr(self.op, "os_type", None) is None:
6045 raise errors.OpPrereqError("No guest OS specified",
6047 self.op.force_variant = getattr(self.op, "force_variant", False)
6049 def _RunAllocator(self):
6050 """Run the allocator based on input opcode.
6053 nics = [n.ToDict() for n in self.nics]
6054 ial = IAllocator(self.cfg, self.rpc,
6055 mode=constants.IALLOCATOR_MODE_ALLOC,
6056 name=self.op.instance_name,
6057 disk_template=self.op.disk_template,
6060 vcpus=self.be_full[constants.BE_VCPUS],
6061 mem_size=self.be_full[constants.BE_MEMORY],
6064 hypervisor=self.op.hypervisor,
6067 ial.Run(self.op.iallocator)
6070 raise errors.OpPrereqError("Can't compute nodes using"
6071 " iallocator '%s': %s" %
6072 (self.op.iallocator, ial.info),
6074 if len(ial.result) != ial.required_nodes:
6075 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6076 " of nodes (%s), required %s" %
6077 (self.op.iallocator, len(ial.result),
6078 ial.required_nodes), errors.ECODE_FAULT)
6079 self.op.pnode = ial.result[0]
6080 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6081 self.op.instance_name, self.op.iallocator,
6082 utils.CommaJoin(ial.result))
6083 if ial.required_nodes == 2:
6084 self.op.snode = ial.result[1]
6086 def BuildHooksEnv(self):
6089 This runs on master, primary and secondary nodes of the instance.
6093 "ADD_MODE": self.op.mode,
6095 if self.op.mode == constants.INSTANCE_IMPORT:
6096 env["SRC_NODE"] = self.op.src_node
6097 env["SRC_PATH"] = self.op.src_path
6098 env["SRC_IMAGES"] = self.src_images
6100 env.update(_BuildInstanceHookEnv(
6101 name=self.op.instance_name,
6102 primary_node=self.op.pnode,
6103 secondary_nodes=self.secondaries,
6104 status=self.op.start,
6105 os_type=self.op.os_type,
6106 memory=self.be_full[constants.BE_MEMORY],
6107 vcpus=self.be_full[constants.BE_VCPUS],
6108 nics=_NICListToTuple(self, self.nics),
6109 disk_template=self.op.disk_template,
6110 disks=[(d["size"], d["mode"]) for d in self.disks],
6113 hypervisor_name=self.op.hypervisor,
6116 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6121 def CheckPrereq(self):
6122 """Check prerequisites.
6125 if (not self.cfg.GetVGName() and
6126 self.op.disk_template not in constants.DTS_NOT_LVM):
6127 raise errors.OpPrereqError("Cluster does not support lvm-based"
6128 " instances", errors.ECODE_STATE)
6130 if self.op.mode == constants.INSTANCE_IMPORT:
6131 src_node = self.op.src_node
6132 src_path = self.op.src_path
6134 if src_node is None:
6135 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6136 exp_list = self.rpc.call_export_list(locked_nodes)
6138 for node in exp_list:
6139 if exp_list[node].fail_msg:
6141 if src_path in exp_list[node].payload:
6143 self.op.src_node = src_node = node
6144 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6148 raise errors.OpPrereqError("No export found for relative path %s" %
6149 src_path, errors.ECODE_INVAL)
6151 _CheckNodeOnline(self, src_node)
6152 result = self.rpc.call_export_info(src_node, src_path)
6153 result.Raise("No export or invalid export found in dir %s" % src_path)
6155 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6156 if not export_info.has_section(constants.INISECT_EXP):
6157 raise errors.ProgrammerError("Corrupted export config",
6158 errors.ECODE_ENVIRON)
6160 ei_version = export_info.get(constants.INISECT_EXP, 'version')
6161 if (int(ei_version) != constants.EXPORT_VERSION):
6162 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6163 (ei_version, constants.EXPORT_VERSION),
6164 errors.ECODE_ENVIRON)
6166 # Check that the new instance doesn't have less disks than the export
6167 instance_disks = len(self.disks)
6168 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6169 if instance_disks < export_disks:
6170 raise errors.OpPrereqError("Not enough disks to import."
6171 " (instance: %d, export: %d)" %
6172 (instance_disks, export_disks),
6175 self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
6177 for idx in range(export_disks):
6178 option = 'disk%d_dump' % idx
6179 if export_info.has_option(constants.INISECT_INS, option):
6180 # FIXME: are the old os-es, disk sizes, etc. useful?
6181 export_name = export_info.get(constants.INISECT_INS, option)
6182 image = utils.PathJoin(src_path, export_name)
6183 disk_images.append(image)
6185 disk_images.append(False)
6187 self.src_images = disk_images
6189 old_name = export_info.get(constants.INISECT_INS, 'name')
6190 # FIXME: int() here could throw a ValueError on broken exports
6191 exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
6192 if self.op.instance_name == old_name:
6193 for idx, nic in enumerate(self.nics):
6194 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6195 nic_mac_ini = 'nic%d_mac' % idx
6196 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6198 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6200 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6201 if self.op.ip_check:
6202 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6203 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6204 (self.check_ip, self.op.instance_name),
6205 errors.ECODE_NOTUNIQUE)
6207 #### mac address generation
6208 # By generating here the mac address both the allocator and the hooks get
6209 # the real final mac address rather than the 'auto' or 'generate' value.
6210 # There is a race condition between the generation and the instance object
6211 # creation, which means that we know the mac is valid now, but we're not
6212 # sure it will be when we actually add the instance. If things go bad
6213 # adding the instance will abort because of a duplicate mac, and the
6214 # creation job will fail.
6215 for nic in self.nics:
6216 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6217 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6221 if self.op.iallocator is not None:
6222 self._RunAllocator()
6224 #### node related checks
6226 # check primary node
6227 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6228 assert self.pnode is not None, \
6229 "Cannot retrieve locked node %s" % self.op.pnode
6231 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6232 pnode.name, errors.ECODE_STATE)
6234 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6235 pnode.name, errors.ECODE_STATE)
6237 self.secondaries = []
6239 # mirror node verification
6240 if self.op.disk_template in constants.DTS_NET_MIRROR:
6241 if self.op.snode is None:
6242 raise errors.OpPrereqError("The networked disk templates need"
6243 " a mirror node", errors.ECODE_INVAL)
6244 if self.op.snode == pnode.name:
6245 raise errors.OpPrereqError("The secondary node cannot be the"
6246 " primary node.", errors.ECODE_INVAL)
6247 _CheckNodeOnline(self, self.op.snode)
6248 _CheckNodeNotDrained(self, self.op.snode)
6249 self.secondaries.append(self.op.snode)
6251 nodenames = [pnode.name] + self.secondaries
6253 req_size = _ComputeDiskSize(self.op.disk_template,
6256 # Check lv size requirements, if not adopting
6257 if req_size is not None and not self.adopt_disks:
6258 nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
6260 for node in nodenames:
6261 info = nodeinfo[node]
6262 info.Raise("Cannot get current information from node %s" % node)
6264 vg_free = info.get('vg_free', None)
6265 if not isinstance(vg_free, int):
6266 raise errors.OpPrereqError("Can't compute free disk space on"
6267 " node %s" % node, errors.ECODE_ENVIRON)
6268 if req_size > vg_free:
6269 raise errors.OpPrereqError("Not enough disk space on target node %s."
6270 " %d MB available, %d MB required" %
6271 (node, vg_free, req_size),
6274 if self.adopt_disks: # instead, we must check the adoption data
6275 all_lvs = set([i["adopt"] for i in self.disks])
6276 if len(all_lvs) != len(self.disks):
6277 raise errors.OpPrereqError("Duplicate volume names given for adoption",
6279 for lv_name in all_lvs:
6281 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6282 except errors.ReservationError:
6283 raise errors.OpPrereqError("LV named %s used by another instance" %
6284 lv_name, errors.ECODE_NOTUNIQUE)
6286 node_lvs = self.rpc.call_lv_list([pnode.name],
6287 self.cfg.GetVGName())[pnode.name]
6288 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6289 node_lvs = node_lvs.payload
6290 delta = all_lvs.difference(node_lvs.keys())
6292 raise errors.OpPrereqError("Missing logical volume(s): %s" %
6293 utils.CommaJoin(delta),
6295 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6297 raise errors.OpPrereqError("Online logical volumes found, cannot"
6298 " adopt: %s" % utils.CommaJoin(online_lvs),
6300 # update the size of disk based on what is found
6301 for dsk in self.disks:
6302 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6304 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6307 result = self.rpc.call_os_get(pnode.name, self.op.os_type)
6308 result.Raise("OS '%s' not in supported os list for primary node %s" %
6309 (self.op.os_type, pnode.name),
6310 prereq=True, ecode=errors.ECODE_INVAL)
6311 if not self.op.force_variant:
6312 _CheckOSVariant(result.payload, self.op.os_type)
6314 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6316 # memory check on primary node
6318 _CheckNodeFreeMemory(self, self.pnode.name,
6319 "creating instance %s" % self.op.instance_name,
6320 self.be_full[constants.BE_MEMORY],
6323 self.dry_run_result = list(nodenames)
6325 def Exec(self, feedback_fn):
6326 """Create and add the instance to the cluster.
6329 instance = self.op.instance_name
6330 pnode_name = self.pnode.name
6332 ht_kind = self.op.hypervisor
6333 if ht_kind in constants.HTS_REQ_PORT:
6334 network_port = self.cfg.AllocatePort()
6338 ##if self.op.vnc_bind_address is None:
6339 ## self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
6341 # this is needed because os.path.join does not accept None arguments
6342 if self.op.file_storage_dir is None:
6343 string_file_storage_dir = ""
6345 string_file_storage_dir = self.op.file_storage_dir
6347 # build the full file storage dir path
6348 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6349 string_file_storage_dir, instance)
6352 disks = _GenerateDiskTemplate(self,
6353 self.op.disk_template,
6354 instance, pnode_name,
6358 self.op.file_driver,
6361 iobj = objects.Instance(name=instance, os=self.op.os_type,
6362 primary_node=pnode_name,
6363 nics=self.nics, disks=disks,
6364 disk_template=self.op.disk_template,
6366 network_port=network_port,
6367 beparams=self.op.beparams,
6368 hvparams=self.op.hvparams,
6369 hypervisor=self.op.hypervisor,
6372 if self.adopt_disks:
6373 # rename LVs to the newly-generated names; we need to construct
6374 # 'fake' LV disks with the old data, plus the new unique_id
6375 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6377 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6378 rename_to.append(t_dsk.logical_id)
6379 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6380 self.cfg.SetDiskID(t_dsk, pnode_name)
6381 result = self.rpc.call_blockdev_rename(pnode_name,
6382 zip(tmp_disks, rename_to))
6383 result.Raise("Failed to rename adoped LVs")
6385 feedback_fn("* creating instance disks...")
6387 _CreateDisks(self, iobj)
6388 except errors.OpExecError:
6389 self.LogWarning("Device creation failed, reverting...")
6391 _RemoveDisks(self, iobj)
6393 self.cfg.ReleaseDRBDMinors(instance)
6396 feedback_fn("adding instance %s to cluster config" % instance)
6398 self.cfg.AddInstance(iobj, self.proc.GetECId())
6400 # Declare that we don't want to remove the instance lock anymore, as we've
6401 # added the instance to the config
6402 del self.remove_locks[locking.LEVEL_INSTANCE]
6403 # Unlock all the nodes
6404 if self.op.mode == constants.INSTANCE_IMPORT:
6405 nodes_keep = [self.op.src_node]
6406 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6407 if node != self.op.src_node]
6408 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6409 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6411 self.context.glm.release(locking.LEVEL_NODE)
6412 del self.acquired_locks[locking.LEVEL_NODE]
6414 if self.op.wait_for_sync:
6415 disk_abort = not _WaitForSync(self, iobj)
6416 elif iobj.disk_template in constants.DTS_NET_MIRROR:
6417 # make sure the disks are not degraded (still sync-ing is ok)
6419 feedback_fn("* checking mirrors status")
6420 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6425 _RemoveDisks(self, iobj)
6426 self.cfg.RemoveInstance(iobj.name)
6427 # Make sure the instance lock gets removed
6428 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6429 raise errors.OpExecError("There are some degraded disks for"
6432 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6433 if self.op.mode == constants.INSTANCE_CREATE:
6434 feedback_fn("* running the instance OS create scripts...")
6435 # FIXME: pass debug option from opcode to backend
6436 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6437 self.op.debug_level)
6438 result.Raise("Could not add os for instance %s"
6439 " on node %s" % (instance, pnode_name))
6441 elif self.op.mode == constants.INSTANCE_IMPORT:
6442 feedback_fn("* running the instance OS import scripts...")
6443 src_node = self.op.src_node
6444 src_images = self.src_images
6445 cluster_name = self.cfg.GetClusterName()
6446 # FIXME: pass debug option from opcode to backend
6447 import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6448 src_node, src_images,
6450 self.op.debug_level)
6451 msg = import_result.fail_msg
6453 self.LogWarning("Error while importing the disk images for instance"
6454 " %s on node %s: %s" % (instance, pnode_name, msg))
6456 # also checked in the prereq part
6457 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6461 iobj.admin_up = True
6462 self.cfg.Update(iobj, feedback_fn)
6463 logging.info("Starting instance %s on node %s", instance, pnode_name)
6464 feedback_fn("* starting instance...")
6465 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6466 result.Raise("Could not start instance")
6468 return list(iobj.all_nodes)
6471 class LUConnectConsole(NoHooksLU):
6472 """Connect to an instance's console.
6474 This is somewhat special in that it returns the command line that
6475 you need to run on the master node in order to connect to the
6479 _OP_REQP = ["instance_name"]
6482 def ExpandNames(self):
6483 self._ExpandAndLockInstance()
6485 def CheckPrereq(self):
6486 """Check prerequisites.
6488 This checks that the instance is in the cluster.
6491 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6492 assert self.instance is not None, \
6493 "Cannot retrieve locked instance %s" % self.op.instance_name
6494 _CheckNodeOnline(self, self.instance.primary_node)
6496 def Exec(self, feedback_fn):
6497 """Connect to the console of an instance
6500 instance = self.instance
6501 node = instance.primary_node
6503 node_insts = self.rpc.call_instance_list([node],
6504 [instance.hypervisor])[node]
6505 node_insts.Raise("Can't get node information from %s" % node)
6507 if instance.name not in node_insts.payload:
6508 raise errors.OpExecError("Instance %s is not running." % instance.name)
6510 logging.debug("Connecting to console of %s on %s", instance.name, node)
6512 hyper = hypervisor.GetHypervisor(instance.hypervisor)
6513 cluster = self.cfg.GetClusterInfo()
6514 # beparams and hvparams are passed separately, to avoid editing the
6515 # instance and then saving the defaults in the instance itself.
6516 hvparams = cluster.FillHV(instance)
6517 beparams = cluster.FillBE(instance)
6518 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6521 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6524 class LUReplaceDisks(LogicalUnit):
6525 """Replace the disks of an instance.
6528 HPATH = "mirrors-replace"
6529 HTYPE = constants.HTYPE_INSTANCE
6530 _OP_REQP = ["instance_name", "mode", "disks"]
6533 def CheckArguments(self):
6534 if not hasattr(self.op, "remote_node"):
6535 self.op.remote_node = None
6536 if not hasattr(self.op, "iallocator"):
6537 self.op.iallocator = None
6538 if not hasattr(self.op, "early_release"):
6539 self.op.early_release = False
6541 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6544 def ExpandNames(self):
6545 self._ExpandAndLockInstance()
6547 if self.op.iallocator is not None:
6548 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6550 elif self.op.remote_node is not None:
6551 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6552 self.op.remote_node = remote_node
6554 # Warning: do not remove the locking of the new secondary here
6555 # unless DRBD8.AddChildren is changed to work in parallel;
6556 # currently it doesn't since parallel invocations of
6557 # FindUnusedMinor will conflict
6558 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6559 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6562 self.needed_locks[locking.LEVEL_NODE] = []
6563 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6565 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6566 self.op.iallocator, self.op.remote_node,
6567 self.op.disks, False, self.op.early_release)
6569 self.tasklets = [self.replacer]
6571 def DeclareLocks(self, level):
6572 # If we're not already locking all nodes in the set we have to declare the
6573 # instance's primary/secondary nodes.
6574 if (level == locking.LEVEL_NODE and
6575 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6576 self._LockInstancesNodes()
6578 def BuildHooksEnv(self):
6581 This runs on the master, the primary and all the secondaries.
6584 instance = self.replacer.instance
6586 "MODE": self.op.mode,
6587 "NEW_SECONDARY": self.op.remote_node,
6588 "OLD_SECONDARY": instance.secondary_nodes[0],
6590 env.update(_BuildInstanceHookEnvByObject(self, instance))
6592 self.cfg.GetMasterNode(),
6593 instance.primary_node,
6595 if self.op.remote_node is not None:
6596 nl.append(self.op.remote_node)
6600 class LUEvacuateNode(LogicalUnit):
6601 """Relocate the secondary instances from a node.
6604 HPATH = "node-evacuate"
6605 HTYPE = constants.HTYPE_NODE
6606 _OP_REQP = ["node_name"]
6609 def CheckArguments(self):
6610 if not hasattr(self.op, "remote_node"):
6611 self.op.remote_node = None
6612 if not hasattr(self.op, "iallocator"):
6613 self.op.iallocator = None
6614 if not hasattr(self.op, "early_release"):
6615 self.op.early_release = False
6617 TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6618 self.op.remote_node,
6621 def ExpandNames(self):
6622 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6624 self.needed_locks = {}
6626 # Declare node locks
6627 if self.op.iallocator is not None:
6628 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6630 elif self.op.remote_node is not None:
6631 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6633 # Warning: do not remove the locking of the new secondary here
6634 # unless DRBD8.AddChildren is changed to work in parallel;
6635 # currently it doesn't since parallel invocations of
6636 # FindUnusedMinor will conflict
6637 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6638 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6641 raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6643 # Create tasklets for replacing disks for all secondary instances on this
6648 for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6649 logging.debug("Replacing disks for instance %s", inst.name)
6650 names.append(inst.name)
6652 replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6653 self.op.iallocator, self.op.remote_node, [],
6654 True, self.op.early_release)
6655 tasklets.append(replacer)
6657 self.tasklets = tasklets
6658 self.instance_names = names
6660 # Declare instance locks
6661 self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6663 def DeclareLocks(self, level):
6664 # If we're not already locking all nodes in the set we have to declare the
6665 # instance's primary/secondary nodes.
6666 if (level == locking.LEVEL_NODE and
6667 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6668 self._LockInstancesNodes()
6670 def BuildHooksEnv(self):
6673 This runs on the master, the primary and all the secondaries.
6677 "NODE_NAME": self.op.node_name,
6680 nl = [self.cfg.GetMasterNode()]
6682 if self.op.remote_node is not None:
6683 env["NEW_SECONDARY"] = self.op.remote_node
6684 nl.append(self.op.remote_node)
6686 return (env, nl, nl)
6689 class TLReplaceDisks(Tasklet):
6690 """Replaces disks for an instance.
6692 Note: Locking is not within the scope of this class.
6695 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6696 disks, delay_iallocator, early_release):
6697 """Initializes this class.
6700 Tasklet.__init__(self, lu)
6703 self.instance_name = instance_name
6705 self.iallocator_name = iallocator_name
6706 self.remote_node = remote_node
6708 self.delay_iallocator = delay_iallocator
6709 self.early_release = early_release
6712 self.instance = None
6713 self.new_node = None
6714 self.target_node = None
6715 self.other_node = None
6716 self.remote_node_info = None
6717 self.node_secondary_ip = None
6720 def CheckArguments(mode, remote_node, iallocator):
6721 """Helper function for users of this class.
6724 # check for valid parameter combination
6725 if mode == constants.REPLACE_DISK_CHG:
6726 if remote_node is None and iallocator is None:
6727 raise errors.OpPrereqError("When changing the secondary either an"
6728 " iallocator script must be used or the"
6729 " new node given", errors.ECODE_INVAL)
6731 if remote_node is not None and iallocator is not None:
6732 raise errors.OpPrereqError("Give either the iallocator or the new"
6733 " secondary, not both", errors.ECODE_INVAL)
6735 elif remote_node is not None or iallocator is not None:
6736 # Not replacing the secondary
6737 raise errors.OpPrereqError("The iallocator and new node options can"
6738 " only be used when changing the"
6739 " secondary node", errors.ECODE_INVAL)
6742 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6743 """Compute a new secondary node using an IAllocator.
6746 ial = IAllocator(lu.cfg, lu.rpc,
6747 mode=constants.IALLOCATOR_MODE_RELOC,
6749 relocate_from=relocate_from)
6751 ial.Run(iallocator_name)
6754 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6755 " %s" % (iallocator_name, ial.info),
6758 if len(ial.result) != ial.required_nodes:
6759 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6760 " of nodes (%s), required %s" %
6762 len(ial.result), ial.required_nodes),
6765 remote_node_name = ial.result[0]
6767 lu.LogInfo("Selected new secondary for instance '%s': %s",
6768 instance_name, remote_node_name)
6770 return remote_node_name
6772 def _FindFaultyDisks(self, node_name):
6773 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6776 def CheckPrereq(self):
6777 """Check prerequisites.
6779 This checks that the instance is in the cluster.
6782 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
6783 assert instance is not None, \
6784 "Cannot retrieve locked instance %s" % self.instance_name
6786 if instance.disk_template != constants.DT_DRBD8:
6787 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6788 " instances", errors.ECODE_INVAL)
6790 if len(instance.secondary_nodes) != 1:
6791 raise errors.OpPrereqError("The instance has a strange layout,"
6792 " expected one secondary but found %d" %
6793 len(instance.secondary_nodes),
6796 if not self.delay_iallocator:
6797 self._CheckPrereq2()
6799 def _CheckPrereq2(self):
6800 """Check prerequisites, second part.
6802 This function should always be part of CheckPrereq. It was separated and is
6803 now called from Exec because during node evacuation iallocator was only
6804 called with an unmodified cluster model, not taking planned changes into
6808 instance = self.instance
6809 secondary_node = instance.secondary_nodes[0]
6811 if self.iallocator_name is None:
6812 remote_node = self.remote_node
6814 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6815 instance.name, instance.secondary_nodes)
6817 if remote_node is not None:
6818 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6819 assert self.remote_node_info is not None, \
6820 "Cannot retrieve locked node %s" % remote_node
6822 self.remote_node_info = None
6824 if remote_node == self.instance.primary_node:
6825 raise errors.OpPrereqError("The specified node is the primary node of"
6826 " the instance.", errors.ECODE_INVAL)
6828 if remote_node == secondary_node:
6829 raise errors.OpPrereqError("The specified node is already the"
6830 " secondary node of the instance.",
6833 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6834 constants.REPLACE_DISK_CHG):
6835 raise errors.OpPrereqError("Cannot specify disks to be replaced",
6838 if self.mode == constants.REPLACE_DISK_AUTO:
6839 faulty_primary = self._FindFaultyDisks(instance.primary_node)
6840 faulty_secondary = self._FindFaultyDisks(secondary_node)
6842 if faulty_primary and faulty_secondary:
6843 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6844 " one node and can not be repaired"
6845 " automatically" % self.instance_name,
6849 self.disks = faulty_primary
6850 self.target_node = instance.primary_node
6851 self.other_node = secondary_node
6852 check_nodes = [self.target_node, self.other_node]
6853 elif faulty_secondary:
6854 self.disks = faulty_secondary
6855 self.target_node = secondary_node
6856 self.other_node = instance.primary_node
6857 check_nodes = [self.target_node, self.other_node]
6863 # Non-automatic modes
6864 if self.mode == constants.REPLACE_DISK_PRI:
6865 self.target_node = instance.primary_node
6866 self.other_node = secondary_node
6867 check_nodes = [self.target_node, self.other_node]
6869 elif self.mode == constants.REPLACE_DISK_SEC:
6870 self.target_node = secondary_node
6871 self.other_node = instance.primary_node
6872 check_nodes = [self.target_node, self.other_node]
6874 elif self.mode == constants.REPLACE_DISK_CHG:
6875 self.new_node = remote_node
6876 self.other_node = instance.primary_node
6877 self.target_node = secondary_node
6878 check_nodes = [self.new_node, self.other_node]
6880 _CheckNodeNotDrained(self.lu, remote_node)
6882 old_node_info = self.cfg.GetNodeInfo(secondary_node)
6883 assert old_node_info is not None
6884 if old_node_info.offline and not self.early_release:
6885 # doesn't make sense to delay the release
6886 self.early_release = True
6887 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
6888 " early-release mode", secondary_node)
6891 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6894 # If not specified all disks should be replaced
6896 self.disks = range(len(self.instance.disks))
6898 for node in check_nodes:
6899 _CheckNodeOnline(self.lu, node)
6901 # Check whether disks are valid
6902 for disk_idx in self.disks:
6903 instance.FindDisk(disk_idx)
6905 # Get secondary node IP addresses
6908 for node_name in [self.target_node, self.other_node, self.new_node]:
6909 if node_name is not None:
6910 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6912 self.node_secondary_ip = node_2nd_ip
6914 def Exec(self, feedback_fn):
6915 """Execute disk replacement.
6917 This dispatches the disk replacement to the appropriate handler.
6920 if self.delay_iallocator:
6921 self._CheckPrereq2()
6924 feedback_fn("No disks need replacement")
6927 feedback_fn("Replacing disk(s) %s for %s" %
6928 (utils.CommaJoin(self.disks), self.instance.name))
6930 activate_disks = (not self.instance.admin_up)
6932 # Activate the instance disks if we're replacing them on a down instance
6934 _StartInstanceDisks(self.lu, self.instance, True)
6937 # Should we replace the secondary node?
6938 if self.new_node is not None:
6939 fn = self._ExecDrbd8Secondary
6941 fn = self._ExecDrbd8DiskOnly
6943 return fn(feedback_fn)
6946 # Deactivate the instance disks if we're replacing them on a
6949 _SafeShutdownInstanceDisks(self.lu, self.instance)
6951 def _CheckVolumeGroup(self, nodes):
6952 self.lu.LogInfo("Checking volume groups")
6954 vgname = self.cfg.GetVGName()
6956 # Make sure volume group exists on all involved nodes
6957 results = self.rpc.call_vg_list(nodes)
6959 raise errors.OpExecError("Can't list volume groups on the nodes")
6963 res.Raise("Error checking node %s" % node)
6964 if vgname not in res.payload:
6965 raise errors.OpExecError("Volume group '%s' not found on node %s" %
6968 def _CheckDisksExistence(self, nodes):
6969 # Check disk existence
6970 for idx, dev in enumerate(self.instance.disks):
6971 if idx not in self.disks:
6975 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6976 self.cfg.SetDiskID(dev, node)
6978 result = self.rpc.call_blockdev_find(node, dev)
6980 msg = result.fail_msg
6981 if msg or not result.payload:
6983 msg = "disk not found"
6984 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6987 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6988 for idx, dev in enumerate(self.instance.disks):
6989 if idx not in self.disks:
6992 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
6995 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
6997 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
6998 " replace disks for instance %s" %
6999 (node_name, self.instance.name))
7001 def _CreateNewStorage(self, node_name):
7002 vgname = self.cfg.GetVGName()
7005 for idx, dev in enumerate(self.instance.disks):
7006 if idx not in self.disks:
7009 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7011 self.cfg.SetDiskID(dev, node_name)
7013 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7014 names = _GenerateUniqueNames(self.lu, lv_names)
7016 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7017 logical_id=(vgname, names[0]))
7018 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7019 logical_id=(vgname, names[1]))
7021 new_lvs = [lv_data, lv_meta]
7022 old_lvs = dev.children
7023 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7025 # we pass force_create=True to force the LVM creation
7026 for new_lv in new_lvs:
7027 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7028 _GetInstanceInfoText(self.instance), False)
7032 def _CheckDevices(self, node_name, iv_names):
7033 for name, (dev, _, _) in iv_names.iteritems():
7034 self.cfg.SetDiskID(dev, node_name)
7036 result = self.rpc.call_blockdev_find(node_name, dev)
7038 msg = result.fail_msg
7039 if msg or not result.payload:
7041 msg = "disk not found"
7042 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7045 if result.payload.is_degraded:
7046 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7048 def _RemoveOldStorage(self, node_name, iv_names):
7049 for name, (_, old_lvs, _) in iv_names.iteritems():
7050 self.lu.LogInfo("Remove logical volumes for %s" % name)
7053 self.cfg.SetDiskID(lv, node_name)
7055 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7057 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7058 hint="remove unused LVs manually")
7060 def _ReleaseNodeLock(self, node_name):
7061 """Releases the lock for a given node."""
7062 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7064 def _ExecDrbd8DiskOnly(self, feedback_fn):
7065 """Replace a disk on the primary or secondary for DRBD 8.
7067 The algorithm for replace is quite complicated:
7069 1. for each disk to be replaced:
7071 1. create new LVs on the target node with unique names
7072 1. detach old LVs from the drbd device
7073 1. rename old LVs to name_replaced.<time_t>
7074 1. rename new LVs to old LVs
7075 1. attach the new LVs (with the old names now) to the drbd device
7077 1. wait for sync across all devices
7079 1. for each modified disk:
7081 1. remove old LVs (which have the name name_replaces.<time_t>)
7083 Failures are not very well handled.
7088 # Step: check device activation
7089 self.lu.LogStep(1, steps_total, "Check device existence")
7090 self._CheckDisksExistence([self.other_node, self.target_node])
7091 self._CheckVolumeGroup([self.target_node, self.other_node])
7093 # Step: check other node consistency
7094 self.lu.LogStep(2, steps_total, "Check peer consistency")
7095 self._CheckDisksConsistency(self.other_node,
7096 self.other_node == self.instance.primary_node,
7099 # Step: create new storage
7100 self.lu.LogStep(3, steps_total, "Allocate new storage")
7101 iv_names = self._CreateNewStorage(self.target_node)
7103 # Step: for each lv, detach+rename*2+attach
7104 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7105 for dev, old_lvs, new_lvs in iv_names.itervalues():
7106 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7108 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7110 result.Raise("Can't detach drbd from local storage on node"
7111 " %s for device %s" % (self.target_node, dev.iv_name))
7113 #cfg.Update(instance)
7115 # ok, we created the new LVs, so now we know we have the needed
7116 # storage; as such, we proceed on the target node to rename
7117 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7118 # using the assumption that logical_id == physical_id (which in
7119 # turn is the unique_id on that node)
7121 # FIXME(iustin): use a better name for the replaced LVs
7122 temp_suffix = int(time.time())
7123 ren_fn = lambda d, suff: (d.physical_id[0],
7124 d.physical_id[1] + "_replaced-%s" % suff)
7126 # Build the rename list based on what LVs exist on the node
7127 rename_old_to_new = []
7128 for to_ren in old_lvs:
7129 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7130 if not result.fail_msg and result.payload:
7132 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7134 self.lu.LogInfo("Renaming the old LVs on the target node")
7135 result = self.rpc.call_blockdev_rename(self.target_node,
7137 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7139 # Now we rename the new LVs to the old LVs
7140 self.lu.LogInfo("Renaming the new LVs on the target node")
7141 rename_new_to_old = [(new, old.physical_id)
7142 for old, new in zip(old_lvs, new_lvs)]
7143 result = self.rpc.call_blockdev_rename(self.target_node,
7145 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7147 for old, new in zip(old_lvs, new_lvs):
7148 new.logical_id = old.logical_id
7149 self.cfg.SetDiskID(new, self.target_node)
7151 for disk in old_lvs:
7152 disk.logical_id = ren_fn(disk, temp_suffix)
7153 self.cfg.SetDiskID(disk, self.target_node)
7155 # Now that the new lvs have the old name, we can add them to the device
7156 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7157 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7159 msg = result.fail_msg
7161 for new_lv in new_lvs:
7162 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7165 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7166 hint=("cleanup manually the unused logical"
7168 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7170 dev.children = new_lvs
7172 self.cfg.Update(self.instance, feedback_fn)
7175 if self.early_release:
7176 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7178 self._RemoveOldStorage(self.target_node, iv_names)
7179 # WARNING: we release both node locks here, do not do other RPCs
7180 # than WaitForSync to the primary node
7181 self._ReleaseNodeLock([self.target_node, self.other_node])
7184 # This can fail as the old devices are degraded and _WaitForSync
7185 # does a combined result over all disks, so we don't check its return value
7186 self.lu.LogStep(cstep, steps_total, "Sync devices")
7188 _WaitForSync(self.lu, self.instance)
7190 # Check all devices manually
7191 self._CheckDevices(self.instance.primary_node, iv_names)
7193 # Step: remove old storage
7194 if not self.early_release:
7195 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7197 self._RemoveOldStorage(self.target_node, iv_names)
7199 def _ExecDrbd8Secondary(self, feedback_fn):
7200 """Replace the secondary node for DRBD 8.
7202 The algorithm for replace is quite complicated:
7203 - for all disks of the instance:
7204 - create new LVs on the new node with same names
7205 - shutdown the drbd device on the old secondary
7206 - disconnect the drbd network on the primary
7207 - create the drbd device on the new secondary
7208 - network attach the drbd on the primary, using an artifice:
7209 the drbd code for Attach() will connect to the network if it
7210 finds a device which is connected to the good local disks but
7212 - wait for sync across all devices
7213 - remove all disks from the old secondary
7215 Failures are not very well handled.
7220 # Step: check device activation
7221 self.lu.LogStep(1, steps_total, "Check device existence")
7222 self._CheckDisksExistence([self.instance.primary_node])
7223 self._CheckVolumeGroup([self.instance.primary_node])
7225 # Step: check other node consistency
7226 self.lu.LogStep(2, steps_total, "Check peer consistency")
7227 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7229 # Step: create new storage
7230 self.lu.LogStep(3, steps_total, "Allocate new storage")
7231 for idx, dev in enumerate(self.instance.disks):
7232 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7233 (self.new_node, idx))
7234 # we pass force_create=True to force LVM creation
7235 for new_lv in dev.children:
7236 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7237 _GetInstanceInfoText(self.instance), False)
7239 # Step 4: dbrd minors and drbd setups changes
7240 # after this, we must manually remove the drbd minors on both the
7241 # error and the success paths
7242 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7243 minors = self.cfg.AllocateDRBDMinor([self.new_node
7244 for dev in self.instance.disks],
7246 logging.debug("Allocated minors %r", minors)
7249 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7250 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7251 (self.new_node, idx))
7252 # create new devices on new_node; note that we create two IDs:
7253 # one without port, so the drbd will be activated without
7254 # networking information on the new node at this stage, and one
7255 # with network, for the latter activation in step 4
7256 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7257 if self.instance.primary_node == o_node1:
7260 assert self.instance.primary_node == o_node2, "Three-node instance?"
7263 new_alone_id = (self.instance.primary_node, self.new_node, None,
7264 p_minor, new_minor, o_secret)
7265 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7266 p_minor, new_minor, o_secret)
7268 iv_names[idx] = (dev, dev.children, new_net_id)
7269 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7271 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7272 logical_id=new_alone_id,
7273 children=dev.children,
7276 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7277 _GetInstanceInfoText(self.instance), False)
7278 except errors.GenericError:
7279 self.cfg.ReleaseDRBDMinors(self.instance.name)
7282 # We have new devices, shutdown the drbd on the old secondary
7283 for idx, dev in enumerate(self.instance.disks):
7284 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7285 self.cfg.SetDiskID(dev, self.target_node)
7286 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7288 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7289 "node: %s" % (idx, msg),
7290 hint=("Please cleanup this device manually as"
7291 " soon as possible"))
7293 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7294 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7295 self.node_secondary_ip,
7296 self.instance.disks)\
7297 [self.instance.primary_node]
7299 msg = result.fail_msg
7301 # detaches didn't succeed (unlikely)
7302 self.cfg.ReleaseDRBDMinors(self.instance.name)
7303 raise errors.OpExecError("Can't detach the disks from the network on"
7304 " old node: %s" % (msg,))
7306 # if we managed to detach at least one, we update all the disks of
7307 # the instance to point to the new secondary
7308 self.lu.LogInfo("Updating instance configuration")
7309 for dev, _, new_logical_id in iv_names.itervalues():
7310 dev.logical_id = new_logical_id
7311 self.cfg.SetDiskID(dev, self.instance.primary_node)
7313 self.cfg.Update(self.instance, feedback_fn)
7315 # and now perform the drbd attach
7316 self.lu.LogInfo("Attaching primary drbds to new secondary"
7317 " (standalone => connected)")
7318 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7320 self.node_secondary_ip,
7321 self.instance.disks,
7324 for to_node, to_result in result.items():
7325 msg = to_result.fail_msg
7327 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7329 hint=("please do a gnt-instance info to see the"
7330 " status of disks"))
7332 if self.early_release:
7333 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7335 self._RemoveOldStorage(self.target_node, iv_names)
7336 # WARNING: we release all node locks here, do not do other RPCs
7337 # than WaitForSync to the primary node
7338 self._ReleaseNodeLock([self.instance.primary_node,
7343 # This can fail as the old devices are degraded and _WaitForSync
7344 # does a combined result over all disks, so we don't check its return value
7345 self.lu.LogStep(cstep, steps_total, "Sync devices")
7347 _WaitForSync(self.lu, self.instance)
7349 # Check all devices manually
7350 self._CheckDevices(self.instance.primary_node, iv_names)
7352 # Step: remove old storage
7353 if not self.early_release:
7354 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7355 self._RemoveOldStorage(self.target_node, iv_names)
7358 class LURepairNodeStorage(NoHooksLU):
7359 """Repairs the volume group on a node.
7362 _OP_REQP = ["node_name"]
7365 def CheckArguments(self):
7366 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7368 def ExpandNames(self):
7369 self.needed_locks = {
7370 locking.LEVEL_NODE: [self.op.node_name],
7373 def _CheckFaultyDisks(self, instance, node_name):
7374 """Ensure faulty disks abort the opcode or at least warn."""
7376 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7378 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7379 " node '%s'" % (instance.name, node_name),
7381 except errors.OpPrereqError, err:
7382 if self.op.ignore_consistency:
7383 self.proc.LogWarning(str(err.args[0]))
7387 def CheckPrereq(self):
7388 """Check prerequisites.
7391 storage_type = self.op.storage_type
7393 if (constants.SO_FIX_CONSISTENCY not in
7394 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7395 raise errors.OpPrereqError("Storage units of type '%s' can not be"
7396 " repaired" % storage_type,
7399 # Check whether any instance on this node has faulty disks
7400 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7401 if not inst.admin_up:
7403 check_nodes = set(inst.all_nodes)
7404 check_nodes.discard(self.op.node_name)
7405 for inst_node_name in check_nodes:
7406 self._CheckFaultyDisks(inst, inst_node_name)
7408 def Exec(self, feedback_fn):
7409 feedback_fn("Repairing storage unit '%s' on %s ..." %
7410 (self.op.name, self.op.node_name))
7412 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7413 result = self.rpc.call_storage_execute(self.op.node_name,
7414 self.op.storage_type, st_args,
7416 constants.SO_FIX_CONSISTENCY)
7417 result.Raise("Failed to repair storage unit '%s' on %s" %
7418 (self.op.name, self.op.node_name))
7421 class LUNodeEvacuationStrategy(NoHooksLU):
7422 """Computes the node evacuation strategy.
7425 _OP_REQP = ["nodes"]
7428 def CheckArguments(self):
7429 if not hasattr(self.op, "remote_node"):
7430 self.op.remote_node = None
7431 if not hasattr(self.op, "iallocator"):
7432 self.op.iallocator = None
7433 if self.op.remote_node is not None and self.op.iallocator is not None:
7434 raise errors.OpPrereqError("Give either the iallocator or the new"
7435 " secondary, not both", errors.ECODE_INVAL)
7437 def ExpandNames(self):
7438 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7439 self.needed_locks = locks = {}
7440 if self.op.remote_node is None:
7441 locks[locking.LEVEL_NODE] = locking.ALL_SET
7443 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7444 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7446 def CheckPrereq(self):
7449 def Exec(self, feedback_fn):
7450 if self.op.remote_node is not None:
7452 for node in self.op.nodes:
7453 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7456 if i.primary_node == self.op.remote_node:
7457 raise errors.OpPrereqError("Node %s is the primary node of"
7458 " instance %s, cannot use it as"
7460 (self.op.remote_node, i.name),
7462 result.append([i.name, self.op.remote_node])
7464 ial = IAllocator(self.cfg, self.rpc,
7465 mode=constants.IALLOCATOR_MODE_MEVAC,
7466 evac_nodes=self.op.nodes)
7467 ial.Run(self.op.iallocator, validate=True)
7469 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7475 class LUGrowDisk(LogicalUnit):
7476 """Grow a disk of an instance.
7480 HTYPE = constants.HTYPE_INSTANCE
7481 _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7484 def ExpandNames(self):
7485 self._ExpandAndLockInstance()
7486 self.needed_locks[locking.LEVEL_NODE] = []
7487 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7489 def DeclareLocks(self, level):
7490 if level == locking.LEVEL_NODE:
7491 self._LockInstancesNodes()
7493 def BuildHooksEnv(self):
7496 This runs on the master, the primary and all the secondaries.
7500 "DISK": self.op.disk,
7501 "AMOUNT": self.op.amount,
7503 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7504 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7507 def CheckPrereq(self):
7508 """Check prerequisites.
7510 This checks that the instance is in the cluster.
7513 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7514 assert instance is not None, \
7515 "Cannot retrieve locked instance %s" % self.op.instance_name
7516 nodenames = list(instance.all_nodes)
7517 for node in nodenames:
7518 _CheckNodeOnline(self, node)
7521 self.instance = instance
7523 if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
7524 raise errors.OpPrereqError("Instance's disk layout does not support"
7525 " growing.", errors.ECODE_INVAL)
7527 self.disk = instance.FindDisk(self.op.disk)
7529 nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
7530 instance.hypervisor)
7531 for node in nodenames:
7532 info = nodeinfo[node]
7533 info.Raise("Cannot get current information from node %s" % node)
7534 vg_free = info.payload.get('vg_free', None)
7535 if not isinstance(vg_free, int):
7536 raise errors.OpPrereqError("Can't compute free disk space on"
7537 " node %s" % node, errors.ECODE_ENVIRON)
7538 if self.op.amount > vg_free:
7539 raise errors.OpPrereqError("Not enough disk space on target node %s:"
7540 " %d MiB available, %d MiB required" %
7541 (node, vg_free, self.op.amount),
7544 def Exec(self, feedback_fn):
7545 """Execute disk grow.
7548 instance = self.instance
7550 for node in instance.all_nodes:
7551 self.cfg.SetDiskID(disk, node)
7552 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7553 result.Raise("Grow request failed to node %s" % node)
7555 # TODO: Rewrite code to work properly
7556 # DRBD goes into sync mode for a short amount of time after executing the
7557 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7558 # calling "resize" in sync mode fails. Sleeping for a short amount of
7559 # time is a work-around.
7562 disk.RecordGrow(self.op.amount)
7563 self.cfg.Update(instance, feedback_fn)
7564 if self.op.wait_for_sync:
7565 disk_abort = not _WaitForSync(self, instance)
7567 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7568 " status.\nPlease check the instance.")
7571 class LUQueryInstanceData(NoHooksLU):
7572 """Query runtime instance data.
7575 _OP_REQP = ["instances", "static"]
7578 def ExpandNames(self):
7579 self.needed_locks = {}
7580 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7582 if not isinstance(self.op.instances, list):
7583 raise errors.OpPrereqError("Invalid argument type 'instances'",
7586 if self.op.instances:
7587 self.wanted_names = []
7588 for name in self.op.instances:
7589 full_name = _ExpandInstanceName(self.cfg, name)
7590 self.wanted_names.append(full_name)
7591 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7593 self.wanted_names = None
7594 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7596 self.needed_locks[locking.LEVEL_NODE] = []
7597 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7599 def DeclareLocks(self, level):
7600 if level == locking.LEVEL_NODE:
7601 self._LockInstancesNodes()
7603 def CheckPrereq(self):
7604 """Check prerequisites.
7606 This only checks the optional instance list against the existing names.
7609 if self.wanted_names is None:
7610 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7612 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7613 in self.wanted_names]
7616 def _ComputeBlockdevStatus(self, node, instance_name, dev):
7617 """Returns the status of a block device
7620 if self.op.static or not node:
7623 self.cfg.SetDiskID(dev, node)
7625 result = self.rpc.call_blockdev_find(node, dev)
7629 result.Raise("Can't compute disk status for %s" % instance_name)
7631 status = result.payload
7635 return (status.dev_path, status.major, status.minor,
7636 status.sync_percent, status.estimated_time,
7637 status.is_degraded, status.ldisk_status)
7639 def _ComputeDiskStatus(self, instance, snode, dev):
7640 """Compute block device status.
7643 if dev.dev_type in constants.LDS_DRBD:
7644 # we change the snode then (otherwise we use the one passed in)
7645 if dev.logical_id[0] == instance.primary_node:
7646 snode = dev.logical_id[1]
7648 snode = dev.logical_id[0]
7650 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7652 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7655 dev_children = [self._ComputeDiskStatus(instance, snode, child)
7656 for child in dev.children]
7661 "iv_name": dev.iv_name,
7662 "dev_type": dev.dev_type,
7663 "logical_id": dev.logical_id,
7664 "physical_id": dev.physical_id,
7665 "pstatus": dev_pstatus,
7666 "sstatus": dev_sstatus,
7667 "children": dev_children,
7674 def Exec(self, feedback_fn):
7675 """Gather and return data"""
7678 cluster = self.cfg.GetClusterInfo()
7680 for instance in self.wanted_instances:
7681 if not self.op.static:
7682 remote_info = self.rpc.call_instance_info(instance.primary_node,
7684 instance.hypervisor)
7685 remote_info.Raise("Error checking node %s" % instance.primary_node)
7686 remote_info = remote_info.payload
7687 if remote_info and "state" in remote_info:
7690 remote_state = "down"
7693 if instance.admin_up:
7696 config_state = "down"
7698 disks = [self._ComputeDiskStatus(instance, None, device)
7699 for device in instance.disks]
7702 "name": instance.name,
7703 "config_state": config_state,
7704 "run_state": remote_state,
7705 "pnode": instance.primary_node,
7706 "snodes": instance.secondary_nodes,
7708 # this happens to be the same format used for hooks
7709 "nics": _NICListToTuple(self, instance.nics),
7711 "hypervisor": instance.hypervisor,
7712 "network_port": instance.network_port,
7713 "hv_instance": instance.hvparams,
7714 "hv_actual": cluster.FillHV(instance, skip_globals=True),
7715 "be_instance": instance.beparams,
7716 "be_actual": cluster.FillBE(instance),
7717 "serial_no": instance.serial_no,
7718 "mtime": instance.mtime,
7719 "ctime": instance.ctime,
7720 "uuid": instance.uuid,
7723 result[instance.name] = idict
7728 class LUSetInstanceParams(LogicalUnit):
7729 """Modifies an instances's parameters.
7732 HPATH = "instance-modify"
7733 HTYPE = constants.HTYPE_INSTANCE
7734 _OP_REQP = ["instance_name"]
7737 def CheckArguments(self):
7738 if not hasattr(self.op, 'nics'):
7740 if not hasattr(self.op, 'disks'):
7742 if not hasattr(self.op, 'beparams'):
7743 self.op.beparams = {}
7744 if not hasattr(self.op, 'hvparams'):
7745 self.op.hvparams = {}
7746 self.op.force = getattr(self.op, "force", False)
7747 if not (self.op.nics or self.op.disks or
7748 self.op.hvparams or self.op.beparams):
7749 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
7751 if self.op.hvparams:
7752 _CheckGlobalHvParams(self.op.hvparams)
7756 for disk_op, disk_dict in self.op.disks:
7757 if disk_op == constants.DDM_REMOVE:
7760 elif disk_op == constants.DDM_ADD:
7763 if not isinstance(disk_op, int):
7764 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
7765 if not isinstance(disk_dict, dict):
7766 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7767 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7769 if disk_op == constants.DDM_ADD:
7770 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7771 if mode not in constants.DISK_ACCESS_SET:
7772 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
7774 size = disk_dict.get('size', None)
7776 raise errors.OpPrereqError("Required disk parameter size missing",
7780 except (TypeError, ValueError), err:
7781 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7782 str(err), errors.ECODE_INVAL)
7783 disk_dict['size'] = size
7785 # modification of disk
7786 if 'size' in disk_dict:
7787 raise errors.OpPrereqError("Disk size change not possible, use"
7788 " grow-disk", errors.ECODE_INVAL)
7790 if disk_addremove > 1:
7791 raise errors.OpPrereqError("Only one disk add or remove operation"
7792 " supported at a time", errors.ECODE_INVAL)
7796 for nic_op, nic_dict in self.op.nics:
7797 if nic_op == constants.DDM_REMOVE:
7800 elif nic_op == constants.DDM_ADD:
7803 if not isinstance(nic_op, int):
7804 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
7805 if not isinstance(nic_dict, dict):
7806 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7807 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7809 # nic_dict should be a dict
7810 nic_ip = nic_dict.get('ip', None)
7811 if nic_ip is not None:
7812 if nic_ip.lower() == constants.VALUE_NONE:
7813 nic_dict['ip'] = None
7815 if not utils.IsValidIP(nic_ip):
7816 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
7819 nic_bridge = nic_dict.get('bridge', None)
7820 nic_link = nic_dict.get('link', None)
7821 if nic_bridge and nic_link:
7822 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7823 " at the same time", errors.ECODE_INVAL)
7824 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7825 nic_dict['bridge'] = None
7826 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7827 nic_dict['link'] = None
7829 if nic_op == constants.DDM_ADD:
7830 nic_mac = nic_dict.get('mac', None)
7832 nic_dict['mac'] = constants.VALUE_AUTO
7834 if 'mac' in nic_dict:
7835 nic_mac = nic_dict['mac']
7836 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7837 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
7839 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7840 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7841 " modifying an existing nic",
7844 if nic_addremove > 1:
7845 raise errors.OpPrereqError("Only one NIC add or remove operation"
7846 " supported at a time", errors.ECODE_INVAL)
7848 def ExpandNames(self):
7849 self._ExpandAndLockInstance()
7850 self.needed_locks[locking.LEVEL_NODE] = []
7851 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7853 def DeclareLocks(self, level):
7854 if level == locking.LEVEL_NODE:
7855 self._LockInstancesNodes()
7857 def BuildHooksEnv(self):
7860 This runs on the master, primary and secondaries.
7864 if constants.BE_MEMORY in self.be_new:
7865 args['memory'] = self.be_new[constants.BE_MEMORY]
7866 if constants.BE_VCPUS in self.be_new:
7867 args['vcpus'] = self.be_new[constants.BE_VCPUS]
7868 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7869 # information at all.
7872 nic_override = dict(self.op.nics)
7873 c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7874 for idx, nic in enumerate(self.instance.nics):
7875 if idx in nic_override:
7876 this_nic_override = nic_override[idx]
7878 this_nic_override = {}
7879 if 'ip' in this_nic_override:
7880 ip = this_nic_override['ip']
7883 if 'mac' in this_nic_override:
7884 mac = this_nic_override['mac']
7887 if idx in self.nic_pnew:
7888 nicparams = self.nic_pnew[idx]
7890 nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7891 mode = nicparams[constants.NIC_MODE]
7892 link = nicparams[constants.NIC_LINK]
7893 args['nics'].append((ip, mac, mode, link))
7894 if constants.DDM_ADD in nic_override:
7895 ip = nic_override[constants.DDM_ADD].get('ip', None)
7896 mac = nic_override[constants.DDM_ADD]['mac']
7897 nicparams = self.nic_pnew[constants.DDM_ADD]
7898 mode = nicparams[constants.NIC_MODE]
7899 link = nicparams[constants.NIC_LINK]
7900 args['nics'].append((ip, mac, mode, link))
7901 elif constants.DDM_REMOVE in nic_override:
7902 del args['nics'][-1]
7904 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7905 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7909 def _GetUpdatedParams(old_params, update_dict,
7910 default_values, parameter_types):
7911 """Return the new params dict for the given params.
7913 @type old_params: dict
7914 @param old_params: old parameters
7915 @type update_dict: dict
7916 @param update_dict: dict containing new parameter values,
7917 or constants.VALUE_DEFAULT to reset the
7918 parameter to its default value
7919 @type default_values: dict
7920 @param default_values: default values for the filled parameters
7921 @type parameter_types: dict
7922 @param parameter_types: dict mapping target dict keys to types
7923 in constants.ENFORCEABLE_TYPES
7924 @rtype: (dict, dict)
7925 @return: (new_parameters, filled_parameters)
7928 params_copy = copy.deepcopy(old_params)
7929 for key, val in update_dict.iteritems():
7930 if val == constants.VALUE_DEFAULT:
7932 del params_copy[key]
7936 params_copy[key] = val
7937 utils.ForceDictType(params_copy, parameter_types)
7938 params_filled = objects.FillDict(default_values, params_copy)
7939 return (params_copy, params_filled)
7941 def CheckPrereq(self):
7942 """Check prerequisites.
7944 This only checks the instance list against the existing names.
7947 self.force = self.op.force
7949 # checking the new params on the primary/secondary nodes
7951 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7952 cluster = self.cluster = self.cfg.GetClusterInfo()
7953 assert self.instance is not None, \
7954 "Cannot retrieve locked instance %s" % self.op.instance_name
7955 pnode = instance.primary_node
7956 nodelist = list(instance.all_nodes)
7958 # hvparams processing
7959 if self.op.hvparams:
7960 i_hvdict, hv_new = self._GetUpdatedParams(
7961 instance.hvparams, self.op.hvparams,
7962 cluster.hvparams[instance.hypervisor],
7963 constants.HVS_PARAMETER_TYPES)
7965 hypervisor.GetHypervisor(
7966 instance.hypervisor).CheckParameterSyntax(hv_new)
7967 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7968 self.hv_new = hv_new # the new actual values
7969 self.hv_inst = i_hvdict # the new dict (without defaults)
7971 self.hv_new = self.hv_inst = {}
7973 # beparams processing
7974 if self.op.beparams:
7975 i_bedict, be_new = self._GetUpdatedParams(
7976 instance.beparams, self.op.beparams,
7977 cluster.beparams[constants.PP_DEFAULT],
7978 constants.BES_PARAMETER_TYPES)
7979 self.be_new = be_new # the new actual values
7980 self.be_inst = i_bedict # the new dict (without defaults)
7982 self.be_new = self.be_inst = {}
7986 if constants.BE_MEMORY in self.op.beparams and not self.force:
7987 mem_check_list = [pnode]
7988 if be_new[constants.BE_AUTO_BALANCE]:
7989 # either we changed auto_balance to yes or it was from before
7990 mem_check_list.extend(instance.secondary_nodes)
7991 instance_info = self.rpc.call_instance_info(pnode, instance.name,
7992 instance.hypervisor)
7993 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
7994 instance.hypervisor)
7995 pninfo = nodeinfo[pnode]
7996 msg = pninfo.fail_msg
7998 # Assume the primary node is unreachable and go ahead
7999 self.warn.append("Can't get info from primary node %s: %s" %
8001 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8002 self.warn.append("Node data from primary node %s doesn't contain"
8003 " free memory information" % pnode)
8004 elif instance_info.fail_msg:
8005 self.warn.append("Can't get instance runtime information: %s" %
8006 instance_info.fail_msg)
8008 if instance_info.payload:
8009 current_mem = int(instance_info.payload['memory'])
8011 # Assume instance not running
8012 # (there is a slight race condition here, but it's not very probable,
8013 # and we have no other way to check)
8015 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8016 pninfo.payload['memory_free'])
8018 raise errors.OpPrereqError("This change will prevent the instance"
8019 " from starting, due to %d MB of memory"
8020 " missing on its primary node" % miss_mem,
8023 if be_new[constants.BE_AUTO_BALANCE]:
8024 for node, nres in nodeinfo.items():
8025 if node not in instance.secondary_nodes:
8029 self.warn.append("Can't get info from secondary node %s: %s" %
8031 elif not isinstance(nres.payload.get('memory_free', None), int):
8032 self.warn.append("Secondary node %s didn't return free"
8033 " memory information" % node)
8034 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8035 self.warn.append("Not enough memory to failover instance to"
8036 " secondary node %s" % node)
8041 for nic_op, nic_dict in self.op.nics:
8042 if nic_op == constants.DDM_REMOVE:
8043 if not instance.nics:
8044 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8047 if nic_op != constants.DDM_ADD:
8049 if not instance.nics:
8050 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8051 " no NICs" % nic_op,
8053 if nic_op < 0 or nic_op >= len(instance.nics):
8054 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8056 (nic_op, len(instance.nics) - 1),
8058 old_nic_params = instance.nics[nic_op].nicparams
8059 old_nic_ip = instance.nics[nic_op].ip
8064 update_params_dict = dict([(key, nic_dict[key])
8065 for key in constants.NICS_PARAMETERS
8066 if key in nic_dict])
8068 if 'bridge' in nic_dict:
8069 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8071 new_nic_params, new_filled_nic_params = \
8072 self._GetUpdatedParams(old_nic_params, update_params_dict,
8073 cluster.nicparams[constants.PP_DEFAULT],
8074 constants.NICS_PARAMETER_TYPES)
8075 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8076 self.nic_pinst[nic_op] = new_nic_params
8077 self.nic_pnew[nic_op] = new_filled_nic_params
8078 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8080 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8081 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8082 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8084 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8086 self.warn.append(msg)
8088 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8089 if new_nic_mode == constants.NIC_MODE_ROUTED:
8090 if 'ip' in nic_dict:
8091 nic_ip = nic_dict['ip']
8095 raise errors.OpPrereqError('Cannot set the nic ip to None'
8096 ' on a routed nic', errors.ECODE_INVAL)
8097 if 'mac' in nic_dict:
8098 nic_mac = nic_dict['mac']
8100 raise errors.OpPrereqError('Cannot set the nic mac to None',
8102 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8103 # otherwise generate the mac
8104 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8106 # or validate/reserve the current one
8108 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8109 except errors.ReservationError:
8110 raise errors.OpPrereqError("MAC address %s already in use"
8111 " in cluster" % nic_mac,
8112 errors.ECODE_NOTUNIQUE)
8115 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8116 raise errors.OpPrereqError("Disk operations not supported for"
8117 " diskless instances",
8119 for disk_op, _ in self.op.disks:
8120 if disk_op == constants.DDM_REMOVE:
8121 if len(instance.disks) == 1:
8122 raise errors.OpPrereqError("Cannot remove the last disk of"
8125 ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
8126 ins_l = ins_l[pnode]
8127 msg = ins_l.fail_msg
8129 raise errors.OpPrereqError("Can't contact node %s: %s" %
8130 (pnode, msg), errors.ECODE_ENVIRON)
8131 if instance.name in ins_l.payload:
8132 raise errors.OpPrereqError("Instance is running, can't remove"
8133 " disks.", errors.ECODE_STATE)
8135 if (disk_op == constants.DDM_ADD and
8136 len(instance.nics) >= constants.MAX_DISKS):
8137 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8138 " add more" % constants.MAX_DISKS,
8140 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8142 if disk_op < 0 or disk_op >= len(instance.disks):
8143 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8145 (disk_op, len(instance.disks)),
8150 def Exec(self, feedback_fn):
8151 """Modifies an instance.
8153 All parameters take effect only at the next restart of the instance.
8156 # Process here the warnings from CheckPrereq, as we don't have a
8157 # feedback_fn there.
8158 for warn in self.warn:
8159 feedback_fn("WARNING: %s" % warn)
8162 instance = self.instance
8164 for disk_op, disk_dict in self.op.disks:
8165 if disk_op == constants.DDM_REMOVE:
8166 # remove the last disk
8167 device = instance.disks.pop()
8168 device_idx = len(instance.disks)
8169 for node, disk in device.ComputeNodeTree(instance.primary_node):
8170 self.cfg.SetDiskID(disk, node)
8171 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8173 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8174 " continuing anyway", device_idx, node, msg)
8175 result.append(("disk/%d" % device_idx, "remove"))
8176 elif disk_op == constants.DDM_ADD:
8178 if instance.disk_template == constants.DT_FILE:
8179 file_driver, file_path = instance.disks[0].logical_id
8180 file_path = os.path.dirname(file_path)
8182 file_driver = file_path = None
8183 disk_idx_base = len(instance.disks)
8184 new_disk = _GenerateDiskTemplate(self,
8185 instance.disk_template,
8186 instance.name, instance.primary_node,
8187 instance.secondary_nodes,
8192 instance.disks.append(new_disk)
8193 info = _GetInstanceInfoText(instance)
8195 logging.info("Creating volume %s for instance %s",
8196 new_disk.iv_name, instance.name)
8197 # Note: this needs to be kept in sync with _CreateDisks
8199 for node in instance.all_nodes:
8200 f_create = node == instance.primary_node
8202 _CreateBlockDev(self, node, instance, new_disk,
8203 f_create, info, f_create)
8204 except errors.OpExecError, err:
8205 self.LogWarning("Failed to create volume %s (%s) on"
8207 new_disk.iv_name, new_disk, node, err)
8208 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8209 (new_disk.size, new_disk.mode)))
8211 # change a given disk
8212 instance.disks[disk_op].mode = disk_dict['mode']
8213 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8215 for nic_op, nic_dict in self.op.nics:
8216 if nic_op == constants.DDM_REMOVE:
8217 # remove the last nic
8218 del instance.nics[-1]
8219 result.append(("nic.%d" % len(instance.nics), "remove"))
8220 elif nic_op == constants.DDM_ADD:
8221 # mac and bridge should be set, by now
8222 mac = nic_dict['mac']
8223 ip = nic_dict.get('ip', None)
8224 nicparams = self.nic_pinst[constants.DDM_ADD]
8225 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8226 instance.nics.append(new_nic)
8227 result.append(("nic.%d" % (len(instance.nics) - 1),
8228 "add:mac=%s,ip=%s,mode=%s,link=%s" %
8229 (new_nic.mac, new_nic.ip,
8230 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8231 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8234 for key in 'mac', 'ip':
8236 setattr(instance.nics[nic_op], key, nic_dict[key])
8237 if nic_op in self.nic_pinst:
8238 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8239 for key, val in nic_dict.iteritems():
8240 result.append(("nic.%s/%d" % (key, nic_op), val))
8243 if self.op.hvparams:
8244 instance.hvparams = self.hv_inst
8245 for key, val in self.op.hvparams.iteritems():
8246 result.append(("hv/%s" % key, val))
8249 if self.op.beparams:
8250 instance.beparams = self.be_inst
8251 for key, val in self.op.beparams.iteritems():
8252 result.append(("be/%s" % key, val))
8254 self.cfg.Update(instance, feedback_fn)
8259 class LUQueryExports(NoHooksLU):
8260 """Query the exports list
8263 _OP_REQP = ['nodes']
8266 def ExpandNames(self):
8267 self.needed_locks = {}
8268 self.share_locks[locking.LEVEL_NODE] = 1
8269 if not self.op.nodes:
8270 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8272 self.needed_locks[locking.LEVEL_NODE] = \
8273 _GetWantedNodes(self, self.op.nodes)
8275 def CheckPrereq(self):
8276 """Check prerequisites.
8279 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8281 def Exec(self, feedback_fn):
8282 """Compute the list of all the exported system images.
8285 @return: a dictionary with the structure node->(export-list)
8286 where export-list is a list of the instances exported on
8290 rpcresult = self.rpc.call_export_list(self.nodes)
8292 for node in rpcresult:
8293 if rpcresult[node].fail_msg:
8294 result[node] = False
8296 result[node] = rpcresult[node].payload
8301 class LUExportInstance(LogicalUnit):
8302 """Export an instance to an image in the cluster.
8305 HPATH = "instance-export"
8306 HTYPE = constants.HTYPE_INSTANCE
8307 _OP_REQP = ["instance_name", "target_node", "shutdown"]
8310 def CheckArguments(self):
8311 """Check the arguments.
8314 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8315 constants.DEFAULT_SHUTDOWN_TIMEOUT)
8317 def ExpandNames(self):
8318 self._ExpandAndLockInstance()
8319 # FIXME: lock only instance primary and destination node
8321 # Sad but true, for now we have do lock all nodes, as we don't know where
8322 # the previous export might be, and and in this LU we search for it and
8323 # remove it from its current node. In the future we could fix this by:
8324 # - making a tasklet to search (share-lock all), then create the new one,
8325 # then one to remove, after
8326 # - removing the removal operation altogether
8327 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8329 def DeclareLocks(self, level):
8330 """Last minute lock declaration."""
8331 # All nodes are locked anyway, so nothing to do here.
8333 def BuildHooksEnv(self):
8336 This will run on the master, primary node and target node.
8340 "EXPORT_NODE": self.op.target_node,
8341 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8342 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8344 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8345 nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8346 self.op.target_node]
8349 def CheckPrereq(self):
8350 """Check prerequisites.
8352 This checks that the instance and node names are valid.
8355 instance_name = self.op.instance_name
8356 self.instance = self.cfg.GetInstanceInfo(instance_name)
8357 assert self.instance is not None, \
8358 "Cannot retrieve locked instance %s" % self.op.instance_name
8359 _CheckNodeOnline(self, self.instance.primary_node)
8361 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8362 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8363 assert self.dst_node is not None
8365 _CheckNodeOnline(self, self.dst_node.name)
8366 _CheckNodeNotDrained(self, self.dst_node.name)
8368 # instance disk type verification
8369 for disk in self.instance.disks:
8370 if disk.dev_type == constants.LD_FILE:
8371 raise errors.OpPrereqError("Export not supported for instances with"
8372 " file-based disks", errors.ECODE_INVAL)
8374 def Exec(self, feedback_fn):
8375 """Export an instance to an image in the cluster.
8378 instance = self.instance
8379 dst_node = self.dst_node
8380 src_node = instance.primary_node
8382 if self.op.shutdown:
8383 # shutdown the instance, but not the disks
8384 feedback_fn("Shutting down instance %s" % instance.name)
8385 result = self.rpc.call_instance_shutdown(src_node, instance,
8386 self.shutdown_timeout)
8387 result.Raise("Could not shutdown instance %s on"
8388 " node %s" % (instance.name, src_node))
8390 vgname = self.cfg.GetVGName()
8394 # set the disks ID correctly since call_instance_start needs the
8395 # correct drbd minor to create the symlinks
8396 for disk in instance.disks:
8397 self.cfg.SetDiskID(disk, src_node)
8399 activate_disks = (not instance.admin_up)
8402 # Activate the instance disks if we'exporting a stopped instance
8403 feedback_fn("Activating disks for %s" % instance.name)
8404 _StartInstanceDisks(self, instance, None)
8410 for idx, disk in enumerate(instance.disks):
8411 feedback_fn("Creating a snapshot of disk/%s on node %s" %
8414 # result.payload will be a snapshot of an lvm leaf of the one we
8416 result = self.rpc.call_blockdev_snapshot(src_node, disk)
8417 msg = result.fail_msg
8419 self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8421 snap_disks.append(False)
8423 disk_id = (vgname, result.payload)
8424 new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8425 logical_id=disk_id, physical_id=disk_id,
8426 iv_name=disk.iv_name)
8427 snap_disks.append(new_dev)
8430 if self.op.shutdown and instance.admin_up:
8431 feedback_fn("Starting instance %s" % instance.name)
8432 result = self.rpc.call_instance_start(src_node, instance, None, None)
8433 msg = result.fail_msg
8435 _ShutdownInstanceDisks(self, instance)
8436 raise errors.OpExecError("Could not start instance: %s" % msg)
8438 # TODO: check for size
8440 cluster_name = self.cfg.GetClusterName()
8441 for idx, dev in enumerate(snap_disks):
8442 feedback_fn("Exporting snapshot %s from %s to %s" %
8443 (idx, src_node, dst_node.name))
8445 # FIXME: pass debug from opcode to backend
8446 result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8447 instance, cluster_name,
8448 idx, self.op.debug_level)
8449 msg = result.fail_msg
8451 self.LogWarning("Could not export disk/%s from node %s to"
8452 " node %s: %s", idx, src_node, dst_node.name, msg)
8453 dresults.append(False)
8455 dresults.append(True)
8456 msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8458 self.LogWarning("Could not remove snapshot for disk/%d from node"
8459 " %s: %s", idx, src_node, msg)
8461 dresults.append(False)
8463 feedback_fn("Finalizing export on %s" % dst_node.name)
8464 result = self.rpc.call_finalize_export(dst_node.name, instance,
8467 msg = result.fail_msg
8469 self.LogWarning("Could not finalize export for instance %s"
8470 " on node %s: %s", instance.name, dst_node.name, msg)
8475 feedback_fn("Deactivating disks for %s" % instance.name)
8476 _ShutdownInstanceDisks(self, instance)
8478 nodelist = self.cfg.GetNodeList()
8479 nodelist.remove(dst_node.name)
8481 # on one-node clusters nodelist will be empty after the removal
8482 # if we proceed the backup would be removed because OpQueryExports
8483 # substitutes an empty list with the full cluster node list.
8484 iname = instance.name
8486 feedback_fn("Removing old exports for instance %s" % iname)
8487 exportlist = self.rpc.call_export_list(nodelist)
8488 for node in exportlist:
8489 if exportlist[node].fail_msg:
8491 if iname in exportlist[node].payload:
8492 msg = self.rpc.call_export_remove(node, iname).fail_msg
8494 self.LogWarning("Could not remove older export for instance %s"
8495 " on node %s: %s", iname, node, msg)
8496 return fin_resu, dresults
8499 class LURemoveExport(NoHooksLU):
8500 """Remove exports related to the named instance.
8503 _OP_REQP = ["instance_name"]
8506 def ExpandNames(self):
8507 self.needed_locks = {}
8508 # We need all nodes to be locked in order for RemoveExport to work, but we
8509 # don't need to lock the instance itself, as nothing will happen to it (and
8510 # we can remove exports also for a removed instance)
8511 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8513 def CheckPrereq(self):
8514 """Check prerequisites.
8518 def Exec(self, feedback_fn):
8519 """Remove any export.
8522 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8523 # If the instance was not found we'll try with the name that was passed in.
8524 # This will only work if it was an FQDN, though.
8526 if not instance_name:
8528 instance_name = self.op.instance_name
8530 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8531 exportlist = self.rpc.call_export_list(locked_nodes)
8533 for node in exportlist:
8534 msg = exportlist[node].fail_msg
8536 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8538 if instance_name in exportlist[node].payload:
8540 result = self.rpc.call_export_remove(node, instance_name)
8541 msg = result.fail_msg
8543 logging.error("Could not remove export for instance %s"
8544 " on node %s: %s", instance_name, node, msg)
8546 if fqdn_warn and not found:
8547 feedback_fn("Export not found. If trying to remove an export belonging"
8548 " to a deleted instance please use its Fully Qualified"
8552 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
8555 This is an abstract class which is the parent of all the other tags LUs.
8559 def ExpandNames(self):
8560 self.needed_locks = {}
8561 if self.op.kind == constants.TAG_NODE:
8562 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
8563 self.needed_locks[locking.LEVEL_NODE] = self.op.name
8564 elif self.op.kind == constants.TAG_INSTANCE:
8565 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
8566 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
8568 def CheckPrereq(self):
8569 """Check prerequisites.
8572 if self.op.kind == constants.TAG_CLUSTER:
8573 self.target = self.cfg.GetClusterInfo()
8574 elif self.op.kind == constants.TAG_NODE:
8575 self.target = self.cfg.GetNodeInfo(self.op.name)
8576 elif self.op.kind == constants.TAG_INSTANCE:
8577 self.target = self.cfg.GetInstanceInfo(self.op.name)
8579 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8580 str(self.op.kind), errors.ECODE_INVAL)
8583 class LUGetTags(TagsLU):
8584 """Returns the tags of a given object.
8587 _OP_REQP = ["kind", "name"]
8590 def Exec(self, feedback_fn):
8591 """Returns the tag list.
8594 return list(self.target.GetTags())
8597 class LUSearchTags(NoHooksLU):
8598 """Searches the tags for a given pattern.
8601 _OP_REQP = ["pattern"]
8604 def ExpandNames(self):
8605 self.needed_locks = {}
8607 def CheckPrereq(self):
8608 """Check prerequisites.
8610 This checks the pattern passed for validity by compiling it.
8614 self.re = re.compile(self.op.pattern)
8615 except re.error, err:
8616 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
8617 (self.op.pattern, err), errors.ECODE_INVAL)
8619 def Exec(self, feedback_fn):
8620 """Returns the tag list.
8624 tgts = [("/cluster", cfg.GetClusterInfo())]
8625 ilist = cfg.GetAllInstancesInfo().values()
8626 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
8627 nlist = cfg.GetAllNodesInfo().values()
8628 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
8630 for path, target in tgts:
8631 for tag in target.GetTags():
8632 if self.re.search(tag):
8633 results.append((path, tag))
8637 class LUAddTags(TagsLU):
8638 """Sets a tag on a given object.
8641 _OP_REQP = ["kind", "name", "tags"]
8644 def CheckPrereq(self):
8645 """Check prerequisites.
8647 This checks the type and length of the tag name and value.
8650 TagsLU.CheckPrereq(self)
8651 for tag in self.op.tags:
8652 objects.TaggableObject.ValidateTag(tag)
8654 def Exec(self, feedback_fn):
8659 for tag in self.op.tags:
8660 self.target.AddTag(tag)
8661 except errors.TagError, err:
8662 raise errors.OpExecError("Error while setting tag: %s" % str(err))
8663 self.cfg.Update(self.target, feedback_fn)
8666 class LUDelTags(TagsLU):
8667 """Delete a list of tags from a given object.
8670 _OP_REQP = ["kind", "name", "tags"]
8673 def CheckPrereq(self):
8674 """Check prerequisites.
8676 This checks that we have the given tag.
8679 TagsLU.CheckPrereq(self)
8680 for tag in self.op.tags:
8681 objects.TaggableObject.ValidateTag(tag)
8682 del_tags = frozenset(self.op.tags)
8683 cur_tags = self.target.GetTags()
8684 if not del_tags <= cur_tags:
8685 diff_tags = del_tags - cur_tags
8686 diff_names = ["'%s'" % tag for tag in diff_tags]
8688 raise errors.OpPrereqError("Tag(s) %s not found" %
8689 (",".join(diff_names)), errors.ECODE_NOENT)
8691 def Exec(self, feedback_fn):
8692 """Remove the tag from the object.
8695 for tag in self.op.tags:
8696 self.target.RemoveTag(tag)
8697 self.cfg.Update(self.target, feedback_fn)
8700 class LUTestDelay(NoHooksLU):
8701 """Sleep for a specified amount of time.
8703 This LU sleeps on the master and/or nodes for a specified amount of
8707 _OP_REQP = ["duration", "on_master", "on_nodes"]
8710 def ExpandNames(self):
8711 """Expand names and set required locks.
8713 This expands the node list, if any.
8716 self.needed_locks = {}
8717 if self.op.on_nodes:
8718 # _GetWantedNodes can be used here, but is not always appropriate to use
8719 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8721 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8722 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8724 def CheckPrereq(self):
8725 """Check prerequisites.
8729 def Exec(self, feedback_fn):
8730 """Do the actual sleep.
8733 if self.op.on_master:
8734 if not utils.TestDelay(self.op.duration):
8735 raise errors.OpExecError("Error during master delay test")
8736 if self.op.on_nodes:
8737 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8738 for node, node_result in result.items():
8739 node_result.Raise("Failure during rpc call to node %s" % node)
8742 class IAllocator(object):
8743 """IAllocator framework.
8745 An IAllocator instance has three sets of attributes:
8746 - cfg that is needed to query the cluster
8747 - input data (all members of the _KEYS class attribute are required)
8748 - four buffer attributes (in|out_data|text), that represent the
8749 input (to the external script) in text and data structure format,
8750 and the output from it, again in two formats
8751 - the result variables from the script (success, info, nodes) for
8755 # pylint: disable-msg=R0902
8756 # lots of instance attributes
8758 "name", "mem_size", "disks", "disk_template",
8759 "os", "tags", "nics", "vcpus", "hypervisor",
8762 "name", "relocate_from",
8768 def __init__(self, cfg, rpc, mode, **kwargs):
8771 # init buffer variables
8772 self.in_text = self.out_text = self.in_data = self.out_data = None
8773 # init all input fields so that pylint is happy
8775 self.mem_size = self.disks = self.disk_template = None
8776 self.os = self.tags = self.nics = self.vcpus = None
8777 self.hypervisor = None
8778 self.relocate_from = None
8780 self.evac_nodes = None
8782 self.required_nodes = None
8783 # init result fields
8784 self.success = self.info = self.result = None
8785 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8786 keyset = self._ALLO_KEYS
8787 fn = self._AddNewInstance
8788 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8789 keyset = self._RELO_KEYS
8790 fn = self._AddRelocateInstance
8791 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
8792 keyset = self._EVAC_KEYS
8793 fn = self._AddEvacuateNodes
8795 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8796 " IAllocator" % self.mode)
8798 if key not in keyset:
8799 raise errors.ProgrammerError("Invalid input parameter '%s' to"
8800 " IAllocator" % key)
8801 setattr(self, key, kwargs[key])
8804 if key not in kwargs:
8805 raise errors.ProgrammerError("Missing input parameter '%s' to"
8806 " IAllocator" % key)
8807 self._BuildInputData(fn)
8809 def _ComputeClusterData(self):
8810 """Compute the generic allocator input data.
8812 This is the data that is independent of the actual operation.
8816 cluster_info = cfg.GetClusterInfo()
8819 "version": constants.IALLOCATOR_VERSION,
8820 "cluster_name": cfg.GetClusterName(),
8821 "cluster_tags": list(cluster_info.GetTags()),
8822 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8823 # we don't have job IDs
8825 iinfo = cfg.GetAllInstancesInfo().values()
8826 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8830 node_list = cfg.GetNodeList()
8832 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8833 hypervisor_name = self.hypervisor
8834 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8835 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8836 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
8837 hypervisor_name = cluster_info.enabled_hypervisors[0]
8839 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8842 self.rpc.call_all_instances_info(node_list,
8843 cluster_info.enabled_hypervisors)
8844 for nname, nresult in node_data.items():
8845 # first fill in static (config-based) values
8846 ninfo = cfg.GetNodeInfo(nname)
8848 "tags": list(ninfo.GetTags()),
8849 "primary_ip": ninfo.primary_ip,
8850 "secondary_ip": ninfo.secondary_ip,
8851 "offline": ninfo.offline,
8852 "drained": ninfo.drained,
8853 "master_candidate": ninfo.master_candidate,
8856 if not (ninfo.offline or ninfo.drained):
8857 nresult.Raise("Can't get data for node %s" % nname)
8858 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8860 remote_info = nresult.payload
8862 for attr in ['memory_total', 'memory_free', 'memory_dom0',
8863 'vg_size', 'vg_free', 'cpu_total']:
8864 if attr not in remote_info:
8865 raise errors.OpExecError("Node '%s' didn't return attribute"
8866 " '%s'" % (nname, attr))
8867 if not isinstance(remote_info[attr], int):
8868 raise errors.OpExecError("Node '%s' returned invalid value"
8870 (nname, attr, remote_info[attr]))
8871 # compute memory used by primary instances
8872 i_p_mem = i_p_up_mem = 0
8873 for iinfo, beinfo in i_list:
8874 if iinfo.primary_node == nname:
8875 i_p_mem += beinfo[constants.BE_MEMORY]
8876 if iinfo.name not in node_iinfo[nname].payload:
8879 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8880 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8881 remote_info['memory_free'] -= max(0, i_mem_diff)
8884 i_p_up_mem += beinfo[constants.BE_MEMORY]
8886 # compute memory used by instances
8888 "total_memory": remote_info['memory_total'],
8889 "reserved_memory": remote_info['memory_dom0'],
8890 "free_memory": remote_info['memory_free'],
8891 "total_disk": remote_info['vg_size'],
8892 "free_disk": remote_info['vg_free'],
8893 "total_cpus": remote_info['cpu_total'],
8894 "i_pri_memory": i_p_mem,
8895 "i_pri_up_memory": i_p_up_mem,
8899 node_results[nname] = pnr
8900 data["nodes"] = node_results
8904 for iinfo, beinfo in i_list:
8906 for nic in iinfo.nics:
8907 filled_params = objects.FillDict(
8908 cluster_info.nicparams[constants.PP_DEFAULT],
8910 nic_dict = {"mac": nic.mac,
8912 "mode": filled_params[constants.NIC_MODE],
8913 "link": filled_params[constants.NIC_LINK],
8915 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8916 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8917 nic_data.append(nic_dict)
8919 "tags": list(iinfo.GetTags()),
8920 "admin_up": iinfo.admin_up,
8921 "vcpus": beinfo[constants.BE_VCPUS],
8922 "memory": beinfo[constants.BE_MEMORY],
8924 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8926 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8927 "disk_template": iinfo.disk_template,
8928 "hypervisor": iinfo.hypervisor,
8930 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8932 instance_data[iinfo.name] = pir
8934 data["instances"] = instance_data
8938 def _AddNewInstance(self):
8939 """Add new instance data to allocator structure.
8941 This in combination with _AllocatorGetClusterData will create the
8942 correct structure needed as input for the allocator.
8944 The checks for the completeness of the opcode must have already been
8948 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8950 if self.disk_template in constants.DTS_NET_MIRROR:
8951 self.required_nodes = 2
8953 self.required_nodes = 1
8956 "disk_template": self.disk_template,
8959 "vcpus": self.vcpus,
8960 "memory": self.mem_size,
8961 "disks": self.disks,
8962 "disk_space_total": disk_space,
8964 "required_nodes": self.required_nodes,
8968 def _AddRelocateInstance(self):
8969 """Add relocate instance data to allocator structure.
8971 This in combination with _IAllocatorGetClusterData will create the
8972 correct structure needed as input for the allocator.
8974 The checks for the completeness of the opcode must have already been
8978 instance = self.cfg.GetInstanceInfo(self.name)
8979 if instance is None:
8980 raise errors.ProgrammerError("Unknown instance '%s' passed to"
8981 " IAllocator" % self.name)
8983 if instance.disk_template not in constants.DTS_NET_MIRROR:
8984 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
8987 if len(instance.secondary_nodes) != 1:
8988 raise errors.OpPrereqError("Instance has not exactly one secondary node",
8991 self.required_nodes = 1
8992 disk_sizes = [{'size': disk.size} for disk in instance.disks]
8993 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
8997 "disk_space_total": disk_space,
8998 "required_nodes": self.required_nodes,
8999 "relocate_from": self.relocate_from,
9003 def _AddEvacuateNodes(self):
9004 """Add evacuate nodes data to allocator structure.
9008 "evac_nodes": self.evac_nodes
9012 def _BuildInputData(self, fn):
9013 """Build input data structures.
9016 self._ComputeClusterData()
9019 request["type"] = self.mode
9020 self.in_data["request"] = request
9022 self.in_text = serializer.Dump(self.in_data)
9024 def Run(self, name, validate=True, call_fn=None):
9025 """Run an instance allocator and return the results.
9029 call_fn = self.rpc.call_iallocator_runner
9031 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9032 result.Raise("Failure while running the iallocator script")
9034 self.out_text = result.payload
9036 self._ValidateResult()
9038 def _ValidateResult(self):
9039 """Process the allocator results.
9041 This will process and if successful save the result in
9042 self.out_data and the other parameters.
9046 rdict = serializer.Load(self.out_text)
9047 except Exception, err:
9048 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9050 if not isinstance(rdict, dict):
9051 raise errors.OpExecError("Can't parse iallocator results: not a dict")
9053 # TODO: remove backwards compatiblity in later versions
9054 if "nodes" in rdict and "result" not in rdict:
9055 rdict["result"] = rdict["nodes"]
9058 for key in "success", "info", "result":
9059 if key not in rdict:
9060 raise errors.OpExecError("Can't parse iallocator results:"
9061 " missing key '%s'" % key)
9062 setattr(self, key, rdict[key])
9064 if not isinstance(rdict["result"], list):
9065 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9067 self.out_data = rdict
9070 class LUTestAllocator(NoHooksLU):
9071 """Run allocator tests.
9073 This LU runs the allocator tests
9076 _OP_REQP = ["direction", "mode", "name"]
9078 def CheckPrereq(self):
9079 """Check prerequisites.
9081 This checks the opcode parameters depending on the director and mode test.
9084 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9085 for attr in ["name", "mem_size", "disks", "disk_template",
9086 "os", "tags", "nics", "vcpus"]:
9087 if not hasattr(self.op, attr):
9088 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9089 attr, errors.ECODE_INVAL)
9090 iname = self.cfg.ExpandInstanceName(self.op.name)
9091 if iname is not None:
9092 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9093 iname, errors.ECODE_EXISTS)
9094 if not isinstance(self.op.nics, list):
9095 raise errors.OpPrereqError("Invalid parameter 'nics'",
9097 for row in self.op.nics:
9098 if (not isinstance(row, dict) or
9101 "bridge" not in row):
9102 raise errors.OpPrereqError("Invalid contents of the 'nics'"
9103 " parameter", errors.ECODE_INVAL)
9104 if not isinstance(self.op.disks, list):
9105 raise errors.OpPrereqError("Invalid parameter 'disks'",
9107 for row in self.op.disks:
9108 if (not isinstance(row, dict) or
9109 "size" not in row or
9110 not isinstance(row["size"], int) or
9111 "mode" not in row or
9112 row["mode"] not in ['r', 'w']):
9113 raise errors.OpPrereqError("Invalid contents of the 'disks'"
9114 " parameter", errors.ECODE_INVAL)
9115 if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9116 self.op.hypervisor = self.cfg.GetHypervisorType()
9117 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9118 if not hasattr(self.op, "name"):
9119 raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9121 fname = _ExpandInstanceName(self.cfg, self.op.name)
9122 self.op.name = fname
9123 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9124 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9125 if not hasattr(self.op, "evac_nodes"):
9126 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9127 " opcode input", errors.ECODE_INVAL)
9129 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9130 self.op.mode, errors.ECODE_INVAL)
9132 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9133 if not hasattr(self.op, "allocator") or self.op.allocator is None:
9134 raise errors.OpPrereqError("Missing allocator name",
9136 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9137 raise errors.OpPrereqError("Wrong allocator test '%s'" %
9138 self.op.direction, errors.ECODE_INVAL)
9140 def Exec(self, feedback_fn):
9141 """Run the allocator test.
9144 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9145 ial = IAllocator(self.cfg, self.rpc,
9148 mem_size=self.op.mem_size,
9149 disks=self.op.disks,
9150 disk_template=self.op.disk_template,
9154 vcpus=self.op.vcpus,
9155 hypervisor=self.op.hypervisor,
9157 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9158 ial = IAllocator(self.cfg, self.rpc,
9161 relocate_from=list(self.relocate_from),
9163 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9164 ial = IAllocator(self.cfg, self.rpc,
9166 evac_nodes=self.op.evac_nodes)
9168 raise errors.ProgrammerError("Uncatched mode %s in"
9169 " LUTestAllocator.Exec", self.op.mode)
9171 if self.op.direction == constants.IALLOCATOR_DIR_IN:
9172 result = ial.in_text
9174 ial.Run(self.op.allocator, validate=False)
9175 result = ial.out_text