4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
38 from ganeti import ssh
39 from ganeti import utils
40 from ganeti import errors
41 from ganeti import hypervisor
42 from ganeti import locking
43 from ganeti import constants
44 from ganeti import objects
45 from ganeti import serializer
46 from ganeti import ssconf
49 class LogicalUnit(object):
50 """Logical Unit base class.
52 Subclasses must follow these rules:
53 - implement ExpandNames
54 - implement CheckPrereq (except when tasklets are used)
55 - implement Exec (except when tasklets are used)
56 - implement BuildHooksEnv
57 - redefine HPATH and HTYPE
58 - optionally redefine their run requirements:
59 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
61 Note that all commands require root permissions.
63 @ivar dry_run_result: the value (if any) that will be returned to the caller
64 in dry-run mode (signalled by opcode dry_run parameter)
72 def __init__(self, processor, op, context, rpc):
73 """Constructor for LogicalUnit.
75 This needs to be overridden in derived classes in order to check op
81 self.cfg = context.cfg
82 self.context = context
84 # Dicts used to declare locking needs to mcpu
85 self.needed_locks = None
86 self.acquired_locks = {}
87 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
89 self.remove_locks = {}
90 # Used to force good behavior when calling helper functions
91 self.recalculate_locks = {}
94 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
95 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
96 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
98 self.dry_run_result = None
99 # support for generic debug attribute
100 if (not hasattr(self.op, "debug_level") or
101 not isinstance(self.op.debug_level, int)):
102 self.op.debug_level = 0
107 for attr_name in self._OP_REQP:
108 attr_val = getattr(op, attr_name, None)
110 raise errors.OpPrereqError("Required parameter '%s' missing" %
111 attr_name, errors.ECODE_INVAL)
113 self.CheckArguments()
116 """Returns the SshRunner object
120 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
123 ssh = property(fget=__GetSSH)
125 def CheckArguments(self):
126 """Check syntactic validity for the opcode arguments.
128 This method is for doing a simple syntactic check and ensure
129 validity of opcode parameters, without any cluster-related
130 checks. While the same can be accomplished in ExpandNames and/or
131 CheckPrereq, doing these separate is better because:
133 - ExpandNames is left as as purely a lock-related function
134 - CheckPrereq is run after we have acquired locks (and possible
137 The function is allowed to change the self.op attribute so that
138 later methods can no longer worry about missing parameters.
143 def ExpandNames(self):
144 """Expand names for this LU.
146 This method is called before starting to execute the opcode, and it should
147 update all the parameters of the opcode to their canonical form (e.g. a
148 short node name must be fully expanded after this method has successfully
149 completed). This way locking, hooks, logging, ecc. can work correctly.
151 LUs which implement this method must also populate the self.needed_locks
152 member, as a dict with lock levels as keys, and a list of needed lock names
155 - use an empty dict if you don't need any lock
156 - if you don't need any lock at a particular level omit that level
157 - don't put anything for the BGL level
158 - if you want all locks at a level use locking.ALL_SET as a value
160 If you need to share locks (rather than acquire them exclusively) at one
161 level you can modify self.share_locks, setting a true value (usually 1) for
162 that level. By default locks are not shared.
164 This function can also define a list of tasklets, which then will be
165 executed in order instead of the usual LU-level CheckPrereq and Exec
166 functions, if those are not defined by the LU.
170 # Acquire all nodes and one instance
171 self.needed_locks = {
172 locking.LEVEL_NODE: locking.ALL_SET,
173 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
175 # Acquire just two nodes
176 self.needed_locks = {
177 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
180 self.needed_locks = {} # No, you can't leave it to the default value None
183 # The implementation of this method is mandatory only if the new LU is
184 # concurrent, so that old LUs don't need to be changed all at the same
187 self.needed_locks = {} # Exclusive LUs don't need locks.
189 raise NotImplementedError
191 def DeclareLocks(self, level):
192 """Declare LU locking needs for a level
194 While most LUs can just declare their locking needs at ExpandNames time,
195 sometimes there's the need to calculate some locks after having acquired
196 the ones before. This function is called just before acquiring locks at a
197 particular level, but after acquiring the ones at lower levels, and permits
198 such calculations. It can be used to modify self.needed_locks, and by
199 default it does nothing.
201 This function is only called if you have something already set in
202 self.needed_locks for the level.
204 @param level: Locking level which is going to be locked
205 @type level: member of ganeti.locking.LEVELS
209 def CheckPrereq(self):
210 """Check prerequisites for this LU.
212 This method should check that the prerequisites for the execution
213 of this LU are fulfilled. It can do internode communication, but
214 it should be idempotent - no cluster or system changes are
217 The method should raise errors.OpPrereqError in case something is
218 not fulfilled. Its return value is ignored.
220 This method should also update all the parameters of the opcode to
221 their canonical form if it hasn't been done by ExpandNames before.
224 if self.tasklets is not None:
225 for (idx, tl) in enumerate(self.tasklets):
226 logging.debug("Checking prerequisites for tasklet %s/%s",
227 idx + 1, len(self.tasklets))
230 raise NotImplementedError
232 def Exec(self, feedback_fn):
235 This method should implement the actual work. It should raise
236 errors.OpExecError for failures that are somewhat dealt with in
240 if self.tasklets is not None:
241 for (idx, tl) in enumerate(self.tasklets):
242 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
245 raise NotImplementedError
247 def BuildHooksEnv(self):
248 """Build hooks environment for this LU.
250 This method should return a three-node tuple consisting of: a dict
251 containing the environment that will be used for running the
252 specific hook for this LU, a list of node names on which the hook
253 should run before the execution, and a list of node names on which
254 the hook should run after the execution.
256 The keys of the dict must not have 'GANETI_' prefixed as this will
257 be handled in the hooks runner. Also note additional keys will be
258 added by the hooks runner. If the LU doesn't define any
259 environment, an empty dict (and not None) should be returned.
261 No nodes should be returned as an empty list (and not None).
263 Note that if the HPATH for a LU class is None, this function will
267 raise NotImplementedError
269 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
270 """Notify the LU about the results of its hooks.
272 This method is called every time a hooks phase is executed, and notifies
273 the Logical Unit about the hooks' result. The LU can then use it to alter
274 its result based on the hooks. By default the method does nothing and the
275 previous result is passed back unchanged but any LU can define it if it
276 wants to use the local cluster hook-scripts somehow.
278 @param phase: one of L{constants.HOOKS_PHASE_POST} or
279 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
280 @param hook_results: the results of the multi-node hooks rpc call
281 @param feedback_fn: function used send feedback back to the caller
282 @param lu_result: the previous Exec result this LU had, or None
284 @return: the new Exec result, based on the previous result
288 # API must be kept, thus we ignore the unused argument and could
289 # be a function warnings
290 # pylint: disable-msg=W0613,R0201
293 def _ExpandAndLockInstance(self):
294 """Helper function to expand and lock an instance.
296 Many LUs that work on an instance take its name in self.op.instance_name
297 and need to expand it and then declare the expanded name for locking. This
298 function does it, and then updates self.op.instance_name to the expanded
299 name. It also initializes needed_locks as a dict, if this hasn't been done
303 if self.needed_locks is None:
304 self.needed_locks = {}
306 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
307 "_ExpandAndLockInstance called with instance-level locks set"
308 self.op.instance_name = _ExpandInstanceName(self.cfg,
309 self.op.instance_name)
310 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
312 def _LockInstancesNodes(self, primary_only=False):
313 """Helper function to declare instances' nodes for locking.
315 This function should be called after locking one or more instances to lock
316 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
317 with all primary or secondary nodes for instances already locked and
318 present in self.needed_locks[locking.LEVEL_INSTANCE].
320 It should be called from DeclareLocks, and for safety only works if
321 self.recalculate_locks[locking.LEVEL_NODE] is set.
323 In the future it may grow parameters to just lock some instance's nodes, or
324 to just lock primaries or secondary nodes, if needed.
326 If should be called in DeclareLocks in a way similar to::
328 if level == locking.LEVEL_NODE:
329 self._LockInstancesNodes()
331 @type primary_only: boolean
332 @param primary_only: only lock primary nodes of locked instances
335 assert locking.LEVEL_NODE in self.recalculate_locks, \
336 "_LockInstancesNodes helper function called with no nodes to recalculate"
338 # TODO: check if we're really been called with the instance locks held
340 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
341 # future we might want to have different behaviors depending on the value
342 # of self.recalculate_locks[locking.LEVEL_NODE]
344 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
345 instance = self.context.cfg.GetInstanceInfo(instance_name)
346 wanted_nodes.append(instance.primary_node)
348 wanted_nodes.extend(instance.secondary_nodes)
350 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
351 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
352 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
353 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
355 del self.recalculate_locks[locking.LEVEL_NODE]
358 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
359 """Simple LU which runs no hooks.
361 This LU is intended as a parent for other LogicalUnits which will
362 run no hooks, in order to reduce duplicate code.
368 def BuildHooksEnv(self):
369 """Empty BuildHooksEnv for NoHooksLu.
371 This just raises an error.
374 assert False, "BuildHooksEnv called for NoHooksLUs"
378 """Tasklet base class.
380 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
381 they can mix legacy code with tasklets. Locking needs to be done in the LU,
382 tasklets know nothing about locks.
384 Subclasses must follow these rules:
385 - Implement CheckPrereq
389 def __init__(self, lu):
396 def CheckPrereq(self):
397 """Check prerequisites for this tasklets.
399 This method should check whether the prerequisites for the execution of
400 this tasklet are fulfilled. It can do internode communication, but it
401 should be idempotent - no cluster or system changes are allowed.
403 The method should raise errors.OpPrereqError in case something is not
404 fulfilled. Its return value is ignored.
406 This method should also update all parameters to their canonical form if it
407 hasn't been done before.
410 raise NotImplementedError
412 def Exec(self, feedback_fn):
413 """Execute the tasklet.
415 This method should implement the actual work. It should raise
416 errors.OpExecError for failures that are somewhat dealt with in code, or
420 raise NotImplementedError
423 def _GetWantedNodes(lu, nodes):
424 """Returns list of checked and expanded node names.
426 @type lu: L{LogicalUnit}
427 @param lu: the logical unit on whose behalf we execute
429 @param nodes: list of node names or None for all nodes
431 @return: the list of nodes, sorted
432 @raise errors.ProgrammerError: if the nodes parameter is wrong type
435 if not isinstance(nodes, list):
436 raise errors.OpPrereqError("Invalid argument type 'nodes'",
440 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
441 " non-empty list of nodes whose name is to be expanded.")
443 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
444 return utils.NiceSort(wanted)
447 def _GetWantedInstances(lu, instances):
448 """Returns list of checked and expanded instance names.
450 @type lu: L{LogicalUnit}
451 @param lu: the logical unit on whose behalf we execute
452 @type instances: list
453 @param instances: list of instance names or None for all instances
455 @return: the list of instances, sorted
456 @raise errors.OpPrereqError: if the instances parameter is wrong type
457 @raise errors.OpPrereqError: if any of the passed instances is not found
460 if not isinstance(instances, list):
461 raise errors.OpPrereqError("Invalid argument type 'instances'",
465 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
467 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
471 def _CheckOutputFields(static, dynamic, selected):
472 """Checks whether all selected fields are valid.
474 @type static: L{utils.FieldSet}
475 @param static: static fields set
476 @type dynamic: L{utils.FieldSet}
477 @param dynamic: dynamic fields set
484 delta = f.NonMatching(selected)
486 raise errors.OpPrereqError("Unknown output fields selected: %s"
487 % ",".join(delta), errors.ECODE_INVAL)
490 def _CheckBooleanOpField(op, name):
491 """Validates boolean opcode parameters.
493 This will ensure that an opcode parameter is either a boolean value,
494 or None (but that it always exists).
497 val = getattr(op, name, None)
498 if not (val is None or isinstance(val, bool)):
499 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
500 (name, str(val)), errors.ECODE_INVAL)
501 setattr(op, name, val)
504 def _CheckGlobalHvParams(params):
505 """Validates that given hypervisor params are not global ones.
507 This will ensure that instances don't get customised versions of
511 used_globals = constants.HVC_GLOBALS.intersection(params)
513 msg = ("The following hypervisor parameters are global and cannot"
514 " be customized at instance level, please modify them at"
515 " cluster level: %s" % utils.CommaJoin(used_globals))
516 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
519 def _CheckNodeOnline(lu, node):
520 """Ensure that a given node is online.
522 @param lu: the LU on behalf of which we make the check
523 @param node: the node to check
524 @raise errors.OpPrereqError: if the node is offline
527 if lu.cfg.GetNodeInfo(node).offline:
528 raise errors.OpPrereqError("Can't use offline node %s" % node,
532 def _CheckNodeNotDrained(lu, node):
533 """Ensure that a given node is not drained.
535 @param lu: the LU on behalf of which we make the check
536 @param node: the node to check
537 @raise errors.OpPrereqError: if the node is drained
540 if lu.cfg.GetNodeInfo(node).drained:
541 raise errors.OpPrereqError("Can't use drained node %s" % node,
545 def _CheckDiskTemplate(template):
546 """Ensure a given disk template is valid.
549 if template not in constants.DISK_TEMPLATES:
550 msg = ("Invalid disk template name '%s', valid templates are: %s" %
551 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
552 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
555 def _ExpandItemName(fn, name, kind):
556 """Expand an item name.
558 @param fn: the function to use for expansion
559 @param name: requested item name
560 @param kind: text description ('Node' or 'Instance')
561 @return: the resolved (full) name
562 @raise errors.OpPrereqError: if the item is not found
566 if full_name is None:
567 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
572 def _ExpandNodeName(cfg, name):
573 """Wrapper over L{_ExpandItemName} for nodes."""
574 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
577 def _ExpandInstanceName(cfg, name):
578 """Wrapper over L{_ExpandItemName} for instance."""
579 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
582 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
583 memory, vcpus, nics, disk_template, disks,
584 bep, hvp, hypervisor_name):
585 """Builds instance related env variables for hooks
587 This builds the hook environment from individual variables.
590 @param name: the name of the instance
591 @type primary_node: string
592 @param primary_node: the name of the instance's primary node
593 @type secondary_nodes: list
594 @param secondary_nodes: list of secondary nodes as strings
595 @type os_type: string
596 @param os_type: the name of the instance's OS
597 @type status: boolean
598 @param status: the should_run status of the instance
600 @param memory: the memory size of the instance
602 @param vcpus: the count of VCPUs the instance has
604 @param nics: list of tuples (ip, mac, mode, link) representing
605 the NICs the instance has
606 @type disk_template: string
607 @param disk_template: the disk template of the instance
609 @param disks: the list of (size, mode) pairs
611 @param bep: the backend parameters for the instance
613 @param hvp: the hypervisor parameters for the instance
614 @type hypervisor_name: string
615 @param hypervisor_name: the hypervisor for the instance
617 @return: the hook environment for this instance
626 "INSTANCE_NAME": name,
627 "INSTANCE_PRIMARY": primary_node,
628 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
629 "INSTANCE_OS_TYPE": os_type,
630 "INSTANCE_STATUS": str_status,
631 "INSTANCE_MEMORY": memory,
632 "INSTANCE_VCPUS": vcpus,
633 "INSTANCE_DISK_TEMPLATE": disk_template,
634 "INSTANCE_HYPERVISOR": hypervisor_name,
638 nic_count = len(nics)
639 for idx, (ip, mac, mode, link) in enumerate(nics):
642 env["INSTANCE_NIC%d_IP" % idx] = ip
643 env["INSTANCE_NIC%d_MAC" % idx] = mac
644 env["INSTANCE_NIC%d_MODE" % idx] = mode
645 env["INSTANCE_NIC%d_LINK" % idx] = link
646 if mode == constants.NIC_MODE_BRIDGED:
647 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
651 env["INSTANCE_NIC_COUNT"] = nic_count
654 disk_count = len(disks)
655 for idx, (size, mode) in enumerate(disks):
656 env["INSTANCE_DISK%d_SIZE" % idx] = size
657 env["INSTANCE_DISK%d_MODE" % idx] = mode
661 env["INSTANCE_DISK_COUNT"] = disk_count
663 for source, kind in [(bep, "BE"), (hvp, "HV")]:
664 for key, value in source.items():
665 env["INSTANCE_%s_%s" % (kind, key)] = value
670 def _NICListToTuple(lu, nics):
671 """Build a list of nic information tuples.
673 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
674 value in LUQueryInstanceData.
676 @type lu: L{LogicalUnit}
677 @param lu: the logical unit on whose behalf we execute
678 @type nics: list of L{objects.NIC}
679 @param nics: list of nics to convert to hooks tuples
683 c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
687 filled_params = objects.FillDict(c_nicparams, nic.nicparams)
688 mode = filled_params[constants.NIC_MODE]
689 link = filled_params[constants.NIC_LINK]
690 hooks_nics.append((ip, mac, mode, link))
694 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
695 """Builds instance related env variables for hooks from an object.
697 @type lu: L{LogicalUnit}
698 @param lu: the logical unit on whose behalf we execute
699 @type instance: L{objects.Instance}
700 @param instance: the instance for which we should build the
703 @param override: dictionary with key/values that will override
706 @return: the hook environment dictionary
709 cluster = lu.cfg.GetClusterInfo()
710 bep = cluster.FillBE(instance)
711 hvp = cluster.FillHV(instance)
713 'name': instance.name,
714 'primary_node': instance.primary_node,
715 'secondary_nodes': instance.secondary_nodes,
716 'os_type': instance.os,
717 'status': instance.admin_up,
718 'memory': bep[constants.BE_MEMORY],
719 'vcpus': bep[constants.BE_VCPUS],
720 'nics': _NICListToTuple(lu, instance.nics),
721 'disk_template': instance.disk_template,
722 'disks': [(disk.size, disk.mode) for disk in instance.disks],
725 'hypervisor_name': instance.hypervisor,
728 args.update(override)
729 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
732 def _AdjustCandidatePool(lu, exceptions):
733 """Adjust the candidate pool after node operations.
736 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
738 lu.LogInfo("Promoted nodes to master candidate role: %s",
739 utils.CommaJoin(node.name for node in mod_list))
740 for name in mod_list:
741 lu.context.ReaddNode(name)
742 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
744 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
748 def _DecideSelfPromotion(lu, exceptions=None):
749 """Decide whether I should promote myself as a master candidate.
752 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
753 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
754 # the new node will increase mc_max with one, so:
755 mc_should = min(mc_should + 1, cp_size)
756 return mc_now < mc_should
759 def _CheckNicsBridgesExist(lu, target_nics, target_node,
760 profile=constants.PP_DEFAULT):
761 """Check that the brigdes needed by a list of nics exist.
764 c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
765 paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
766 for nic in target_nics]
767 brlist = [params[constants.NIC_LINK] for params in paramslist
768 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
770 result = lu.rpc.call_bridges_exist(target_node, brlist)
771 result.Raise("Error checking bridges on destination node '%s'" %
772 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
775 def _CheckInstanceBridgesExist(lu, instance, node=None):
776 """Check that the brigdes needed by an instance exist.
780 node = instance.primary_node
781 _CheckNicsBridgesExist(lu, instance.nics, node)
784 def _CheckOSVariant(os_obj, name):
785 """Check whether an OS name conforms to the os variants specification.
787 @type os_obj: L{objects.OS}
788 @param os_obj: OS object to check
790 @param name: OS name passed by the user, to check for validity
793 if not os_obj.supported_variants:
796 variant = name.split("+", 1)[1]
798 raise errors.OpPrereqError("OS name must include a variant",
801 if variant not in os_obj.supported_variants:
802 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
805 def _GetNodeInstancesInner(cfg, fn):
806 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
809 def _GetNodeInstances(cfg, node_name):
810 """Returns a list of all primary and secondary instances on a node.
814 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
817 def _GetNodePrimaryInstances(cfg, node_name):
818 """Returns primary instances on a node.
821 return _GetNodeInstancesInner(cfg,
822 lambda inst: node_name == inst.primary_node)
825 def _GetNodeSecondaryInstances(cfg, node_name):
826 """Returns secondary instances on a node.
829 return _GetNodeInstancesInner(cfg,
830 lambda inst: node_name in inst.secondary_nodes)
833 def _GetStorageTypeArgs(cfg, storage_type):
834 """Returns the arguments for a storage type.
837 # Special case for file storage
838 if storage_type == constants.ST_FILE:
839 # storage.FileStorage wants a list of storage directories
840 return [[cfg.GetFileStorageDir()]]
845 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
848 for dev in instance.disks:
849 cfg.SetDiskID(dev, node_name)
851 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
852 result.Raise("Failed to get disk status from node %s" % node_name,
853 prereq=prereq, ecode=errors.ECODE_ENVIRON)
855 for idx, bdev_status in enumerate(result.payload):
856 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
862 def _FormatTimestamp(secs):
863 """Formats a Unix timestamp with the local timezone.
866 return time.strftime("%F %T %Z", time.gmtime(secs))
869 class LUPostInitCluster(LogicalUnit):
870 """Logical unit for running hooks after cluster initialization.
873 HPATH = "cluster-init"
874 HTYPE = constants.HTYPE_CLUSTER
877 def BuildHooksEnv(self):
881 env = {"OP_TARGET": self.cfg.GetClusterName()}
882 mn = self.cfg.GetMasterNode()
885 def CheckPrereq(self):
886 """No prerequisites to check.
891 def Exec(self, feedback_fn):
898 class LUDestroyCluster(LogicalUnit):
899 """Logical unit for destroying the cluster.
902 HPATH = "cluster-destroy"
903 HTYPE = constants.HTYPE_CLUSTER
906 def BuildHooksEnv(self):
910 env = {"OP_TARGET": self.cfg.GetClusterName()}
913 def CheckPrereq(self):
914 """Check prerequisites.
916 This checks whether the cluster is empty.
918 Any errors are signaled by raising errors.OpPrereqError.
921 master = self.cfg.GetMasterNode()
923 nodelist = self.cfg.GetNodeList()
924 if len(nodelist) != 1 or nodelist[0] != master:
925 raise errors.OpPrereqError("There are still %d node(s) in"
926 " this cluster." % (len(nodelist) - 1),
928 instancelist = self.cfg.GetInstanceList()
930 raise errors.OpPrereqError("There are still %d instance(s) in"
931 " this cluster." % len(instancelist),
934 def Exec(self, feedback_fn):
935 """Destroys the cluster.
938 master = self.cfg.GetMasterNode()
939 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
941 # Run post hooks on master node before it's removed
942 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
944 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
946 # pylint: disable-msg=W0702
947 self.LogWarning("Errors occurred running hooks on %s" % master)
949 result = self.rpc.call_node_stop_master(master, False)
950 result.Raise("Could not disable the master role")
953 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
954 utils.CreateBackup(priv_key)
955 utils.CreateBackup(pub_key)
960 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
961 warn_days=constants.SSL_CERT_EXPIRATION_WARN,
962 error_days=constants.SSL_CERT_EXPIRATION_ERROR):
963 """Verifies certificate details for LUVerifyCluster.
967 msg = "Certificate %s is expired" % filename
969 if not_before is not None and not_after is not None:
970 msg += (" (valid from %s to %s)" %
971 (_FormatTimestamp(not_before),
972 _FormatTimestamp(not_after)))
973 elif not_before is not None:
974 msg += " (valid from %s)" % _FormatTimestamp(not_before)
975 elif not_after is not None:
976 msg += " (valid until %s)" % _FormatTimestamp(not_after)
978 return (LUVerifyCluster.ETYPE_ERROR, msg)
980 elif not_before is not None and not_before > now:
981 return (LUVerifyCluster.ETYPE_WARNING,
982 "Certificate %s not yet valid (valid from %s)" %
983 (filename, _FormatTimestamp(not_before)))
985 elif not_after is not None:
986 remaining_days = int((not_after - now) / (24 * 3600))
988 msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
990 if remaining_days <= error_days:
991 return (LUVerifyCluster.ETYPE_ERROR, msg)
993 if remaining_days <= warn_days:
994 return (LUVerifyCluster.ETYPE_WARNING, msg)
999 def _VerifyCertificate(filename):
1000 """Verifies a certificate for LUVerifyCluster.
1002 @type filename: string
1003 @param filename: Path to PEM file
1007 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1008 utils.ReadFile(filename))
1009 except Exception, err: # pylint: disable-msg=W0703
1010 return (LUVerifyCluster.ETYPE_ERROR,
1011 "Failed to load X509 certificate %s: %s" % (filename, err))
1013 # Depending on the pyOpenSSL version, this can just return (None, None)
1014 (not_before, not_after) = utils.GetX509CertValidity(cert)
1016 return _VerifyCertificateInner(filename, cert.has_expired(),
1017 not_before, not_after, time.time())
1020 class LUVerifyCluster(LogicalUnit):
1021 """Verifies the cluster status.
1024 HPATH = "cluster-verify"
1025 HTYPE = constants.HTYPE_CLUSTER
1026 _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1029 TCLUSTER = "cluster"
1031 TINSTANCE = "instance"
1033 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1034 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1035 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1036 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1037 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1038 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1039 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1040 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1041 ENODEDRBD = (TNODE, "ENODEDRBD")
1042 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1043 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1044 ENODEHV = (TNODE, "ENODEHV")
1045 ENODELVM = (TNODE, "ENODELVM")
1046 ENODEN1 = (TNODE, "ENODEN1")
1047 ENODENET = (TNODE, "ENODENET")
1048 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1049 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1050 ENODERPC = (TNODE, "ENODERPC")
1051 ENODESSH = (TNODE, "ENODESSH")
1052 ENODEVERSION = (TNODE, "ENODEVERSION")
1053 ENODESETUP = (TNODE, "ENODESETUP")
1054 ENODETIME = (TNODE, "ENODETIME")
1056 ETYPE_FIELD = "code"
1057 ETYPE_ERROR = "ERROR"
1058 ETYPE_WARNING = "WARNING"
1060 def ExpandNames(self):
1061 self.needed_locks = {
1062 locking.LEVEL_NODE: locking.ALL_SET,
1063 locking.LEVEL_INSTANCE: locking.ALL_SET,
1065 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1067 def _Error(self, ecode, item, msg, *args, **kwargs):
1068 """Format an error message.
1070 Based on the opcode's error_codes parameter, either format a
1071 parseable error code, or a simpler error string.
1073 This must be called only from Exec and functions called from Exec.
1076 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1078 # first complete the msg
1081 # then format the whole message
1082 if self.op.error_codes:
1083 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1089 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1090 # and finally report it via the feedback_fn
1091 self._feedback_fn(" - %s" % msg)
1093 def _ErrorIf(self, cond, *args, **kwargs):
1094 """Log an error message if the passed condition is True.
1097 cond = bool(cond) or self.op.debug_simulate_errors
1099 self._Error(*args, **kwargs)
1100 # do not mark the operation as failed for WARN cases only
1101 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1102 self.bad = self.bad or cond
1104 def _VerifyNode(self, nodeinfo, file_list, local_cksum,
1105 node_result, master_files, drbd_map, vg_name):
1106 """Run multiple tests against a node.
1110 - compares ganeti version
1111 - checks vg existence and size > 20G
1112 - checks config file checksum
1113 - checks ssh to other nodes
1115 @type nodeinfo: L{objects.Node}
1116 @param nodeinfo: the node to check
1117 @param file_list: required list of files
1118 @param local_cksum: dictionary of local files and their checksums
1119 @param node_result: the results from the node
1120 @param master_files: list of files that only masters should have
1121 @param drbd_map: the useddrbd minors for this node, in
1122 form of minor: (instance, must_exist) which correspond to instances
1123 and their running status
1124 @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
1127 node = nodeinfo.name
1128 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1130 # main result, node_result should be a non-empty dict
1131 test = not node_result or not isinstance(node_result, dict)
1132 _ErrorIf(test, self.ENODERPC, node,
1133 "unable to verify node: no data returned")
1137 # compares ganeti version
1138 local_version = constants.PROTOCOL_VERSION
1139 remote_version = node_result.get('version', None)
1140 test = not (remote_version and
1141 isinstance(remote_version, (list, tuple)) and
1142 len(remote_version) == 2)
1143 _ErrorIf(test, self.ENODERPC, node,
1144 "connection to node returned invalid data")
1148 test = local_version != remote_version[0]
1149 _ErrorIf(test, self.ENODEVERSION, node,
1150 "incompatible protocol versions: master %s,"
1151 " node %s", local_version, remote_version[0])
1155 # node seems compatible, we can actually try to look into its results
1157 # full package version
1158 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1159 self.ENODEVERSION, node,
1160 "software version mismatch: master %s, node %s",
1161 constants.RELEASE_VERSION, remote_version[1],
1162 code=self.ETYPE_WARNING)
1164 # checks vg existence and size > 20G
1165 if vg_name is not None:
1166 vglist = node_result.get(constants.NV_VGLIST, None)
1168 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1170 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1171 constants.MIN_VG_SIZE)
1172 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1174 # checks config file checksum
1176 remote_cksum = node_result.get(constants.NV_FILELIST, None)
1177 test = not isinstance(remote_cksum, dict)
1178 _ErrorIf(test, self.ENODEFILECHECK, node,
1179 "node hasn't returned file checksum data")
1181 for file_name in file_list:
1182 node_is_mc = nodeinfo.master_candidate
1183 must_have = (file_name not in master_files) or node_is_mc
1185 test1 = file_name not in remote_cksum
1187 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1189 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1190 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1191 "file '%s' missing", file_name)
1192 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1193 "file '%s' has wrong checksum", file_name)
1194 # not candidate and this is not a must-have file
1195 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1196 "file '%s' should not exist on non master"
1197 " candidates (and the file is outdated)", file_name)
1198 # all good, except non-master/non-must have combination
1199 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1200 "file '%s' should not exist"
1201 " on non master candidates", file_name)
1205 test = constants.NV_NODELIST not in node_result
1206 _ErrorIf(test, self.ENODESSH, node,
1207 "node hasn't returned node ssh connectivity data")
1209 if node_result[constants.NV_NODELIST]:
1210 for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1211 _ErrorIf(True, self.ENODESSH, node,
1212 "ssh communication with node '%s': %s", a_node, a_msg)
1214 test = constants.NV_NODENETTEST not in node_result
1215 _ErrorIf(test, self.ENODENET, node,
1216 "node hasn't returned node tcp connectivity data")
1218 if node_result[constants.NV_NODENETTEST]:
1219 nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1221 _ErrorIf(True, self.ENODENET, node,
1222 "tcp communication with node '%s': %s",
1223 anode, node_result[constants.NV_NODENETTEST][anode])
1225 hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1226 if isinstance(hyp_result, dict):
1227 for hv_name, hv_result in hyp_result.iteritems():
1228 test = hv_result is not None
1229 _ErrorIf(test, self.ENODEHV, node,
1230 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1232 # check used drbd list
1233 if vg_name is not None:
1234 used_minors = node_result.get(constants.NV_DRBDLIST, [])
1235 test = not isinstance(used_minors, (tuple, list))
1236 _ErrorIf(test, self.ENODEDRBD, node,
1237 "cannot parse drbd status file: %s", str(used_minors))
1239 for minor, (iname, must_exist) in drbd_map.items():
1240 test = minor not in used_minors and must_exist
1241 _ErrorIf(test, self.ENODEDRBD, node,
1242 "drbd minor %d of instance %s is not active",
1244 for minor in used_minors:
1245 test = minor not in drbd_map
1246 _ErrorIf(test, self.ENODEDRBD, node,
1247 "unallocated drbd minor %d is in use", minor)
1248 test = node_result.get(constants.NV_NODESETUP,
1249 ["Missing NODESETUP results"])
1250 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1254 if vg_name is not None:
1255 pvlist = node_result.get(constants.NV_PVLIST, None)
1256 test = pvlist is None
1257 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1259 # check that ':' is not present in PV names, since it's a
1260 # special character for lvcreate (denotes the range of PEs to
1262 for _, pvname, owner_vg in pvlist:
1263 test = ":" in pvname
1264 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1265 " '%s' of VG '%s'", pvname, owner_vg)
1267 def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1268 node_instance, n_offline):
1269 """Verify an instance.
1271 This function checks to see if the required block devices are
1272 available on the instance's node.
1275 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1276 node_current = instanceconfig.primary_node
1278 node_vol_should = {}
1279 instanceconfig.MapLVsByNode(node_vol_should)
1281 for node in node_vol_should:
1282 if node in n_offline:
1283 # ignore missing volumes on offline nodes
1285 for volume in node_vol_should[node]:
1286 test = node not in node_vol_is or volume not in node_vol_is[node]
1287 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1288 "volume %s missing on node %s", volume, node)
1290 if instanceconfig.admin_up:
1291 test = ((node_current not in node_instance or
1292 not instance in node_instance[node_current]) and
1293 node_current not in n_offline)
1294 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1295 "instance not running on its primary node %s",
1298 for node in node_instance:
1299 if (not node == node_current):
1300 test = instance in node_instance[node]
1301 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1302 "instance should not run on node %s", node)
1304 def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1305 """Verify if there are any unknown volumes in the cluster.
1307 The .os, .swap and backup volumes are ignored. All other volumes are
1308 reported as unknown.
1311 for node in node_vol_is:
1312 for volume in node_vol_is[node]:
1313 test = (node not in node_vol_should or
1314 volume not in node_vol_should[node])
1315 self._ErrorIf(test, self.ENODEORPHANLV, node,
1316 "volume %s is unknown", volume)
1318 def _VerifyOrphanInstances(self, instancelist, node_instance):
1319 """Verify the list of running instances.
1321 This checks what instances are running but unknown to the cluster.
1324 for node in node_instance:
1325 for o_inst in node_instance[node]:
1326 test = o_inst not in instancelist
1327 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1328 "instance %s on node %s should not exist", o_inst, node)
1330 def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1331 """Verify N+1 Memory Resilience.
1333 Check that if one single node dies we can still start all the instances it
1337 for node, nodeinfo in node_info.iteritems():
1338 # This code checks that every node which is now listed as secondary has
1339 # enough memory to host all instances it is supposed to should a single
1340 # other node in the cluster fail.
1341 # FIXME: not ready for failover to an arbitrary node
1342 # FIXME: does not support file-backed instances
1343 # WARNING: we currently take into account down instances as well as up
1344 # ones, considering that even if they're down someone might want to start
1345 # them even in the event of a node failure.
1346 for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1348 for instance in instances:
1349 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1350 if bep[constants.BE_AUTO_BALANCE]:
1351 needed_mem += bep[constants.BE_MEMORY]
1352 test = nodeinfo['mfree'] < needed_mem
1353 self._ErrorIf(test, self.ENODEN1, node,
1354 "not enough memory on to accommodate"
1355 " failovers should peer node %s fail", prinode)
1357 def CheckPrereq(self):
1358 """Check prerequisites.
1360 Transform the list of checks we're going to skip into a set and check that
1361 all its members are valid.
1364 self.skip_set = frozenset(self.op.skip_checks)
1365 if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1366 raise errors.OpPrereqError("Invalid checks to be skipped specified",
1369 def BuildHooksEnv(self):
1372 Cluster-Verify hooks just ran in the post phase and their failure makes
1373 the output be logged in the verify output and the verification to fail.
1376 all_nodes = self.cfg.GetNodeList()
1378 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1380 for node in self.cfg.GetAllNodesInfo().values():
1381 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1383 return env, [], all_nodes
1385 def Exec(self, feedback_fn):
1386 """Verify integrity of cluster, performing various test on nodes.
1390 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1391 verbose = self.op.verbose
1392 self._feedback_fn = feedback_fn
1393 feedback_fn("* Verifying global settings")
1394 for msg in self.cfg.VerifyConfig():
1395 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1397 # Check the cluster certificates
1398 for cert_filename in constants.ALL_CERT_FILES:
1399 (errcode, msg) = _VerifyCertificate(cert_filename)
1400 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1402 vg_name = self.cfg.GetVGName()
1403 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1404 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1405 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1406 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1407 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1408 for iname in instancelist)
1409 i_non_redundant = [] # Non redundant instances
1410 i_non_a_balanced = [] # Non auto-balanced instances
1411 n_offline = [] # List of offline nodes
1412 n_drained = [] # List of nodes being drained
1418 # FIXME: verify OS list
1419 # do local checksums
1420 master_files = [constants.CLUSTER_CONF_FILE]
1422 file_names = ssconf.SimpleStore().GetFileList()
1423 file_names.extend(constants.ALL_CERT_FILES)
1424 file_names.extend(master_files)
1426 local_checksums = utils.FingerprintFiles(file_names)
1428 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1429 node_verify_param = {
1430 constants.NV_FILELIST: file_names,
1431 constants.NV_NODELIST: [node.name for node in nodeinfo
1432 if not node.offline],
1433 constants.NV_HYPERVISOR: hypervisors,
1434 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1435 node.secondary_ip) for node in nodeinfo
1436 if not node.offline],
1437 constants.NV_INSTANCELIST: hypervisors,
1438 constants.NV_VERSION: None,
1439 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1440 constants.NV_NODESETUP: None,
1441 constants.NV_TIME: None,
1444 if vg_name is not None:
1445 node_verify_param[constants.NV_VGLIST] = None
1446 node_verify_param[constants.NV_LVLIST] = vg_name
1447 node_verify_param[constants.NV_PVLIST] = [vg_name]
1448 node_verify_param[constants.NV_DRBDLIST] = None
1450 # Due to the way our RPC system works, exact response times cannot be
1451 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1452 # time before and after executing the request, we can at least have a time
1454 nvinfo_starttime = time.time()
1455 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1456 self.cfg.GetClusterName())
1457 nvinfo_endtime = time.time()
1459 cluster = self.cfg.GetClusterInfo()
1460 master_node = self.cfg.GetMasterNode()
1461 all_drbd_map = self.cfg.ComputeDRBDMap()
1463 feedback_fn("* Verifying node status")
1464 for node_i in nodeinfo:
1469 feedback_fn("* Skipping offline node %s" % (node,))
1470 n_offline.append(node)
1473 if node == master_node:
1475 elif node_i.master_candidate:
1476 ntype = "master candidate"
1477 elif node_i.drained:
1479 n_drained.append(node)
1483 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1485 msg = all_nvinfo[node].fail_msg
1486 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1490 nresult = all_nvinfo[node].payload
1492 for minor, instance in all_drbd_map[node].items():
1493 test = instance not in instanceinfo
1494 _ErrorIf(test, self.ECLUSTERCFG, None,
1495 "ghost instance '%s' in temporary DRBD map", instance)
1496 # ghost instance should not be running, but otherwise we
1497 # don't give double warnings (both ghost instance and
1498 # unallocated minor in use)
1500 node_drbd[minor] = (instance, False)
1502 instance = instanceinfo[instance]
1503 node_drbd[minor] = (instance.name, instance.admin_up)
1505 self._VerifyNode(node_i, file_names, local_checksums,
1506 nresult, master_files, node_drbd, vg_name)
1508 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1510 node_volume[node] = {}
1511 elif isinstance(lvdata, basestring):
1512 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1513 utils.SafeEncode(lvdata))
1514 node_volume[node] = {}
1515 elif not isinstance(lvdata, dict):
1516 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1519 node_volume[node] = lvdata
1522 idata = nresult.get(constants.NV_INSTANCELIST, None)
1523 test = not isinstance(idata, list)
1524 _ErrorIf(test, self.ENODEHV, node,
1525 "rpc call to node failed (instancelist): %s",
1526 utils.SafeEncode(str(idata)))
1530 node_instance[node] = idata
1533 nodeinfo = nresult.get(constants.NV_HVINFO, None)
1534 test = not isinstance(nodeinfo, dict)
1535 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1540 ntime = nresult.get(constants.NV_TIME, None)
1542 ntime_merged = utils.MergeTime(ntime)
1543 except (ValueError, TypeError):
1544 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1546 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1547 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1548 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1549 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1553 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1554 "Node time diverges by at least %s from master node time",
1557 if ntime_diff is not None:
1562 "mfree": int(nodeinfo['memory_free']),
1565 # dictionary holding all instances this node is secondary for,
1566 # grouped by their primary node. Each key is a cluster node, and each
1567 # value is a list of instances which have the key as primary and the
1568 # current node as secondary. this is handy to calculate N+1 memory
1569 # availability if you can only failover from a primary to its
1571 "sinst-by-pnode": {},
1573 # FIXME: devise a free space model for file based instances as well
1574 if vg_name is not None:
1575 test = (constants.NV_VGLIST not in nresult or
1576 vg_name not in nresult[constants.NV_VGLIST])
1577 _ErrorIf(test, self.ENODELVM, node,
1578 "node didn't return data for the volume group '%s'"
1579 " - it is either missing or broken", vg_name)
1582 node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1583 except (ValueError, KeyError):
1584 _ErrorIf(True, self.ENODERPC, node,
1585 "node returned invalid nodeinfo, check lvm/hypervisor")
1588 node_vol_should = {}
1590 feedback_fn("* Verifying instance status")
1591 for instance in instancelist:
1593 feedback_fn("* Verifying instance %s" % instance)
1594 inst_config = instanceinfo[instance]
1595 self._VerifyInstance(instance, inst_config, node_volume,
1596 node_instance, n_offline)
1597 inst_nodes_offline = []
1599 inst_config.MapLVsByNode(node_vol_should)
1601 instance_cfg[instance] = inst_config
1603 pnode = inst_config.primary_node
1604 _ErrorIf(pnode not in node_info and pnode not in n_offline,
1605 self.ENODERPC, pnode, "instance %s, connection to"
1606 " primary node failed", instance)
1607 if pnode in node_info:
1608 node_info[pnode]['pinst'].append(instance)
1610 if pnode in n_offline:
1611 inst_nodes_offline.append(pnode)
1613 # If the instance is non-redundant we cannot survive losing its primary
1614 # node, so we are not N+1 compliant. On the other hand we have no disk
1615 # templates with more than one secondary so that situation is not well
1617 # FIXME: does not support file-backed instances
1618 if len(inst_config.secondary_nodes) == 0:
1619 i_non_redundant.append(instance)
1620 _ErrorIf(len(inst_config.secondary_nodes) > 1,
1621 self.EINSTANCELAYOUT, instance,
1622 "instance has multiple secondary nodes", code="WARNING")
1624 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1625 i_non_a_balanced.append(instance)
1627 for snode in inst_config.secondary_nodes:
1628 _ErrorIf(snode not in node_info and snode not in n_offline,
1629 self.ENODERPC, snode,
1630 "instance %s, connection to secondary node"
1631 " failed", instance)
1633 if snode in node_info:
1634 node_info[snode]['sinst'].append(instance)
1635 if pnode not in node_info[snode]['sinst-by-pnode']:
1636 node_info[snode]['sinst-by-pnode'][pnode] = []
1637 node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1639 if snode in n_offline:
1640 inst_nodes_offline.append(snode)
1642 # warn that the instance lives on offline nodes
1643 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1644 "instance lives on offline node(s) %s",
1645 utils.CommaJoin(inst_nodes_offline))
1647 feedback_fn("* Verifying orphan volumes")
1648 self._VerifyOrphanVolumes(node_vol_should, node_volume)
1650 feedback_fn("* Verifying remaining instances")
1651 self._VerifyOrphanInstances(instancelist, node_instance)
1653 if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1654 feedback_fn("* Verifying N+1 Memory redundancy")
1655 self._VerifyNPlusOneMemory(node_info, instance_cfg)
1657 feedback_fn("* Other Notes")
1659 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
1660 % len(i_non_redundant))
1662 if i_non_a_balanced:
1663 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
1664 % len(i_non_a_balanced))
1667 feedback_fn(" - NOTICE: %d offline node(s) found." % len(n_offline))
1670 feedback_fn(" - NOTICE: %d drained node(s) found." % len(n_drained))
1674 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1675 """Analyze the post-hooks' result
1677 This method analyses the hook result, handles it, and sends some
1678 nicely-formatted feedback back to the user.
1680 @param phase: one of L{constants.HOOKS_PHASE_POST} or
1681 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1682 @param hooks_results: the results of the multi-node hooks rpc call
1683 @param feedback_fn: function used send feedback back to the caller
1684 @param lu_result: previous Exec result
1685 @return: the new Exec result, based on the previous result
1689 # We only really run POST phase hooks, and are only interested in
1691 if phase == constants.HOOKS_PHASE_POST:
1692 # Used to change hooks' output to proper indentation
1693 indent_re = re.compile('^', re.M)
1694 feedback_fn("* Hooks Results")
1695 assert hooks_results, "invalid result from hooks"
1697 for node_name in hooks_results:
1698 res = hooks_results[node_name]
1700 test = msg and not res.offline
1701 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1702 "Communication failure in hooks execution: %s", msg)
1703 if res.offline or msg:
1704 # No need to investigate payload if node is offline or gave an error.
1705 # override manually lu_result here as _ErrorIf only
1706 # overrides self.bad
1709 for script, hkr, output in res.payload:
1710 test = hkr == constants.HKR_FAIL
1711 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1712 "Script %s failed, output:", script)
1714 output = indent_re.sub(' ', output)
1715 feedback_fn("%s" % output)
1721 class LUVerifyDisks(NoHooksLU):
1722 """Verifies the cluster disks status.
1728 def ExpandNames(self):
1729 self.needed_locks = {
1730 locking.LEVEL_NODE: locking.ALL_SET,
1731 locking.LEVEL_INSTANCE: locking.ALL_SET,
1733 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1735 def CheckPrereq(self):
1736 """Check prerequisites.
1738 This has no prerequisites.
1743 def Exec(self, feedback_fn):
1744 """Verify integrity of cluster disks.
1746 @rtype: tuple of three items
1747 @return: a tuple of (dict of node-to-node_error, list of instances
1748 which need activate-disks, dict of instance: (node, volume) for
1752 result = res_nodes, res_instances, res_missing = {}, [], {}
1754 vg_name = self.cfg.GetVGName()
1755 nodes = utils.NiceSort(self.cfg.GetNodeList())
1756 instances = [self.cfg.GetInstanceInfo(name)
1757 for name in self.cfg.GetInstanceList()]
1760 for inst in instances:
1762 if (not inst.admin_up or
1763 inst.disk_template not in constants.DTS_NET_MIRROR):
1765 inst.MapLVsByNode(inst_lvs)
1766 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1767 for node, vol_list in inst_lvs.iteritems():
1768 for vol in vol_list:
1769 nv_dict[(node, vol)] = inst
1774 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1778 node_res = node_lvs[node]
1779 if node_res.offline:
1781 msg = node_res.fail_msg
1783 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1784 res_nodes[node] = msg
1787 lvs = node_res.payload
1788 for lv_name, (_, _, lv_online) in lvs.items():
1789 inst = nv_dict.pop((node, lv_name), None)
1790 if (not lv_online and inst is not None
1791 and inst.name not in res_instances):
1792 res_instances.append(inst.name)
1794 # any leftover items in nv_dict are missing LVs, let's arrange the
1796 for key, inst in nv_dict.iteritems():
1797 if inst.name not in res_missing:
1798 res_missing[inst.name] = []
1799 res_missing[inst.name].append(key)
1804 class LURepairDiskSizes(NoHooksLU):
1805 """Verifies the cluster disks sizes.
1808 _OP_REQP = ["instances"]
1811 def ExpandNames(self):
1812 if not isinstance(self.op.instances, list):
1813 raise errors.OpPrereqError("Invalid argument type 'instances'",
1816 if self.op.instances:
1817 self.wanted_names = []
1818 for name in self.op.instances:
1819 full_name = _ExpandInstanceName(self.cfg, name)
1820 self.wanted_names.append(full_name)
1821 self.needed_locks = {
1822 locking.LEVEL_NODE: [],
1823 locking.LEVEL_INSTANCE: self.wanted_names,
1825 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1827 self.wanted_names = None
1828 self.needed_locks = {
1829 locking.LEVEL_NODE: locking.ALL_SET,
1830 locking.LEVEL_INSTANCE: locking.ALL_SET,
1832 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1834 def DeclareLocks(self, level):
1835 if level == locking.LEVEL_NODE and self.wanted_names is not None:
1836 self._LockInstancesNodes(primary_only=True)
1838 def CheckPrereq(self):
1839 """Check prerequisites.
1841 This only checks the optional instance list against the existing names.
1844 if self.wanted_names is None:
1845 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1847 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1848 in self.wanted_names]
1850 def _EnsureChildSizes(self, disk):
1851 """Ensure children of the disk have the needed disk size.
1853 This is valid mainly for DRBD8 and fixes an issue where the
1854 children have smaller disk size.
1856 @param disk: an L{ganeti.objects.Disk} object
1859 if disk.dev_type == constants.LD_DRBD8:
1860 assert disk.children, "Empty children for DRBD8?"
1861 fchild = disk.children[0]
1862 mismatch = fchild.size < disk.size
1864 self.LogInfo("Child disk has size %d, parent %d, fixing",
1865 fchild.size, disk.size)
1866 fchild.size = disk.size
1868 # and we recurse on this child only, not on the metadev
1869 return self._EnsureChildSizes(fchild) or mismatch
1873 def Exec(self, feedback_fn):
1874 """Verify the size of cluster disks.
1877 # TODO: check child disks too
1878 # TODO: check differences in size between primary/secondary nodes
1880 for instance in self.wanted_instances:
1881 pnode = instance.primary_node
1882 if pnode not in per_node_disks:
1883 per_node_disks[pnode] = []
1884 for idx, disk in enumerate(instance.disks):
1885 per_node_disks[pnode].append((instance, idx, disk))
1888 for node, dskl in per_node_disks.items():
1889 newl = [v[2].Copy() for v in dskl]
1891 self.cfg.SetDiskID(dsk, node)
1892 result = self.rpc.call_blockdev_getsizes(node, newl)
1894 self.LogWarning("Failure in blockdev_getsizes call to node"
1895 " %s, ignoring", node)
1897 if len(result.data) != len(dskl):
1898 self.LogWarning("Invalid result from node %s, ignoring node results",
1901 for ((instance, idx, disk), size) in zip(dskl, result.data):
1903 self.LogWarning("Disk %d of instance %s did not return size"
1904 " information, ignoring", idx, instance.name)
1906 if not isinstance(size, (int, long)):
1907 self.LogWarning("Disk %d of instance %s did not return valid"
1908 " size information, ignoring", idx, instance.name)
1911 if size != disk.size:
1912 self.LogInfo("Disk %d of instance %s has mismatched size,"
1913 " correcting: recorded %d, actual %d", idx,
1914 instance.name, disk.size, size)
1916 self.cfg.Update(instance, feedback_fn)
1917 changed.append((instance.name, idx, size))
1918 if self._EnsureChildSizes(disk):
1919 self.cfg.Update(instance, feedback_fn)
1920 changed.append((instance.name, idx, disk.size))
1924 class LURenameCluster(LogicalUnit):
1925 """Rename the cluster.
1928 HPATH = "cluster-rename"
1929 HTYPE = constants.HTYPE_CLUSTER
1932 def BuildHooksEnv(self):
1937 "OP_TARGET": self.cfg.GetClusterName(),
1938 "NEW_NAME": self.op.name,
1940 mn = self.cfg.GetMasterNode()
1941 all_nodes = self.cfg.GetNodeList()
1942 return env, [mn], all_nodes
1944 def CheckPrereq(self):
1945 """Verify that the passed name is a valid one.
1948 hostname = utils.GetHostInfo(self.op.name)
1950 new_name = hostname.name
1951 self.ip = new_ip = hostname.ip
1952 old_name = self.cfg.GetClusterName()
1953 old_ip = self.cfg.GetMasterIP()
1954 if new_name == old_name and new_ip == old_ip:
1955 raise errors.OpPrereqError("Neither the name nor the IP address of the"
1956 " cluster has changed",
1958 if new_ip != old_ip:
1959 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1960 raise errors.OpPrereqError("The given cluster IP address (%s) is"
1961 " reachable on the network. Aborting." %
1962 new_ip, errors.ECODE_NOTUNIQUE)
1964 self.op.name = new_name
1966 def Exec(self, feedback_fn):
1967 """Rename the cluster.
1970 clustername = self.op.name
1973 # shutdown the master IP
1974 master = self.cfg.GetMasterNode()
1975 result = self.rpc.call_node_stop_master(master, False)
1976 result.Raise("Could not disable the master role")
1979 cluster = self.cfg.GetClusterInfo()
1980 cluster.cluster_name = clustername
1981 cluster.master_ip = ip
1982 self.cfg.Update(cluster, feedback_fn)
1984 # update the known hosts file
1985 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1986 node_list = self.cfg.GetNodeList()
1988 node_list.remove(master)
1991 result = self.rpc.call_upload_file(node_list,
1992 constants.SSH_KNOWN_HOSTS_FILE)
1993 for to_node, to_result in result.iteritems():
1994 msg = to_result.fail_msg
1996 msg = ("Copy of file %s to node %s failed: %s" %
1997 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1998 self.proc.LogWarning(msg)
2001 result = self.rpc.call_node_start_master(master, False, False)
2002 msg = result.fail_msg
2004 self.LogWarning("Could not re-enable the master role on"
2005 " the master, please restart manually: %s", msg)
2008 def _RecursiveCheckIfLVMBased(disk):
2009 """Check if the given disk or its children are lvm-based.
2011 @type disk: L{objects.Disk}
2012 @param disk: the disk to check
2014 @return: boolean indicating whether a LD_LV dev_type was found or not
2018 for chdisk in disk.children:
2019 if _RecursiveCheckIfLVMBased(chdisk):
2021 return disk.dev_type == constants.LD_LV
2024 class LUSetClusterParams(LogicalUnit):
2025 """Change the parameters of the cluster.
2028 HPATH = "cluster-modify"
2029 HTYPE = constants.HTYPE_CLUSTER
2033 def CheckArguments(self):
2037 if not hasattr(self.op, "candidate_pool_size"):
2038 self.op.candidate_pool_size = None
2039 if self.op.candidate_pool_size is not None:
2041 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2042 except (ValueError, TypeError), err:
2043 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2044 str(err), errors.ECODE_INVAL)
2045 if self.op.candidate_pool_size < 1:
2046 raise errors.OpPrereqError("At least one master candidate needed",
2049 def ExpandNames(self):
2050 # FIXME: in the future maybe other cluster params won't require checking on
2051 # all nodes to be modified.
2052 self.needed_locks = {
2053 locking.LEVEL_NODE: locking.ALL_SET,
2055 self.share_locks[locking.LEVEL_NODE] = 1
2057 def BuildHooksEnv(self):
2062 "OP_TARGET": self.cfg.GetClusterName(),
2063 "NEW_VG_NAME": self.op.vg_name,
2065 mn = self.cfg.GetMasterNode()
2066 return env, [mn], [mn]
2068 def CheckPrereq(self):
2069 """Check prerequisites.
2071 This checks whether the given params don't conflict and
2072 if the given volume group is valid.
2075 if self.op.vg_name is not None and not self.op.vg_name:
2076 instances = self.cfg.GetAllInstancesInfo().values()
2077 for inst in instances:
2078 for disk in inst.disks:
2079 if _RecursiveCheckIfLVMBased(disk):
2080 raise errors.OpPrereqError("Cannot disable lvm storage while"
2081 " lvm-based instances exist",
2084 node_list = self.acquired_locks[locking.LEVEL_NODE]
2086 # if vg_name not None, checks given volume group on all nodes
2088 vglist = self.rpc.call_vg_list(node_list)
2089 for node in node_list:
2090 msg = vglist[node].fail_msg
2092 # ignoring down node
2093 self.LogWarning("Error while gathering data on node %s"
2094 " (ignoring node): %s", node, msg)
2096 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2098 constants.MIN_VG_SIZE)
2100 raise errors.OpPrereqError("Error on node '%s': %s" %
2101 (node, vgstatus), errors.ECODE_ENVIRON)
2103 self.cluster = cluster = self.cfg.GetClusterInfo()
2104 # validate params changes
2105 if self.op.beparams:
2106 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2107 self.new_beparams = objects.FillDict(
2108 cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2110 if self.op.nicparams:
2111 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2112 self.new_nicparams = objects.FillDict(
2113 cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2114 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2117 # check all instances for consistency
2118 for instance in self.cfg.GetAllInstancesInfo().values():
2119 for nic_idx, nic in enumerate(instance.nics):
2120 params_copy = copy.deepcopy(nic.nicparams)
2121 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2123 # check parameter syntax
2125 objects.NIC.CheckParameterSyntax(params_filled)
2126 except errors.ConfigurationError, err:
2127 nic_errors.append("Instance %s, nic/%d: %s" %
2128 (instance.name, nic_idx, err))
2130 # if we're moving instances to routed, check that they have an ip
2131 target_mode = params_filled[constants.NIC_MODE]
2132 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2133 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2134 (instance.name, nic_idx))
2136 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2137 "\n".join(nic_errors))
2139 # hypervisor list/parameters
2140 self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2141 if self.op.hvparams:
2142 if not isinstance(self.op.hvparams, dict):
2143 raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2145 for hv_name, hv_dict in self.op.hvparams.items():
2146 if hv_name not in self.new_hvparams:
2147 self.new_hvparams[hv_name] = hv_dict
2149 self.new_hvparams[hv_name].update(hv_dict)
2151 # os hypervisor parameters
2152 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2154 if not isinstance(self.op.os_hvp, dict):
2155 raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2157 for os_name, hvs in self.op.os_hvp.items():
2158 if not isinstance(hvs, dict):
2159 raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2160 " input"), errors.ECODE_INVAL)
2161 if os_name not in self.new_os_hvp:
2162 self.new_os_hvp[os_name] = hvs
2164 for hv_name, hv_dict in hvs.items():
2165 if hv_name not in self.new_os_hvp[os_name]:
2166 self.new_os_hvp[os_name][hv_name] = hv_dict
2168 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2170 if self.op.enabled_hypervisors is not None:
2171 self.hv_list = self.op.enabled_hypervisors
2172 if not self.hv_list:
2173 raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2174 " least one member",
2176 invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2178 raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2180 utils.CommaJoin(invalid_hvs),
2183 self.hv_list = cluster.enabled_hypervisors
2185 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2186 # either the enabled list has changed, or the parameters have, validate
2187 for hv_name, hv_params in self.new_hvparams.items():
2188 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2189 (self.op.enabled_hypervisors and
2190 hv_name in self.op.enabled_hypervisors)):
2191 # either this is a new hypervisor, or its parameters have changed
2192 hv_class = hypervisor.GetHypervisor(hv_name)
2193 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2194 hv_class.CheckParameterSyntax(hv_params)
2195 _CheckHVParams(self, node_list, hv_name, hv_params)
2198 # no need to check any newly-enabled hypervisors, since the
2199 # defaults have already been checked in the above code-block
2200 for os_name, os_hvp in self.new_os_hvp.items():
2201 for hv_name, hv_params in os_hvp.items():
2202 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2203 # we need to fill in the new os_hvp on top of the actual hv_p
2204 cluster_defaults = self.new_hvparams.get(hv_name, {})
2205 new_osp = objects.FillDict(cluster_defaults, hv_params)
2206 hv_class = hypervisor.GetHypervisor(hv_name)
2207 hv_class.CheckParameterSyntax(new_osp)
2208 _CheckHVParams(self, node_list, hv_name, new_osp)
2211 def Exec(self, feedback_fn):
2212 """Change the parameters of the cluster.
2215 if self.op.vg_name is not None:
2216 new_volume = self.op.vg_name
2219 if new_volume != self.cfg.GetVGName():
2220 self.cfg.SetVGName(new_volume)
2222 feedback_fn("Cluster LVM configuration already in desired"
2223 " state, not changing")
2224 if self.op.hvparams:
2225 self.cluster.hvparams = self.new_hvparams
2227 self.cluster.os_hvp = self.new_os_hvp
2228 if self.op.enabled_hypervisors is not None:
2229 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2230 if self.op.beparams:
2231 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2232 if self.op.nicparams:
2233 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2235 if self.op.candidate_pool_size is not None:
2236 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2237 # we need to update the pool size here, otherwise the save will fail
2238 _AdjustCandidatePool(self, [])
2240 self.cfg.Update(self.cluster, feedback_fn)
2243 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2244 """Distribute additional files which are part of the cluster configuration.
2246 ConfigWriter takes care of distributing the config and ssconf files, but
2247 there are more files which should be distributed to all nodes. This function
2248 makes sure those are copied.
2250 @param lu: calling logical unit
2251 @param additional_nodes: list of nodes not in the config to distribute to
2254 # 1. Gather target nodes
2255 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2256 dist_nodes = lu.cfg.GetOnlineNodeList()
2257 if additional_nodes is not None:
2258 dist_nodes.extend(additional_nodes)
2259 if myself.name in dist_nodes:
2260 dist_nodes.remove(myself.name)
2262 # 2. Gather files to distribute
2263 dist_files = set([constants.ETC_HOSTS,
2264 constants.SSH_KNOWN_HOSTS_FILE,
2265 constants.RAPI_CERT_FILE,
2266 constants.RAPI_USERS_FILE,
2267 constants.HMAC_CLUSTER_KEY,
2270 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2271 for hv_name in enabled_hypervisors:
2272 hv_class = hypervisor.GetHypervisor(hv_name)
2273 dist_files.update(hv_class.GetAncillaryFiles())
2275 # 3. Perform the files upload
2276 for fname in dist_files:
2277 if os.path.exists(fname):
2278 result = lu.rpc.call_upload_file(dist_nodes, fname)
2279 for to_node, to_result in result.items():
2280 msg = to_result.fail_msg
2282 msg = ("Copy of file %s to node %s failed: %s" %
2283 (fname, to_node, msg))
2284 lu.proc.LogWarning(msg)
2287 class LURedistributeConfig(NoHooksLU):
2288 """Force the redistribution of cluster configuration.
2290 This is a very simple LU.
2296 def ExpandNames(self):
2297 self.needed_locks = {
2298 locking.LEVEL_NODE: locking.ALL_SET,
2300 self.share_locks[locking.LEVEL_NODE] = 1
2302 def CheckPrereq(self):
2303 """Check prerequisites.
2307 def Exec(self, feedback_fn):
2308 """Redistribute the configuration.
2311 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2312 _RedistributeAncillaryFiles(self)
2315 def _WaitForSync(lu, instance, oneshot=False):
2316 """Sleep and poll for an instance's disk to sync.
2319 if not instance.disks:
2323 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2325 node = instance.primary_node
2327 for dev in instance.disks:
2328 lu.cfg.SetDiskID(dev, node)
2330 # TODO: Convert to utils.Retry
2333 degr_retries = 10 # in seconds, as we sleep 1 second each time
2337 cumul_degraded = False
2338 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2339 msg = rstats.fail_msg
2341 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2344 raise errors.RemoteError("Can't contact node %s for mirror data,"
2345 " aborting." % node)
2348 rstats = rstats.payload
2350 for i, mstat in enumerate(rstats):
2352 lu.LogWarning("Can't compute data for node %s/%s",
2353 node, instance.disks[i].iv_name)
2356 cumul_degraded = (cumul_degraded or
2357 (mstat.is_degraded and mstat.sync_percent is None))
2358 if mstat.sync_percent is not None:
2360 if mstat.estimated_time is not None:
2361 rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2362 max_time = mstat.estimated_time
2364 rem_time = "no time estimate"
2365 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2366 (instance.disks[i].iv_name, mstat.sync_percent,
2369 # if we're done but degraded, let's do a few small retries, to
2370 # make sure we see a stable and not transient situation; therefore
2371 # we force restart of the loop
2372 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2373 logging.info("Degraded disks found, %d retries left", degr_retries)
2381 time.sleep(min(60, max_time))
2384 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2385 return not cumul_degraded
2388 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2389 """Check that mirrors are not degraded.
2391 The ldisk parameter, if True, will change the test from the
2392 is_degraded attribute (which represents overall non-ok status for
2393 the device(s)) to the ldisk (representing the local storage status).
2396 lu.cfg.SetDiskID(dev, node)
2400 if on_primary or dev.AssembleOnSecondary():
2401 rstats = lu.rpc.call_blockdev_find(node, dev)
2402 msg = rstats.fail_msg
2404 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2406 elif not rstats.payload:
2407 lu.LogWarning("Can't find disk on node %s", node)
2411 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2413 result = result and not rstats.payload.is_degraded
2416 for child in dev.children:
2417 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2422 class LUDiagnoseOS(NoHooksLU):
2423 """Logical unit for OS diagnose/query.
2426 _OP_REQP = ["output_fields", "names"]
2428 _FIELDS_STATIC = utils.FieldSet()
2429 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2430 # Fields that need calculation of global os validity
2431 _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2433 def ExpandNames(self):
2435 raise errors.OpPrereqError("Selective OS query not supported",
2438 _CheckOutputFields(static=self._FIELDS_STATIC,
2439 dynamic=self._FIELDS_DYNAMIC,
2440 selected=self.op.output_fields)
2442 # Lock all nodes, in shared mode
2443 # Temporary removal of locks, should be reverted later
2444 # TODO: reintroduce locks when they are lighter-weight
2445 self.needed_locks = {}
2446 #self.share_locks[locking.LEVEL_NODE] = 1
2447 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2449 def CheckPrereq(self):
2450 """Check prerequisites.
2455 def _DiagnoseByOS(rlist):
2456 """Remaps a per-node return list into an a per-os per-node dictionary
2458 @param rlist: a map with node names as keys and OS objects as values
2461 @return: a dictionary with osnames as keys and as value another map, with
2462 nodes as keys and tuples of (path, status, diagnose) as values, eg::
2464 {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2465 (/srv/..., False, "invalid api")],
2466 "node2": [(/srv/..., True, "")]}
2471 # we build here the list of nodes that didn't fail the RPC (at RPC
2472 # level), so that nodes with a non-responding node daemon don't
2473 # make all OSes invalid
2474 good_nodes = [node_name for node_name in rlist
2475 if not rlist[node_name].fail_msg]
2476 for node_name, nr in rlist.items():
2477 if nr.fail_msg or not nr.payload:
2479 for name, path, status, diagnose, variants in nr.payload:
2480 if name not in all_os:
2481 # build a list of nodes for this os containing empty lists
2482 # for each node in node_list
2484 for nname in good_nodes:
2485 all_os[name][nname] = []
2486 all_os[name][node_name].append((path, status, diagnose, variants))
2489 def Exec(self, feedback_fn):
2490 """Compute the list of OSes.
2493 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2494 node_data = self.rpc.call_os_diagnose(valid_nodes)
2495 pol = self._DiagnoseByOS(node_data)
2497 calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2498 calc_variants = "variants" in self.op.output_fields
2500 for os_name, os_data in pol.items():
2505 for osl in os_data.values():
2506 valid = valid and osl and osl[0][1]
2511 node_variants = osl[0][3]
2512 if variants is None:
2513 variants = node_variants
2515 variants = [v for v in variants if v in node_variants]
2517 for field in self.op.output_fields:
2520 elif field == "valid":
2522 elif field == "node_status":
2523 # this is just a copy of the dict
2525 for node_name, nos_list in os_data.items():
2526 val[node_name] = nos_list
2527 elif field == "variants":
2530 raise errors.ParameterError(field)
2537 class LURemoveNode(LogicalUnit):
2538 """Logical unit for removing a node.
2541 HPATH = "node-remove"
2542 HTYPE = constants.HTYPE_NODE
2543 _OP_REQP = ["node_name"]
2545 def BuildHooksEnv(self):
2548 This doesn't run on the target node in the pre phase as a failed
2549 node would then be impossible to remove.
2553 "OP_TARGET": self.op.node_name,
2554 "NODE_NAME": self.op.node_name,
2556 all_nodes = self.cfg.GetNodeList()
2558 all_nodes.remove(self.op.node_name)
2560 logging.warning("Node %s which is about to be removed not found"
2561 " in the all nodes list", self.op.node_name)
2562 return env, all_nodes, all_nodes
2564 def CheckPrereq(self):
2565 """Check prerequisites.
2568 - the node exists in the configuration
2569 - it does not have primary or secondary instances
2570 - it's not the master
2572 Any errors are signaled by raising errors.OpPrereqError.
2575 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2576 node = self.cfg.GetNodeInfo(self.op.node_name)
2577 assert node is not None
2579 instance_list = self.cfg.GetInstanceList()
2581 masternode = self.cfg.GetMasterNode()
2582 if node.name == masternode:
2583 raise errors.OpPrereqError("Node is the master node,"
2584 " you need to failover first.",
2587 for instance_name in instance_list:
2588 instance = self.cfg.GetInstanceInfo(instance_name)
2589 if node.name in instance.all_nodes:
2590 raise errors.OpPrereqError("Instance %s is still running on the node,"
2591 " please remove first." % instance_name,
2593 self.op.node_name = node.name
2596 def Exec(self, feedback_fn):
2597 """Removes the node from the cluster.
2601 logging.info("Stopping the node daemon and removing configs from node %s",
2604 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2606 # Promote nodes to master candidate as needed
2607 _AdjustCandidatePool(self, exceptions=[node.name])
2608 self.context.RemoveNode(node.name)
2610 # Run post hooks on the node before it's removed
2611 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2613 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2615 # pylint: disable-msg=W0702
2616 self.LogWarning("Errors occurred running hooks on %s" % node.name)
2618 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2619 msg = result.fail_msg
2621 self.LogWarning("Errors encountered on the remote node while leaving"
2622 " the cluster: %s", msg)
2625 class LUQueryNodes(NoHooksLU):
2626 """Logical unit for querying nodes.
2629 # pylint: disable-msg=W0142
2630 _OP_REQP = ["output_fields", "names", "use_locking"]
2633 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2634 "master_candidate", "offline", "drained"]
2636 _FIELDS_DYNAMIC = utils.FieldSet(
2638 "mtotal", "mnode", "mfree",
2640 "ctotal", "cnodes", "csockets",
2643 _FIELDS_STATIC = utils.FieldSet(*[
2644 "pinst_cnt", "sinst_cnt",
2645 "pinst_list", "sinst_list",
2646 "pip", "sip", "tags",
2648 "role"] + _SIMPLE_FIELDS
2651 def ExpandNames(self):
2652 _CheckOutputFields(static=self._FIELDS_STATIC,
2653 dynamic=self._FIELDS_DYNAMIC,
2654 selected=self.op.output_fields)
2656 self.needed_locks = {}
2657 self.share_locks[locking.LEVEL_NODE] = 1
2660 self.wanted = _GetWantedNodes(self, self.op.names)
2662 self.wanted = locking.ALL_SET
2664 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2665 self.do_locking = self.do_node_query and self.op.use_locking
2667 # if we don't request only static fields, we need to lock the nodes
2668 self.needed_locks[locking.LEVEL_NODE] = self.wanted
2670 def CheckPrereq(self):
2671 """Check prerequisites.
2674 # The validation of the node list is done in the _GetWantedNodes,
2675 # if non empty, and if empty, there's no validation to do
2678 def Exec(self, feedback_fn):
2679 """Computes the list of nodes and their attributes.
2682 all_info = self.cfg.GetAllNodesInfo()
2684 nodenames = self.acquired_locks[locking.LEVEL_NODE]
2685 elif self.wanted != locking.ALL_SET:
2686 nodenames = self.wanted
2687 missing = set(nodenames).difference(all_info.keys())
2689 raise errors.OpExecError(
2690 "Some nodes were removed before retrieving their data: %s" % missing)
2692 nodenames = all_info.keys()
2694 nodenames = utils.NiceSort(nodenames)
2695 nodelist = [all_info[name] for name in nodenames]
2697 # begin data gathering
2699 if self.do_node_query:
2701 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2702 self.cfg.GetHypervisorType())
2703 for name in nodenames:
2704 nodeinfo = node_data[name]
2705 if not nodeinfo.fail_msg and nodeinfo.payload:
2706 nodeinfo = nodeinfo.payload
2707 fn = utils.TryConvert
2709 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2710 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2711 "mfree": fn(int, nodeinfo.get('memory_free', None)),
2712 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2713 "dfree": fn(int, nodeinfo.get('vg_free', None)),
2714 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2715 "bootid": nodeinfo.get('bootid', None),
2716 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2717 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2720 live_data[name] = {}
2722 live_data = dict.fromkeys(nodenames, {})
2724 node_to_primary = dict([(name, set()) for name in nodenames])
2725 node_to_secondary = dict([(name, set()) for name in nodenames])
2727 inst_fields = frozenset(("pinst_cnt", "pinst_list",
2728 "sinst_cnt", "sinst_list"))
2729 if inst_fields & frozenset(self.op.output_fields):
2730 inst_data = self.cfg.GetAllInstancesInfo()
2732 for inst in inst_data.values():
2733 if inst.primary_node in node_to_primary:
2734 node_to_primary[inst.primary_node].add(inst.name)
2735 for secnode in inst.secondary_nodes:
2736 if secnode in node_to_secondary:
2737 node_to_secondary[secnode].add(inst.name)
2739 master_node = self.cfg.GetMasterNode()
2741 # end data gathering
2744 for node in nodelist:
2746 for field in self.op.output_fields:
2747 if field in self._SIMPLE_FIELDS:
2748 val = getattr(node, field)
2749 elif field == "pinst_list":
2750 val = list(node_to_primary[node.name])
2751 elif field == "sinst_list":
2752 val = list(node_to_secondary[node.name])
2753 elif field == "pinst_cnt":
2754 val = len(node_to_primary[node.name])
2755 elif field == "sinst_cnt":
2756 val = len(node_to_secondary[node.name])
2757 elif field == "pip":
2758 val = node.primary_ip
2759 elif field == "sip":
2760 val = node.secondary_ip
2761 elif field == "tags":
2762 val = list(node.GetTags())
2763 elif field == "master":
2764 val = node.name == master_node
2765 elif self._FIELDS_DYNAMIC.Matches(field):
2766 val = live_data[node.name].get(field, None)
2767 elif field == "role":
2768 if node.name == master_node:
2770 elif node.master_candidate:
2779 raise errors.ParameterError(field)
2780 node_output.append(val)
2781 output.append(node_output)
2786 class LUQueryNodeVolumes(NoHooksLU):
2787 """Logical unit for getting volumes on node(s).
2790 _OP_REQP = ["nodes", "output_fields"]
2792 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2793 _FIELDS_STATIC = utils.FieldSet("node")
2795 def ExpandNames(self):
2796 _CheckOutputFields(static=self._FIELDS_STATIC,
2797 dynamic=self._FIELDS_DYNAMIC,
2798 selected=self.op.output_fields)
2800 self.needed_locks = {}
2801 self.share_locks[locking.LEVEL_NODE] = 1
2802 if not self.op.nodes:
2803 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2805 self.needed_locks[locking.LEVEL_NODE] = \
2806 _GetWantedNodes(self, self.op.nodes)
2808 def CheckPrereq(self):
2809 """Check prerequisites.
2811 This checks that the fields required are valid output fields.
2814 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2816 def Exec(self, feedback_fn):
2817 """Computes the list of nodes and their attributes.
2820 nodenames = self.nodes
2821 volumes = self.rpc.call_node_volumes(nodenames)
2823 ilist = [self.cfg.GetInstanceInfo(iname) for iname
2824 in self.cfg.GetInstanceList()]
2826 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2829 for node in nodenames:
2830 nresult = volumes[node]
2833 msg = nresult.fail_msg
2835 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2838 node_vols = nresult.payload[:]
2839 node_vols.sort(key=lambda vol: vol['dev'])
2841 for vol in node_vols:
2843 for field in self.op.output_fields:
2846 elif field == "phys":
2850 elif field == "name":
2852 elif field == "size":
2853 val = int(float(vol['size']))
2854 elif field == "instance":
2856 if node not in lv_by_node[inst]:
2858 if vol['name'] in lv_by_node[inst][node]:
2864 raise errors.ParameterError(field)
2865 node_output.append(str(val))
2867 output.append(node_output)
2872 class LUQueryNodeStorage(NoHooksLU):
2873 """Logical unit for getting information on storage units on node(s).
2876 _OP_REQP = ["nodes", "storage_type", "output_fields"]
2878 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
2880 def ExpandNames(self):
2881 storage_type = self.op.storage_type
2883 if storage_type not in constants.VALID_STORAGE_TYPES:
2884 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2887 _CheckOutputFields(static=self._FIELDS_STATIC,
2888 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
2889 selected=self.op.output_fields)
2891 self.needed_locks = {}
2892 self.share_locks[locking.LEVEL_NODE] = 1
2895 self.needed_locks[locking.LEVEL_NODE] = \
2896 _GetWantedNodes(self, self.op.nodes)
2898 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2900 def CheckPrereq(self):
2901 """Check prerequisites.
2903 This checks that the fields required are valid output fields.
2906 self.op.name = getattr(self.op, "name", None)
2908 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2910 def Exec(self, feedback_fn):
2911 """Computes the list of nodes and their attributes.
2914 # Always get name to sort by
2915 if constants.SF_NAME in self.op.output_fields:
2916 fields = self.op.output_fields[:]
2918 fields = [constants.SF_NAME] + self.op.output_fields
2920 # Never ask for node or type as it's only known to the LU
2921 for extra in [constants.SF_NODE, constants.SF_TYPE]:
2922 while extra in fields:
2923 fields.remove(extra)
2925 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2926 name_idx = field_idx[constants.SF_NAME]
2928 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2929 data = self.rpc.call_storage_list(self.nodes,
2930 self.op.storage_type, st_args,
2931 self.op.name, fields)
2935 for node in utils.NiceSort(self.nodes):
2936 nresult = data[node]
2940 msg = nresult.fail_msg
2942 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2945 rows = dict([(row[name_idx], row) for row in nresult.payload])
2947 for name in utils.NiceSort(rows.keys()):
2952 for field in self.op.output_fields:
2953 if field == constants.SF_NODE:
2955 elif field == constants.SF_TYPE:
2956 val = self.op.storage_type
2957 elif field in field_idx:
2958 val = row[field_idx[field]]
2960 raise errors.ParameterError(field)
2969 class LUModifyNodeStorage(NoHooksLU):
2970 """Logical unit for modifying a storage volume on a node.
2973 _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2976 def CheckArguments(self):
2977 self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
2979 storage_type = self.op.storage_type
2980 if storage_type not in constants.VALID_STORAGE_TYPES:
2981 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2984 def ExpandNames(self):
2985 self.needed_locks = {
2986 locking.LEVEL_NODE: self.op.node_name,
2989 def CheckPrereq(self):
2990 """Check prerequisites.
2993 storage_type = self.op.storage_type
2996 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2998 raise errors.OpPrereqError("Storage units of type '%s' can not be"
2999 " modified" % storage_type,
3002 diff = set(self.op.changes.keys()) - modifiable
3004 raise errors.OpPrereqError("The following fields can not be modified for"
3005 " storage units of type '%s': %r" %
3006 (storage_type, list(diff)),
3009 def Exec(self, feedback_fn):
3010 """Computes the list of nodes and their attributes.
3013 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3014 result = self.rpc.call_storage_modify(self.op.node_name,
3015 self.op.storage_type, st_args,
3016 self.op.name, self.op.changes)
3017 result.Raise("Failed to modify storage unit '%s' on %s" %
3018 (self.op.name, self.op.node_name))
3021 class LUAddNode(LogicalUnit):
3022 """Logical unit for adding node to the cluster.
3026 HTYPE = constants.HTYPE_NODE
3027 _OP_REQP = ["node_name"]
3029 def CheckArguments(self):
3030 # validate/normalize the node name
3031 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3033 def BuildHooksEnv(self):
3036 This will run on all nodes before, and on all nodes + the new node after.
3040 "OP_TARGET": self.op.node_name,
3041 "NODE_NAME": self.op.node_name,
3042 "NODE_PIP": self.op.primary_ip,
3043 "NODE_SIP": self.op.secondary_ip,
3045 nodes_0 = self.cfg.GetNodeList()
3046 nodes_1 = nodes_0 + [self.op.node_name, ]
3047 return env, nodes_0, nodes_1
3049 def CheckPrereq(self):
3050 """Check prerequisites.
3053 - the new node is not already in the config
3055 - its parameters (single/dual homed) matches the cluster
3057 Any errors are signaled by raising errors.OpPrereqError.
3060 node_name = self.op.node_name
3063 dns_data = utils.GetHostInfo(node_name)
3065 node = dns_data.name
3066 primary_ip = self.op.primary_ip = dns_data.ip
3067 secondary_ip = getattr(self.op, "secondary_ip", None)
3068 if secondary_ip is None:
3069 secondary_ip = primary_ip
3070 if not utils.IsValidIP(secondary_ip):
3071 raise errors.OpPrereqError("Invalid secondary IP given",
3073 self.op.secondary_ip = secondary_ip
3075 node_list = cfg.GetNodeList()
3076 if not self.op.readd and node in node_list:
3077 raise errors.OpPrereqError("Node %s is already in the configuration" %
3078 node, errors.ECODE_EXISTS)
3079 elif self.op.readd and node not in node_list:
3080 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3083 for existing_node_name in node_list:
3084 existing_node = cfg.GetNodeInfo(existing_node_name)
3086 if self.op.readd and node == existing_node_name:
3087 if (existing_node.primary_ip != primary_ip or
3088 existing_node.secondary_ip != secondary_ip):
3089 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3090 " address configuration as before",
3094 if (existing_node.primary_ip == primary_ip or
3095 existing_node.secondary_ip == primary_ip or
3096 existing_node.primary_ip == secondary_ip or
3097 existing_node.secondary_ip == secondary_ip):
3098 raise errors.OpPrereqError("New node ip address(es) conflict with"
3099 " existing node %s" % existing_node.name,
3100 errors.ECODE_NOTUNIQUE)
3102 # check that the type of the node (single versus dual homed) is the
3103 # same as for the master
3104 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3105 master_singlehomed = myself.secondary_ip == myself.primary_ip
3106 newbie_singlehomed = secondary_ip == primary_ip
3107 if master_singlehomed != newbie_singlehomed:
3108 if master_singlehomed:
3109 raise errors.OpPrereqError("The master has no private ip but the"
3110 " new node has one",
3113 raise errors.OpPrereqError("The master has a private ip but the"
3114 " new node doesn't have one",
3117 # checks reachability
3118 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3119 raise errors.OpPrereqError("Node not reachable by ping",
3120 errors.ECODE_ENVIRON)
3122 if not newbie_singlehomed:
3123 # check reachability from my secondary ip to newbie's secondary ip
3124 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3125 source=myself.secondary_ip):
3126 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3127 " based ping to noded port",
3128 errors.ECODE_ENVIRON)
3135 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3138 self.new_node = self.cfg.GetNodeInfo(node)
3139 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3141 self.new_node = objects.Node(name=node,
3142 primary_ip=primary_ip,
3143 secondary_ip=secondary_ip,
3144 master_candidate=self.master_candidate,
3145 offline=False, drained=False)
3147 def Exec(self, feedback_fn):
3148 """Adds the new node to the cluster.
3151 new_node = self.new_node
3152 node = new_node.name
3154 # for re-adds, reset the offline/drained/master-candidate flags;
3155 # we need to reset here, otherwise offline would prevent RPC calls
3156 # later in the procedure; this also means that if the re-add
3157 # fails, we are left with a non-offlined, broken node
3159 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3160 self.LogInfo("Readding a node, the offline/drained flags were reset")
3161 # if we demote the node, we do cleanup later in the procedure
3162 new_node.master_candidate = self.master_candidate
3164 # notify the user about any possible mc promotion
3165 if new_node.master_candidate:
3166 self.LogInfo("Node will be a master candidate")
3168 # check connectivity
3169 result = self.rpc.call_version([node])[node]
3170 result.Raise("Can't get version information from node %s" % node)
3171 if constants.PROTOCOL_VERSION == result.payload:
3172 logging.info("Communication to node %s fine, sw version %s match",
3173 node, result.payload)
3175 raise errors.OpExecError("Version mismatch master version %s,"
3176 " node version %s" %
3177 (constants.PROTOCOL_VERSION, result.payload))
3180 if self.cfg.GetClusterInfo().modify_ssh_setup:
3181 logging.info("Copy ssh key to node %s", node)
3182 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3184 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3185 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3189 keyarray.append(utils.ReadFile(i))
3191 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3192 keyarray[2], keyarray[3], keyarray[4],
3194 result.Raise("Cannot transfer ssh keys to the new node")
3196 # Add node to our /etc/hosts, and add key to known_hosts
3197 if self.cfg.GetClusterInfo().modify_etc_hosts:
3198 utils.AddHostToEtcHosts(new_node.name)
3200 if new_node.secondary_ip != new_node.primary_ip:
3201 result = self.rpc.call_node_has_ip_address(new_node.name,
3202 new_node.secondary_ip)
3203 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3204 prereq=True, ecode=errors.ECODE_ENVIRON)
3205 if not result.payload:
3206 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3207 " you gave (%s). Please fix and re-run this"
3208 " command." % new_node.secondary_ip)
3210 node_verify_list = [self.cfg.GetMasterNode()]
3211 node_verify_param = {
3212 constants.NV_NODELIST: [node],
3213 # TODO: do a node-net-test as well?
3216 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3217 self.cfg.GetClusterName())
3218 for verifier in node_verify_list:
3219 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3220 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3222 for failed in nl_payload:
3223 feedback_fn("ssh/hostname verification failed"
3224 " (checking from %s): %s" %
3225 (verifier, nl_payload[failed]))
3226 raise errors.OpExecError("ssh/hostname verification failed.")
3229 _RedistributeAncillaryFiles(self)
3230 self.context.ReaddNode(new_node)
3231 # make sure we redistribute the config
3232 self.cfg.Update(new_node, feedback_fn)
3233 # and make sure the new node will not have old files around
3234 if not new_node.master_candidate:
3235 result = self.rpc.call_node_demote_from_mc(new_node.name)
3236 msg = result.fail_msg
3238 self.LogWarning("Node failed to demote itself from master"
3239 " candidate status: %s" % msg)
3241 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3242 self.context.AddNode(new_node, self.proc.GetECId())
3245 class LUSetNodeParams(LogicalUnit):
3246 """Modifies the parameters of a node.
3249 HPATH = "node-modify"
3250 HTYPE = constants.HTYPE_NODE
3251 _OP_REQP = ["node_name"]
3254 def CheckArguments(self):
3255 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3256 _CheckBooleanOpField(self.op, 'master_candidate')
3257 _CheckBooleanOpField(self.op, 'offline')
3258 _CheckBooleanOpField(self.op, 'drained')
3259 _CheckBooleanOpField(self.op, 'auto_promote')
3260 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3261 if all_mods.count(None) == 3:
3262 raise errors.OpPrereqError("Please pass at least one modification",
3264 if all_mods.count(True) > 1:
3265 raise errors.OpPrereqError("Can't set the node into more than one"
3266 " state at the same time",
3269 # Boolean value that tells us whether we're offlining or draining the node
3270 self.offline_or_drain = (self.op.offline == True or
3271 self.op.drained == True)
3272 self.deoffline_or_drain = (self.op.offline == False or
3273 self.op.drained == False)
3274 self.might_demote = (self.op.master_candidate == False or
3275 self.offline_or_drain)
3277 self.lock_all = self.op.auto_promote and self.might_demote
3280 def ExpandNames(self):
3282 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3284 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3286 def BuildHooksEnv(self):
3289 This runs on the master node.
3293 "OP_TARGET": self.op.node_name,
3294 "MASTER_CANDIDATE": str(self.op.master_candidate),
3295 "OFFLINE": str(self.op.offline),
3296 "DRAINED": str(self.op.drained),
3298 nl = [self.cfg.GetMasterNode(),
3302 def CheckPrereq(self):
3303 """Check prerequisites.
3305 This only checks the instance list against the existing names.
3308 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3310 if (self.op.master_candidate is not None or
3311 self.op.drained is not None or
3312 self.op.offline is not None):
3313 # we can't change the master's node flags
3314 if self.op.node_name == self.cfg.GetMasterNode():
3315 raise errors.OpPrereqError("The master role can be changed"
3316 " only via masterfailover",
3320 if node.master_candidate and self.might_demote and not self.lock_all:
3321 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3322 # check if after removing the current node, we're missing master
3324 (mc_remaining, mc_should, _) = \
3325 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3326 if mc_remaining != mc_should:
3327 raise errors.OpPrereqError("Not enough master candidates, please"
3328 " pass auto_promote to allow promotion",
3331 if (self.op.master_candidate == True and
3332 ((node.offline and not self.op.offline == False) or
3333 (node.drained and not self.op.drained == False))):
3334 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3335 " to master_candidate" % node.name,
3338 # If we're being deofflined/drained, we'll MC ourself if needed
3339 if (self.deoffline_or_drain and not self.offline_or_drain and not
3340 self.op.master_candidate == True and not node.master_candidate):
3341 self.op.master_candidate = _DecideSelfPromotion(self)
3342 if self.op.master_candidate:
3343 self.LogInfo("Autopromoting node to master candidate")
3347 def Exec(self, feedback_fn):
3356 if self.op.offline is not None:
3357 node.offline = self.op.offline
3358 result.append(("offline", str(self.op.offline)))
3359 if self.op.offline == True:
3360 if node.master_candidate:
3361 node.master_candidate = False
3363 result.append(("master_candidate", "auto-demotion due to offline"))
3365 node.drained = False
3366 result.append(("drained", "clear drained status due to offline"))
3368 if self.op.master_candidate is not None:
3369 node.master_candidate = self.op.master_candidate
3371 result.append(("master_candidate", str(self.op.master_candidate)))
3372 if self.op.master_candidate == False:
3373 rrc = self.rpc.call_node_demote_from_mc(node.name)
3376 self.LogWarning("Node failed to demote itself: %s" % msg)
3378 if self.op.drained is not None:
3379 node.drained = self.op.drained
3380 result.append(("drained", str(self.op.drained)))
3381 if self.op.drained == True:
3382 if node.master_candidate:
3383 node.master_candidate = False
3385 result.append(("master_candidate", "auto-demotion due to drain"))
3386 rrc = self.rpc.call_node_demote_from_mc(node.name)
3389 self.LogWarning("Node failed to demote itself: %s" % msg)
3391 node.offline = False
3392 result.append(("offline", "clear offline status due to drain"))
3394 # we locked all nodes, we adjust the CP before updating this node
3396 _AdjustCandidatePool(self, [node.name])
3398 # this will trigger configuration file update, if needed
3399 self.cfg.Update(node, feedback_fn)
3401 # this will trigger job queue propagation or cleanup
3403 self.context.ReaddNode(node)
3408 class LUPowercycleNode(NoHooksLU):
3409 """Powercycles a node.
3412 _OP_REQP = ["node_name", "force"]
3415 def CheckArguments(self):
3416 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3417 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3418 raise errors.OpPrereqError("The node is the master and the force"
3419 " parameter was not set",
3422 def ExpandNames(self):
3423 """Locking for PowercycleNode.
3425 This is a last-resort option and shouldn't block on other
3426 jobs. Therefore, we grab no locks.
3429 self.needed_locks = {}
3431 def CheckPrereq(self):
3432 """Check prerequisites.
3434 This LU has no prereqs.
3439 def Exec(self, feedback_fn):
3443 result = self.rpc.call_node_powercycle(self.op.node_name,
3444 self.cfg.GetHypervisorType())
3445 result.Raise("Failed to schedule the reboot")
3446 return result.payload
3449 class LUQueryClusterInfo(NoHooksLU):
3450 """Query cluster configuration.
3456 def ExpandNames(self):
3457 self.needed_locks = {}
3459 def CheckPrereq(self):
3460 """No prerequsites needed for this LU.
3465 def Exec(self, feedback_fn):
3466 """Return cluster config.
3469 cluster = self.cfg.GetClusterInfo()
3472 # Filter just for enabled hypervisors
3473 for os_name, hv_dict in cluster.os_hvp.items():
3474 os_hvp[os_name] = {}
3475 for hv_name, hv_params in hv_dict.items():
3476 if hv_name in cluster.enabled_hypervisors:
3477 os_hvp[os_name][hv_name] = hv_params
3480 "software_version": constants.RELEASE_VERSION,
3481 "protocol_version": constants.PROTOCOL_VERSION,
3482 "config_version": constants.CONFIG_VERSION,
3483 "os_api_version": max(constants.OS_API_VERSIONS),
3484 "export_version": constants.EXPORT_VERSION,
3485 "architecture": (platform.architecture()[0], platform.machine()),
3486 "name": cluster.cluster_name,
3487 "master": cluster.master_node,
3488 "default_hypervisor": cluster.enabled_hypervisors[0],
3489 "enabled_hypervisors": cluster.enabled_hypervisors,
3490 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3491 for hypervisor_name in cluster.enabled_hypervisors]),
3493 "beparams": cluster.beparams,
3494 "nicparams": cluster.nicparams,
3495 "candidate_pool_size": cluster.candidate_pool_size,
3496 "master_netdev": cluster.master_netdev,
3497 "volume_group_name": cluster.volume_group_name,
3498 "file_storage_dir": cluster.file_storage_dir,
3499 "ctime": cluster.ctime,
3500 "mtime": cluster.mtime,
3501 "uuid": cluster.uuid,
3502 "tags": list(cluster.GetTags()),
3508 class LUQueryConfigValues(NoHooksLU):
3509 """Return configuration values.
3514 _FIELDS_DYNAMIC = utils.FieldSet()
3515 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3518 def ExpandNames(self):
3519 self.needed_locks = {}
3521 _CheckOutputFields(static=self._FIELDS_STATIC,
3522 dynamic=self._FIELDS_DYNAMIC,
3523 selected=self.op.output_fields)
3525 def CheckPrereq(self):
3526 """No prerequisites.
3531 def Exec(self, feedback_fn):
3532 """Dump a representation of the cluster config to the standard output.
3536 for field in self.op.output_fields:
3537 if field == "cluster_name":
3538 entry = self.cfg.GetClusterName()
3539 elif field == "master_node":
3540 entry = self.cfg.GetMasterNode()
3541 elif field == "drain_flag":
3542 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3543 elif field == "watcher_pause":
3544 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3546 raise errors.ParameterError(field)
3547 values.append(entry)
3551 class LUActivateInstanceDisks(NoHooksLU):
3552 """Bring up an instance's disks.
3555 _OP_REQP = ["instance_name"]
3558 def ExpandNames(self):
3559 self._ExpandAndLockInstance()
3560 self.needed_locks[locking.LEVEL_NODE] = []
3561 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3563 def DeclareLocks(self, level):
3564 if level == locking.LEVEL_NODE:
3565 self._LockInstancesNodes()
3567 def CheckPrereq(self):
3568 """Check prerequisites.
3570 This checks that the instance is in the cluster.
3573 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3574 assert self.instance is not None, \
3575 "Cannot retrieve locked instance %s" % self.op.instance_name
3576 _CheckNodeOnline(self, self.instance.primary_node)
3577 if not hasattr(self.op, "ignore_size"):
3578 self.op.ignore_size = False
3580 def Exec(self, feedback_fn):
3581 """Activate the disks.
3584 disks_ok, disks_info = \
3585 _AssembleInstanceDisks(self, self.instance,
3586 ignore_size=self.op.ignore_size)
3588 raise errors.OpExecError("Cannot activate block devices")
3593 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3595 """Prepare the block devices for an instance.
3597 This sets up the block devices on all nodes.
3599 @type lu: L{LogicalUnit}
3600 @param lu: the logical unit on whose behalf we execute
3601 @type instance: L{objects.Instance}
3602 @param instance: the instance for whose disks we assemble
3603 @type ignore_secondaries: boolean
3604 @param ignore_secondaries: if true, errors on secondary nodes
3605 won't result in an error return from the function
3606 @type ignore_size: boolean
3607 @param ignore_size: if true, the current known size of the disk
3608 will not be used during the disk activation, useful for cases
3609 when the size is wrong
3610 @return: False if the operation failed, otherwise a list of
3611 (host, instance_visible_name, node_visible_name)
3612 with the mapping from node devices to instance devices
3617 iname = instance.name
3618 # With the two passes mechanism we try to reduce the window of
3619 # opportunity for the race condition of switching DRBD to primary
3620 # before handshaking occured, but we do not eliminate it
3622 # The proper fix would be to wait (with some limits) until the
3623 # connection has been made and drbd transitions from WFConnection
3624 # into any other network-connected state (Connected, SyncTarget,
3627 # 1st pass, assemble on all nodes in secondary mode
3628 for inst_disk in instance.disks:
3629 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3631 node_disk = node_disk.Copy()
3632 node_disk.UnsetSize()
3633 lu.cfg.SetDiskID(node_disk, node)
3634 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3635 msg = result.fail_msg
3637 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3638 " (is_primary=False, pass=1): %s",
3639 inst_disk.iv_name, node, msg)
3640 if not ignore_secondaries:
3643 # FIXME: race condition on drbd migration to primary
3645 # 2nd pass, do only the primary node
3646 for inst_disk in instance.disks:
3649 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3650 if node != instance.primary_node:
3653 node_disk = node_disk.Copy()
3654 node_disk.UnsetSize()
3655 lu.cfg.SetDiskID(node_disk, node)
3656 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3657 msg = result.fail_msg
3659 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3660 " (is_primary=True, pass=2): %s",
3661 inst_disk.iv_name, node, msg)
3664 dev_path = result.payload
3666 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3668 # leave the disks configured for the primary node
3669 # this is a workaround that would be fixed better by
3670 # improving the logical/physical id handling
3671 for disk in instance.disks:
3672 lu.cfg.SetDiskID(disk, instance.primary_node)
3674 return disks_ok, device_info
3677 def _StartInstanceDisks(lu, instance, force):
3678 """Start the disks of an instance.
3681 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3682 ignore_secondaries=force)
3684 _ShutdownInstanceDisks(lu, instance)
3685 if force is not None and not force:
3686 lu.proc.LogWarning("", hint="If the message above refers to a"
3688 " you can retry the operation using '--force'.")
3689 raise errors.OpExecError("Disk consistency error")
3692 class LUDeactivateInstanceDisks(NoHooksLU):
3693 """Shutdown an instance's disks.
3696 _OP_REQP = ["instance_name"]
3699 def ExpandNames(self):
3700 self._ExpandAndLockInstance()
3701 self.needed_locks[locking.LEVEL_NODE] = []
3702 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3704 def DeclareLocks(self, level):
3705 if level == locking.LEVEL_NODE:
3706 self._LockInstancesNodes()
3708 def CheckPrereq(self):
3709 """Check prerequisites.
3711 This checks that the instance is in the cluster.
3714 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3715 assert self.instance is not None, \
3716 "Cannot retrieve locked instance %s" % self.op.instance_name
3718 def Exec(self, feedback_fn):
3719 """Deactivate the disks
3722 instance = self.instance
3723 _SafeShutdownInstanceDisks(self, instance)
3726 def _SafeShutdownInstanceDisks(lu, instance):
3727 """Shutdown block devices of an instance.
3729 This function checks if an instance is running, before calling
3730 _ShutdownInstanceDisks.
3733 pnode = instance.primary_node
3734 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3735 ins_l.Raise("Can't contact node %s" % pnode)
3737 if instance.name in ins_l.payload:
3738 raise errors.OpExecError("Instance is running, can't shutdown"
3741 _ShutdownInstanceDisks(lu, instance)
3744 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3745 """Shutdown block devices of an instance.
3747 This does the shutdown on all nodes of the instance.
3749 If the ignore_primary is false, errors on the primary node are
3754 for disk in instance.disks:
3755 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3756 lu.cfg.SetDiskID(top_disk, node)
3757 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3758 msg = result.fail_msg
3760 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3761 disk.iv_name, node, msg)
3762 if not ignore_primary or node != instance.primary_node:
3767 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3768 """Checks if a node has enough free memory.
3770 This function check if a given node has the needed amount of free
3771 memory. In case the node has less memory or we cannot get the
3772 information from the node, this function raise an OpPrereqError
3775 @type lu: C{LogicalUnit}
3776 @param lu: a logical unit from which we get configuration data
3778 @param node: the node to check
3779 @type reason: C{str}
3780 @param reason: string to use in the error message
3781 @type requested: C{int}
3782 @param requested: the amount of memory in MiB to check for
3783 @type hypervisor_name: C{str}
3784 @param hypervisor_name: the hypervisor to ask for memory stats
3785 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3786 we cannot check the node
3789 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3790 nodeinfo[node].Raise("Can't get data from node %s" % node,
3791 prereq=True, ecode=errors.ECODE_ENVIRON)
3792 free_mem = nodeinfo[node].payload.get('memory_free', None)
3793 if not isinstance(free_mem, int):
3794 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3795 " was '%s'" % (node, free_mem),
3796 errors.ECODE_ENVIRON)
3797 if requested > free_mem:
3798 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3799 " needed %s MiB, available %s MiB" %
3800 (node, reason, requested, free_mem),
3804 def _CheckNodesFreeDisk(lu, nodenames, requested):
3805 """Checks if nodes have enough free disk space in the default VG.
3807 This function check if all given nodes have the needed amount of
3808 free disk. In case any node has less disk or we cannot get the
3809 information from the node, this function raise an OpPrereqError
3812 @type lu: C{LogicalUnit}
3813 @param lu: a logical unit from which we get configuration data
3814 @type nodenames: C{list}
3815 @param node: the list of node names to check
3816 @type requested: C{int}
3817 @param requested: the amount of disk in MiB to check for
3818 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
3819 we cannot check the node
3822 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3823 lu.cfg.GetHypervisorType())
3824 for node in nodenames:
3825 info = nodeinfo[node]
3826 info.Raise("Cannot get current information from node %s" % node,
3827 prereq=True, ecode=errors.ECODE_ENVIRON)
3828 vg_free = info.payload.get("vg_free", None)
3829 if not isinstance(vg_free, int):
3830 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
3831 " result was '%s'" % (node, vg_free),
3832 errors.ECODE_ENVIRON)
3833 if requested > vg_free:
3834 raise errors.OpPrereqError("Not enough disk space on target node %s:"
3835 " required %d MiB, available %d MiB" %
3836 (node, requested, vg_free),
3840 class LUStartupInstance(LogicalUnit):
3841 """Starts an instance.
3844 HPATH = "instance-start"
3845 HTYPE = constants.HTYPE_INSTANCE
3846 _OP_REQP = ["instance_name", "force"]
3849 def ExpandNames(self):
3850 self._ExpandAndLockInstance()
3852 def BuildHooksEnv(self):
3855 This runs on master, primary and secondary nodes of the instance.
3859 "FORCE": self.op.force,
3861 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3862 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3865 def CheckPrereq(self):
3866 """Check prerequisites.
3868 This checks that the instance is in the cluster.
3871 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3872 assert self.instance is not None, \
3873 "Cannot retrieve locked instance %s" % self.op.instance_name
3876 self.beparams = getattr(self.op, "beparams", {})
3878 if not isinstance(self.beparams, dict):
3879 raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3880 " dict" % (type(self.beparams), ),
3882 # fill the beparams dict
3883 utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3884 self.op.beparams = self.beparams
3887 self.hvparams = getattr(self.op, "hvparams", {})
3889 if not isinstance(self.hvparams, dict):
3890 raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3891 " dict" % (type(self.hvparams), ),
3894 # check hypervisor parameter syntax (locally)
3895 cluster = self.cfg.GetClusterInfo()
3896 utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3897 filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3899 filled_hvp.update(self.hvparams)
3900 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3901 hv_type.CheckParameterSyntax(filled_hvp)
3902 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3903 self.op.hvparams = self.hvparams
3905 _CheckNodeOnline(self, instance.primary_node)
3907 bep = self.cfg.GetClusterInfo().FillBE(instance)
3908 # check bridges existence
3909 _CheckInstanceBridgesExist(self, instance)
3911 remote_info = self.rpc.call_instance_info(instance.primary_node,
3913 instance.hypervisor)
3914 remote_info.Raise("Error checking node %s" % instance.primary_node,
3915 prereq=True, ecode=errors.ECODE_ENVIRON)
3916 if not remote_info.payload: # not running already
3917 _CheckNodeFreeMemory(self, instance.primary_node,
3918 "starting instance %s" % instance.name,
3919 bep[constants.BE_MEMORY], instance.hypervisor)
3921 def Exec(self, feedback_fn):
3922 """Start the instance.
3925 instance = self.instance
3926 force = self.op.force
3928 self.cfg.MarkInstanceUp(instance.name)
3930 node_current = instance.primary_node
3932 _StartInstanceDisks(self, instance, force)
3934 result = self.rpc.call_instance_start(node_current, instance,
3935 self.hvparams, self.beparams)
3936 msg = result.fail_msg
3938 _ShutdownInstanceDisks(self, instance)
3939 raise errors.OpExecError("Could not start instance: %s" % msg)
3942 class LURebootInstance(LogicalUnit):
3943 """Reboot an instance.
3946 HPATH = "instance-reboot"
3947 HTYPE = constants.HTYPE_INSTANCE
3948 _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3951 def CheckArguments(self):
3952 """Check the arguments.
3955 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3956 constants.DEFAULT_SHUTDOWN_TIMEOUT)
3958 def ExpandNames(self):
3959 if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3960 constants.INSTANCE_REBOOT_HARD,
3961 constants.INSTANCE_REBOOT_FULL]:
3962 raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3963 (constants.INSTANCE_REBOOT_SOFT,
3964 constants.INSTANCE_REBOOT_HARD,
3965 constants.INSTANCE_REBOOT_FULL))
3966 self._ExpandAndLockInstance()
3968 def BuildHooksEnv(self):
3971 This runs on master, primary and secondary nodes of the instance.
3975 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3976 "REBOOT_TYPE": self.op.reboot_type,
3977 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
3979 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3980 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3983 def CheckPrereq(self):
3984 """Check prerequisites.
3986 This checks that the instance is in the cluster.
3989 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3990 assert self.instance is not None, \
3991 "Cannot retrieve locked instance %s" % self.op.instance_name
3993 _CheckNodeOnline(self, instance.primary_node)
3995 # check bridges existence
3996 _CheckInstanceBridgesExist(self, instance)
3998 def Exec(self, feedback_fn):
3999 """Reboot the instance.
4002 instance = self.instance
4003 ignore_secondaries = self.op.ignore_secondaries
4004 reboot_type = self.op.reboot_type
4006 node_current = instance.primary_node
4008 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4009 constants.INSTANCE_REBOOT_HARD]:
4010 for disk in instance.disks:
4011 self.cfg.SetDiskID(disk, node_current)
4012 result = self.rpc.call_instance_reboot(node_current, instance,
4014 self.shutdown_timeout)
4015 result.Raise("Could not reboot instance")
4017 result = self.rpc.call_instance_shutdown(node_current, instance,
4018 self.shutdown_timeout)
4019 result.Raise("Could not shutdown instance for full reboot")
4020 _ShutdownInstanceDisks(self, instance)
4021 _StartInstanceDisks(self, instance, ignore_secondaries)
4022 result = self.rpc.call_instance_start(node_current, instance, None, None)
4023 msg = result.fail_msg
4025 _ShutdownInstanceDisks(self, instance)
4026 raise errors.OpExecError("Could not start instance for"
4027 " full reboot: %s" % msg)
4029 self.cfg.MarkInstanceUp(instance.name)
4032 class LUShutdownInstance(LogicalUnit):
4033 """Shutdown an instance.
4036 HPATH = "instance-stop"
4037 HTYPE = constants.HTYPE_INSTANCE
4038 _OP_REQP = ["instance_name"]
4041 def CheckArguments(self):
4042 """Check the arguments.
4045 self.timeout = getattr(self.op, "timeout",
4046 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4048 def ExpandNames(self):
4049 self._ExpandAndLockInstance()
4051 def BuildHooksEnv(self):
4054 This runs on master, primary and secondary nodes of the instance.
4057 env = _BuildInstanceHookEnvByObject(self, self.instance)
4058 env["TIMEOUT"] = self.timeout
4059 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4062 def CheckPrereq(self):
4063 """Check prerequisites.
4065 This checks that the instance is in the cluster.
4068 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4069 assert self.instance is not None, \
4070 "Cannot retrieve locked instance %s" % self.op.instance_name
4071 _CheckNodeOnline(self, self.instance.primary_node)
4073 def Exec(self, feedback_fn):
4074 """Shutdown the instance.
4077 instance = self.instance
4078 node_current = instance.primary_node
4079 timeout = self.timeout
4080 self.cfg.MarkInstanceDown(instance.name)
4081 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4082 msg = result.fail_msg
4084 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4086 _ShutdownInstanceDisks(self, instance)
4089 class LUReinstallInstance(LogicalUnit):
4090 """Reinstall an instance.
4093 HPATH = "instance-reinstall"
4094 HTYPE = constants.HTYPE_INSTANCE
4095 _OP_REQP = ["instance_name"]
4098 def ExpandNames(self):
4099 self._ExpandAndLockInstance()
4101 def BuildHooksEnv(self):
4104 This runs on master, primary and secondary nodes of the instance.
4107 env = _BuildInstanceHookEnvByObject(self, self.instance)
4108 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4111 def CheckPrereq(self):
4112 """Check prerequisites.
4114 This checks that the instance is in the cluster and is not running.
4117 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4118 assert instance is not None, \
4119 "Cannot retrieve locked instance %s" % self.op.instance_name
4120 _CheckNodeOnline(self, instance.primary_node)
4122 if instance.disk_template == constants.DT_DISKLESS:
4123 raise errors.OpPrereqError("Instance '%s' has no disks" %
4124 self.op.instance_name,
4126 if instance.admin_up:
4127 raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4128 self.op.instance_name,
4130 remote_info = self.rpc.call_instance_info(instance.primary_node,
4132 instance.hypervisor)
4133 remote_info.Raise("Error checking node %s" % instance.primary_node,
4134 prereq=True, ecode=errors.ECODE_ENVIRON)
4135 if remote_info.payload:
4136 raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4137 (self.op.instance_name,
4138 instance.primary_node),
4141 self.op.os_type = getattr(self.op, "os_type", None)
4142 self.op.force_variant = getattr(self.op, "force_variant", False)
4143 if self.op.os_type is not None:
4145 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4146 result = self.rpc.call_os_get(pnode, self.op.os_type)
4147 result.Raise("OS '%s' not in supported OS list for primary node %s" %
4148 (self.op.os_type, pnode),
4149 prereq=True, ecode=errors.ECODE_INVAL)
4150 if not self.op.force_variant:
4151 _CheckOSVariant(result.payload, self.op.os_type)
4153 self.instance = instance
4155 def Exec(self, feedback_fn):
4156 """Reinstall the instance.
4159 inst = self.instance
4161 if self.op.os_type is not None:
4162 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4163 inst.os = self.op.os_type
4164 self.cfg.Update(inst, feedback_fn)
4166 _StartInstanceDisks(self, inst, None)
4168 feedback_fn("Running the instance OS create scripts...")
4169 # FIXME: pass debug option from opcode to backend
4170 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4171 self.op.debug_level)
4172 result.Raise("Could not install OS for instance %s on node %s" %
4173 (inst.name, inst.primary_node))
4175 _ShutdownInstanceDisks(self, inst)
4178 class LURecreateInstanceDisks(LogicalUnit):
4179 """Recreate an instance's missing disks.
4182 HPATH = "instance-recreate-disks"
4183 HTYPE = constants.HTYPE_INSTANCE
4184 _OP_REQP = ["instance_name", "disks"]
4187 def CheckArguments(self):
4188 """Check the arguments.
4191 if not isinstance(self.op.disks, list):
4192 raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4193 for item in self.op.disks:
4194 if (not isinstance(item, int) or
4196 raise errors.OpPrereqError("Invalid disk specification '%s'" %
4197 str(item), errors.ECODE_INVAL)
4199 def ExpandNames(self):
4200 self._ExpandAndLockInstance()
4202 def BuildHooksEnv(self):
4205 This runs on master, primary and secondary nodes of the instance.
4208 env = _BuildInstanceHookEnvByObject(self, self.instance)
4209 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4212 def CheckPrereq(self):
4213 """Check prerequisites.
4215 This checks that the instance is in the cluster and is not running.
4218 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4219 assert instance is not None, \
4220 "Cannot retrieve locked instance %s" % self.op.instance_name
4221 _CheckNodeOnline(self, instance.primary_node)
4223 if instance.disk_template == constants.DT_DISKLESS:
4224 raise errors.OpPrereqError("Instance '%s' has no disks" %
4225 self.op.instance_name, errors.ECODE_INVAL)
4226 if instance.admin_up:
4227 raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4228 self.op.instance_name, errors.ECODE_STATE)
4229 remote_info = self.rpc.call_instance_info(instance.primary_node,
4231 instance.hypervisor)
4232 remote_info.Raise("Error checking node %s" % instance.primary_node,
4233 prereq=True, ecode=errors.ECODE_ENVIRON)
4234 if remote_info.payload:
4235 raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4236 (self.op.instance_name,
4237 instance.primary_node), errors.ECODE_STATE)
4239 if not self.op.disks:
4240 self.op.disks = range(len(instance.disks))
4242 for idx in self.op.disks:
4243 if idx >= len(instance.disks):
4244 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4247 self.instance = instance
4249 def Exec(self, feedback_fn):
4250 """Recreate the disks.
4254 for idx, _ in enumerate(self.instance.disks):
4255 if idx not in self.op.disks: # disk idx has not been passed in
4259 _CreateDisks(self, self.instance, to_skip=to_skip)
4262 class LURenameInstance(LogicalUnit):
4263 """Rename an instance.
4266 HPATH = "instance-rename"
4267 HTYPE = constants.HTYPE_INSTANCE
4268 _OP_REQP = ["instance_name", "new_name"]
4270 def BuildHooksEnv(self):
4273 This runs on master, primary and secondary nodes of the instance.
4276 env = _BuildInstanceHookEnvByObject(self, self.instance)
4277 env["INSTANCE_NEW_NAME"] = self.op.new_name
4278 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4281 def CheckPrereq(self):
4282 """Check prerequisites.
4284 This checks that the instance is in the cluster and is not running.
4287 self.op.instance_name = _ExpandInstanceName(self.cfg,
4288 self.op.instance_name)
4289 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4290 assert instance is not None
4291 _CheckNodeOnline(self, instance.primary_node)
4293 if instance.admin_up:
4294 raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4295 self.op.instance_name, errors.ECODE_STATE)
4296 remote_info = self.rpc.call_instance_info(instance.primary_node,
4298 instance.hypervisor)
4299 remote_info.Raise("Error checking node %s" % instance.primary_node,
4300 prereq=True, ecode=errors.ECODE_ENVIRON)
4301 if remote_info.payload:
4302 raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4303 (self.op.instance_name,
4304 instance.primary_node), errors.ECODE_STATE)
4305 self.instance = instance
4307 # new name verification
4308 name_info = utils.GetHostInfo(self.op.new_name)
4310 self.op.new_name = new_name = name_info.name
4311 instance_list = self.cfg.GetInstanceList()
4312 if new_name in instance_list:
4313 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4314 new_name, errors.ECODE_EXISTS)
4316 if not getattr(self.op, "ignore_ip", False):
4317 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4318 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4319 (name_info.ip, new_name),
4320 errors.ECODE_NOTUNIQUE)
4323 def Exec(self, feedback_fn):
4324 """Reinstall the instance.
4327 inst = self.instance
4328 old_name = inst.name
4330 if inst.disk_template == constants.DT_FILE:
4331 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4333 self.cfg.RenameInstance(inst.name, self.op.new_name)
4334 # Change the instance lock. This is definitely safe while we hold the BGL
4335 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4336 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4338 # re-read the instance from the configuration after rename
4339 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4341 if inst.disk_template == constants.DT_FILE:
4342 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4343 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4344 old_file_storage_dir,
4345 new_file_storage_dir)
4346 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4347 " (but the instance has been renamed in Ganeti)" %
4348 (inst.primary_node, old_file_storage_dir,
4349 new_file_storage_dir))
4351 _StartInstanceDisks(self, inst, None)
4353 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4354 old_name, self.op.debug_level)
4355 msg = result.fail_msg
4357 msg = ("Could not run OS rename script for instance %s on node %s"
4358 " (but the instance has been renamed in Ganeti): %s" %
4359 (inst.name, inst.primary_node, msg))
4360 self.proc.LogWarning(msg)
4362 _ShutdownInstanceDisks(self, inst)
4365 class LURemoveInstance(LogicalUnit):
4366 """Remove an instance.
4369 HPATH = "instance-remove"
4370 HTYPE = constants.HTYPE_INSTANCE
4371 _OP_REQP = ["instance_name", "ignore_failures"]
4374 def CheckArguments(self):
4375 """Check the arguments.
4378 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4379 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4381 def ExpandNames(self):
4382 self._ExpandAndLockInstance()
4383 self.needed_locks[locking.LEVEL_NODE] = []
4384 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4386 def DeclareLocks(self, level):
4387 if level == locking.LEVEL_NODE:
4388 self._LockInstancesNodes()
4390 def BuildHooksEnv(self):
4393 This runs on master, primary and secondary nodes of the instance.
4396 env = _BuildInstanceHookEnvByObject(self, self.instance)
4397 env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4398 nl = [self.cfg.GetMasterNode()]
4399 nl_post = list(self.instance.all_nodes) + nl
4400 return env, nl, nl_post
4402 def CheckPrereq(self):
4403 """Check prerequisites.
4405 This checks that the instance is in the cluster.
4408 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4409 assert self.instance is not None, \
4410 "Cannot retrieve locked instance %s" % self.op.instance_name
4412 def Exec(self, feedback_fn):
4413 """Remove the instance.
4416 instance = self.instance
4417 logging.info("Shutting down instance %s on node %s",
4418 instance.name, instance.primary_node)
4420 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4421 self.shutdown_timeout)
4422 msg = result.fail_msg
4424 if self.op.ignore_failures:
4425 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4427 raise errors.OpExecError("Could not shutdown instance %s on"
4429 (instance.name, instance.primary_node, msg))
4431 logging.info("Removing block devices for instance %s", instance.name)
4433 if not _RemoveDisks(self, instance):
4434 if self.op.ignore_failures:
4435 feedback_fn("Warning: can't remove instance's disks")
4437 raise errors.OpExecError("Can't remove instance's disks")
4439 logging.info("Removing instance %s out of cluster config", instance.name)
4441 self.cfg.RemoveInstance(instance.name)
4442 self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4445 class LUQueryInstances(NoHooksLU):
4446 """Logical unit for querying instances.
4449 # pylint: disable-msg=W0142
4450 _OP_REQP = ["output_fields", "names", "use_locking"]
4452 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4453 "serial_no", "ctime", "mtime", "uuid"]
4454 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4456 "disk_template", "ip", "mac", "bridge",
4457 "nic_mode", "nic_link",
4458 "sda_size", "sdb_size", "vcpus", "tags",
4459 "network_port", "beparams",
4460 r"(disk)\.(size)/([0-9]+)",
4461 r"(disk)\.(sizes)", "disk_usage",
4462 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4463 r"(nic)\.(bridge)/([0-9]+)",
4464 r"(nic)\.(macs|ips|modes|links|bridges)",
4465 r"(disk|nic)\.(count)",
4467 ] + _SIMPLE_FIELDS +
4469 for name in constants.HVS_PARAMETERS
4470 if name not in constants.HVC_GLOBALS] +
4472 for name in constants.BES_PARAMETERS])
4473 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4476 def ExpandNames(self):
4477 _CheckOutputFields(static=self._FIELDS_STATIC,
4478 dynamic=self._FIELDS_DYNAMIC,
4479 selected=self.op.output_fields)
4481 self.needed_locks = {}
4482 self.share_locks[locking.LEVEL_INSTANCE] = 1
4483 self.share_locks[locking.LEVEL_NODE] = 1
4486 self.wanted = _GetWantedInstances(self, self.op.names)
4488 self.wanted = locking.ALL_SET
4490 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4491 self.do_locking = self.do_node_query and self.op.use_locking
4493 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4494 self.needed_locks[locking.LEVEL_NODE] = []
4495 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4497 def DeclareLocks(self, level):
4498 if level == locking.LEVEL_NODE and self.do_locking:
4499 self._LockInstancesNodes()
4501 def CheckPrereq(self):
4502 """Check prerequisites.
4507 def Exec(self, feedback_fn):
4508 """Computes the list of nodes and their attributes.
4511 # pylint: disable-msg=R0912
4512 # way too many branches here
4513 all_info = self.cfg.GetAllInstancesInfo()
4514 if self.wanted == locking.ALL_SET:
4515 # caller didn't specify instance names, so ordering is not important
4517 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4519 instance_names = all_info.keys()
4520 instance_names = utils.NiceSort(instance_names)
4522 # caller did specify names, so we must keep the ordering
4524 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4526 tgt_set = all_info.keys()
4527 missing = set(self.wanted).difference(tgt_set)
4529 raise errors.OpExecError("Some instances were removed before"
4530 " retrieving their data: %s" % missing)
4531 instance_names = self.wanted
4533 instance_list = [all_info[iname] for iname in instance_names]
4535 # begin data gathering
4537 nodes = frozenset([inst.primary_node for inst in instance_list])
4538 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4542 if self.do_node_query:
4544 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4546 result = node_data[name]
4548 # offline nodes will be in both lists
4549 off_nodes.append(name)
4551 bad_nodes.append(name)
4554 live_data.update(result.payload)
4555 # else no instance is alive
4557 live_data = dict([(name, {}) for name in instance_names])
4559 # end data gathering
4564 cluster = self.cfg.GetClusterInfo()
4565 for instance in instance_list:
4567 i_hv = cluster.FillHV(instance, skip_globals=True)
4568 i_be = cluster.FillBE(instance)
4569 i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4570 nic.nicparams) for nic in instance.nics]
4571 for field in self.op.output_fields:
4572 st_match = self._FIELDS_STATIC.Matches(field)
4573 if field in self._SIMPLE_FIELDS:
4574 val = getattr(instance, field)
4575 elif field == "pnode":
4576 val = instance.primary_node
4577 elif field == "snodes":
4578 val = list(instance.secondary_nodes)
4579 elif field == "admin_state":
4580 val = instance.admin_up
4581 elif field == "oper_state":
4582 if instance.primary_node in bad_nodes:
4585 val = bool(live_data.get(instance.name))
4586 elif field == "status":
4587 if instance.primary_node in off_nodes:
4588 val = "ERROR_nodeoffline"
4589 elif instance.primary_node in bad_nodes:
4590 val = "ERROR_nodedown"
4592 running = bool(live_data.get(instance.name))
4594 if instance.admin_up:
4599 if instance.admin_up:
4603 elif field == "oper_ram":
4604 if instance.primary_node in bad_nodes:
4606 elif instance.name in live_data:
4607 val = live_data[instance.name].get("memory", "?")
4610 elif field == "vcpus":
4611 val = i_be[constants.BE_VCPUS]
4612 elif field == "disk_template":
4613 val = instance.disk_template
4616 val = instance.nics[0].ip
4619 elif field == "nic_mode":
4621 val = i_nicp[0][constants.NIC_MODE]
4624 elif field == "nic_link":
4626 val = i_nicp[0][constants.NIC_LINK]
4629 elif field == "bridge":
4630 if (instance.nics and
4631 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4632 val = i_nicp[0][constants.NIC_LINK]
4635 elif field == "mac":
4637 val = instance.nics[0].mac
4640 elif field == "sda_size" or field == "sdb_size":
4641 idx = ord(field[2]) - ord('a')
4643 val = instance.FindDisk(idx).size
4644 except errors.OpPrereqError:
4646 elif field == "disk_usage": # total disk usage per node
4647 disk_sizes = [{'size': disk.size} for disk in instance.disks]
4648 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4649 elif field == "tags":
4650 val = list(instance.GetTags())
4651 elif field == "hvparams":
4653 elif (field.startswith(HVPREFIX) and
4654 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4655 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4656 val = i_hv.get(field[len(HVPREFIX):], None)
4657 elif field == "beparams":
4659 elif (field.startswith(BEPREFIX) and
4660 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4661 val = i_be.get(field[len(BEPREFIX):], None)
4662 elif st_match and st_match.groups():
4663 # matches a variable list
4664 st_groups = st_match.groups()
4665 if st_groups and st_groups[0] == "disk":
4666 if st_groups[1] == "count":
4667 val = len(instance.disks)
4668 elif st_groups[1] == "sizes":
4669 val = [disk.size for disk in instance.disks]
4670 elif st_groups[1] == "size":
4672 val = instance.FindDisk(st_groups[2]).size
4673 except errors.OpPrereqError:
4676 assert False, "Unhandled disk parameter"
4677 elif st_groups[0] == "nic":
4678 if st_groups[1] == "count":
4679 val = len(instance.nics)
4680 elif st_groups[1] == "macs":
4681 val = [nic.mac for nic in instance.nics]
4682 elif st_groups[1] == "ips":
4683 val = [nic.ip for nic in instance.nics]
4684 elif st_groups[1] == "modes":
4685 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4686 elif st_groups[1] == "links":
4687 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4688 elif st_groups[1] == "bridges":
4691 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4692 val.append(nicp[constants.NIC_LINK])
4697 nic_idx = int(st_groups[2])
4698 if nic_idx >= len(instance.nics):
4701 if st_groups[1] == "mac":
4702 val = instance.nics[nic_idx].mac
4703 elif st_groups[1] == "ip":
4704 val = instance.nics[nic_idx].ip
4705 elif st_groups[1] == "mode":
4706 val = i_nicp[nic_idx][constants.NIC_MODE]
4707 elif st_groups[1] == "link":
4708 val = i_nicp[nic_idx][constants.NIC_LINK]
4709 elif st_groups[1] == "bridge":
4710 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4711 if nic_mode == constants.NIC_MODE_BRIDGED:
4712 val = i_nicp[nic_idx][constants.NIC_LINK]
4716 assert False, "Unhandled NIC parameter"
4718 assert False, ("Declared but unhandled variable parameter '%s'" %
4721 assert False, "Declared but unhandled parameter '%s'" % field
4728 class LUFailoverInstance(LogicalUnit):
4729 """Failover an instance.
4732 HPATH = "instance-failover"
4733 HTYPE = constants.HTYPE_INSTANCE
4734 _OP_REQP = ["instance_name", "ignore_consistency"]
4737 def CheckArguments(self):
4738 """Check the arguments.
4741 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4742 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4744 def ExpandNames(self):
4745 self._ExpandAndLockInstance()
4746 self.needed_locks[locking.LEVEL_NODE] = []
4747 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4749 def DeclareLocks(self, level):
4750 if level == locking.LEVEL_NODE:
4751 self._LockInstancesNodes()
4753 def BuildHooksEnv(self):
4756 This runs on master, primary and secondary nodes of the instance.
4759 instance = self.instance
4760 source_node = instance.primary_node
4761 target_node = instance.secondary_nodes[0]
4763 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4764 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4765 "OLD_PRIMARY": source_node,
4766 "OLD_SECONDARY": target_node,
4767 "NEW_PRIMARY": target_node,
4768 "NEW_SECONDARY": source_node,
4770 env.update(_BuildInstanceHookEnvByObject(self, instance))
4771 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4773 nl_post.append(source_node)
4774 return env, nl, nl_post
4776 def CheckPrereq(self):
4777 """Check prerequisites.
4779 This checks that the instance is in the cluster.
4782 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4783 assert self.instance is not None, \
4784 "Cannot retrieve locked instance %s" % self.op.instance_name
4786 bep = self.cfg.GetClusterInfo().FillBE(instance)
4787 if instance.disk_template not in constants.DTS_NET_MIRROR:
4788 raise errors.OpPrereqError("Instance's disk layout is not"
4789 " network mirrored, cannot failover.",
4792 secondary_nodes = instance.secondary_nodes
4793 if not secondary_nodes:
4794 raise errors.ProgrammerError("no secondary node but using "
4795 "a mirrored disk template")
4797 target_node = secondary_nodes[0]
4798 _CheckNodeOnline(self, target_node)
4799 _CheckNodeNotDrained(self, target_node)
4800 if instance.admin_up:
4801 # check memory requirements on the secondary node
4802 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4803 instance.name, bep[constants.BE_MEMORY],
4804 instance.hypervisor)
4806 self.LogInfo("Not checking memory on the secondary node as"
4807 " instance will not be started")
4809 # check bridge existance
4810 _CheckInstanceBridgesExist(self, instance, node=target_node)
4812 def Exec(self, feedback_fn):
4813 """Failover an instance.
4815 The failover is done by shutting it down on its present node and
4816 starting it on the secondary.
4819 instance = self.instance
4821 source_node = instance.primary_node
4822 target_node = instance.secondary_nodes[0]
4824 if instance.admin_up:
4825 feedback_fn("* checking disk consistency between source and target")
4826 for dev in instance.disks:
4827 # for drbd, these are drbd over lvm
4828 if not _CheckDiskConsistency(self, dev, target_node, False):
4829 if not self.op.ignore_consistency:
4830 raise errors.OpExecError("Disk %s is degraded on target node,"
4831 " aborting failover." % dev.iv_name)
4833 feedback_fn("* not checking disk consistency as instance is not running")
4835 feedback_fn("* shutting down instance on source node")
4836 logging.info("Shutting down instance %s on node %s",
4837 instance.name, source_node)
4839 result = self.rpc.call_instance_shutdown(source_node, instance,
4840 self.shutdown_timeout)
4841 msg = result.fail_msg
4843 if self.op.ignore_consistency:
4844 self.proc.LogWarning("Could not shutdown instance %s on node %s."
4845 " Proceeding anyway. Please make sure node"
4846 " %s is down. Error details: %s",
4847 instance.name, source_node, source_node, msg)
4849 raise errors.OpExecError("Could not shutdown instance %s on"
4851 (instance.name, source_node, msg))
4853 feedback_fn("* deactivating the instance's disks on source node")
4854 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4855 raise errors.OpExecError("Can't shut down the instance's disks.")
4857 instance.primary_node = target_node
4858 # distribute new instance config to the other nodes
4859 self.cfg.Update(instance, feedback_fn)
4861 # Only start the instance if it's marked as up
4862 if instance.admin_up:
4863 feedback_fn("* activating the instance's disks on target node")
4864 logging.info("Starting instance %s on node %s",
4865 instance.name, target_node)
4867 disks_ok, _ = _AssembleInstanceDisks(self, instance,
4868 ignore_secondaries=True)
4870 _ShutdownInstanceDisks(self, instance)
4871 raise errors.OpExecError("Can't activate the instance's disks")
4873 feedback_fn("* starting the instance on the target node")
4874 result = self.rpc.call_instance_start(target_node, instance, None, None)
4875 msg = result.fail_msg
4877 _ShutdownInstanceDisks(self, instance)
4878 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4879 (instance.name, target_node, msg))
4882 class LUMigrateInstance(LogicalUnit):
4883 """Migrate an instance.
4885 This is migration without shutting down, compared to the failover,
4886 which is done with shutdown.
4889 HPATH = "instance-migrate"
4890 HTYPE = constants.HTYPE_INSTANCE
4891 _OP_REQP = ["instance_name", "live", "cleanup"]
4895 def ExpandNames(self):
4896 self._ExpandAndLockInstance()
4898 self.needed_locks[locking.LEVEL_NODE] = []
4899 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4901 self._migrater = TLMigrateInstance(self, self.op.instance_name,
4902 self.op.live, self.op.cleanup)
4903 self.tasklets = [self._migrater]
4905 def DeclareLocks(self, level):
4906 if level == locking.LEVEL_NODE:
4907 self._LockInstancesNodes()
4909 def BuildHooksEnv(self):
4912 This runs on master, primary and secondary nodes of the instance.
4915 instance = self._migrater.instance
4916 source_node = instance.primary_node
4917 target_node = instance.secondary_nodes[0]
4918 env = _BuildInstanceHookEnvByObject(self, instance)
4919 env["MIGRATE_LIVE"] = self.op.live
4920 env["MIGRATE_CLEANUP"] = self.op.cleanup
4922 "OLD_PRIMARY": source_node,
4923 "OLD_SECONDARY": target_node,
4924 "NEW_PRIMARY": target_node,
4925 "NEW_SECONDARY": source_node,
4927 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4929 nl_post.append(source_node)
4930 return env, nl, nl_post
4933 class LUMoveInstance(LogicalUnit):
4934 """Move an instance by data-copying.
4937 HPATH = "instance-move"
4938 HTYPE = constants.HTYPE_INSTANCE
4939 _OP_REQP = ["instance_name", "target_node"]
4942 def CheckArguments(self):
4943 """Check the arguments.
4946 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4947 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4949 def ExpandNames(self):
4950 self._ExpandAndLockInstance()
4951 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
4952 self.op.target_node = target_node
4953 self.needed_locks[locking.LEVEL_NODE] = [target_node]
4954 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4956 def DeclareLocks(self, level):
4957 if level == locking.LEVEL_NODE:
4958 self._LockInstancesNodes(primary_only=True)
4960 def BuildHooksEnv(self):
4963 This runs on master, primary and secondary nodes of the instance.
4967 "TARGET_NODE": self.op.target_node,
4968 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4970 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4971 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4972 self.op.target_node]
4975 def CheckPrereq(self):
4976 """Check prerequisites.
4978 This checks that the instance is in the cluster.
4981 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4982 assert self.instance is not None, \
4983 "Cannot retrieve locked instance %s" % self.op.instance_name
4985 node = self.cfg.GetNodeInfo(self.op.target_node)
4986 assert node is not None, \
4987 "Cannot retrieve locked node %s" % self.op.target_node
4989 self.target_node = target_node = node.name
4991 if target_node == instance.primary_node:
4992 raise errors.OpPrereqError("Instance %s is already on the node %s" %
4993 (instance.name, target_node),
4996 bep = self.cfg.GetClusterInfo().FillBE(instance)
4998 for idx, dsk in enumerate(instance.disks):
4999 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5000 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5001 " cannot copy" % idx, errors.ECODE_STATE)
5003 _CheckNodeOnline(self, target_node)
5004 _CheckNodeNotDrained(self, target_node)
5006 if instance.admin_up:
5007 # check memory requirements on the secondary node
5008 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5009 instance.name, bep[constants.BE_MEMORY],
5010 instance.hypervisor)
5012 self.LogInfo("Not checking memory on the secondary node as"
5013 " instance will not be started")
5015 # check bridge existance
5016 _CheckInstanceBridgesExist(self, instance, node=target_node)
5018 def Exec(self, feedback_fn):
5019 """Move an instance.
5021 The move is done by shutting it down on its present node, copying
5022 the data over (slow) and starting it on the new node.
5025 instance = self.instance
5027 source_node = instance.primary_node
5028 target_node = self.target_node
5030 self.LogInfo("Shutting down instance %s on source node %s",
5031 instance.name, source_node)
5033 result = self.rpc.call_instance_shutdown(source_node, instance,
5034 self.shutdown_timeout)
5035 msg = result.fail_msg
5037 if self.op.ignore_consistency:
5038 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5039 " Proceeding anyway. Please make sure node"
5040 " %s is down. Error details: %s",
5041 instance.name, source_node, source_node, msg)
5043 raise errors.OpExecError("Could not shutdown instance %s on"
5045 (instance.name, source_node, msg))
5047 # create the target disks
5049 _CreateDisks(self, instance, target_node=target_node)
5050 except errors.OpExecError:
5051 self.LogWarning("Device creation failed, reverting...")
5053 _RemoveDisks(self, instance, target_node=target_node)
5055 self.cfg.ReleaseDRBDMinors(instance.name)
5058 cluster_name = self.cfg.GetClusterInfo().cluster_name
5061 # activate, get path, copy the data over
5062 for idx, disk in enumerate(instance.disks):
5063 self.LogInfo("Copying data for disk %d", idx)
5064 result = self.rpc.call_blockdev_assemble(target_node, disk,
5065 instance.name, True)
5067 self.LogWarning("Can't assemble newly created disk %d: %s",
5068 idx, result.fail_msg)
5069 errs.append(result.fail_msg)
5071 dev_path = result.payload
5072 result = self.rpc.call_blockdev_export(source_node, disk,
5073 target_node, dev_path,
5076 self.LogWarning("Can't copy data over for disk %d: %s",
5077 idx, result.fail_msg)
5078 errs.append(result.fail_msg)
5082 self.LogWarning("Some disks failed to copy, aborting")
5084 _RemoveDisks(self, instance, target_node=target_node)
5086 self.cfg.ReleaseDRBDMinors(instance.name)
5087 raise errors.OpExecError("Errors during disk copy: %s" %
5090 instance.primary_node = target_node
5091 self.cfg.Update(instance, feedback_fn)
5093 self.LogInfo("Removing the disks on the original node")
5094 _RemoveDisks(self, instance, target_node=source_node)
5096 # Only start the instance if it's marked as up
5097 if instance.admin_up:
5098 self.LogInfo("Starting instance %s on node %s",
5099 instance.name, target_node)
5101 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5102 ignore_secondaries=True)
5104 _ShutdownInstanceDisks(self, instance)
5105 raise errors.OpExecError("Can't activate the instance's disks")
5107 result = self.rpc.call_instance_start(target_node, instance, None, None)
5108 msg = result.fail_msg
5110 _ShutdownInstanceDisks(self, instance)
5111 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5112 (instance.name, target_node, msg))
5115 class LUMigrateNode(LogicalUnit):
5116 """Migrate all instances from a node.
5119 HPATH = "node-migrate"
5120 HTYPE = constants.HTYPE_NODE
5121 _OP_REQP = ["node_name", "live"]
5124 def ExpandNames(self):
5125 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5127 self.needed_locks = {
5128 locking.LEVEL_NODE: [self.op.node_name],
5131 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5133 # Create tasklets for migrating instances for all instances on this node
5137 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5138 logging.debug("Migrating instance %s", inst.name)
5139 names.append(inst.name)
5141 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5143 self.tasklets = tasklets
5145 # Declare instance locks
5146 self.needed_locks[locking.LEVEL_INSTANCE] = names
5148 def DeclareLocks(self, level):
5149 if level == locking.LEVEL_NODE:
5150 self._LockInstancesNodes()
5152 def BuildHooksEnv(self):
5155 This runs on the master, the primary and all the secondaries.
5159 "NODE_NAME": self.op.node_name,
5162 nl = [self.cfg.GetMasterNode()]
5164 return (env, nl, nl)
5167 class TLMigrateInstance(Tasklet):
5168 def __init__(self, lu, instance_name, live, cleanup):
5169 """Initializes this class.
5172 Tasklet.__init__(self, lu)
5175 self.instance_name = instance_name
5177 self.cleanup = cleanup
5179 def CheckPrereq(self):
5180 """Check prerequisites.
5182 This checks that the instance is in the cluster.
5185 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5186 instance = self.cfg.GetInstanceInfo(instance_name)
5187 assert instance is not None
5189 if instance.disk_template != constants.DT_DRBD8:
5190 raise errors.OpPrereqError("Instance's disk layout is not"
5191 " drbd8, cannot migrate.", errors.ECODE_STATE)
5193 secondary_nodes = instance.secondary_nodes
5194 if not secondary_nodes:
5195 raise errors.ConfigurationError("No secondary node but using"
5196 " drbd8 disk template")
5198 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5200 target_node = secondary_nodes[0]
5201 # check memory requirements on the secondary node
5202 _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5203 instance.name, i_be[constants.BE_MEMORY],
5204 instance.hypervisor)
5206 # check bridge existance
5207 _CheckInstanceBridgesExist(self, instance, node=target_node)
5209 if not self.cleanup:
5210 _CheckNodeNotDrained(self, target_node)
5211 result = self.rpc.call_instance_migratable(instance.primary_node,
5213 result.Raise("Can't migrate, please use failover",
5214 prereq=True, ecode=errors.ECODE_STATE)
5216 self.instance = instance
5218 def _WaitUntilSync(self):
5219 """Poll with custom rpc for disk sync.
5221 This uses our own step-based rpc call.
5224 self.feedback_fn("* wait until resync is done")
5228 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5230 self.instance.disks)
5232 for node, nres in result.items():
5233 nres.Raise("Cannot resync disks on node %s" % node)
5234 node_done, node_percent = nres.payload
5235 all_done = all_done and node_done
5236 if node_percent is not None:
5237 min_percent = min(min_percent, node_percent)
5239 if min_percent < 100:
5240 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5243 def _EnsureSecondary(self, node):
5244 """Demote a node to secondary.
5247 self.feedback_fn("* switching node %s to secondary mode" % node)
5249 for dev in self.instance.disks:
5250 self.cfg.SetDiskID(dev, node)
5252 result = self.rpc.call_blockdev_close(node, self.instance.name,
5253 self.instance.disks)
5254 result.Raise("Cannot change disk to secondary on node %s" % node)
5256 def _GoStandalone(self):
5257 """Disconnect from the network.
5260 self.feedback_fn("* changing into standalone mode")
5261 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5262 self.instance.disks)
5263 for node, nres in result.items():
5264 nres.Raise("Cannot disconnect disks node %s" % node)
5266 def _GoReconnect(self, multimaster):
5267 """Reconnect to the network.
5273 msg = "single-master"
5274 self.feedback_fn("* changing disks into %s mode" % msg)
5275 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5276 self.instance.disks,
5277 self.instance.name, multimaster)
5278 for node, nres in result.items():
5279 nres.Raise("Cannot change disks config on node %s" % node)
5281 def _ExecCleanup(self):
5282 """Try to cleanup after a failed migration.
5284 The cleanup is done by:
5285 - check that the instance is running only on one node
5286 (and update the config if needed)
5287 - change disks on its secondary node to secondary
5288 - wait until disks are fully synchronized
5289 - disconnect from the network
5290 - change disks into single-master mode
5291 - wait again until disks are fully synchronized
5294 instance = self.instance
5295 target_node = self.target_node
5296 source_node = self.source_node
5298 # check running on only one node
5299 self.feedback_fn("* checking where the instance actually runs"
5300 " (if this hangs, the hypervisor might be in"
5302 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5303 for node, result in ins_l.items():
5304 result.Raise("Can't contact node %s" % node)
5306 runningon_source = instance.name in ins_l[source_node].payload
5307 runningon_target = instance.name in ins_l[target_node].payload
5309 if runningon_source and runningon_target:
5310 raise errors.OpExecError("Instance seems to be running on two nodes,"
5311 " or the hypervisor is confused. You will have"
5312 " to ensure manually that it runs only on one"
5313 " and restart this operation.")
5315 if not (runningon_source or runningon_target):
5316 raise errors.OpExecError("Instance does not seem to be running at all."
5317 " In this case, it's safer to repair by"
5318 " running 'gnt-instance stop' to ensure disk"
5319 " shutdown, and then restarting it.")
5321 if runningon_target:
5322 # the migration has actually succeeded, we need to update the config
5323 self.feedback_fn("* instance running on secondary node (%s),"
5324 " updating config" % target_node)
5325 instance.primary_node = target_node
5326 self.cfg.Update(instance, self.feedback_fn)
5327 demoted_node = source_node
5329 self.feedback_fn("* instance confirmed to be running on its"
5330 " primary node (%s)" % source_node)
5331 demoted_node = target_node
5333 self._EnsureSecondary(demoted_node)
5335 self._WaitUntilSync()
5336 except errors.OpExecError:
5337 # we ignore here errors, since if the device is standalone, it
5338 # won't be able to sync
5340 self._GoStandalone()
5341 self._GoReconnect(False)
5342 self._WaitUntilSync()
5344 self.feedback_fn("* done")
5346 def _RevertDiskStatus(self):
5347 """Try to revert the disk status after a failed migration.
5350 target_node = self.target_node
5352 self._EnsureSecondary(target_node)
5353 self._GoStandalone()
5354 self._GoReconnect(False)
5355 self._WaitUntilSync()
5356 except errors.OpExecError, err:
5357 self.lu.LogWarning("Migration failed and I can't reconnect the"
5358 " drives: error '%s'\n"
5359 "Please look and recover the instance status" %
5362 def _AbortMigration(self):
5363 """Call the hypervisor code to abort a started migration.
5366 instance = self.instance
5367 target_node = self.target_node
5368 migration_info = self.migration_info
5370 abort_result = self.rpc.call_finalize_migration(target_node,
5374 abort_msg = abort_result.fail_msg
5376 logging.error("Aborting migration failed on target node %s: %s",
5377 target_node, abort_msg)
5378 # Don't raise an exception here, as we stil have to try to revert the
5379 # disk status, even if this step failed.
5381 def _ExecMigration(self):
5382 """Migrate an instance.
5384 The migrate is done by:
5385 - change the disks into dual-master mode
5386 - wait until disks are fully synchronized again
5387 - migrate the instance
5388 - change disks on the new secondary node (the old primary) to secondary
5389 - wait until disks are fully synchronized
5390 - change disks into single-master mode
5393 instance = self.instance
5394 target_node = self.target_node
5395 source_node = self.source_node
5397 self.feedback_fn("* checking disk consistency between source and target")
5398 for dev in instance.disks:
5399 if not _CheckDiskConsistency(self, dev, target_node, False):
5400 raise errors.OpExecError("Disk %s is degraded or not fully"
5401 " synchronized on target node,"
5402 " aborting migrate." % dev.iv_name)
5404 # First get the migration information from the remote node
5405 result = self.rpc.call_migration_info(source_node, instance)
5406 msg = result.fail_msg
5408 log_err = ("Failed fetching source migration information from %s: %s" %
5410 logging.error(log_err)
5411 raise errors.OpExecError(log_err)
5413 self.migration_info = migration_info = result.payload
5415 # Then switch the disks to master/master mode
5416 self._EnsureSecondary(target_node)
5417 self._GoStandalone()
5418 self._GoReconnect(True)
5419 self._WaitUntilSync()
5421 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5422 result = self.rpc.call_accept_instance(target_node,
5425 self.nodes_ip[target_node])
5427 msg = result.fail_msg
5429 logging.error("Instance pre-migration failed, trying to revert"
5430 " disk status: %s", msg)
5431 self.feedback_fn("Pre-migration failed, aborting")
5432 self._AbortMigration()
5433 self._RevertDiskStatus()
5434 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5435 (instance.name, msg))
5437 self.feedback_fn("* migrating instance to %s" % target_node)
5439 result = self.rpc.call_instance_migrate(source_node, instance,
5440 self.nodes_ip[target_node],
5442 msg = result.fail_msg
5444 logging.error("Instance migration failed, trying to revert"
5445 " disk status: %s", msg)
5446 self.feedback_fn("Migration failed, aborting")
5447 self._AbortMigration()
5448 self._RevertDiskStatus()
5449 raise errors.OpExecError("Could not migrate instance %s: %s" %
5450 (instance.name, msg))
5453 instance.primary_node = target_node
5454 # distribute new instance config to the other nodes
5455 self.cfg.Update(instance, self.feedback_fn)
5457 result = self.rpc.call_finalize_migration(target_node,
5461 msg = result.fail_msg
5463 logging.error("Instance migration succeeded, but finalization failed:"
5465 raise errors.OpExecError("Could not finalize instance migration: %s" %
5468 self._EnsureSecondary(source_node)
5469 self._WaitUntilSync()
5470 self._GoStandalone()
5471 self._GoReconnect(False)
5472 self._WaitUntilSync()
5474 self.feedback_fn("* done")
5476 def Exec(self, feedback_fn):
5477 """Perform the migration.
5480 feedback_fn("Migrating instance %s" % self.instance.name)
5482 self.feedback_fn = feedback_fn
5484 self.source_node = self.instance.primary_node
5485 self.target_node = self.instance.secondary_nodes[0]
5486 self.all_nodes = [self.source_node, self.target_node]
5488 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5489 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5493 return self._ExecCleanup()
5495 return self._ExecMigration()
5498 def _CreateBlockDev(lu, node, instance, device, force_create,
5500 """Create a tree of block devices on a given node.
5502 If this device type has to be created on secondaries, create it and
5505 If not, just recurse to children keeping the same 'force' value.
5507 @param lu: the lu on whose behalf we execute
5508 @param node: the node on which to create the device
5509 @type instance: L{objects.Instance}
5510 @param instance: the instance which owns the device
5511 @type device: L{objects.Disk}
5512 @param device: the device to create
5513 @type force_create: boolean
5514 @param force_create: whether to force creation of this device; this
5515 will be change to True whenever we find a device which has
5516 CreateOnSecondary() attribute
5517 @param info: the extra 'metadata' we should attach to the device
5518 (this will be represented as a LVM tag)
5519 @type force_open: boolean
5520 @param force_open: this parameter will be passes to the
5521 L{backend.BlockdevCreate} function where it specifies
5522 whether we run on primary or not, and it affects both
5523 the child assembly and the device own Open() execution
5526 if device.CreateOnSecondary():
5530 for child in device.children:
5531 _CreateBlockDev(lu, node, instance, child, force_create,
5534 if not force_create:
5537 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5540 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5541 """Create a single block device on a given node.
5543 This will not recurse over children of the device, so they must be
5546 @param lu: the lu on whose behalf we execute
5547 @param node: the node on which to create the device
5548 @type instance: L{objects.Instance}
5549 @param instance: the instance which owns the device
5550 @type device: L{objects.Disk}
5551 @param device: the device to create
5552 @param info: the extra 'metadata' we should attach to the device
5553 (this will be represented as a LVM tag)
5554 @type force_open: boolean
5555 @param force_open: this parameter will be passes to the
5556 L{backend.BlockdevCreate} function where it specifies
5557 whether we run on primary or not, and it affects both
5558 the child assembly and the device own Open() execution
5561 lu.cfg.SetDiskID(device, node)
5562 result = lu.rpc.call_blockdev_create(node, device, device.size,
5563 instance.name, force_open, info)
5564 result.Raise("Can't create block device %s on"
5565 " node %s for instance %s" % (device, node, instance.name))
5566 if device.physical_id is None:
5567 device.physical_id = result.payload
5570 def _GenerateUniqueNames(lu, exts):
5571 """Generate a suitable LV name.
5573 This will generate a logical volume name for the given instance.
5578 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5579 results.append("%s%s" % (new_id, val))
5583 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5585 """Generate a drbd8 device complete with its children.
5588 port = lu.cfg.AllocatePort()
5589 vgname = lu.cfg.GetVGName()
5590 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5591 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5592 logical_id=(vgname, names[0]))
5593 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5594 logical_id=(vgname, names[1]))
5595 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5596 logical_id=(primary, secondary, port,
5599 children=[dev_data, dev_meta],
5604 def _GenerateDiskTemplate(lu, template_name,
5605 instance_name, primary_node,
5606 secondary_nodes, disk_info,
5607 file_storage_dir, file_driver,
5609 """Generate the entire disk layout for a given template type.
5612 #TODO: compute space requirements
5614 vgname = lu.cfg.GetVGName()
5615 disk_count = len(disk_info)
5617 if template_name == constants.DT_DISKLESS:
5619 elif template_name == constants.DT_PLAIN:
5620 if len(secondary_nodes) != 0:
5621 raise errors.ProgrammerError("Wrong template configuration")
5623 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5624 for i in range(disk_count)])
5625 for idx, disk in enumerate(disk_info):
5626 disk_index = idx + base_index
5627 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5628 logical_id=(vgname, names[idx]),
5629 iv_name="disk/%d" % disk_index,
5631 disks.append(disk_dev)
5632 elif template_name == constants.DT_DRBD8:
5633 if len(secondary_nodes) != 1:
5634 raise errors.ProgrammerError("Wrong template configuration")
5635 remote_node = secondary_nodes[0]
5636 minors = lu.cfg.AllocateDRBDMinor(
5637 [primary_node, remote_node] * len(disk_info), instance_name)
5640 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5641 for i in range(disk_count)]):
5642 names.append(lv_prefix + "_data")
5643 names.append(lv_prefix + "_meta")
5644 for idx, disk in enumerate(disk_info):
5645 disk_index = idx + base_index
5646 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5647 disk["size"], names[idx*2:idx*2+2],
5648 "disk/%d" % disk_index,
5649 minors[idx*2], minors[idx*2+1])
5650 disk_dev.mode = disk["mode"]
5651 disks.append(disk_dev)
5652 elif template_name == constants.DT_FILE:
5653 if len(secondary_nodes) != 0:
5654 raise errors.ProgrammerError("Wrong template configuration")
5656 for idx, disk in enumerate(disk_info):
5657 disk_index = idx + base_index
5658 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5659 iv_name="disk/%d" % disk_index,
5660 logical_id=(file_driver,
5661 "%s/disk%d" % (file_storage_dir,
5664 disks.append(disk_dev)
5666 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5670 def _GetInstanceInfoText(instance):
5671 """Compute that text that should be added to the disk's metadata.
5674 return "originstname+%s" % instance.name
5677 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5678 """Create all disks for an instance.
5680 This abstracts away some work from AddInstance.
5682 @type lu: L{LogicalUnit}
5683 @param lu: the logical unit on whose behalf we execute
5684 @type instance: L{objects.Instance}
5685 @param instance: the instance whose disks we should create
5687 @param to_skip: list of indices to skip
5688 @type target_node: string
5689 @param target_node: if passed, overrides the target node for creation
5691 @return: the success of the creation
5694 info = _GetInstanceInfoText(instance)
5695 if target_node is None:
5696 pnode = instance.primary_node
5697 all_nodes = instance.all_nodes
5702 if instance.disk_template == constants.DT_FILE:
5703 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5704 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5706 result.Raise("Failed to create directory '%s' on"
5707 " node %s" % (file_storage_dir, pnode))
5709 # Note: this needs to be kept in sync with adding of disks in
5710 # LUSetInstanceParams
5711 for idx, device in enumerate(instance.disks):
5712 if to_skip and idx in to_skip:
5714 logging.info("Creating volume %s for instance %s",
5715 device.iv_name, instance.name)
5717 for node in all_nodes:
5718 f_create = node == pnode
5719 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5722 def _RemoveDisks(lu, instance, target_node=None):
5723 """Remove all disks for an instance.
5725 This abstracts away some work from `AddInstance()` and
5726 `RemoveInstance()`. Note that in case some of the devices couldn't
5727 be removed, the removal will continue with the other ones (compare
5728 with `_CreateDisks()`).
5730 @type lu: L{LogicalUnit}
5731 @param lu: the logical unit on whose behalf we execute
5732 @type instance: L{objects.Instance}
5733 @param instance: the instance whose disks we should remove
5734 @type target_node: string
5735 @param target_node: used to override the node on which to remove the disks
5737 @return: the success of the removal
5740 logging.info("Removing block devices for instance %s", instance.name)
5743 for device in instance.disks:
5745 edata = [(target_node, device)]
5747 edata = device.ComputeNodeTree(instance.primary_node)
5748 for node, disk in edata:
5749 lu.cfg.SetDiskID(disk, node)
5750 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5752 lu.LogWarning("Could not remove block device %s on node %s,"
5753 " continuing anyway: %s", device.iv_name, node, msg)
5756 if instance.disk_template == constants.DT_FILE:
5757 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5761 tgt = instance.primary_node
5762 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5764 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5765 file_storage_dir, instance.primary_node, result.fail_msg)
5771 def _ComputeDiskSize(disk_template, disks):
5772 """Compute disk size requirements in the volume group
5775 # Required free disk space as a function of disk and swap space
5777 constants.DT_DISKLESS: None,
5778 constants.DT_PLAIN: sum(d["size"] for d in disks),
5779 # 128 MB are added for drbd metadata for each disk
5780 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5781 constants.DT_FILE: None,
5784 if disk_template not in req_size_dict:
5785 raise errors.ProgrammerError("Disk template '%s' size requirement"
5786 " is unknown" % disk_template)
5788 return req_size_dict[disk_template]
5791 def _CheckHVParams(lu, nodenames, hvname, hvparams):
5792 """Hypervisor parameter validation.
5794 This function abstract the hypervisor parameter validation to be
5795 used in both instance create and instance modify.
5797 @type lu: L{LogicalUnit}
5798 @param lu: the logical unit for which we check
5799 @type nodenames: list
5800 @param nodenames: the list of nodes on which we should check
5801 @type hvname: string
5802 @param hvname: the name of the hypervisor we should use
5803 @type hvparams: dict
5804 @param hvparams: the parameters which we need to check
5805 @raise errors.OpPrereqError: if the parameters are not valid
5808 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5811 for node in nodenames:
5815 info.Raise("Hypervisor parameter validation failed on node %s" % node)
5818 class LUCreateInstance(LogicalUnit):
5819 """Create an instance.
5822 HPATH = "instance-add"
5823 HTYPE = constants.HTYPE_INSTANCE
5824 _OP_REQP = ["instance_name", "disks", "disk_template",
5826 "wait_for_sync", "ip_check", "nics",
5827 "hvparams", "beparams"]
5830 def CheckArguments(self):
5834 # set optional parameters to none if they don't exist
5835 for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5836 if not hasattr(self.op, attr):
5837 setattr(self.op, attr, None)
5839 # do not require name_check to ease forward/backward compatibility
5841 if not hasattr(self.op, "name_check"):
5842 self.op.name_check = True
5843 # validate/normalize the instance name
5844 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
5845 if self.op.ip_check and not self.op.name_check:
5846 # TODO: make the ip check more flexible and not depend on the name check
5847 raise errors.OpPrereqError("Cannot do ip checks without a name check",
5849 if (self.op.disk_template == constants.DT_FILE and
5850 not constants.ENABLE_FILE_STORAGE):
5851 raise errors.OpPrereqError("File storage disabled at configure time",
5853 # check disk information: either all adopt, or no adopt
5854 has_adopt = has_no_adopt = False
5855 for disk in self.op.disks:
5860 if has_adopt and has_no_adopt:
5861 raise errors.OpPrereqError("Either all disks have are adoped or none is",
5864 if self.op.disk_template != constants.DT_PLAIN:
5865 raise errors.OpPrereqError("Disk adoption is only supported for the"
5866 " 'plain' disk template",
5868 if self.op.iallocator is not None:
5869 raise errors.OpPrereqError("Disk adoption not allowed with an"
5870 " iallocator script", errors.ECODE_INVAL)
5871 if self.op.mode == constants.INSTANCE_IMPORT:
5872 raise errors.OpPrereqError("Disk adoption not allowed for"
5873 " instance import", errors.ECODE_INVAL)
5875 self.adopt_disks = has_adopt
5877 def ExpandNames(self):
5878 """ExpandNames for CreateInstance.
5880 Figure out the right locks for instance creation.
5883 self.needed_locks = {}
5885 # cheap checks, mostly valid constants given
5887 # verify creation mode
5888 if self.op.mode not in (constants.INSTANCE_CREATE,
5889 constants.INSTANCE_IMPORT):
5890 raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5891 self.op.mode, errors.ECODE_INVAL)
5893 # disk template and mirror node verification
5894 _CheckDiskTemplate(self.op.disk_template)
5896 if self.op.hypervisor is None:
5897 self.op.hypervisor = self.cfg.GetHypervisorType()
5899 cluster = self.cfg.GetClusterInfo()
5900 enabled_hvs = cluster.enabled_hypervisors
5901 if self.op.hypervisor not in enabled_hvs:
5902 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5903 " cluster (%s)" % (self.op.hypervisor,
5904 ",".join(enabled_hvs)),
5907 # check hypervisor parameter syntax (locally)
5908 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5909 filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5911 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5912 hv_type.CheckParameterSyntax(filled_hvp)
5913 self.hv_full = filled_hvp
5914 # check that we don't specify global parameters on an instance
5915 _CheckGlobalHvParams(self.op.hvparams)
5917 # fill and remember the beparams dict
5918 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5919 self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5922 #### instance parameters check
5924 # instance name verification
5925 if self.op.name_check:
5926 hostname1 = utils.GetHostInfo(self.op.instance_name)
5927 self.op.instance_name = instance_name = hostname1.name
5928 # used in CheckPrereq for ip ping check
5929 self.check_ip = hostname1.ip
5931 instance_name = self.op.instance_name
5932 self.check_ip = None
5934 # this is just a preventive check, but someone might still add this
5935 # instance in the meantime, and creation will fail at lock-add time
5936 if instance_name in self.cfg.GetInstanceList():
5937 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5938 instance_name, errors.ECODE_EXISTS)
5940 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5944 for idx, nic in enumerate(self.op.nics):
5945 nic_mode_req = nic.get("mode", None)
5946 nic_mode = nic_mode_req
5947 if nic_mode is None:
5948 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5950 # in routed mode, for the first nic, the default ip is 'auto'
5951 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5952 default_ip_mode = constants.VALUE_AUTO
5954 default_ip_mode = constants.VALUE_NONE
5956 # ip validity checks
5957 ip = nic.get("ip", default_ip_mode)
5958 if ip is None or ip.lower() == constants.VALUE_NONE:
5960 elif ip.lower() == constants.VALUE_AUTO:
5961 if not self.op.name_check:
5962 raise errors.OpPrereqError("IP address set to auto but name checks"
5963 " have been skipped. Aborting.",
5965 nic_ip = hostname1.ip
5967 if not utils.IsValidIP(ip):
5968 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5969 " like a valid IP" % ip,
5973 # TODO: check the ip address for uniqueness
5974 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5975 raise errors.OpPrereqError("Routed nic mode requires an ip address",
5978 # MAC address verification
5979 mac = nic.get("mac", constants.VALUE_AUTO)
5980 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5981 mac = utils.NormalizeAndValidateMac(mac)
5984 self.cfg.ReserveMAC(mac, self.proc.GetECId())
5985 except errors.ReservationError:
5986 raise errors.OpPrereqError("MAC address %s already in use"
5987 " in cluster" % mac,
5988 errors.ECODE_NOTUNIQUE)
5990 # bridge verification
5991 bridge = nic.get("bridge", None)
5992 link = nic.get("link", None)
5994 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5995 " at the same time", errors.ECODE_INVAL)
5996 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5997 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6004 nicparams[constants.NIC_MODE] = nic_mode_req
6006 nicparams[constants.NIC_LINK] = link
6008 check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6010 objects.NIC.CheckParameterSyntax(check_params)
6011 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6013 # disk checks/pre-build
6015 for disk in self.op.disks:
6016 mode = disk.get("mode", constants.DISK_RDWR)
6017 if mode not in constants.DISK_ACCESS_SET:
6018 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6019 mode, errors.ECODE_INVAL)
6020 size = disk.get("size", None)
6022 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6025 except (TypeError, ValueError):
6026 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6028 new_disk = {"size": size, "mode": mode}
6030 new_disk["adopt"] = disk["adopt"]
6031 self.disks.append(new_disk)
6033 # file storage checks
6034 if (self.op.file_driver and
6035 not self.op.file_driver in constants.FILE_DRIVER):
6036 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6037 self.op.file_driver, errors.ECODE_INVAL)
6039 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6040 raise errors.OpPrereqError("File storage directory path not absolute",
6043 ### Node/iallocator related checks
6044 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6045 raise errors.OpPrereqError("One and only one of iallocator and primary"
6046 " node must be given",
6049 if self.op.iallocator:
6050 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6052 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6053 nodelist = [self.op.pnode]
6054 if self.op.snode is not None:
6055 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6056 nodelist.append(self.op.snode)
6057 self.needed_locks[locking.LEVEL_NODE] = nodelist
6059 # in case of import lock the source node too
6060 if self.op.mode == constants.INSTANCE_IMPORT:
6061 src_node = getattr(self.op, "src_node", None)
6062 src_path = getattr(self.op, "src_path", None)
6064 if src_path is None:
6065 self.op.src_path = src_path = self.op.instance_name
6067 if src_node is None:
6068 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6069 self.op.src_node = None
6070 if os.path.isabs(src_path):
6071 raise errors.OpPrereqError("Importing an instance from an absolute"
6072 " path requires a source node option.",
6075 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6076 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6077 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6078 if not os.path.isabs(src_path):
6079 self.op.src_path = src_path = \
6080 utils.PathJoin(constants.EXPORT_DIR, src_path)
6082 # On import force_variant must be True, because if we forced it at
6083 # initial install, our only chance when importing it back is that it
6085 self.op.force_variant = True
6087 else: # INSTANCE_CREATE
6088 if getattr(self.op, "os_type", None) is None:
6089 raise errors.OpPrereqError("No guest OS specified",
6091 self.op.force_variant = getattr(self.op, "force_variant", False)
6093 def _RunAllocator(self):
6094 """Run the allocator based on input opcode.
6097 nics = [n.ToDict() for n in self.nics]
6098 ial = IAllocator(self.cfg, self.rpc,
6099 mode=constants.IALLOCATOR_MODE_ALLOC,
6100 name=self.op.instance_name,
6101 disk_template=self.op.disk_template,
6104 vcpus=self.be_full[constants.BE_VCPUS],
6105 mem_size=self.be_full[constants.BE_MEMORY],
6108 hypervisor=self.op.hypervisor,
6111 ial.Run(self.op.iallocator)
6114 raise errors.OpPrereqError("Can't compute nodes using"
6115 " iallocator '%s': %s" %
6116 (self.op.iallocator, ial.info),
6118 if len(ial.result) != ial.required_nodes:
6119 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6120 " of nodes (%s), required %s" %
6121 (self.op.iallocator, len(ial.result),
6122 ial.required_nodes), errors.ECODE_FAULT)
6123 self.op.pnode = ial.result[0]
6124 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6125 self.op.instance_name, self.op.iallocator,
6126 utils.CommaJoin(ial.result))
6127 if ial.required_nodes == 2:
6128 self.op.snode = ial.result[1]
6130 def BuildHooksEnv(self):
6133 This runs on master, primary and secondary nodes of the instance.
6137 "ADD_MODE": self.op.mode,
6139 if self.op.mode == constants.INSTANCE_IMPORT:
6140 env["SRC_NODE"] = self.op.src_node
6141 env["SRC_PATH"] = self.op.src_path
6142 env["SRC_IMAGES"] = self.src_images
6144 env.update(_BuildInstanceHookEnv(
6145 name=self.op.instance_name,
6146 primary_node=self.op.pnode,
6147 secondary_nodes=self.secondaries,
6148 status=self.op.start,
6149 os_type=self.op.os_type,
6150 memory=self.be_full[constants.BE_MEMORY],
6151 vcpus=self.be_full[constants.BE_VCPUS],
6152 nics=_NICListToTuple(self, self.nics),
6153 disk_template=self.op.disk_template,
6154 disks=[(d["size"], d["mode"]) for d in self.disks],
6157 hypervisor_name=self.op.hypervisor,
6160 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6165 def CheckPrereq(self):
6166 """Check prerequisites.
6169 if (not self.cfg.GetVGName() and
6170 self.op.disk_template not in constants.DTS_NOT_LVM):
6171 raise errors.OpPrereqError("Cluster does not support lvm-based"
6172 " instances", errors.ECODE_STATE)
6174 if self.op.mode == constants.INSTANCE_IMPORT:
6175 src_node = self.op.src_node
6176 src_path = self.op.src_path
6178 if src_node is None:
6179 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6180 exp_list = self.rpc.call_export_list(locked_nodes)
6182 for node in exp_list:
6183 if exp_list[node].fail_msg:
6185 if src_path in exp_list[node].payload:
6187 self.op.src_node = src_node = node
6188 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6192 raise errors.OpPrereqError("No export found for relative path %s" %
6193 src_path, errors.ECODE_INVAL)
6195 _CheckNodeOnline(self, src_node)
6196 result = self.rpc.call_export_info(src_node, src_path)
6197 result.Raise("No export or invalid export found in dir %s" % src_path)
6199 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6200 if not export_info.has_section(constants.INISECT_EXP):
6201 raise errors.ProgrammerError("Corrupted export config",
6202 errors.ECODE_ENVIRON)
6204 ei_version = export_info.get(constants.INISECT_EXP, 'version')
6205 if (int(ei_version) != constants.EXPORT_VERSION):
6206 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6207 (ei_version, constants.EXPORT_VERSION),
6208 errors.ECODE_ENVIRON)
6210 # Check that the new instance doesn't have less disks than the export
6211 instance_disks = len(self.disks)
6212 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6213 if instance_disks < export_disks:
6214 raise errors.OpPrereqError("Not enough disks to import."
6215 " (instance: %d, export: %d)" %
6216 (instance_disks, export_disks),
6219 self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
6221 for idx in range(export_disks):
6222 option = 'disk%d_dump' % idx
6223 if export_info.has_option(constants.INISECT_INS, option):
6224 # FIXME: are the old os-es, disk sizes, etc. useful?
6225 export_name = export_info.get(constants.INISECT_INS, option)
6226 image = utils.PathJoin(src_path, export_name)
6227 disk_images.append(image)
6229 disk_images.append(False)
6231 self.src_images = disk_images
6233 old_name = export_info.get(constants.INISECT_INS, 'name')
6234 # FIXME: int() here could throw a ValueError on broken exports
6235 exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
6236 if self.op.instance_name == old_name:
6237 for idx, nic in enumerate(self.nics):
6238 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6239 nic_mac_ini = 'nic%d_mac' % idx
6240 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6242 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6244 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6245 if self.op.ip_check:
6246 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6247 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6248 (self.check_ip, self.op.instance_name),
6249 errors.ECODE_NOTUNIQUE)
6251 #### mac address generation
6252 # By generating here the mac address both the allocator and the hooks get
6253 # the real final mac address rather than the 'auto' or 'generate' value.
6254 # There is a race condition between the generation and the instance object
6255 # creation, which means that we know the mac is valid now, but we're not
6256 # sure it will be when we actually add the instance. If things go bad
6257 # adding the instance will abort because of a duplicate mac, and the
6258 # creation job will fail.
6259 for nic in self.nics:
6260 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6261 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6265 if self.op.iallocator is not None:
6266 self._RunAllocator()
6268 #### node related checks
6270 # check primary node
6271 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6272 assert self.pnode is not None, \
6273 "Cannot retrieve locked node %s" % self.op.pnode
6275 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6276 pnode.name, errors.ECODE_STATE)
6278 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6279 pnode.name, errors.ECODE_STATE)
6281 self.secondaries = []
6283 # mirror node verification
6284 if self.op.disk_template in constants.DTS_NET_MIRROR:
6285 if self.op.snode is None:
6286 raise errors.OpPrereqError("The networked disk templates need"
6287 " a mirror node", errors.ECODE_INVAL)
6288 if self.op.snode == pnode.name:
6289 raise errors.OpPrereqError("The secondary node cannot be the"
6290 " primary node.", errors.ECODE_INVAL)
6291 _CheckNodeOnline(self, self.op.snode)
6292 _CheckNodeNotDrained(self, self.op.snode)
6293 self.secondaries.append(self.op.snode)
6295 nodenames = [pnode.name] + self.secondaries
6297 req_size = _ComputeDiskSize(self.op.disk_template,
6300 # Check lv size requirements, if not adopting
6301 if req_size is not None and not self.adopt_disks:
6302 _CheckNodesFreeDisk(self, nodenames, req_size)
6304 if self.adopt_disks: # instead, we must check the adoption data
6305 all_lvs = set([i["adopt"] for i in self.disks])
6306 if len(all_lvs) != len(self.disks):
6307 raise errors.OpPrereqError("Duplicate volume names given for adoption",
6309 for lv_name in all_lvs:
6311 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6312 except errors.ReservationError:
6313 raise errors.OpPrereqError("LV named %s used by another instance" %
6314 lv_name, errors.ECODE_NOTUNIQUE)
6316 node_lvs = self.rpc.call_lv_list([pnode.name],
6317 self.cfg.GetVGName())[pnode.name]
6318 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6319 node_lvs = node_lvs.payload
6320 delta = all_lvs.difference(node_lvs.keys())
6322 raise errors.OpPrereqError("Missing logical volume(s): %s" %
6323 utils.CommaJoin(delta),
6325 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6327 raise errors.OpPrereqError("Online logical volumes found, cannot"
6328 " adopt: %s" % utils.CommaJoin(online_lvs),
6330 # update the size of disk based on what is found
6331 for dsk in self.disks:
6332 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6334 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6337 result = self.rpc.call_os_get(pnode.name, self.op.os_type)
6338 result.Raise("OS '%s' not in supported os list for primary node %s" %
6339 (self.op.os_type, pnode.name),
6340 prereq=True, ecode=errors.ECODE_INVAL)
6341 if not self.op.force_variant:
6342 _CheckOSVariant(result.payload, self.op.os_type)
6344 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6346 # memory check on primary node
6348 _CheckNodeFreeMemory(self, self.pnode.name,
6349 "creating instance %s" % self.op.instance_name,
6350 self.be_full[constants.BE_MEMORY],
6353 self.dry_run_result = list(nodenames)
6355 def Exec(self, feedback_fn):
6356 """Create and add the instance to the cluster.
6359 instance = self.op.instance_name
6360 pnode_name = self.pnode.name
6362 ht_kind = self.op.hypervisor
6363 if ht_kind in constants.HTS_REQ_PORT:
6364 network_port = self.cfg.AllocatePort()
6368 ##if self.op.vnc_bind_address is None:
6369 ## self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
6371 # this is needed because os.path.join does not accept None arguments
6372 if self.op.file_storage_dir is None:
6373 string_file_storage_dir = ""
6375 string_file_storage_dir = self.op.file_storage_dir
6377 # build the full file storage dir path
6378 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6379 string_file_storage_dir, instance)
6382 disks = _GenerateDiskTemplate(self,
6383 self.op.disk_template,
6384 instance, pnode_name,
6388 self.op.file_driver,
6391 iobj = objects.Instance(name=instance, os=self.op.os_type,
6392 primary_node=pnode_name,
6393 nics=self.nics, disks=disks,
6394 disk_template=self.op.disk_template,
6396 network_port=network_port,
6397 beparams=self.op.beparams,
6398 hvparams=self.op.hvparams,
6399 hypervisor=self.op.hypervisor,
6402 if self.adopt_disks:
6403 # rename LVs to the newly-generated names; we need to construct
6404 # 'fake' LV disks with the old data, plus the new unique_id
6405 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6407 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6408 rename_to.append(t_dsk.logical_id)
6409 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6410 self.cfg.SetDiskID(t_dsk, pnode_name)
6411 result = self.rpc.call_blockdev_rename(pnode_name,
6412 zip(tmp_disks, rename_to))
6413 result.Raise("Failed to rename adoped LVs")
6415 feedback_fn("* creating instance disks...")
6417 _CreateDisks(self, iobj)
6418 except errors.OpExecError:
6419 self.LogWarning("Device creation failed, reverting...")
6421 _RemoveDisks(self, iobj)
6423 self.cfg.ReleaseDRBDMinors(instance)
6426 feedback_fn("adding instance %s to cluster config" % instance)
6428 self.cfg.AddInstance(iobj, self.proc.GetECId())
6430 # Declare that we don't want to remove the instance lock anymore, as we've
6431 # added the instance to the config
6432 del self.remove_locks[locking.LEVEL_INSTANCE]
6433 # Unlock all the nodes
6434 if self.op.mode == constants.INSTANCE_IMPORT:
6435 nodes_keep = [self.op.src_node]
6436 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6437 if node != self.op.src_node]
6438 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6439 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6441 self.context.glm.release(locking.LEVEL_NODE)
6442 del self.acquired_locks[locking.LEVEL_NODE]
6444 if self.op.wait_for_sync:
6445 disk_abort = not _WaitForSync(self, iobj)
6446 elif iobj.disk_template in constants.DTS_NET_MIRROR:
6447 # make sure the disks are not degraded (still sync-ing is ok)
6449 feedback_fn("* checking mirrors status")
6450 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6455 _RemoveDisks(self, iobj)
6456 self.cfg.RemoveInstance(iobj.name)
6457 # Make sure the instance lock gets removed
6458 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6459 raise errors.OpExecError("There are some degraded disks for"
6462 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6463 if self.op.mode == constants.INSTANCE_CREATE:
6464 feedback_fn("* running the instance OS create scripts...")
6465 # FIXME: pass debug option from opcode to backend
6466 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6467 self.op.debug_level)
6468 result.Raise("Could not add os for instance %s"
6469 " on node %s" % (instance, pnode_name))
6471 elif self.op.mode == constants.INSTANCE_IMPORT:
6472 feedback_fn("* running the instance OS import scripts...")
6473 src_node = self.op.src_node
6474 src_images = self.src_images
6475 cluster_name = self.cfg.GetClusterName()
6476 # FIXME: pass debug option from opcode to backend
6477 import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6478 src_node, src_images,
6480 self.op.debug_level)
6481 msg = import_result.fail_msg
6483 self.LogWarning("Error while importing the disk images for instance"
6484 " %s on node %s: %s" % (instance, pnode_name, msg))
6486 # also checked in the prereq part
6487 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6491 iobj.admin_up = True
6492 self.cfg.Update(iobj, feedback_fn)
6493 logging.info("Starting instance %s on node %s", instance, pnode_name)
6494 feedback_fn("* starting instance...")
6495 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6496 result.Raise("Could not start instance")
6498 return list(iobj.all_nodes)
6501 class LUConnectConsole(NoHooksLU):
6502 """Connect to an instance's console.
6504 This is somewhat special in that it returns the command line that
6505 you need to run on the master node in order to connect to the
6509 _OP_REQP = ["instance_name"]
6512 def ExpandNames(self):
6513 self._ExpandAndLockInstance()
6515 def CheckPrereq(self):
6516 """Check prerequisites.
6518 This checks that the instance is in the cluster.
6521 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6522 assert self.instance is not None, \
6523 "Cannot retrieve locked instance %s" % self.op.instance_name
6524 _CheckNodeOnline(self, self.instance.primary_node)
6526 def Exec(self, feedback_fn):
6527 """Connect to the console of an instance
6530 instance = self.instance
6531 node = instance.primary_node
6533 node_insts = self.rpc.call_instance_list([node],
6534 [instance.hypervisor])[node]
6535 node_insts.Raise("Can't get node information from %s" % node)
6537 if instance.name not in node_insts.payload:
6538 raise errors.OpExecError("Instance %s is not running." % instance.name)
6540 logging.debug("Connecting to console of %s on %s", instance.name, node)
6542 hyper = hypervisor.GetHypervisor(instance.hypervisor)
6543 cluster = self.cfg.GetClusterInfo()
6544 # beparams and hvparams are passed separately, to avoid editing the
6545 # instance and then saving the defaults in the instance itself.
6546 hvparams = cluster.FillHV(instance)
6547 beparams = cluster.FillBE(instance)
6548 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6551 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6554 class LUReplaceDisks(LogicalUnit):
6555 """Replace the disks of an instance.
6558 HPATH = "mirrors-replace"
6559 HTYPE = constants.HTYPE_INSTANCE
6560 _OP_REQP = ["instance_name", "mode", "disks"]
6563 def CheckArguments(self):
6564 if not hasattr(self.op, "remote_node"):
6565 self.op.remote_node = None
6566 if not hasattr(self.op, "iallocator"):
6567 self.op.iallocator = None
6568 if not hasattr(self.op, "early_release"):
6569 self.op.early_release = False
6571 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6574 def ExpandNames(self):
6575 self._ExpandAndLockInstance()
6577 if self.op.iallocator is not None:
6578 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6580 elif self.op.remote_node is not None:
6581 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6582 self.op.remote_node = remote_node
6584 # Warning: do not remove the locking of the new secondary here
6585 # unless DRBD8.AddChildren is changed to work in parallel;
6586 # currently it doesn't since parallel invocations of
6587 # FindUnusedMinor will conflict
6588 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6589 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6592 self.needed_locks[locking.LEVEL_NODE] = []
6593 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6595 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6596 self.op.iallocator, self.op.remote_node,
6597 self.op.disks, False, self.op.early_release)
6599 self.tasklets = [self.replacer]
6601 def DeclareLocks(self, level):
6602 # If we're not already locking all nodes in the set we have to declare the
6603 # instance's primary/secondary nodes.
6604 if (level == locking.LEVEL_NODE and
6605 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6606 self._LockInstancesNodes()
6608 def BuildHooksEnv(self):
6611 This runs on the master, the primary and all the secondaries.
6614 instance = self.replacer.instance
6616 "MODE": self.op.mode,
6617 "NEW_SECONDARY": self.op.remote_node,
6618 "OLD_SECONDARY": instance.secondary_nodes[0],
6620 env.update(_BuildInstanceHookEnvByObject(self, instance))
6622 self.cfg.GetMasterNode(),
6623 instance.primary_node,
6625 if self.op.remote_node is not None:
6626 nl.append(self.op.remote_node)
6630 class LUEvacuateNode(LogicalUnit):
6631 """Relocate the secondary instances from a node.
6634 HPATH = "node-evacuate"
6635 HTYPE = constants.HTYPE_NODE
6636 _OP_REQP = ["node_name"]
6639 def CheckArguments(self):
6640 if not hasattr(self.op, "remote_node"):
6641 self.op.remote_node = None
6642 if not hasattr(self.op, "iallocator"):
6643 self.op.iallocator = None
6644 if not hasattr(self.op, "early_release"):
6645 self.op.early_release = False
6647 TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6648 self.op.remote_node,
6651 def ExpandNames(self):
6652 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6654 self.needed_locks = {}
6656 # Declare node locks
6657 if self.op.iallocator is not None:
6658 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6660 elif self.op.remote_node is not None:
6661 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6663 # Warning: do not remove the locking of the new secondary here
6664 # unless DRBD8.AddChildren is changed to work in parallel;
6665 # currently it doesn't since parallel invocations of
6666 # FindUnusedMinor will conflict
6667 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6668 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6671 raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6673 # Create tasklets for replacing disks for all secondary instances on this
6678 for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6679 logging.debug("Replacing disks for instance %s", inst.name)
6680 names.append(inst.name)
6682 replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6683 self.op.iallocator, self.op.remote_node, [],
6684 True, self.op.early_release)
6685 tasklets.append(replacer)
6687 self.tasklets = tasklets
6688 self.instance_names = names
6690 # Declare instance locks
6691 self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6693 def DeclareLocks(self, level):
6694 # If we're not already locking all nodes in the set we have to declare the
6695 # instance's primary/secondary nodes.
6696 if (level == locking.LEVEL_NODE and
6697 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6698 self._LockInstancesNodes()
6700 def BuildHooksEnv(self):
6703 This runs on the master, the primary and all the secondaries.
6707 "NODE_NAME": self.op.node_name,
6710 nl = [self.cfg.GetMasterNode()]
6712 if self.op.remote_node is not None:
6713 env["NEW_SECONDARY"] = self.op.remote_node
6714 nl.append(self.op.remote_node)
6716 return (env, nl, nl)
6719 class TLReplaceDisks(Tasklet):
6720 """Replaces disks for an instance.
6722 Note: Locking is not within the scope of this class.
6725 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6726 disks, delay_iallocator, early_release):
6727 """Initializes this class.
6730 Tasklet.__init__(self, lu)
6733 self.instance_name = instance_name
6735 self.iallocator_name = iallocator_name
6736 self.remote_node = remote_node
6738 self.delay_iallocator = delay_iallocator
6739 self.early_release = early_release
6742 self.instance = None
6743 self.new_node = None
6744 self.target_node = None
6745 self.other_node = None
6746 self.remote_node_info = None
6747 self.node_secondary_ip = None
6750 def CheckArguments(mode, remote_node, iallocator):
6751 """Helper function for users of this class.
6754 # check for valid parameter combination
6755 if mode == constants.REPLACE_DISK_CHG:
6756 if remote_node is None and iallocator is None:
6757 raise errors.OpPrereqError("When changing the secondary either an"
6758 " iallocator script must be used or the"
6759 " new node given", errors.ECODE_INVAL)
6761 if remote_node is not None and iallocator is not None:
6762 raise errors.OpPrereqError("Give either the iallocator or the new"
6763 " secondary, not both", errors.ECODE_INVAL)
6765 elif remote_node is not None or iallocator is not None:
6766 # Not replacing the secondary
6767 raise errors.OpPrereqError("The iallocator and new node options can"
6768 " only be used when changing the"
6769 " secondary node", errors.ECODE_INVAL)
6772 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6773 """Compute a new secondary node using an IAllocator.
6776 ial = IAllocator(lu.cfg, lu.rpc,
6777 mode=constants.IALLOCATOR_MODE_RELOC,
6779 relocate_from=relocate_from)
6781 ial.Run(iallocator_name)
6784 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6785 " %s" % (iallocator_name, ial.info),
6788 if len(ial.result) != ial.required_nodes:
6789 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6790 " of nodes (%s), required %s" %
6792 len(ial.result), ial.required_nodes),
6795 remote_node_name = ial.result[0]
6797 lu.LogInfo("Selected new secondary for instance '%s': %s",
6798 instance_name, remote_node_name)
6800 return remote_node_name
6802 def _FindFaultyDisks(self, node_name):
6803 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6806 def CheckPrereq(self):
6807 """Check prerequisites.
6809 This checks that the instance is in the cluster.
6812 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
6813 assert instance is not None, \
6814 "Cannot retrieve locked instance %s" % self.instance_name
6816 if instance.disk_template != constants.DT_DRBD8:
6817 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6818 " instances", errors.ECODE_INVAL)
6820 if len(instance.secondary_nodes) != 1:
6821 raise errors.OpPrereqError("The instance has a strange layout,"
6822 " expected one secondary but found %d" %
6823 len(instance.secondary_nodes),
6826 if not self.delay_iallocator:
6827 self._CheckPrereq2()
6829 def _CheckPrereq2(self):
6830 """Check prerequisites, second part.
6832 This function should always be part of CheckPrereq. It was separated and is
6833 now called from Exec because during node evacuation iallocator was only
6834 called with an unmodified cluster model, not taking planned changes into
6838 instance = self.instance
6839 secondary_node = instance.secondary_nodes[0]
6841 if self.iallocator_name is None:
6842 remote_node = self.remote_node
6844 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6845 instance.name, instance.secondary_nodes)
6847 if remote_node is not None:
6848 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6849 assert self.remote_node_info is not None, \
6850 "Cannot retrieve locked node %s" % remote_node
6852 self.remote_node_info = None
6854 if remote_node == self.instance.primary_node:
6855 raise errors.OpPrereqError("The specified node is the primary node of"
6856 " the instance.", errors.ECODE_INVAL)
6858 if remote_node == secondary_node:
6859 raise errors.OpPrereqError("The specified node is already the"
6860 " secondary node of the instance.",
6863 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6864 constants.REPLACE_DISK_CHG):
6865 raise errors.OpPrereqError("Cannot specify disks to be replaced",
6868 if self.mode == constants.REPLACE_DISK_AUTO:
6869 faulty_primary = self._FindFaultyDisks(instance.primary_node)
6870 faulty_secondary = self._FindFaultyDisks(secondary_node)
6872 if faulty_primary and faulty_secondary:
6873 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6874 " one node and can not be repaired"
6875 " automatically" % self.instance_name,
6879 self.disks = faulty_primary
6880 self.target_node = instance.primary_node
6881 self.other_node = secondary_node
6882 check_nodes = [self.target_node, self.other_node]
6883 elif faulty_secondary:
6884 self.disks = faulty_secondary
6885 self.target_node = secondary_node
6886 self.other_node = instance.primary_node
6887 check_nodes = [self.target_node, self.other_node]
6893 # Non-automatic modes
6894 if self.mode == constants.REPLACE_DISK_PRI:
6895 self.target_node = instance.primary_node
6896 self.other_node = secondary_node
6897 check_nodes = [self.target_node, self.other_node]
6899 elif self.mode == constants.REPLACE_DISK_SEC:
6900 self.target_node = secondary_node
6901 self.other_node = instance.primary_node
6902 check_nodes = [self.target_node, self.other_node]
6904 elif self.mode == constants.REPLACE_DISK_CHG:
6905 self.new_node = remote_node
6906 self.other_node = instance.primary_node
6907 self.target_node = secondary_node
6908 check_nodes = [self.new_node, self.other_node]
6910 _CheckNodeNotDrained(self.lu, remote_node)
6912 old_node_info = self.cfg.GetNodeInfo(secondary_node)
6913 assert old_node_info is not None
6914 if old_node_info.offline and not self.early_release:
6915 # doesn't make sense to delay the release
6916 self.early_release = True
6917 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
6918 " early-release mode", secondary_node)
6921 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6924 # If not specified all disks should be replaced
6926 self.disks = range(len(self.instance.disks))
6928 for node in check_nodes:
6929 _CheckNodeOnline(self.lu, node)
6931 # Check whether disks are valid
6932 for disk_idx in self.disks:
6933 instance.FindDisk(disk_idx)
6935 # Get secondary node IP addresses
6938 for node_name in [self.target_node, self.other_node, self.new_node]:
6939 if node_name is not None:
6940 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6942 self.node_secondary_ip = node_2nd_ip
6944 def Exec(self, feedback_fn):
6945 """Execute disk replacement.
6947 This dispatches the disk replacement to the appropriate handler.
6950 if self.delay_iallocator:
6951 self._CheckPrereq2()
6954 feedback_fn("No disks need replacement")
6957 feedback_fn("Replacing disk(s) %s for %s" %
6958 (utils.CommaJoin(self.disks), self.instance.name))
6960 activate_disks = (not self.instance.admin_up)
6962 # Activate the instance disks if we're replacing them on a down instance
6964 _StartInstanceDisks(self.lu, self.instance, True)
6967 # Should we replace the secondary node?
6968 if self.new_node is not None:
6969 fn = self._ExecDrbd8Secondary
6971 fn = self._ExecDrbd8DiskOnly
6973 return fn(feedback_fn)
6976 # Deactivate the instance disks if we're replacing them on a
6979 _SafeShutdownInstanceDisks(self.lu, self.instance)
6981 def _CheckVolumeGroup(self, nodes):
6982 self.lu.LogInfo("Checking volume groups")
6984 vgname = self.cfg.GetVGName()
6986 # Make sure volume group exists on all involved nodes
6987 results = self.rpc.call_vg_list(nodes)
6989 raise errors.OpExecError("Can't list volume groups on the nodes")
6993 res.Raise("Error checking node %s" % node)
6994 if vgname not in res.payload:
6995 raise errors.OpExecError("Volume group '%s' not found on node %s" %
6998 def _CheckDisksExistence(self, nodes):
6999 # Check disk existence
7000 for idx, dev in enumerate(self.instance.disks):
7001 if idx not in self.disks:
7005 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7006 self.cfg.SetDiskID(dev, node)
7008 result = self.rpc.call_blockdev_find(node, dev)
7010 msg = result.fail_msg
7011 if msg or not result.payload:
7013 msg = "disk not found"
7014 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7017 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7018 for idx, dev in enumerate(self.instance.disks):
7019 if idx not in self.disks:
7022 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7025 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7027 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7028 " replace disks for instance %s" %
7029 (node_name, self.instance.name))
7031 def _CreateNewStorage(self, node_name):
7032 vgname = self.cfg.GetVGName()
7035 for idx, dev in enumerate(self.instance.disks):
7036 if idx not in self.disks:
7039 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7041 self.cfg.SetDiskID(dev, node_name)
7043 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7044 names = _GenerateUniqueNames(self.lu, lv_names)
7046 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7047 logical_id=(vgname, names[0]))
7048 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7049 logical_id=(vgname, names[1]))
7051 new_lvs = [lv_data, lv_meta]
7052 old_lvs = dev.children
7053 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7055 # we pass force_create=True to force the LVM creation
7056 for new_lv in new_lvs:
7057 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7058 _GetInstanceInfoText(self.instance), False)
7062 def _CheckDevices(self, node_name, iv_names):
7063 for name, (dev, _, _) in iv_names.iteritems():
7064 self.cfg.SetDiskID(dev, node_name)
7066 result = self.rpc.call_blockdev_find(node_name, dev)
7068 msg = result.fail_msg
7069 if msg or not result.payload:
7071 msg = "disk not found"
7072 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7075 if result.payload.is_degraded:
7076 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7078 def _RemoveOldStorage(self, node_name, iv_names):
7079 for name, (_, old_lvs, _) in iv_names.iteritems():
7080 self.lu.LogInfo("Remove logical volumes for %s" % name)
7083 self.cfg.SetDiskID(lv, node_name)
7085 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7087 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7088 hint="remove unused LVs manually")
7090 def _ReleaseNodeLock(self, node_name):
7091 """Releases the lock for a given node."""
7092 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7094 def _ExecDrbd8DiskOnly(self, feedback_fn):
7095 """Replace a disk on the primary or secondary for DRBD 8.
7097 The algorithm for replace is quite complicated:
7099 1. for each disk to be replaced:
7101 1. create new LVs on the target node with unique names
7102 1. detach old LVs from the drbd device
7103 1. rename old LVs to name_replaced.<time_t>
7104 1. rename new LVs to old LVs
7105 1. attach the new LVs (with the old names now) to the drbd device
7107 1. wait for sync across all devices
7109 1. for each modified disk:
7111 1. remove old LVs (which have the name name_replaces.<time_t>)
7113 Failures are not very well handled.
7118 # Step: check device activation
7119 self.lu.LogStep(1, steps_total, "Check device existence")
7120 self._CheckDisksExistence([self.other_node, self.target_node])
7121 self._CheckVolumeGroup([self.target_node, self.other_node])
7123 # Step: check other node consistency
7124 self.lu.LogStep(2, steps_total, "Check peer consistency")
7125 self._CheckDisksConsistency(self.other_node,
7126 self.other_node == self.instance.primary_node,
7129 # Step: create new storage
7130 self.lu.LogStep(3, steps_total, "Allocate new storage")
7131 iv_names = self._CreateNewStorage(self.target_node)
7133 # Step: for each lv, detach+rename*2+attach
7134 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7135 for dev, old_lvs, new_lvs in iv_names.itervalues():
7136 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7138 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7140 result.Raise("Can't detach drbd from local storage on node"
7141 " %s for device %s" % (self.target_node, dev.iv_name))
7143 #cfg.Update(instance)
7145 # ok, we created the new LVs, so now we know we have the needed
7146 # storage; as such, we proceed on the target node to rename
7147 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7148 # using the assumption that logical_id == physical_id (which in
7149 # turn is the unique_id on that node)
7151 # FIXME(iustin): use a better name for the replaced LVs
7152 temp_suffix = int(time.time())
7153 ren_fn = lambda d, suff: (d.physical_id[0],
7154 d.physical_id[1] + "_replaced-%s" % suff)
7156 # Build the rename list based on what LVs exist on the node
7157 rename_old_to_new = []
7158 for to_ren in old_lvs:
7159 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7160 if not result.fail_msg and result.payload:
7162 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7164 self.lu.LogInfo("Renaming the old LVs on the target node")
7165 result = self.rpc.call_blockdev_rename(self.target_node,
7167 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7169 # Now we rename the new LVs to the old LVs
7170 self.lu.LogInfo("Renaming the new LVs on the target node")
7171 rename_new_to_old = [(new, old.physical_id)
7172 for old, new in zip(old_lvs, new_lvs)]
7173 result = self.rpc.call_blockdev_rename(self.target_node,
7175 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7177 for old, new in zip(old_lvs, new_lvs):
7178 new.logical_id = old.logical_id
7179 self.cfg.SetDiskID(new, self.target_node)
7181 for disk in old_lvs:
7182 disk.logical_id = ren_fn(disk, temp_suffix)
7183 self.cfg.SetDiskID(disk, self.target_node)
7185 # Now that the new lvs have the old name, we can add them to the device
7186 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7187 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7189 msg = result.fail_msg
7191 for new_lv in new_lvs:
7192 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7195 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7196 hint=("cleanup manually the unused logical"
7198 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7200 dev.children = new_lvs
7202 self.cfg.Update(self.instance, feedback_fn)
7205 if self.early_release:
7206 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7208 self._RemoveOldStorage(self.target_node, iv_names)
7209 # WARNING: we release both node locks here, do not do other RPCs
7210 # than WaitForSync to the primary node
7211 self._ReleaseNodeLock([self.target_node, self.other_node])
7214 # This can fail as the old devices are degraded and _WaitForSync
7215 # does a combined result over all disks, so we don't check its return value
7216 self.lu.LogStep(cstep, steps_total, "Sync devices")
7218 _WaitForSync(self.lu, self.instance)
7220 # Check all devices manually
7221 self._CheckDevices(self.instance.primary_node, iv_names)
7223 # Step: remove old storage
7224 if not self.early_release:
7225 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7227 self._RemoveOldStorage(self.target_node, iv_names)
7229 def _ExecDrbd8Secondary(self, feedback_fn):
7230 """Replace the secondary node for DRBD 8.
7232 The algorithm for replace is quite complicated:
7233 - for all disks of the instance:
7234 - create new LVs on the new node with same names
7235 - shutdown the drbd device on the old secondary
7236 - disconnect the drbd network on the primary
7237 - create the drbd device on the new secondary
7238 - network attach the drbd on the primary, using an artifice:
7239 the drbd code for Attach() will connect to the network if it
7240 finds a device which is connected to the good local disks but
7242 - wait for sync across all devices
7243 - remove all disks from the old secondary
7245 Failures are not very well handled.
7250 # Step: check device activation
7251 self.lu.LogStep(1, steps_total, "Check device existence")
7252 self._CheckDisksExistence([self.instance.primary_node])
7253 self._CheckVolumeGroup([self.instance.primary_node])
7255 # Step: check other node consistency
7256 self.lu.LogStep(2, steps_total, "Check peer consistency")
7257 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7259 # Step: create new storage
7260 self.lu.LogStep(3, steps_total, "Allocate new storage")
7261 for idx, dev in enumerate(self.instance.disks):
7262 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7263 (self.new_node, idx))
7264 # we pass force_create=True to force LVM creation
7265 for new_lv in dev.children:
7266 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7267 _GetInstanceInfoText(self.instance), False)
7269 # Step 4: dbrd minors and drbd setups changes
7270 # after this, we must manually remove the drbd minors on both the
7271 # error and the success paths
7272 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7273 minors = self.cfg.AllocateDRBDMinor([self.new_node
7274 for dev in self.instance.disks],
7276 logging.debug("Allocated minors %r", minors)
7279 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7280 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7281 (self.new_node, idx))
7282 # create new devices on new_node; note that we create two IDs:
7283 # one without port, so the drbd will be activated without
7284 # networking information on the new node at this stage, and one
7285 # with network, for the latter activation in step 4
7286 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7287 if self.instance.primary_node == o_node1:
7290 assert self.instance.primary_node == o_node2, "Three-node instance?"
7293 new_alone_id = (self.instance.primary_node, self.new_node, None,
7294 p_minor, new_minor, o_secret)
7295 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7296 p_minor, new_minor, o_secret)
7298 iv_names[idx] = (dev, dev.children, new_net_id)
7299 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7301 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7302 logical_id=new_alone_id,
7303 children=dev.children,
7306 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7307 _GetInstanceInfoText(self.instance), False)
7308 except errors.GenericError:
7309 self.cfg.ReleaseDRBDMinors(self.instance.name)
7312 # We have new devices, shutdown the drbd on the old secondary
7313 for idx, dev in enumerate(self.instance.disks):
7314 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7315 self.cfg.SetDiskID(dev, self.target_node)
7316 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7318 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7319 "node: %s" % (idx, msg),
7320 hint=("Please cleanup this device manually as"
7321 " soon as possible"))
7323 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7324 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7325 self.node_secondary_ip,
7326 self.instance.disks)\
7327 [self.instance.primary_node]
7329 msg = result.fail_msg
7331 # detaches didn't succeed (unlikely)
7332 self.cfg.ReleaseDRBDMinors(self.instance.name)
7333 raise errors.OpExecError("Can't detach the disks from the network on"
7334 " old node: %s" % (msg,))
7336 # if we managed to detach at least one, we update all the disks of
7337 # the instance to point to the new secondary
7338 self.lu.LogInfo("Updating instance configuration")
7339 for dev, _, new_logical_id in iv_names.itervalues():
7340 dev.logical_id = new_logical_id
7341 self.cfg.SetDiskID(dev, self.instance.primary_node)
7343 self.cfg.Update(self.instance, feedback_fn)
7345 # and now perform the drbd attach
7346 self.lu.LogInfo("Attaching primary drbds to new secondary"
7347 " (standalone => connected)")
7348 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7350 self.node_secondary_ip,
7351 self.instance.disks,
7354 for to_node, to_result in result.items():
7355 msg = to_result.fail_msg
7357 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7359 hint=("please do a gnt-instance info to see the"
7360 " status of disks"))
7362 if self.early_release:
7363 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7365 self._RemoveOldStorage(self.target_node, iv_names)
7366 # WARNING: we release all node locks here, do not do other RPCs
7367 # than WaitForSync to the primary node
7368 self._ReleaseNodeLock([self.instance.primary_node,
7373 # This can fail as the old devices are degraded and _WaitForSync
7374 # does a combined result over all disks, so we don't check its return value
7375 self.lu.LogStep(cstep, steps_total, "Sync devices")
7377 _WaitForSync(self.lu, self.instance)
7379 # Check all devices manually
7380 self._CheckDevices(self.instance.primary_node, iv_names)
7382 # Step: remove old storage
7383 if not self.early_release:
7384 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7385 self._RemoveOldStorage(self.target_node, iv_names)
7388 class LURepairNodeStorage(NoHooksLU):
7389 """Repairs the volume group on a node.
7392 _OP_REQP = ["node_name"]
7395 def CheckArguments(self):
7396 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7398 def ExpandNames(self):
7399 self.needed_locks = {
7400 locking.LEVEL_NODE: [self.op.node_name],
7403 def _CheckFaultyDisks(self, instance, node_name):
7404 """Ensure faulty disks abort the opcode or at least warn."""
7406 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7408 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7409 " node '%s'" % (instance.name, node_name),
7411 except errors.OpPrereqError, err:
7412 if self.op.ignore_consistency:
7413 self.proc.LogWarning(str(err.args[0]))
7417 def CheckPrereq(self):
7418 """Check prerequisites.
7421 storage_type = self.op.storage_type
7423 if (constants.SO_FIX_CONSISTENCY not in
7424 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7425 raise errors.OpPrereqError("Storage units of type '%s' can not be"
7426 " repaired" % storage_type,
7429 # Check whether any instance on this node has faulty disks
7430 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7431 if not inst.admin_up:
7433 check_nodes = set(inst.all_nodes)
7434 check_nodes.discard(self.op.node_name)
7435 for inst_node_name in check_nodes:
7436 self._CheckFaultyDisks(inst, inst_node_name)
7438 def Exec(self, feedback_fn):
7439 feedback_fn("Repairing storage unit '%s' on %s ..." %
7440 (self.op.name, self.op.node_name))
7442 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7443 result = self.rpc.call_storage_execute(self.op.node_name,
7444 self.op.storage_type, st_args,
7446 constants.SO_FIX_CONSISTENCY)
7447 result.Raise("Failed to repair storage unit '%s' on %s" %
7448 (self.op.name, self.op.node_name))
7451 class LUNodeEvacuationStrategy(NoHooksLU):
7452 """Computes the node evacuation strategy.
7455 _OP_REQP = ["nodes"]
7458 def CheckArguments(self):
7459 if not hasattr(self.op, "remote_node"):
7460 self.op.remote_node = None
7461 if not hasattr(self.op, "iallocator"):
7462 self.op.iallocator = None
7463 if self.op.remote_node is not None and self.op.iallocator is not None:
7464 raise errors.OpPrereqError("Give either the iallocator or the new"
7465 " secondary, not both", errors.ECODE_INVAL)
7467 def ExpandNames(self):
7468 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7469 self.needed_locks = locks = {}
7470 if self.op.remote_node is None:
7471 locks[locking.LEVEL_NODE] = locking.ALL_SET
7473 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7474 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7476 def CheckPrereq(self):
7479 def Exec(self, feedback_fn):
7480 if self.op.remote_node is not None:
7482 for node in self.op.nodes:
7483 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7486 if i.primary_node == self.op.remote_node:
7487 raise errors.OpPrereqError("Node %s is the primary node of"
7488 " instance %s, cannot use it as"
7490 (self.op.remote_node, i.name),
7492 result.append([i.name, self.op.remote_node])
7494 ial = IAllocator(self.cfg, self.rpc,
7495 mode=constants.IALLOCATOR_MODE_MEVAC,
7496 evac_nodes=self.op.nodes)
7497 ial.Run(self.op.iallocator, validate=True)
7499 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7505 class LUGrowDisk(LogicalUnit):
7506 """Grow a disk of an instance.
7510 HTYPE = constants.HTYPE_INSTANCE
7511 _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7514 def ExpandNames(self):
7515 self._ExpandAndLockInstance()
7516 self.needed_locks[locking.LEVEL_NODE] = []
7517 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7519 def DeclareLocks(self, level):
7520 if level == locking.LEVEL_NODE:
7521 self._LockInstancesNodes()
7523 def BuildHooksEnv(self):
7526 This runs on the master, the primary and all the secondaries.
7530 "DISK": self.op.disk,
7531 "AMOUNT": self.op.amount,
7533 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7534 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7537 def CheckPrereq(self):
7538 """Check prerequisites.
7540 This checks that the instance is in the cluster.
7543 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7544 assert instance is not None, \
7545 "Cannot retrieve locked instance %s" % self.op.instance_name
7546 nodenames = list(instance.all_nodes)
7547 for node in nodenames:
7548 _CheckNodeOnline(self, node)
7551 self.instance = instance
7553 if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
7554 raise errors.OpPrereqError("Instance's disk layout does not support"
7555 " growing.", errors.ECODE_INVAL)
7557 self.disk = instance.FindDisk(self.op.disk)
7559 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7561 def Exec(self, feedback_fn):
7562 """Execute disk grow.
7565 instance = self.instance
7567 for node in instance.all_nodes:
7568 self.cfg.SetDiskID(disk, node)
7569 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7570 result.Raise("Grow request failed to node %s" % node)
7572 # TODO: Rewrite code to work properly
7573 # DRBD goes into sync mode for a short amount of time after executing the
7574 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7575 # calling "resize" in sync mode fails. Sleeping for a short amount of
7576 # time is a work-around.
7579 disk.RecordGrow(self.op.amount)
7580 self.cfg.Update(instance, feedback_fn)
7581 if self.op.wait_for_sync:
7582 disk_abort = not _WaitForSync(self, instance)
7584 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7585 " status.\nPlease check the instance.")
7588 class LUQueryInstanceData(NoHooksLU):
7589 """Query runtime instance data.
7592 _OP_REQP = ["instances", "static"]
7595 def ExpandNames(self):
7596 self.needed_locks = {}
7597 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7599 if not isinstance(self.op.instances, list):
7600 raise errors.OpPrereqError("Invalid argument type 'instances'",
7603 if self.op.instances:
7604 self.wanted_names = []
7605 for name in self.op.instances:
7606 full_name = _ExpandInstanceName(self.cfg, name)
7607 self.wanted_names.append(full_name)
7608 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7610 self.wanted_names = None
7611 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7613 self.needed_locks[locking.LEVEL_NODE] = []
7614 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7616 def DeclareLocks(self, level):
7617 if level == locking.LEVEL_NODE:
7618 self._LockInstancesNodes()
7620 def CheckPrereq(self):
7621 """Check prerequisites.
7623 This only checks the optional instance list against the existing names.
7626 if self.wanted_names is None:
7627 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7629 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7630 in self.wanted_names]
7633 def _ComputeBlockdevStatus(self, node, instance_name, dev):
7634 """Returns the status of a block device
7637 if self.op.static or not node:
7640 self.cfg.SetDiskID(dev, node)
7642 result = self.rpc.call_blockdev_find(node, dev)
7646 result.Raise("Can't compute disk status for %s" % instance_name)
7648 status = result.payload
7652 return (status.dev_path, status.major, status.minor,
7653 status.sync_percent, status.estimated_time,
7654 status.is_degraded, status.ldisk_status)
7656 def _ComputeDiskStatus(self, instance, snode, dev):
7657 """Compute block device status.
7660 if dev.dev_type in constants.LDS_DRBD:
7661 # we change the snode then (otherwise we use the one passed in)
7662 if dev.logical_id[0] == instance.primary_node:
7663 snode = dev.logical_id[1]
7665 snode = dev.logical_id[0]
7667 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7669 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7672 dev_children = [self._ComputeDiskStatus(instance, snode, child)
7673 for child in dev.children]
7678 "iv_name": dev.iv_name,
7679 "dev_type": dev.dev_type,
7680 "logical_id": dev.logical_id,
7681 "physical_id": dev.physical_id,
7682 "pstatus": dev_pstatus,
7683 "sstatus": dev_sstatus,
7684 "children": dev_children,
7691 def Exec(self, feedback_fn):
7692 """Gather and return data"""
7695 cluster = self.cfg.GetClusterInfo()
7697 for instance in self.wanted_instances:
7698 if not self.op.static:
7699 remote_info = self.rpc.call_instance_info(instance.primary_node,
7701 instance.hypervisor)
7702 remote_info.Raise("Error checking node %s" % instance.primary_node)
7703 remote_info = remote_info.payload
7704 if remote_info and "state" in remote_info:
7707 remote_state = "down"
7710 if instance.admin_up:
7713 config_state = "down"
7715 disks = [self._ComputeDiskStatus(instance, None, device)
7716 for device in instance.disks]
7719 "name": instance.name,
7720 "config_state": config_state,
7721 "run_state": remote_state,
7722 "pnode": instance.primary_node,
7723 "snodes": instance.secondary_nodes,
7725 # this happens to be the same format used for hooks
7726 "nics": _NICListToTuple(self, instance.nics),
7728 "hypervisor": instance.hypervisor,
7729 "network_port": instance.network_port,
7730 "hv_instance": instance.hvparams,
7731 "hv_actual": cluster.FillHV(instance, skip_globals=True),
7732 "be_instance": instance.beparams,
7733 "be_actual": cluster.FillBE(instance),
7734 "serial_no": instance.serial_no,
7735 "mtime": instance.mtime,
7736 "ctime": instance.ctime,
7737 "uuid": instance.uuid,
7740 result[instance.name] = idict
7745 class LUSetInstanceParams(LogicalUnit):
7746 """Modifies an instances's parameters.
7749 HPATH = "instance-modify"
7750 HTYPE = constants.HTYPE_INSTANCE
7751 _OP_REQP = ["instance_name"]
7754 def CheckArguments(self):
7755 if not hasattr(self.op, 'nics'):
7757 if not hasattr(self.op, 'disks'):
7759 if not hasattr(self.op, 'beparams'):
7760 self.op.beparams = {}
7761 if not hasattr(self.op, 'hvparams'):
7762 self.op.hvparams = {}
7763 self.op.force = getattr(self.op, "force", False)
7764 if not (self.op.nics or self.op.disks or
7765 self.op.hvparams or self.op.beparams):
7766 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
7768 if self.op.hvparams:
7769 _CheckGlobalHvParams(self.op.hvparams)
7773 for disk_op, disk_dict in self.op.disks:
7774 if disk_op == constants.DDM_REMOVE:
7777 elif disk_op == constants.DDM_ADD:
7780 if not isinstance(disk_op, int):
7781 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
7782 if not isinstance(disk_dict, dict):
7783 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7784 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7786 if disk_op == constants.DDM_ADD:
7787 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7788 if mode not in constants.DISK_ACCESS_SET:
7789 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
7791 size = disk_dict.get('size', None)
7793 raise errors.OpPrereqError("Required disk parameter size missing",
7797 except (TypeError, ValueError), err:
7798 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7799 str(err), errors.ECODE_INVAL)
7800 disk_dict['size'] = size
7802 # modification of disk
7803 if 'size' in disk_dict:
7804 raise errors.OpPrereqError("Disk size change not possible, use"
7805 " grow-disk", errors.ECODE_INVAL)
7807 if disk_addremove > 1:
7808 raise errors.OpPrereqError("Only one disk add or remove operation"
7809 " supported at a time", errors.ECODE_INVAL)
7813 for nic_op, nic_dict in self.op.nics:
7814 if nic_op == constants.DDM_REMOVE:
7817 elif nic_op == constants.DDM_ADD:
7820 if not isinstance(nic_op, int):
7821 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
7822 if not isinstance(nic_dict, dict):
7823 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7824 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7826 # nic_dict should be a dict
7827 nic_ip = nic_dict.get('ip', None)
7828 if nic_ip is not None:
7829 if nic_ip.lower() == constants.VALUE_NONE:
7830 nic_dict['ip'] = None
7832 if not utils.IsValidIP(nic_ip):
7833 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
7836 nic_bridge = nic_dict.get('bridge', None)
7837 nic_link = nic_dict.get('link', None)
7838 if nic_bridge and nic_link:
7839 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7840 " at the same time", errors.ECODE_INVAL)
7841 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7842 nic_dict['bridge'] = None
7843 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7844 nic_dict['link'] = None
7846 if nic_op == constants.DDM_ADD:
7847 nic_mac = nic_dict.get('mac', None)
7849 nic_dict['mac'] = constants.VALUE_AUTO
7851 if 'mac' in nic_dict:
7852 nic_mac = nic_dict['mac']
7853 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7854 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
7856 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7857 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7858 " modifying an existing nic",
7861 if nic_addremove > 1:
7862 raise errors.OpPrereqError("Only one NIC add or remove operation"
7863 " supported at a time", errors.ECODE_INVAL)
7865 def ExpandNames(self):
7866 self._ExpandAndLockInstance()
7867 self.needed_locks[locking.LEVEL_NODE] = []
7868 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7870 def DeclareLocks(self, level):
7871 if level == locking.LEVEL_NODE:
7872 self._LockInstancesNodes()
7874 def BuildHooksEnv(self):
7877 This runs on the master, primary and secondaries.
7881 if constants.BE_MEMORY in self.be_new:
7882 args['memory'] = self.be_new[constants.BE_MEMORY]
7883 if constants.BE_VCPUS in self.be_new:
7884 args['vcpus'] = self.be_new[constants.BE_VCPUS]
7885 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7886 # information at all.
7889 nic_override = dict(self.op.nics)
7890 c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7891 for idx, nic in enumerate(self.instance.nics):
7892 if idx in nic_override:
7893 this_nic_override = nic_override[idx]
7895 this_nic_override = {}
7896 if 'ip' in this_nic_override:
7897 ip = this_nic_override['ip']
7900 if 'mac' in this_nic_override:
7901 mac = this_nic_override['mac']
7904 if idx in self.nic_pnew:
7905 nicparams = self.nic_pnew[idx]
7907 nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7908 mode = nicparams[constants.NIC_MODE]
7909 link = nicparams[constants.NIC_LINK]
7910 args['nics'].append((ip, mac, mode, link))
7911 if constants.DDM_ADD in nic_override:
7912 ip = nic_override[constants.DDM_ADD].get('ip', None)
7913 mac = nic_override[constants.DDM_ADD]['mac']
7914 nicparams = self.nic_pnew[constants.DDM_ADD]
7915 mode = nicparams[constants.NIC_MODE]
7916 link = nicparams[constants.NIC_LINK]
7917 args['nics'].append((ip, mac, mode, link))
7918 elif constants.DDM_REMOVE in nic_override:
7919 del args['nics'][-1]
7921 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7922 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7926 def _GetUpdatedParams(old_params, update_dict,
7927 default_values, parameter_types):
7928 """Return the new params dict for the given params.
7930 @type old_params: dict
7931 @param old_params: old parameters
7932 @type update_dict: dict
7933 @param update_dict: dict containing new parameter values,
7934 or constants.VALUE_DEFAULT to reset the
7935 parameter to its default value
7936 @type default_values: dict
7937 @param default_values: default values for the filled parameters
7938 @type parameter_types: dict
7939 @param parameter_types: dict mapping target dict keys to types
7940 in constants.ENFORCEABLE_TYPES
7941 @rtype: (dict, dict)
7942 @return: (new_parameters, filled_parameters)
7945 params_copy = copy.deepcopy(old_params)
7946 for key, val in update_dict.iteritems():
7947 if val == constants.VALUE_DEFAULT:
7949 del params_copy[key]
7953 params_copy[key] = val
7954 utils.ForceDictType(params_copy, parameter_types)
7955 params_filled = objects.FillDict(default_values, params_copy)
7956 return (params_copy, params_filled)
7958 def CheckPrereq(self):
7959 """Check prerequisites.
7961 This only checks the instance list against the existing names.
7964 self.force = self.op.force
7966 # checking the new params on the primary/secondary nodes
7968 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7969 cluster = self.cluster = self.cfg.GetClusterInfo()
7970 assert self.instance is not None, \
7971 "Cannot retrieve locked instance %s" % self.op.instance_name
7972 pnode = instance.primary_node
7973 nodelist = list(instance.all_nodes)
7975 # hvparams processing
7976 if self.op.hvparams:
7977 i_hvdict, hv_new = self._GetUpdatedParams(
7978 instance.hvparams, self.op.hvparams,
7979 cluster.hvparams[instance.hypervisor],
7980 constants.HVS_PARAMETER_TYPES)
7982 hypervisor.GetHypervisor(
7983 instance.hypervisor).CheckParameterSyntax(hv_new)
7984 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7985 self.hv_new = hv_new # the new actual values
7986 self.hv_inst = i_hvdict # the new dict (without defaults)
7988 self.hv_new = self.hv_inst = {}
7990 # beparams processing
7991 if self.op.beparams:
7992 i_bedict, be_new = self._GetUpdatedParams(
7993 instance.beparams, self.op.beparams,
7994 cluster.beparams[constants.PP_DEFAULT],
7995 constants.BES_PARAMETER_TYPES)
7996 self.be_new = be_new # the new actual values
7997 self.be_inst = i_bedict # the new dict (without defaults)
7999 self.be_new = self.be_inst = {}
8003 if constants.BE_MEMORY in self.op.beparams and not self.force:
8004 mem_check_list = [pnode]
8005 if be_new[constants.BE_AUTO_BALANCE]:
8006 # either we changed auto_balance to yes or it was from before
8007 mem_check_list.extend(instance.secondary_nodes)
8008 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8009 instance.hypervisor)
8010 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8011 instance.hypervisor)
8012 pninfo = nodeinfo[pnode]
8013 msg = pninfo.fail_msg
8015 # Assume the primary node is unreachable and go ahead
8016 self.warn.append("Can't get info from primary node %s: %s" %
8018 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8019 self.warn.append("Node data from primary node %s doesn't contain"
8020 " free memory information" % pnode)
8021 elif instance_info.fail_msg:
8022 self.warn.append("Can't get instance runtime information: %s" %
8023 instance_info.fail_msg)
8025 if instance_info.payload:
8026 current_mem = int(instance_info.payload['memory'])
8028 # Assume instance not running
8029 # (there is a slight race condition here, but it's not very probable,
8030 # and we have no other way to check)
8032 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8033 pninfo.payload['memory_free'])
8035 raise errors.OpPrereqError("This change will prevent the instance"
8036 " from starting, due to %d MB of memory"
8037 " missing on its primary node" % miss_mem,
8040 if be_new[constants.BE_AUTO_BALANCE]:
8041 for node, nres in nodeinfo.items():
8042 if node not in instance.secondary_nodes:
8046 self.warn.append("Can't get info from secondary node %s: %s" %
8048 elif not isinstance(nres.payload.get('memory_free', None), int):
8049 self.warn.append("Secondary node %s didn't return free"
8050 " memory information" % node)
8051 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8052 self.warn.append("Not enough memory to failover instance to"
8053 " secondary node %s" % node)
8058 for nic_op, nic_dict in self.op.nics:
8059 if nic_op == constants.DDM_REMOVE:
8060 if not instance.nics:
8061 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8064 if nic_op != constants.DDM_ADD:
8066 if not instance.nics:
8067 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8068 " no NICs" % nic_op,
8070 if nic_op < 0 or nic_op >= len(instance.nics):
8071 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8073 (nic_op, len(instance.nics) - 1),
8075 old_nic_params = instance.nics[nic_op].nicparams
8076 old_nic_ip = instance.nics[nic_op].ip
8081 update_params_dict = dict([(key, nic_dict[key])
8082 for key in constants.NICS_PARAMETERS
8083 if key in nic_dict])
8085 if 'bridge' in nic_dict:
8086 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8088 new_nic_params, new_filled_nic_params = \
8089 self._GetUpdatedParams(old_nic_params, update_params_dict,
8090 cluster.nicparams[constants.PP_DEFAULT],
8091 constants.NICS_PARAMETER_TYPES)
8092 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8093 self.nic_pinst[nic_op] = new_nic_params
8094 self.nic_pnew[nic_op] = new_filled_nic_params
8095 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8097 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8098 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8099 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8101 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8103 self.warn.append(msg)
8105 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8106 if new_nic_mode == constants.NIC_MODE_ROUTED:
8107 if 'ip' in nic_dict:
8108 nic_ip = nic_dict['ip']
8112 raise errors.OpPrereqError('Cannot set the nic ip to None'
8113 ' on a routed nic', errors.ECODE_INVAL)
8114 if 'mac' in nic_dict:
8115 nic_mac = nic_dict['mac']
8117 raise errors.OpPrereqError('Cannot set the nic mac to None',
8119 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8120 # otherwise generate the mac
8121 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8123 # or validate/reserve the current one
8125 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8126 except errors.ReservationError:
8127 raise errors.OpPrereqError("MAC address %s already in use"
8128 " in cluster" % nic_mac,
8129 errors.ECODE_NOTUNIQUE)
8132 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8133 raise errors.OpPrereqError("Disk operations not supported for"
8134 " diskless instances",
8136 for disk_op, _ in self.op.disks:
8137 if disk_op == constants.DDM_REMOVE:
8138 if len(instance.disks) == 1:
8139 raise errors.OpPrereqError("Cannot remove the last disk of"
8142 ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
8143 ins_l = ins_l[pnode]
8144 msg = ins_l.fail_msg
8146 raise errors.OpPrereqError("Can't contact node %s: %s" %
8147 (pnode, msg), errors.ECODE_ENVIRON)
8148 if instance.name in ins_l.payload:
8149 raise errors.OpPrereqError("Instance is running, can't remove"
8150 " disks.", errors.ECODE_STATE)
8152 if (disk_op == constants.DDM_ADD and
8153 len(instance.nics) >= constants.MAX_DISKS):
8154 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8155 " add more" % constants.MAX_DISKS,
8157 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8159 if disk_op < 0 or disk_op >= len(instance.disks):
8160 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8162 (disk_op, len(instance.disks)),
8167 def Exec(self, feedback_fn):
8168 """Modifies an instance.
8170 All parameters take effect only at the next restart of the instance.
8173 # Process here the warnings from CheckPrereq, as we don't have a
8174 # feedback_fn there.
8175 for warn in self.warn:
8176 feedback_fn("WARNING: %s" % warn)
8179 instance = self.instance
8181 for disk_op, disk_dict in self.op.disks:
8182 if disk_op == constants.DDM_REMOVE:
8183 # remove the last disk
8184 device = instance.disks.pop()
8185 device_idx = len(instance.disks)
8186 for node, disk in device.ComputeNodeTree(instance.primary_node):
8187 self.cfg.SetDiskID(disk, node)
8188 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8190 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8191 " continuing anyway", device_idx, node, msg)
8192 result.append(("disk/%d" % device_idx, "remove"))
8193 elif disk_op == constants.DDM_ADD:
8195 if instance.disk_template == constants.DT_FILE:
8196 file_driver, file_path = instance.disks[0].logical_id
8197 file_path = os.path.dirname(file_path)
8199 file_driver = file_path = None
8200 disk_idx_base = len(instance.disks)
8201 new_disk = _GenerateDiskTemplate(self,
8202 instance.disk_template,
8203 instance.name, instance.primary_node,
8204 instance.secondary_nodes,
8209 instance.disks.append(new_disk)
8210 info = _GetInstanceInfoText(instance)
8212 logging.info("Creating volume %s for instance %s",
8213 new_disk.iv_name, instance.name)
8214 # Note: this needs to be kept in sync with _CreateDisks
8216 for node in instance.all_nodes:
8217 f_create = node == instance.primary_node
8219 _CreateBlockDev(self, node, instance, new_disk,
8220 f_create, info, f_create)
8221 except errors.OpExecError, err:
8222 self.LogWarning("Failed to create volume %s (%s) on"
8224 new_disk.iv_name, new_disk, node, err)
8225 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8226 (new_disk.size, new_disk.mode)))
8228 # change a given disk
8229 instance.disks[disk_op].mode = disk_dict['mode']
8230 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8232 for nic_op, nic_dict in self.op.nics:
8233 if nic_op == constants.DDM_REMOVE:
8234 # remove the last nic
8235 del instance.nics[-1]
8236 result.append(("nic.%d" % len(instance.nics), "remove"))
8237 elif nic_op == constants.DDM_ADD:
8238 # mac and bridge should be set, by now
8239 mac = nic_dict['mac']
8240 ip = nic_dict.get('ip', None)
8241 nicparams = self.nic_pinst[constants.DDM_ADD]
8242 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8243 instance.nics.append(new_nic)
8244 result.append(("nic.%d" % (len(instance.nics) - 1),
8245 "add:mac=%s,ip=%s,mode=%s,link=%s" %
8246 (new_nic.mac, new_nic.ip,
8247 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8248 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8251 for key in 'mac', 'ip':
8253 setattr(instance.nics[nic_op], key, nic_dict[key])
8254 if nic_op in self.nic_pinst:
8255 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8256 for key, val in nic_dict.iteritems():
8257 result.append(("nic.%s/%d" % (key, nic_op), val))
8260 if self.op.hvparams:
8261 instance.hvparams = self.hv_inst
8262 for key, val in self.op.hvparams.iteritems():
8263 result.append(("hv/%s" % key, val))
8266 if self.op.beparams:
8267 instance.beparams = self.be_inst
8268 for key, val in self.op.beparams.iteritems():
8269 result.append(("be/%s" % key, val))
8271 self.cfg.Update(instance, feedback_fn)
8276 class LUQueryExports(NoHooksLU):
8277 """Query the exports list
8280 _OP_REQP = ['nodes']
8283 def ExpandNames(self):
8284 self.needed_locks = {}
8285 self.share_locks[locking.LEVEL_NODE] = 1
8286 if not self.op.nodes:
8287 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8289 self.needed_locks[locking.LEVEL_NODE] = \
8290 _GetWantedNodes(self, self.op.nodes)
8292 def CheckPrereq(self):
8293 """Check prerequisites.
8296 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8298 def Exec(self, feedback_fn):
8299 """Compute the list of all the exported system images.
8302 @return: a dictionary with the structure node->(export-list)
8303 where export-list is a list of the instances exported on
8307 rpcresult = self.rpc.call_export_list(self.nodes)
8309 for node in rpcresult:
8310 if rpcresult[node].fail_msg:
8311 result[node] = False
8313 result[node] = rpcresult[node].payload
8318 class LUExportInstance(LogicalUnit):
8319 """Export an instance to an image in the cluster.
8322 HPATH = "instance-export"
8323 HTYPE = constants.HTYPE_INSTANCE
8324 _OP_REQP = ["instance_name", "target_node", "shutdown"]
8327 def CheckArguments(self):
8328 """Check the arguments.
8331 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8332 constants.DEFAULT_SHUTDOWN_TIMEOUT)
8334 def ExpandNames(self):
8335 self._ExpandAndLockInstance()
8336 # FIXME: lock only instance primary and destination node
8338 # Sad but true, for now we have do lock all nodes, as we don't know where
8339 # the previous export might be, and and in this LU we search for it and
8340 # remove it from its current node. In the future we could fix this by:
8341 # - making a tasklet to search (share-lock all), then create the new one,
8342 # then one to remove, after
8343 # - removing the removal operation altogether
8344 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8346 def DeclareLocks(self, level):
8347 """Last minute lock declaration."""
8348 # All nodes are locked anyway, so nothing to do here.
8350 def BuildHooksEnv(self):
8353 This will run on the master, primary node and target node.
8357 "EXPORT_NODE": self.op.target_node,
8358 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8359 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8361 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8362 nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8363 self.op.target_node]
8366 def CheckPrereq(self):
8367 """Check prerequisites.
8369 This checks that the instance and node names are valid.
8372 instance_name = self.op.instance_name
8373 self.instance = self.cfg.GetInstanceInfo(instance_name)
8374 assert self.instance is not None, \
8375 "Cannot retrieve locked instance %s" % self.op.instance_name
8376 _CheckNodeOnline(self, self.instance.primary_node)
8378 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8379 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8380 assert self.dst_node is not None
8382 _CheckNodeOnline(self, self.dst_node.name)
8383 _CheckNodeNotDrained(self, self.dst_node.name)
8385 # instance disk type verification
8386 for disk in self.instance.disks:
8387 if disk.dev_type == constants.LD_FILE:
8388 raise errors.OpPrereqError("Export not supported for instances with"
8389 " file-based disks", errors.ECODE_INVAL)
8391 def Exec(self, feedback_fn):
8392 """Export an instance to an image in the cluster.
8395 instance = self.instance
8396 dst_node = self.dst_node
8397 src_node = instance.primary_node
8399 if self.op.shutdown:
8400 # shutdown the instance, but not the disks
8401 feedback_fn("Shutting down instance %s" % instance.name)
8402 result = self.rpc.call_instance_shutdown(src_node, instance,
8403 self.shutdown_timeout)
8404 result.Raise("Could not shutdown instance %s on"
8405 " node %s" % (instance.name, src_node))
8407 vgname = self.cfg.GetVGName()
8411 # set the disks ID correctly since call_instance_start needs the
8412 # correct drbd minor to create the symlinks
8413 for disk in instance.disks:
8414 self.cfg.SetDiskID(disk, src_node)
8416 activate_disks = (not instance.admin_up)
8419 # Activate the instance disks if we'exporting a stopped instance
8420 feedback_fn("Activating disks for %s" % instance.name)
8421 _StartInstanceDisks(self, instance, None)
8427 for idx, disk in enumerate(instance.disks):
8428 feedback_fn("Creating a snapshot of disk/%s on node %s" %
8431 # result.payload will be a snapshot of an lvm leaf of the one we
8433 result = self.rpc.call_blockdev_snapshot(src_node, disk)
8434 msg = result.fail_msg
8436 self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8438 snap_disks.append(False)
8440 disk_id = (vgname, result.payload)
8441 new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8442 logical_id=disk_id, physical_id=disk_id,
8443 iv_name=disk.iv_name)
8444 snap_disks.append(new_dev)
8447 if self.op.shutdown and instance.admin_up:
8448 feedback_fn("Starting instance %s" % instance.name)
8449 result = self.rpc.call_instance_start(src_node, instance, None, None)
8450 msg = result.fail_msg
8452 _ShutdownInstanceDisks(self, instance)
8453 raise errors.OpExecError("Could not start instance: %s" % msg)
8455 # TODO: check for size
8457 cluster_name = self.cfg.GetClusterName()
8458 for idx, dev in enumerate(snap_disks):
8459 feedback_fn("Exporting snapshot %s from %s to %s" %
8460 (idx, src_node, dst_node.name))
8462 # FIXME: pass debug from opcode to backend
8463 result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8464 instance, cluster_name,
8465 idx, self.op.debug_level)
8466 msg = result.fail_msg
8468 self.LogWarning("Could not export disk/%s from node %s to"
8469 " node %s: %s", idx, src_node, dst_node.name, msg)
8470 dresults.append(False)
8472 dresults.append(True)
8473 msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8475 self.LogWarning("Could not remove snapshot for disk/%d from node"
8476 " %s: %s", idx, src_node, msg)
8478 dresults.append(False)
8480 feedback_fn("Finalizing export on %s" % dst_node.name)
8481 result = self.rpc.call_finalize_export(dst_node.name, instance,
8484 msg = result.fail_msg
8486 self.LogWarning("Could not finalize export for instance %s"
8487 " on node %s: %s", instance.name, dst_node.name, msg)
8492 feedback_fn("Deactivating disks for %s" % instance.name)
8493 _ShutdownInstanceDisks(self, instance)
8495 nodelist = self.cfg.GetNodeList()
8496 nodelist.remove(dst_node.name)
8498 # on one-node clusters nodelist will be empty after the removal
8499 # if we proceed the backup would be removed because OpQueryExports
8500 # substitutes an empty list with the full cluster node list.
8501 iname = instance.name
8503 feedback_fn("Removing old exports for instance %s" % iname)
8504 exportlist = self.rpc.call_export_list(nodelist)
8505 for node in exportlist:
8506 if exportlist[node].fail_msg:
8508 if iname in exportlist[node].payload:
8509 msg = self.rpc.call_export_remove(node, iname).fail_msg
8511 self.LogWarning("Could not remove older export for instance %s"
8512 " on node %s: %s", iname, node, msg)
8513 return fin_resu, dresults
8516 class LURemoveExport(NoHooksLU):
8517 """Remove exports related to the named instance.
8520 _OP_REQP = ["instance_name"]
8523 def ExpandNames(self):
8524 self.needed_locks = {}
8525 # We need all nodes to be locked in order for RemoveExport to work, but we
8526 # don't need to lock the instance itself, as nothing will happen to it (and
8527 # we can remove exports also for a removed instance)
8528 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8530 def CheckPrereq(self):
8531 """Check prerequisites.
8535 def Exec(self, feedback_fn):
8536 """Remove any export.
8539 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8540 # If the instance was not found we'll try with the name that was passed in.
8541 # This will only work if it was an FQDN, though.
8543 if not instance_name:
8545 instance_name = self.op.instance_name
8547 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8548 exportlist = self.rpc.call_export_list(locked_nodes)
8550 for node in exportlist:
8551 msg = exportlist[node].fail_msg
8553 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8555 if instance_name in exportlist[node].payload:
8557 result = self.rpc.call_export_remove(node, instance_name)
8558 msg = result.fail_msg
8560 logging.error("Could not remove export for instance %s"
8561 " on node %s: %s", instance_name, node, msg)
8563 if fqdn_warn and not found:
8564 feedback_fn("Export not found. If trying to remove an export belonging"
8565 " to a deleted instance please use its Fully Qualified"
8569 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
8572 This is an abstract class which is the parent of all the other tags LUs.
8576 def ExpandNames(self):
8577 self.needed_locks = {}
8578 if self.op.kind == constants.TAG_NODE:
8579 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
8580 self.needed_locks[locking.LEVEL_NODE] = self.op.name
8581 elif self.op.kind == constants.TAG_INSTANCE:
8582 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
8583 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
8585 def CheckPrereq(self):
8586 """Check prerequisites.
8589 if self.op.kind == constants.TAG_CLUSTER:
8590 self.target = self.cfg.GetClusterInfo()
8591 elif self.op.kind == constants.TAG_NODE:
8592 self.target = self.cfg.GetNodeInfo(self.op.name)
8593 elif self.op.kind == constants.TAG_INSTANCE:
8594 self.target = self.cfg.GetInstanceInfo(self.op.name)
8596 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8597 str(self.op.kind), errors.ECODE_INVAL)
8600 class LUGetTags(TagsLU):
8601 """Returns the tags of a given object.
8604 _OP_REQP = ["kind", "name"]
8607 def Exec(self, feedback_fn):
8608 """Returns the tag list.
8611 return list(self.target.GetTags())
8614 class LUSearchTags(NoHooksLU):
8615 """Searches the tags for a given pattern.
8618 _OP_REQP = ["pattern"]
8621 def ExpandNames(self):
8622 self.needed_locks = {}
8624 def CheckPrereq(self):
8625 """Check prerequisites.
8627 This checks the pattern passed for validity by compiling it.
8631 self.re = re.compile(self.op.pattern)
8632 except re.error, err:
8633 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
8634 (self.op.pattern, err), errors.ECODE_INVAL)
8636 def Exec(self, feedback_fn):
8637 """Returns the tag list.
8641 tgts = [("/cluster", cfg.GetClusterInfo())]
8642 ilist = cfg.GetAllInstancesInfo().values()
8643 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
8644 nlist = cfg.GetAllNodesInfo().values()
8645 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
8647 for path, target in tgts:
8648 for tag in target.GetTags():
8649 if self.re.search(tag):
8650 results.append((path, tag))
8654 class LUAddTags(TagsLU):
8655 """Sets a tag on a given object.
8658 _OP_REQP = ["kind", "name", "tags"]
8661 def CheckPrereq(self):
8662 """Check prerequisites.
8664 This checks the type and length of the tag name and value.
8667 TagsLU.CheckPrereq(self)
8668 for tag in self.op.tags:
8669 objects.TaggableObject.ValidateTag(tag)
8671 def Exec(self, feedback_fn):
8676 for tag in self.op.tags:
8677 self.target.AddTag(tag)
8678 except errors.TagError, err:
8679 raise errors.OpExecError("Error while setting tag: %s" % str(err))
8680 self.cfg.Update(self.target, feedback_fn)
8683 class LUDelTags(TagsLU):
8684 """Delete a list of tags from a given object.
8687 _OP_REQP = ["kind", "name", "tags"]
8690 def CheckPrereq(self):
8691 """Check prerequisites.
8693 This checks that we have the given tag.
8696 TagsLU.CheckPrereq(self)
8697 for tag in self.op.tags:
8698 objects.TaggableObject.ValidateTag(tag)
8699 del_tags = frozenset(self.op.tags)
8700 cur_tags = self.target.GetTags()
8701 if not del_tags <= cur_tags:
8702 diff_tags = del_tags - cur_tags
8703 diff_names = ["'%s'" % tag for tag in diff_tags]
8705 raise errors.OpPrereqError("Tag(s) %s not found" %
8706 (",".join(diff_names)), errors.ECODE_NOENT)
8708 def Exec(self, feedback_fn):
8709 """Remove the tag from the object.
8712 for tag in self.op.tags:
8713 self.target.RemoveTag(tag)
8714 self.cfg.Update(self.target, feedback_fn)
8717 class LUTestDelay(NoHooksLU):
8718 """Sleep for a specified amount of time.
8720 This LU sleeps on the master and/or nodes for a specified amount of
8724 _OP_REQP = ["duration", "on_master", "on_nodes"]
8727 def ExpandNames(self):
8728 """Expand names and set required locks.
8730 This expands the node list, if any.
8733 self.needed_locks = {}
8734 if self.op.on_nodes:
8735 # _GetWantedNodes can be used here, but is not always appropriate to use
8736 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8738 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8739 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8741 def CheckPrereq(self):
8742 """Check prerequisites.
8746 def Exec(self, feedback_fn):
8747 """Do the actual sleep.
8750 if self.op.on_master:
8751 if not utils.TestDelay(self.op.duration):
8752 raise errors.OpExecError("Error during master delay test")
8753 if self.op.on_nodes:
8754 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8755 for node, node_result in result.items():
8756 node_result.Raise("Failure during rpc call to node %s" % node)
8759 class IAllocator(object):
8760 """IAllocator framework.
8762 An IAllocator instance has three sets of attributes:
8763 - cfg that is needed to query the cluster
8764 - input data (all members of the _KEYS class attribute are required)
8765 - four buffer attributes (in|out_data|text), that represent the
8766 input (to the external script) in text and data structure format,
8767 and the output from it, again in two formats
8768 - the result variables from the script (success, info, nodes) for
8772 # pylint: disable-msg=R0902
8773 # lots of instance attributes
8775 "name", "mem_size", "disks", "disk_template",
8776 "os", "tags", "nics", "vcpus", "hypervisor",
8779 "name", "relocate_from",
8785 def __init__(self, cfg, rpc, mode, **kwargs):
8788 # init buffer variables
8789 self.in_text = self.out_text = self.in_data = self.out_data = None
8790 # init all input fields so that pylint is happy
8792 self.mem_size = self.disks = self.disk_template = None
8793 self.os = self.tags = self.nics = self.vcpus = None
8794 self.hypervisor = None
8795 self.relocate_from = None
8797 self.evac_nodes = None
8799 self.required_nodes = None
8800 # init result fields
8801 self.success = self.info = self.result = None
8802 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8803 keyset = self._ALLO_KEYS
8804 fn = self._AddNewInstance
8805 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8806 keyset = self._RELO_KEYS
8807 fn = self._AddRelocateInstance
8808 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
8809 keyset = self._EVAC_KEYS
8810 fn = self._AddEvacuateNodes
8812 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8813 " IAllocator" % self.mode)
8815 if key not in keyset:
8816 raise errors.ProgrammerError("Invalid input parameter '%s' to"
8817 " IAllocator" % key)
8818 setattr(self, key, kwargs[key])
8821 if key not in kwargs:
8822 raise errors.ProgrammerError("Missing input parameter '%s' to"
8823 " IAllocator" % key)
8824 self._BuildInputData(fn)
8826 def _ComputeClusterData(self):
8827 """Compute the generic allocator input data.
8829 This is the data that is independent of the actual operation.
8833 cluster_info = cfg.GetClusterInfo()
8836 "version": constants.IALLOCATOR_VERSION,
8837 "cluster_name": cfg.GetClusterName(),
8838 "cluster_tags": list(cluster_info.GetTags()),
8839 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8840 # we don't have job IDs
8842 iinfo = cfg.GetAllInstancesInfo().values()
8843 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8847 node_list = cfg.GetNodeList()
8849 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8850 hypervisor_name = self.hypervisor
8851 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8852 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8853 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
8854 hypervisor_name = cluster_info.enabled_hypervisors[0]
8856 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8859 self.rpc.call_all_instances_info(node_list,
8860 cluster_info.enabled_hypervisors)
8861 for nname, nresult in node_data.items():
8862 # first fill in static (config-based) values
8863 ninfo = cfg.GetNodeInfo(nname)
8865 "tags": list(ninfo.GetTags()),
8866 "primary_ip": ninfo.primary_ip,
8867 "secondary_ip": ninfo.secondary_ip,
8868 "offline": ninfo.offline,
8869 "drained": ninfo.drained,
8870 "master_candidate": ninfo.master_candidate,
8873 if not (ninfo.offline or ninfo.drained):
8874 nresult.Raise("Can't get data for node %s" % nname)
8875 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8877 remote_info = nresult.payload
8879 for attr in ['memory_total', 'memory_free', 'memory_dom0',
8880 'vg_size', 'vg_free', 'cpu_total']:
8881 if attr not in remote_info:
8882 raise errors.OpExecError("Node '%s' didn't return attribute"
8883 " '%s'" % (nname, attr))
8884 if not isinstance(remote_info[attr], int):
8885 raise errors.OpExecError("Node '%s' returned invalid value"
8887 (nname, attr, remote_info[attr]))
8888 # compute memory used by primary instances
8889 i_p_mem = i_p_up_mem = 0
8890 for iinfo, beinfo in i_list:
8891 if iinfo.primary_node == nname:
8892 i_p_mem += beinfo[constants.BE_MEMORY]
8893 if iinfo.name not in node_iinfo[nname].payload:
8896 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8897 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8898 remote_info['memory_free'] -= max(0, i_mem_diff)
8901 i_p_up_mem += beinfo[constants.BE_MEMORY]
8903 # compute memory used by instances
8905 "total_memory": remote_info['memory_total'],
8906 "reserved_memory": remote_info['memory_dom0'],
8907 "free_memory": remote_info['memory_free'],
8908 "total_disk": remote_info['vg_size'],
8909 "free_disk": remote_info['vg_free'],
8910 "total_cpus": remote_info['cpu_total'],
8911 "i_pri_memory": i_p_mem,
8912 "i_pri_up_memory": i_p_up_mem,
8916 node_results[nname] = pnr
8917 data["nodes"] = node_results
8921 for iinfo, beinfo in i_list:
8923 for nic in iinfo.nics:
8924 filled_params = objects.FillDict(
8925 cluster_info.nicparams[constants.PP_DEFAULT],
8927 nic_dict = {"mac": nic.mac,
8929 "mode": filled_params[constants.NIC_MODE],
8930 "link": filled_params[constants.NIC_LINK],
8932 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8933 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8934 nic_data.append(nic_dict)
8936 "tags": list(iinfo.GetTags()),
8937 "admin_up": iinfo.admin_up,
8938 "vcpus": beinfo[constants.BE_VCPUS],
8939 "memory": beinfo[constants.BE_MEMORY],
8941 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8943 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8944 "disk_template": iinfo.disk_template,
8945 "hypervisor": iinfo.hypervisor,
8947 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8949 instance_data[iinfo.name] = pir
8951 data["instances"] = instance_data
8955 def _AddNewInstance(self):
8956 """Add new instance data to allocator structure.
8958 This in combination with _AllocatorGetClusterData will create the
8959 correct structure needed as input for the allocator.
8961 The checks for the completeness of the opcode must have already been
8965 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8967 if self.disk_template in constants.DTS_NET_MIRROR:
8968 self.required_nodes = 2
8970 self.required_nodes = 1
8973 "disk_template": self.disk_template,
8976 "vcpus": self.vcpus,
8977 "memory": self.mem_size,
8978 "disks": self.disks,
8979 "disk_space_total": disk_space,
8981 "required_nodes": self.required_nodes,
8985 def _AddRelocateInstance(self):
8986 """Add relocate instance data to allocator structure.
8988 This in combination with _IAllocatorGetClusterData will create the
8989 correct structure needed as input for the allocator.
8991 The checks for the completeness of the opcode must have already been
8995 instance = self.cfg.GetInstanceInfo(self.name)
8996 if instance is None:
8997 raise errors.ProgrammerError("Unknown instance '%s' passed to"
8998 " IAllocator" % self.name)
9000 if instance.disk_template not in constants.DTS_NET_MIRROR:
9001 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9004 if len(instance.secondary_nodes) != 1:
9005 raise errors.OpPrereqError("Instance has not exactly one secondary node",
9008 self.required_nodes = 1
9009 disk_sizes = [{'size': disk.size} for disk in instance.disks]
9010 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9014 "disk_space_total": disk_space,
9015 "required_nodes": self.required_nodes,
9016 "relocate_from": self.relocate_from,
9020 def _AddEvacuateNodes(self):
9021 """Add evacuate nodes data to allocator structure.
9025 "evac_nodes": self.evac_nodes
9029 def _BuildInputData(self, fn):
9030 """Build input data structures.
9033 self._ComputeClusterData()
9036 request["type"] = self.mode
9037 self.in_data["request"] = request
9039 self.in_text = serializer.Dump(self.in_data)
9041 def Run(self, name, validate=True, call_fn=None):
9042 """Run an instance allocator and return the results.
9046 call_fn = self.rpc.call_iallocator_runner
9048 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9049 result.Raise("Failure while running the iallocator script")
9051 self.out_text = result.payload
9053 self._ValidateResult()
9055 def _ValidateResult(self):
9056 """Process the allocator results.
9058 This will process and if successful save the result in
9059 self.out_data and the other parameters.
9063 rdict = serializer.Load(self.out_text)
9064 except Exception, err:
9065 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9067 if not isinstance(rdict, dict):
9068 raise errors.OpExecError("Can't parse iallocator results: not a dict")
9070 # TODO: remove backwards compatiblity in later versions
9071 if "nodes" in rdict and "result" not in rdict:
9072 rdict["result"] = rdict["nodes"]
9075 for key in "success", "info", "result":
9076 if key not in rdict:
9077 raise errors.OpExecError("Can't parse iallocator results:"
9078 " missing key '%s'" % key)
9079 setattr(self, key, rdict[key])
9081 if not isinstance(rdict["result"], list):
9082 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9084 self.out_data = rdict
9087 class LUTestAllocator(NoHooksLU):
9088 """Run allocator tests.
9090 This LU runs the allocator tests
9093 _OP_REQP = ["direction", "mode", "name"]
9095 def CheckPrereq(self):
9096 """Check prerequisites.
9098 This checks the opcode parameters depending on the director and mode test.
9101 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9102 for attr in ["name", "mem_size", "disks", "disk_template",
9103 "os", "tags", "nics", "vcpus"]:
9104 if not hasattr(self.op, attr):
9105 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9106 attr, errors.ECODE_INVAL)
9107 iname = self.cfg.ExpandInstanceName(self.op.name)
9108 if iname is not None:
9109 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9110 iname, errors.ECODE_EXISTS)
9111 if not isinstance(self.op.nics, list):
9112 raise errors.OpPrereqError("Invalid parameter 'nics'",
9114 for row in self.op.nics:
9115 if (not isinstance(row, dict) or
9118 "bridge" not in row):
9119 raise errors.OpPrereqError("Invalid contents of the 'nics'"
9120 " parameter", errors.ECODE_INVAL)
9121 if not isinstance(self.op.disks, list):
9122 raise errors.OpPrereqError("Invalid parameter 'disks'",
9124 for row in self.op.disks:
9125 if (not isinstance(row, dict) or
9126 "size" not in row or
9127 not isinstance(row["size"], int) or
9128 "mode" not in row or
9129 row["mode"] not in ['r', 'w']):
9130 raise errors.OpPrereqError("Invalid contents of the 'disks'"
9131 " parameter", errors.ECODE_INVAL)
9132 if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9133 self.op.hypervisor = self.cfg.GetHypervisorType()
9134 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9135 if not hasattr(self.op, "name"):
9136 raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9138 fname = _ExpandInstanceName(self.cfg, self.op.name)
9139 self.op.name = fname
9140 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9141 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9142 if not hasattr(self.op, "evac_nodes"):
9143 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9144 " opcode input", errors.ECODE_INVAL)
9146 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9147 self.op.mode, errors.ECODE_INVAL)
9149 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9150 if not hasattr(self.op, "allocator") or self.op.allocator is None:
9151 raise errors.OpPrereqError("Missing allocator name",
9153 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9154 raise errors.OpPrereqError("Wrong allocator test '%s'" %
9155 self.op.direction, errors.ECODE_INVAL)
9157 def Exec(self, feedback_fn):
9158 """Run the allocator test.
9161 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9162 ial = IAllocator(self.cfg, self.rpc,
9165 mem_size=self.op.mem_size,
9166 disks=self.op.disks,
9167 disk_template=self.op.disk_template,
9171 vcpus=self.op.vcpus,
9172 hypervisor=self.op.hypervisor,
9174 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9175 ial = IAllocator(self.cfg, self.rpc,
9178 relocate_from=list(self.relocate_from),
9180 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9181 ial = IAllocator(self.cfg, self.rpc,
9183 evac_nodes=self.op.evac_nodes)
9185 raise errors.ProgrammerError("Uncatched mode %s in"
9186 " LUTestAllocator.Exec", self.op.mode)
9188 if self.op.direction == constants.IALLOCATOR_DIR_IN:
9189 result = ial.in_text
9191 ial.Run(self.op.allocator, validate=False)
9192 result = ial.out_text