4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
38 from ganeti import ssh
39 from ganeti import utils
40 from ganeti import errors
41 from ganeti import hypervisor
42 from ganeti import locking
43 from ganeti import constants
44 from ganeti import objects
45 from ganeti import serializer
46 from ganeti import ssconf
47 from ganeti import uidpool
48 from ganeti import compat
49 from ganeti import masterd
51 import ganeti.masterd.instance # pylint: disable-msg=W0611
54 class LogicalUnit(object):
55 """Logical Unit base class.
57 Subclasses must follow these rules:
58 - implement ExpandNames
59 - implement CheckPrereq (except when tasklets are used)
60 - implement Exec (except when tasklets are used)
61 - implement BuildHooksEnv
62 - redefine HPATH and HTYPE
63 - optionally redefine their run requirements:
64 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
66 Note that all commands require root permissions.
68 @ivar dry_run_result: the value (if any) that will be returned to the caller
69 in dry-run mode (signalled by opcode dry_run parameter)
77 def __init__(self, processor, op, context, rpc):
78 """Constructor for LogicalUnit.
80 This needs to be overridden in derived classes in order to check op
86 self.cfg = context.cfg
87 self.context = context
89 # Dicts used to declare locking needs to mcpu
90 self.needed_locks = None
91 self.acquired_locks = {}
92 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
94 self.remove_locks = {}
95 # Used to force good behavior when calling helper functions
96 self.recalculate_locks = {}
99 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
100 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
101 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
102 # support for dry-run
103 self.dry_run_result = None
104 # support for generic debug attribute
105 if (not hasattr(self.op, "debug_level") or
106 not isinstance(self.op.debug_level, int)):
107 self.op.debug_level = 0
112 for attr_name in self._OP_REQP:
113 attr_val = getattr(op, attr_name, None)
115 raise errors.OpPrereqError("Required parameter '%s' missing" %
116 attr_name, errors.ECODE_INVAL)
118 self.CheckArguments()
121 """Returns the SshRunner object
125 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
128 ssh = property(fget=__GetSSH)
130 def CheckArguments(self):
131 """Check syntactic validity for the opcode arguments.
133 This method is for doing a simple syntactic check and ensure
134 validity of opcode parameters, without any cluster-related
135 checks. While the same can be accomplished in ExpandNames and/or
136 CheckPrereq, doing these separate is better because:
138 - ExpandNames is left as as purely a lock-related function
139 - CheckPrereq is run after we have acquired locks (and possible
142 The function is allowed to change the self.op attribute so that
143 later methods can no longer worry about missing parameters.
148 def ExpandNames(self):
149 """Expand names for this LU.
151 This method is called before starting to execute the opcode, and it should
152 update all the parameters of the opcode to their canonical form (e.g. a
153 short node name must be fully expanded after this method has successfully
154 completed). This way locking, hooks, logging, ecc. can work correctly.
156 LUs which implement this method must also populate the self.needed_locks
157 member, as a dict with lock levels as keys, and a list of needed lock names
160 - use an empty dict if you don't need any lock
161 - if you don't need any lock at a particular level omit that level
162 - don't put anything for the BGL level
163 - if you want all locks at a level use locking.ALL_SET as a value
165 If you need to share locks (rather than acquire them exclusively) at one
166 level you can modify self.share_locks, setting a true value (usually 1) for
167 that level. By default locks are not shared.
169 This function can also define a list of tasklets, which then will be
170 executed in order instead of the usual LU-level CheckPrereq and Exec
171 functions, if those are not defined by the LU.
175 # Acquire all nodes and one instance
176 self.needed_locks = {
177 locking.LEVEL_NODE: locking.ALL_SET,
178 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
180 # Acquire just two nodes
181 self.needed_locks = {
182 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
185 self.needed_locks = {} # No, you can't leave it to the default value None
188 # The implementation of this method is mandatory only if the new LU is
189 # concurrent, so that old LUs don't need to be changed all at the same
192 self.needed_locks = {} # Exclusive LUs don't need locks.
194 raise NotImplementedError
196 def DeclareLocks(self, level):
197 """Declare LU locking needs for a level
199 While most LUs can just declare their locking needs at ExpandNames time,
200 sometimes there's the need to calculate some locks after having acquired
201 the ones before. This function is called just before acquiring locks at a
202 particular level, but after acquiring the ones at lower levels, and permits
203 such calculations. It can be used to modify self.needed_locks, and by
204 default it does nothing.
206 This function is only called if you have something already set in
207 self.needed_locks for the level.
209 @param level: Locking level which is going to be locked
210 @type level: member of ganeti.locking.LEVELS
214 def CheckPrereq(self):
215 """Check prerequisites for this LU.
217 This method should check that the prerequisites for the execution
218 of this LU are fulfilled. It can do internode communication, but
219 it should be idempotent - no cluster or system changes are
222 The method should raise errors.OpPrereqError in case something is
223 not fulfilled. Its return value is ignored.
225 This method should also update all the parameters of the opcode to
226 their canonical form if it hasn't been done by ExpandNames before.
229 if self.tasklets is not None:
230 for (idx, tl) in enumerate(self.tasklets):
231 logging.debug("Checking prerequisites for tasklet %s/%s",
232 idx + 1, len(self.tasklets))
235 raise NotImplementedError
237 def Exec(self, feedback_fn):
240 This method should implement the actual work. It should raise
241 errors.OpExecError for failures that are somewhat dealt with in
245 if self.tasklets is not None:
246 for (idx, tl) in enumerate(self.tasklets):
247 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
250 raise NotImplementedError
252 def BuildHooksEnv(self):
253 """Build hooks environment for this LU.
255 This method should return a three-node tuple consisting of: a dict
256 containing the environment that will be used for running the
257 specific hook for this LU, a list of node names on which the hook
258 should run before the execution, and a list of node names on which
259 the hook should run after the execution.
261 The keys of the dict must not have 'GANETI_' prefixed as this will
262 be handled in the hooks runner. Also note additional keys will be
263 added by the hooks runner. If the LU doesn't define any
264 environment, an empty dict (and not None) should be returned.
266 No nodes should be returned as an empty list (and not None).
268 Note that if the HPATH for a LU class is None, this function will
272 raise NotImplementedError
274 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
275 """Notify the LU about the results of its hooks.
277 This method is called every time a hooks phase is executed, and notifies
278 the Logical Unit about the hooks' result. The LU can then use it to alter
279 its result based on the hooks. By default the method does nothing and the
280 previous result is passed back unchanged but any LU can define it if it
281 wants to use the local cluster hook-scripts somehow.
283 @param phase: one of L{constants.HOOKS_PHASE_POST} or
284 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
285 @param hook_results: the results of the multi-node hooks rpc call
286 @param feedback_fn: function used send feedback back to the caller
287 @param lu_result: the previous Exec result this LU had, or None
289 @return: the new Exec result, based on the previous result
293 # API must be kept, thus we ignore the unused argument and could
294 # be a function warnings
295 # pylint: disable-msg=W0613,R0201
298 def _ExpandAndLockInstance(self):
299 """Helper function to expand and lock an instance.
301 Many LUs that work on an instance take its name in self.op.instance_name
302 and need to expand it and then declare the expanded name for locking. This
303 function does it, and then updates self.op.instance_name to the expanded
304 name. It also initializes needed_locks as a dict, if this hasn't been done
308 if self.needed_locks is None:
309 self.needed_locks = {}
311 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
312 "_ExpandAndLockInstance called with instance-level locks set"
313 self.op.instance_name = _ExpandInstanceName(self.cfg,
314 self.op.instance_name)
315 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
317 def _LockInstancesNodes(self, primary_only=False):
318 """Helper function to declare instances' nodes for locking.
320 This function should be called after locking one or more instances to lock
321 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
322 with all primary or secondary nodes for instances already locked and
323 present in self.needed_locks[locking.LEVEL_INSTANCE].
325 It should be called from DeclareLocks, and for safety only works if
326 self.recalculate_locks[locking.LEVEL_NODE] is set.
328 In the future it may grow parameters to just lock some instance's nodes, or
329 to just lock primaries or secondary nodes, if needed.
331 If should be called in DeclareLocks in a way similar to::
333 if level == locking.LEVEL_NODE:
334 self._LockInstancesNodes()
336 @type primary_only: boolean
337 @param primary_only: only lock primary nodes of locked instances
340 assert locking.LEVEL_NODE in self.recalculate_locks, \
341 "_LockInstancesNodes helper function called with no nodes to recalculate"
343 # TODO: check if we're really been called with the instance locks held
345 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
346 # future we might want to have different behaviors depending on the value
347 # of self.recalculate_locks[locking.LEVEL_NODE]
349 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
350 instance = self.context.cfg.GetInstanceInfo(instance_name)
351 wanted_nodes.append(instance.primary_node)
353 wanted_nodes.extend(instance.secondary_nodes)
355 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
356 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
357 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
358 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
360 del self.recalculate_locks[locking.LEVEL_NODE]
363 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
364 """Simple LU which runs no hooks.
366 This LU is intended as a parent for other LogicalUnits which will
367 run no hooks, in order to reduce duplicate code.
373 def BuildHooksEnv(self):
374 """Empty BuildHooksEnv for NoHooksLu.
376 This just raises an error.
379 assert False, "BuildHooksEnv called for NoHooksLUs"
383 """Tasklet base class.
385 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
386 they can mix legacy code with tasklets. Locking needs to be done in the LU,
387 tasklets know nothing about locks.
389 Subclasses must follow these rules:
390 - Implement CheckPrereq
394 def __init__(self, lu):
401 def CheckPrereq(self):
402 """Check prerequisites for this tasklets.
404 This method should check whether the prerequisites for the execution of
405 this tasklet are fulfilled. It can do internode communication, but it
406 should be idempotent - no cluster or system changes are allowed.
408 The method should raise errors.OpPrereqError in case something is not
409 fulfilled. Its return value is ignored.
411 This method should also update all parameters to their canonical form if it
412 hasn't been done before.
415 raise NotImplementedError
417 def Exec(self, feedback_fn):
418 """Execute the tasklet.
420 This method should implement the actual work. It should raise
421 errors.OpExecError for failures that are somewhat dealt with in code, or
425 raise NotImplementedError
428 def _GetWantedNodes(lu, nodes):
429 """Returns list of checked and expanded node names.
431 @type lu: L{LogicalUnit}
432 @param lu: the logical unit on whose behalf we execute
434 @param nodes: list of node names or None for all nodes
436 @return: the list of nodes, sorted
437 @raise errors.ProgrammerError: if the nodes parameter is wrong type
440 if not isinstance(nodes, list):
441 raise errors.OpPrereqError("Invalid argument type 'nodes'",
445 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
446 " non-empty list of nodes whose name is to be expanded.")
448 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
449 return utils.NiceSort(wanted)
452 def _GetWantedInstances(lu, instances):
453 """Returns list of checked and expanded instance names.
455 @type lu: L{LogicalUnit}
456 @param lu: the logical unit on whose behalf we execute
457 @type instances: list
458 @param instances: list of instance names or None for all instances
460 @return: the list of instances, sorted
461 @raise errors.OpPrereqError: if the instances parameter is wrong type
462 @raise errors.OpPrereqError: if any of the passed instances is not found
465 if not isinstance(instances, list):
466 raise errors.OpPrereqError("Invalid argument type 'instances'",
470 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
472 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
476 def _CheckOutputFields(static, dynamic, selected):
477 """Checks whether all selected fields are valid.
479 @type static: L{utils.FieldSet}
480 @param static: static fields set
481 @type dynamic: L{utils.FieldSet}
482 @param dynamic: dynamic fields set
489 delta = f.NonMatching(selected)
491 raise errors.OpPrereqError("Unknown output fields selected: %s"
492 % ",".join(delta), errors.ECODE_INVAL)
495 def _CheckBooleanOpField(op, name):
496 """Validates boolean opcode parameters.
498 This will ensure that an opcode parameter is either a boolean value,
499 or None (but that it always exists).
502 val = getattr(op, name, None)
503 if not (val is None or isinstance(val, bool)):
504 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
505 (name, str(val)), errors.ECODE_INVAL)
506 setattr(op, name, val)
509 def _CheckGlobalHvParams(params):
510 """Validates that given hypervisor params are not global ones.
512 This will ensure that instances don't get customised versions of
516 used_globals = constants.HVC_GLOBALS.intersection(params)
518 msg = ("The following hypervisor parameters are global and cannot"
519 " be customized at instance level, please modify them at"
520 " cluster level: %s" % utils.CommaJoin(used_globals))
521 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
524 def _CheckNodeOnline(lu, node):
525 """Ensure that a given node is online.
527 @param lu: the LU on behalf of which we make the check
528 @param node: the node to check
529 @raise errors.OpPrereqError: if the node is offline
532 if lu.cfg.GetNodeInfo(node).offline:
533 raise errors.OpPrereqError("Can't use offline node %s" % node,
537 def _CheckNodeNotDrained(lu, node):
538 """Ensure that a given node is not drained.
540 @param lu: the LU on behalf of which we make the check
541 @param node: the node to check
542 @raise errors.OpPrereqError: if the node is drained
545 if lu.cfg.GetNodeInfo(node).drained:
546 raise errors.OpPrereqError("Can't use drained node %s" % node,
550 def _CheckNodeHasOS(lu, node, os_name, force_variant):
551 """Ensure that a node supports a given OS.
553 @param lu: the LU on behalf of which we make the check
554 @param node: the node to check
555 @param os_name: the OS to query about
556 @param force_variant: whether to ignore variant errors
557 @raise errors.OpPrereqError: if the node is not supporting the OS
560 result = lu.rpc.call_os_get(node, os_name)
561 result.Raise("OS '%s' not in supported OS list for node %s" %
563 prereq=True, ecode=errors.ECODE_INVAL)
564 if not force_variant:
565 _CheckOSVariant(result.payload, os_name)
568 def _RequireFileStorage():
569 """Checks that file storage is enabled.
571 @raise errors.OpPrereqError: when file storage is disabled
574 if not constants.ENABLE_FILE_STORAGE:
575 raise errors.OpPrereqError("File storage disabled at configure time",
579 def _CheckDiskTemplate(template):
580 """Ensure a given disk template is valid.
583 if template not in constants.DISK_TEMPLATES:
584 msg = ("Invalid disk template name '%s', valid templates are: %s" %
585 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
586 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
587 if template == constants.DT_FILE:
588 _RequireFileStorage()
591 def _CheckStorageType(storage_type):
592 """Ensure a given storage type is valid.
595 if storage_type not in constants.VALID_STORAGE_TYPES:
596 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
598 if storage_type == constants.ST_FILE:
599 _RequireFileStorage()
603 def _CheckInstanceDown(lu, instance, reason):
604 """Ensure that an instance is not running."""
605 if instance.admin_up:
606 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
607 (instance.name, reason), errors.ECODE_STATE)
609 pnode = instance.primary_node
610 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
611 ins_l.Raise("Can't contact node %s for instance information" % pnode,
612 prereq=True, ecode=errors.ECODE_ENVIRON)
614 if instance.name in ins_l.payload:
615 raise errors.OpPrereqError("Instance %s is running, %s" %
616 (instance.name, reason), errors.ECODE_STATE)
619 def _ExpandItemName(fn, name, kind):
620 """Expand an item name.
622 @param fn: the function to use for expansion
623 @param name: requested item name
624 @param kind: text description ('Node' or 'Instance')
625 @return: the resolved (full) name
626 @raise errors.OpPrereqError: if the item is not found
630 if full_name is None:
631 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
636 def _ExpandNodeName(cfg, name):
637 """Wrapper over L{_ExpandItemName} for nodes."""
638 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
641 def _ExpandInstanceName(cfg, name):
642 """Wrapper over L{_ExpandItemName} for instance."""
643 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
646 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
647 memory, vcpus, nics, disk_template, disks,
648 bep, hvp, hypervisor_name):
649 """Builds instance related env variables for hooks
651 This builds the hook environment from individual variables.
654 @param name: the name of the instance
655 @type primary_node: string
656 @param primary_node: the name of the instance's primary node
657 @type secondary_nodes: list
658 @param secondary_nodes: list of secondary nodes as strings
659 @type os_type: string
660 @param os_type: the name of the instance's OS
661 @type status: boolean
662 @param status: the should_run status of the instance
664 @param memory: the memory size of the instance
666 @param vcpus: the count of VCPUs the instance has
668 @param nics: list of tuples (ip, mac, mode, link) representing
669 the NICs the instance has
670 @type disk_template: string
671 @param disk_template: the disk template of the instance
673 @param disks: the list of (size, mode) pairs
675 @param bep: the backend parameters for the instance
677 @param hvp: the hypervisor parameters for the instance
678 @type hypervisor_name: string
679 @param hypervisor_name: the hypervisor for the instance
681 @return: the hook environment for this instance
690 "INSTANCE_NAME": name,
691 "INSTANCE_PRIMARY": primary_node,
692 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
693 "INSTANCE_OS_TYPE": os_type,
694 "INSTANCE_STATUS": str_status,
695 "INSTANCE_MEMORY": memory,
696 "INSTANCE_VCPUS": vcpus,
697 "INSTANCE_DISK_TEMPLATE": disk_template,
698 "INSTANCE_HYPERVISOR": hypervisor_name,
702 nic_count = len(nics)
703 for idx, (ip, mac, mode, link) in enumerate(nics):
706 env["INSTANCE_NIC%d_IP" % idx] = ip
707 env["INSTANCE_NIC%d_MAC" % idx] = mac
708 env["INSTANCE_NIC%d_MODE" % idx] = mode
709 env["INSTANCE_NIC%d_LINK" % idx] = link
710 if mode == constants.NIC_MODE_BRIDGED:
711 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
715 env["INSTANCE_NIC_COUNT"] = nic_count
718 disk_count = len(disks)
719 for idx, (size, mode) in enumerate(disks):
720 env["INSTANCE_DISK%d_SIZE" % idx] = size
721 env["INSTANCE_DISK%d_MODE" % idx] = mode
725 env["INSTANCE_DISK_COUNT"] = disk_count
727 for source, kind in [(bep, "BE"), (hvp, "HV")]:
728 for key, value in source.items():
729 env["INSTANCE_%s_%s" % (kind, key)] = value
734 def _NICListToTuple(lu, nics):
735 """Build a list of nic information tuples.
737 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
738 value in LUQueryInstanceData.
740 @type lu: L{LogicalUnit}
741 @param lu: the logical unit on whose behalf we execute
742 @type nics: list of L{objects.NIC}
743 @param nics: list of nics to convert to hooks tuples
747 c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
751 filled_params = objects.FillDict(c_nicparams, nic.nicparams)
752 mode = filled_params[constants.NIC_MODE]
753 link = filled_params[constants.NIC_LINK]
754 hooks_nics.append((ip, mac, mode, link))
758 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
759 """Builds instance related env variables for hooks from an object.
761 @type lu: L{LogicalUnit}
762 @param lu: the logical unit on whose behalf we execute
763 @type instance: L{objects.Instance}
764 @param instance: the instance for which we should build the
767 @param override: dictionary with key/values that will override
770 @return: the hook environment dictionary
773 cluster = lu.cfg.GetClusterInfo()
774 bep = cluster.FillBE(instance)
775 hvp = cluster.FillHV(instance)
777 'name': instance.name,
778 'primary_node': instance.primary_node,
779 'secondary_nodes': instance.secondary_nodes,
780 'os_type': instance.os,
781 'status': instance.admin_up,
782 'memory': bep[constants.BE_MEMORY],
783 'vcpus': bep[constants.BE_VCPUS],
784 'nics': _NICListToTuple(lu, instance.nics),
785 'disk_template': instance.disk_template,
786 'disks': [(disk.size, disk.mode) for disk in instance.disks],
789 'hypervisor_name': instance.hypervisor,
792 args.update(override)
793 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
796 def _AdjustCandidatePool(lu, exceptions):
797 """Adjust the candidate pool after node operations.
800 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
802 lu.LogInfo("Promoted nodes to master candidate role: %s",
803 utils.CommaJoin(node.name for node in mod_list))
804 for name in mod_list:
805 lu.context.ReaddNode(name)
806 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
808 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
812 def _DecideSelfPromotion(lu, exceptions=None):
813 """Decide whether I should promote myself as a master candidate.
816 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
817 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
818 # the new node will increase mc_max with one, so:
819 mc_should = min(mc_should + 1, cp_size)
820 return mc_now < mc_should
823 def _CheckNicsBridgesExist(lu, target_nics, target_node,
824 profile=constants.PP_DEFAULT):
825 """Check that the brigdes needed by a list of nics exist.
828 c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
829 paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
830 for nic in target_nics]
831 brlist = [params[constants.NIC_LINK] for params in paramslist
832 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
834 result = lu.rpc.call_bridges_exist(target_node, brlist)
835 result.Raise("Error checking bridges on destination node '%s'" %
836 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
839 def _CheckInstanceBridgesExist(lu, instance, node=None):
840 """Check that the brigdes needed by an instance exist.
844 node = instance.primary_node
845 _CheckNicsBridgesExist(lu, instance.nics, node)
848 def _CheckOSVariant(os_obj, name):
849 """Check whether an OS name conforms to the os variants specification.
851 @type os_obj: L{objects.OS}
852 @param os_obj: OS object to check
854 @param name: OS name passed by the user, to check for validity
857 if not os_obj.supported_variants:
860 variant = name.split("+", 1)[1]
862 raise errors.OpPrereqError("OS name must include a variant",
865 if variant not in os_obj.supported_variants:
866 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
869 def _GetNodeInstancesInner(cfg, fn):
870 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
873 def _GetNodeInstances(cfg, node_name):
874 """Returns a list of all primary and secondary instances on a node.
878 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
881 def _GetNodePrimaryInstances(cfg, node_name):
882 """Returns primary instances on a node.
885 return _GetNodeInstancesInner(cfg,
886 lambda inst: node_name == inst.primary_node)
889 def _GetNodeSecondaryInstances(cfg, node_name):
890 """Returns secondary instances on a node.
893 return _GetNodeInstancesInner(cfg,
894 lambda inst: node_name in inst.secondary_nodes)
897 def _GetStorageTypeArgs(cfg, storage_type):
898 """Returns the arguments for a storage type.
901 # Special case for file storage
902 if storage_type == constants.ST_FILE:
903 # storage.FileStorage wants a list of storage directories
904 return [[cfg.GetFileStorageDir()]]
909 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
912 for dev in instance.disks:
913 cfg.SetDiskID(dev, node_name)
915 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
916 result.Raise("Failed to get disk status from node %s" % node_name,
917 prereq=prereq, ecode=errors.ECODE_ENVIRON)
919 for idx, bdev_status in enumerate(result.payload):
920 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
926 class LUPostInitCluster(LogicalUnit):
927 """Logical unit for running hooks after cluster initialization.
930 HPATH = "cluster-init"
931 HTYPE = constants.HTYPE_CLUSTER
934 def BuildHooksEnv(self):
938 env = {"OP_TARGET": self.cfg.GetClusterName()}
939 mn = self.cfg.GetMasterNode()
942 def CheckPrereq(self):
943 """No prerequisites to check.
948 def Exec(self, feedback_fn):
955 class LUDestroyCluster(LogicalUnit):
956 """Logical unit for destroying the cluster.
959 HPATH = "cluster-destroy"
960 HTYPE = constants.HTYPE_CLUSTER
963 def BuildHooksEnv(self):
967 env = {"OP_TARGET": self.cfg.GetClusterName()}
970 def CheckPrereq(self):
971 """Check prerequisites.
973 This checks whether the cluster is empty.
975 Any errors are signaled by raising errors.OpPrereqError.
978 master = self.cfg.GetMasterNode()
980 nodelist = self.cfg.GetNodeList()
981 if len(nodelist) != 1 or nodelist[0] != master:
982 raise errors.OpPrereqError("There are still %d node(s) in"
983 " this cluster." % (len(nodelist) - 1),
985 instancelist = self.cfg.GetInstanceList()
987 raise errors.OpPrereqError("There are still %d instance(s) in"
988 " this cluster." % len(instancelist),
991 def Exec(self, feedback_fn):
992 """Destroys the cluster.
995 master = self.cfg.GetMasterNode()
996 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
998 # Run post hooks on master node before it's removed
999 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1001 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1003 # pylint: disable-msg=W0702
1004 self.LogWarning("Errors occurred running hooks on %s" % master)
1006 result = self.rpc.call_node_stop_master(master, False)
1007 result.Raise("Could not disable the master role")
1009 if modify_ssh_setup:
1010 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1011 utils.CreateBackup(priv_key)
1012 utils.CreateBackup(pub_key)
1017 def _VerifyCertificate(filename):
1018 """Verifies a certificate for LUVerifyCluster.
1020 @type filename: string
1021 @param filename: Path to PEM file
1025 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1026 utils.ReadFile(filename))
1027 except Exception, err: # pylint: disable-msg=W0703
1028 return (LUVerifyCluster.ETYPE_ERROR,
1029 "Failed to load X509 certificate %s: %s" % (filename, err))
1032 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1033 constants.SSL_CERT_EXPIRATION_ERROR)
1036 fnamemsg = "While verifying %s: %s" % (filename, msg)
1041 return (None, fnamemsg)
1042 elif errcode == utils.CERT_WARNING:
1043 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1044 elif errcode == utils.CERT_ERROR:
1045 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1047 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1050 class LUVerifyCluster(LogicalUnit):
1051 """Verifies the cluster status.
1054 HPATH = "cluster-verify"
1055 HTYPE = constants.HTYPE_CLUSTER
1056 _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1059 TCLUSTER = "cluster"
1061 TINSTANCE = "instance"
1063 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1064 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1065 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1066 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1067 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1068 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1069 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1070 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1071 ENODEDRBD = (TNODE, "ENODEDRBD")
1072 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1073 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1074 ENODEHV = (TNODE, "ENODEHV")
1075 ENODELVM = (TNODE, "ENODELVM")
1076 ENODEN1 = (TNODE, "ENODEN1")
1077 ENODENET = (TNODE, "ENODENET")
1078 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1079 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1080 ENODERPC = (TNODE, "ENODERPC")
1081 ENODESSH = (TNODE, "ENODESSH")
1082 ENODEVERSION = (TNODE, "ENODEVERSION")
1083 ENODESETUP = (TNODE, "ENODESETUP")
1084 ENODETIME = (TNODE, "ENODETIME")
1086 ETYPE_FIELD = "code"
1087 ETYPE_ERROR = "ERROR"
1088 ETYPE_WARNING = "WARNING"
1090 class NodeImage(object):
1091 """A class representing the logical and physical status of a node.
1093 @ivar volumes: a structure as returned from
1094 L{ganeti.backend.GetVolumeList} (runtime)
1095 @ivar instances: a list of running instances (runtime)
1096 @ivar pinst: list of configured primary instances (config)
1097 @ivar sinst: list of configured secondary instances (config)
1098 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1099 of this node (config)
1100 @ivar mfree: free memory, as reported by hypervisor (runtime)
1101 @ivar dfree: free disk, as reported by the node (runtime)
1102 @ivar offline: the offline status (config)
1103 @type rpc_fail: boolean
1104 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1105 not whether the individual keys were correct) (runtime)
1106 @type lvm_fail: boolean
1107 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1108 @type hyp_fail: boolean
1109 @ivar hyp_fail: whether the RPC call didn't return the instance list
1110 @type ghost: boolean
1111 @ivar ghost: whether this is a known node or not (config)
1114 def __init__(self, offline=False):
1122 self.offline = offline
1123 self.rpc_fail = False
1124 self.lvm_fail = False
1125 self.hyp_fail = False
1128 def ExpandNames(self):
1129 self.needed_locks = {
1130 locking.LEVEL_NODE: locking.ALL_SET,
1131 locking.LEVEL_INSTANCE: locking.ALL_SET,
1133 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1135 def _Error(self, ecode, item, msg, *args, **kwargs):
1136 """Format an error message.
1138 Based on the opcode's error_codes parameter, either format a
1139 parseable error code, or a simpler error string.
1141 This must be called only from Exec and functions called from Exec.
1144 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1146 # first complete the msg
1149 # then format the whole message
1150 if self.op.error_codes:
1151 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1157 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1158 # and finally report it via the feedback_fn
1159 self._feedback_fn(" - %s" % msg)
1161 def _ErrorIf(self, cond, *args, **kwargs):
1162 """Log an error message if the passed condition is True.
1165 cond = bool(cond) or self.op.debug_simulate_errors
1167 self._Error(*args, **kwargs)
1168 # do not mark the operation as failed for WARN cases only
1169 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1170 self.bad = self.bad or cond
1172 def _VerifyNode(self, ninfo, nresult):
1173 """Run multiple tests against a node.
1177 - compares ganeti version
1178 - checks vg existence and size > 20G
1179 - checks config file checksum
1180 - checks ssh to other nodes
1182 @type ninfo: L{objects.Node}
1183 @param ninfo: the node to check
1184 @param nresult: the results from the node
1186 @return: whether overall this call was successful (and we can expect
1187 reasonable values in the respose)
1191 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1193 # main result, nresult should be a non-empty dict
1194 test = not nresult or not isinstance(nresult, dict)
1195 _ErrorIf(test, self.ENODERPC, node,
1196 "unable to verify node: no data returned")
1200 # compares ganeti version
1201 local_version = constants.PROTOCOL_VERSION
1202 remote_version = nresult.get("version", None)
1203 test = not (remote_version and
1204 isinstance(remote_version, (list, tuple)) and
1205 len(remote_version) == 2)
1206 _ErrorIf(test, self.ENODERPC, node,
1207 "connection to node returned invalid data")
1211 test = local_version != remote_version[0]
1212 _ErrorIf(test, self.ENODEVERSION, node,
1213 "incompatible protocol versions: master %s,"
1214 " node %s", local_version, remote_version[0])
1218 # node seems compatible, we can actually try to look into its results
1220 # full package version
1221 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1222 self.ENODEVERSION, node,
1223 "software version mismatch: master %s, node %s",
1224 constants.RELEASE_VERSION, remote_version[1],
1225 code=self.ETYPE_WARNING)
1227 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1228 if isinstance(hyp_result, dict):
1229 for hv_name, hv_result in hyp_result.iteritems():
1230 test = hv_result is not None
1231 _ErrorIf(test, self.ENODEHV, node,
1232 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1235 test = nresult.get(constants.NV_NODESETUP,
1236 ["Missing NODESETUP results"])
1237 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1242 def _VerifyNodeTime(self, ninfo, nresult,
1243 nvinfo_starttime, nvinfo_endtime):
1244 """Check the node time.
1246 @type ninfo: L{objects.Node}
1247 @param ninfo: the node to check
1248 @param nresult: the remote results for the node
1249 @param nvinfo_starttime: the start time of the RPC call
1250 @param nvinfo_endtime: the end time of the RPC call
1254 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1256 ntime = nresult.get(constants.NV_TIME, None)
1258 ntime_merged = utils.MergeTime(ntime)
1259 except (ValueError, TypeError):
1260 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1263 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1264 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1265 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1266 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1270 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1271 "Node time diverges by at least %s from master node time",
1274 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1275 """Check the node time.
1277 @type ninfo: L{objects.Node}
1278 @param ninfo: the node to check
1279 @param nresult: the remote results for the node
1280 @param vg_name: the configured VG name
1287 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1289 # checks vg existence and size > 20G
1290 vglist = nresult.get(constants.NV_VGLIST, None)
1292 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1294 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1295 constants.MIN_VG_SIZE)
1296 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1299 pvlist = nresult.get(constants.NV_PVLIST, None)
1300 test = pvlist is None
1301 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1303 # check that ':' is not present in PV names, since it's a
1304 # special character for lvcreate (denotes the range of PEs to
1306 for _, pvname, owner_vg in pvlist:
1307 test = ":" in pvname
1308 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1309 " '%s' of VG '%s'", pvname, owner_vg)
1311 def _VerifyNodeNetwork(self, ninfo, nresult):
1312 """Check the node time.
1314 @type ninfo: L{objects.Node}
1315 @param ninfo: the node to check
1316 @param nresult: the remote results for the node
1320 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1322 test = constants.NV_NODELIST not in nresult
1323 _ErrorIf(test, self.ENODESSH, node,
1324 "node hasn't returned node ssh connectivity data")
1326 if nresult[constants.NV_NODELIST]:
1327 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1328 _ErrorIf(True, self.ENODESSH, node,
1329 "ssh communication with node '%s': %s", a_node, a_msg)
1331 test = constants.NV_NODENETTEST not in nresult
1332 _ErrorIf(test, self.ENODENET, node,
1333 "node hasn't returned node tcp connectivity data")
1335 if nresult[constants.NV_NODENETTEST]:
1336 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1338 _ErrorIf(True, self.ENODENET, node,
1339 "tcp communication with node '%s': %s",
1340 anode, nresult[constants.NV_NODENETTEST][anode])
1342 def _VerifyInstance(self, instance, instanceconfig, node_image):
1343 """Verify an instance.
1345 This function checks to see if the required block devices are
1346 available on the instance's node.
1349 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1350 node_current = instanceconfig.primary_node
1352 node_vol_should = {}
1353 instanceconfig.MapLVsByNode(node_vol_should)
1355 for node in node_vol_should:
1356 n_img = node_image[node]
1357 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1358 # ignore missing volumes on offline or broken nodes
1360 for volume in node_vol_should[node]:
1361 test = volume not in n_img.volumes
1362 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1363 "volume %s missing on node %s", volume, node)
1365 if instanceconfig.admin_up:
1366 pri_img = node_image[node_current]
1367 test = instance not in pri_img.instances and not pri_img.offline
1368 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1369 "instance not running on its primary node %s",
1372 for node, n_img in node_image.items():
1373 if (not node == node_current):
1374 test = instance in n_img.instances
1375 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1376 "instance should not run on node %s", node)
1378 def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1379 """Verify if there are any unknown volumes in the cluster.
1381 The .os, .swap and backup volumes are ignored. All other volumes are
1382 reported as unknown.
1385 for node, n_img in node_image.items():
1386 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1387 # skip non-healthy nodes
1389 for volume in n_img.volumes:
1390 test = (node not in node_vol_should or
1391 volume not in node_vol_should[node])
1392 self._ErrorIf(test, self.ENODEORPHANLV, node,
1393 "volume %s is unknown", volume)
1395 def _VerifyOrphanInstances(self, instancelist, node_image):
1396 """Verify the list of running instances.
1398 This checks what instances are running but unknown to the cluster.
1401 for node, n_img in node_image.items():
1402 for o_inst in n_img.instances:
1403 test = o_inst not in instancelist
1404 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1405 "instance %s on node %s should not exist", o_inst, node)
1407 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1408 """Verify N+1 Memory Resilience.
1410 Check that if one single node dies we can still start all the
1411 instances it was primary for.
1414 for node, n_img in node_image.items():
1415 # This code checks that every node which is now listed as
1416 # secondary has enough memory to host all instances it is
1417 # supposed to should a single other node in the cluster fail.
1418 # FIXME: not ready for failover to an arbitrary node
1419 # FIXME: does not support file-backed instances
1420 # WARNING: we currently take into account down instances as well
1421 # as up ones, considering that even if they're down someone
1422 # might want to start them even in the event of a node failure.
1423 for prinode, instances in n_img.sbp.items():
1425 for instance in instances:
1426 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1427 if bep[constants.BE_AUTO_BALANCE]:
1428 needed_mem += bep[constants.BE_MEMORY]
1429 test = n_img.mfree < needed_mem
1430 self._ErrorIf(test, self.ENODEN1, node,
1431 "not enough memory on to accommodate"
1432 " failovers should peer node %s fail", prinode)
1434 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1436 """Verifies and computes the node required file checksums.
1438 @type ninfo: L{objects.Node}
1439 @param ninfo: the node to check
1440 @param nresult: the remote results for the node
1441 @param file_list: required list of files
1442 @param local_cksum: dictionary of local files and their checksums
1443 @param master_files: list of files that only masters should have
1447 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1449 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1450 test = not isinstance(remote_cksum, dict)
1451 _ErrorIf(test, self.ENODEFILECHECK, node,
1452 "node hasn't returned file checksum data")
1456 for file_name in file_list:
1457 node_is_mc = ninfo.master_candidate
1458 must_have = (file_name not in master_files) or node_is_mc
1460 test1 = file_name not in remote_cksum
1462 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1464 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1465 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1466 "file '%s' missing", file_name)
1467 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1468 "file '%s' has wrong checksum", file_name)
1469 # not candidate and this is not a must-have file
1470 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1471 "file '%s' should not exist on non master"
1472 " candidates (and the file is outdated)", file_name)
1473 # all good, except non-master/non-must have combination
1474 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1475 "file '%s' should not exist"
1476 " on non master candidates", file_name)
1478 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1479 """Verifies and the node DRBD status.
1481 @type ninfo: L{objects.Node}
1482 @param ninfo: the node to check
1483 @param nresult: the remote results for the node
1484 @param instanceinfo: the dict of instances
1485 @param drbd_map: the DRBD map as returned by
1486 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1490 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1492 # compute the DRBD minors
1494 for minor, instance in drbd_map[node].items():
1495 test = instance not in instanceinfo
1496 _ErrorIf(test, self.ECLUSTERCFG, None,
1497 "ghost instance '%s' in temporary DRBD map", instance)
1498 # ghost instance should not be running, but otherwise we
1499 # don't give double warnings (both ghost instance and
1500 # unallocated minor in use)
1502 node_drbd[minor] = (instance, False)
1504 instance = instanceinfo[instance]
1505 node_drbd[minor] = (instance.name, instance.admin_up)
1507 # and now check them
1508 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1509 test = not isinstance(used_minors, (tuple, list))
1510 _ErrorIf(test, self.ENODEDRBD, node,
1511 "cannot parse drbd status file: %s", str(used_minors))
1513 # we cannot check drbd status
1516 for minor, (iname, must_exist) in node_drbd.items():
1517 test = minor not in used_minors and must_exist
1518 _ErrorIf(test, self.ENODEDRBD, node,
1519 "drbd minor %d of instance %s is not active", minor, iname)
1520 for minor in used_minors:
1521 test = minor not in node_drbd
1522 _ErrorIf(test, self.ENODEDRBD, node,
1523 "unallocated drbd minor %d is in use", minor)
1525 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1526 """Verifies and updates the node volume data.
1528 This function will update a L{NodeImage}'s internal structures
1529 with data from the remote call.
1531 @type ninfo: L{objects.Node}
1532 @param ninfo: the node to check
1533 @param nresult: the remote results for the node
1534 @param nimg: the node image object
1535 @param vg_name: the configured VG name
1539 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1541 nimg.lvm_fail = True
1542 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1545 elif isinstance(lvdata, basestring):
1546 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1547 utils.SafeEncode(lvdata))
1548 elif not isinstance(lvdata, dict):
1549 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1551 nimg.volumes = lvdata
1552 nimg.lvm_fail = False
1554 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1555 """Verifies and updates the node instance list.
1557 If the listing was successful, then updates this node's instance
1558 list. Otherwise, it marks the RPC call as failed for the instance
1561 @type ninfo: L{objects.Node}
1562 @param ninfo: the node to check
1563 @param nresult: the remote results for the node
1564 @param nimg: the node image object
1567 idata = nresult.get(constants.NV_INSTANCELIST, None)
1568 test = not isinstance(idata, list)
1569 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1570 " (instancelist): %s", utils.SafeEncode(str(idata)))
1572 nimg.hyp_fail = True
1574 nimg.instances = idata
1576 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1577 """Verifies and computes a node information map
1579 @type ninfo: L{objects.Node}
1580 @param ninfo: the node to check
1581 @param nresult: the remote results for the node
1582 @param nimg: the node image object
1583 @param vg_name: the configured VG name
1587 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1589 # try to read free memory (from the hypervisor)
1590 hv_info = nresult.get(constants.NV_HVINFO, None)
1591 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1592 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1595 nimg.mfree = int(hv_info["memory_free"])
1596 except (ValueError, TypeError):
1597 _ErrorIf(True, self.ENODERPC, node,
1598 "node returned invalid nodeinfo, check hypervisor")
1600 # FIXME: devise a free space model for file based instances as well
1601 if vg_name is not None:
1602 test = (constants.NV_VGLIST not in nresult or
1603 vg_name not in nresult[constants.NV_VGLIST])
1604 _ErrorIf(test, self.ENODELVM, node,
1605 "node didn't return data for the volume group '%s'"
1606 " - it is either missing or broken", vg_name)
1609 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1610 except (ValueError, TypeError):
1611 _ErrorIf(True, self.ENODERPC, node,
1612 "node returned invalid LVM info, check LVM status")
1614 def CheckPrereq(self):
1615 """Check prerequisites.
1617 Transform the list of checks we're going to skip into a set and check that
1618 all its members are valid.
1621 self.skip_set = frozenset(self.op.skip_checks)
1622 if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1623 raise errors.OpPrereqError("Invalid checks to be skipped specified",
1626 def BuildHooksEnv(self):
1629 Cluster-Verify hooks just ran in the post phase and their failure makes
1630 the output be logged in the verify output and the verification to fail.
1633 all_nodes = self.cfg.GetNodeList()
1635 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1637 for node in self.cfg.GetAllNodesInfo().values():
1638 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1640 return env, [], all_nodes
1642 def Exec(self, feedback_fn):
1643 """Verify integrity of cluster, performing various test on nodes.
1647 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1648 verbose = self.op.verbose
1649 self._feedback_fn = feedback_fn
1650 feedback_fn("* Verifying global settings")
1651 for msg in self.cfg.VerifyConfig():
1652 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1654 # Check the cluster certificates
1655 for cert_filename in constants.ALL_CERT_FILES:
1656 (errcode, msg) = _VerifyCertificate(cert_filename)
1657 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1659 vg_name = self.cfg.GetVGName()
1660 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1661 cluster = self.cfg.GetClusterInfo()
1662 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1663 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1664 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1665 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1666 for iname in instancelist)
1667 i_non_redundant = [] # Non redundant instances
1668 i_non_a_balanced = [] # Non auto-balanced instances
1669 n_offline = 0 # Count of offline nodes
1670 n_drained = 0 # Count of nodes being drained
1671 node_vol_should = {}
1673 # FIXME: verify OS list
1674 # do local checksums
1675 master_files = [constants.CLUSTER_CONF_FILE]
1677 file_names = ssconf.SimpleStore().GetFileList()
1678 file_names.extend(constants.ALL_CERT_FILES)
1679 file_names.extend(master_files)
1680 if cluster.modify_etc_hosts:
1681 file_names.append(constants.ETC_HOSTS)
1683 local_checksums = utils.FingerprintFiles(file_names)
1685 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1686 node_verify_param = {
1687 constants.NV_FILELIST: file_names,
1688 constants.NV_NODELIST: [node.name for node in nodeinfo
1689 if not node.offline],
1690 constants.NV_HYPERVISOR: hypervisors,
1691 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1692 node.secondary_ip) for node in nodeinfo
1693 if not node.offline],
1694 constants.NV_INSTANCELIST: hypervisors,
1695 constants.NV_VERSION: None,
1696 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1697 constants.NV_NODESETUP: None,
1698 constants.NV_TIME: None,
1701 if vg_name is not None:
1702 node_verify_param[constants.NV_VGLIST] = None
1703 node_verify_param[constants.NV_LVLIST] = vg_name
1704 node_verify_param[constants.NV_PVLIST] = [vg_name]
1705 node_verify_param[constants.NV_DRBDLIST] = None
1707 # Build our expected cluster state
1708 node_image = dict((node.name, self.NodeImage(offline=node.offline))
1709 for node in nodeinfo)
1711 for instance in instancelist:
1712 inst_config = instanceinfo[instance]
1714 for nname in inst_config.all_nodes:
1715 if nname not in node_image:
1717 gnode = self.NodeImage()
1719 node_image[nname] = gnode
1721 inst_config.MapLVsByNode(node_vol_should)
1723 pnode = inst_config.primary_node
1724 node_image[pnode].pinst.append(instance)
1726 for snode in inst_config.secondary_nodes:
1727 nimg = node_image[snode]
1728 nimg.sinst.append(instance)
1729 if pnode not in nimg.sbp:
1730 nimg.sbp[pnode] = []
1731 nimg.sbp[pnode].append(instance)
1733 # At this point, we have the in-memory data structures complete,
1734 # except for the runtime information, which we'll gather next
1736 # Due to the way our RPC system works, exact response times cannot be
1737 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1738 # time before and after executing the request, we can at least have a time
1740 nvinfo_starttime = time.time()
1741 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1742 self.cfg.GetClusterName())
1743 nvinfo_endtime = time.time()
1745 master_node = self.cfg.GetMasterNode()
1746 all_drbd_map = self.cfg.ComputeDRBDMap()
1748 feedback_fn("* Verifying node status")
1749 for node_i in nodeinfo:
1751 nimg = node_image[node]
1755 feedback_fn("* Skipping offline node %s" % (node,))
1759 if node == master_node:
1761 elif node_i.master_candidate:
1762 ntype = "master candidate"
1763 elif node_i.drained:
1769 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1771 msg = all_nvinfo[node].fail_msg
1772 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1774 nimg.rpc_fail = True
1777 nresult = all_nvinfo[node].payload
1779 nimg.call_ok = self._VerifyNode(node_i, nresult)
1780 self._VerifyNodeNetwork(node_i, nresult)
1781 self._VerifyNodeLVM(node_i, nresult, vg_name)
1782 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1784 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1785 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1787 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1788 self._UpdateNodeInstances(node_i, nresult, nimg)
1789 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1791 feedback_fn("* Verifying instance status")
1792 for instance in instancelist:
1794 feedback_fn("* Verifying instance %s" % instance)
1795 inst_config = instanceinfo[instance]
1796 self._VerifyInstance(instance, inst_config, node_image)
1797 inst_nodes_offline = []
1799 pnode = inst_config.primary_node
1800 pnode_img = node_image[pnode]
1801 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1802 self.ENODERPC, pnode, "instance %s, connection to"
1803 " primary node failed", instance)
1805 if pnode_img.offline:
1806 inst_nodes_offline.append(pnode)
1808 # If the instance is non-redundant we cannot survive losing its primary
1809 # node, so we are not N+1 compliant. On the other hand we have no disk
1810 # templates with more than one secondary so that situation is not well
1812 # FIXME: does not support file-backed instances
1813 if not inst_config.secondary_nodes:
1814 i_non_redundant.append(instance)
1815 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1816 instance, "instance has multiple secondary nodes: %s",
1817 utils.CommaJoin(inst_config.secondary_nodes),
1818 code=self.ETYPE_WARNING)
1820 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1821 i_non_a_balanced.append(instance)
1823 for snode in inst_config.secondary_nodes:
1824 s_img = node_image[snode]
1825 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1826 "instance %s, connection to secondary node failed", instance)
1829 inst_nodes_offline.append(snode)
1831 # warn that the instance lives on offline nodes
1832 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1833 "instance lives on offline node(s) %s",
1834 utils.CommaJoin(inst_nodes_offline))
1835 # ... or ghost nodes
1836 for node in inst_config.all_nodes:
1837 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1838 "instance lives on ghost node %s", node)
1840 feedback_fn("* Verifying orphan volumes")
1841 self._VerifyOrphanVolumes(node_vol_should, node_image)
1843 feedback_fn("* Verifying oprhan instances")
1844 self._VerifyOrphanInstances(instancelist, node_image)
1846 if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1847 feedback_fn("* Verifying N+1 Memory redundancy")
1848 self._VerifyNPlusOneMemory(node_image, instanceinfo)
1850 feedback_fn("* Other Notes")
1852 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
1853 % len(i_non_redundant))
1855 if i_non_a_balanced:
1856 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
1857 % len(i_non_a_balanced))
1860 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
1863 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
1867 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1868 """Analyze the post-hooks' result
1870 This method analyses the hook result, handles it, and sends some
1871 nicely-formatted feedback back to the user.
1873 @param phase: one of L{constants.HOOKS_PHASE_POST} or
1874 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1875 @param hooks_results: the results of the multi-node hooks rpc call
1876 @param feedback_fn: function used send feedback back to the caller
1877 @param lu_result: previous Exec result
1878 @return: the new Exec result, based on the previous result
1882 # We only really run POST phase hooks, and are only interested in
1884 if phase == constants.HOOKS_PHASE_POST:
1885 # Used to change hooks' output to proper indentation
1886 indent_re = re.compile('^', re.M)
1887 feedback_fn("* Hooks Results")
1888 assert hooks_results, "invalid result from hooks"
1890 for node_name in hooks_results:
1891 res = hooks_results[node_name]
1893 test = msg and not res.offline
1894 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1895 "Communication failure in hooks execution: %s", msg)
1896 if res.offline or msg:
1897 # No need to investigate payload if node is offline or gave an error.
1898 # override manually lu_result here as _ErrorIf only
1899 # overrides self.bad
1902 for script, hkr, output in res.payload:
1903 test = hkr == constants.HKR_FAIL
1904 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1905 "Script %s failed, output:", script)
1907 output = indent_re.sub(' ', output)
1908 feedback_fn("%s" % output)
1914 class LUVerifyDisks(NoHooksLU):
1915 """Verifies the cluster disks status.
1921 def ExpandNames(self):
1922 self.needed_locks = {
1923 locking.LEVEL_NODE: locking.ALL_SET,
1924 locking.LEVEL_INSTANCE: locking.ALL_SET,
1926 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1928 def CheckPrereq(self):
1929 """Check prerequisites.
1931 This has no prerequisites.
1936 def Exec(self, feedback_fn):
1937 """Verify integrity of cluster disks.
1939 @rtype: tuple of three items
1940 @return: a tuple of (dict of node-to-node_error, list of instances
1941 which need activate-disks, dict of instance: (node, volume) for
1945 result = res_nodes, res_instances, res_missing = {}, [], {}
1947 vg_name = self.cfg.GetVGName()
1948 nodes = utils.NiceSort(self.cfg.GetNodeList())
1949 instances = [self.cfg.GetInstanceInfo(name)
1950 for name in self.cfg.GetInstanceList()]
1953 for inst in instances:
1955 if (not inst.admin_up or
1956 inst.disk_template not in constants.DTS_NET_MIRROR):
1958 inst.MapLVsByNode(inst_lvs)
1959 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1960 for node, vol_list in inst_lvs.iteritems():
1961 for vol in vol_list:
1962 nv_dict[(node, vol)] = inst
1967 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1971 node_res = node_lvs[node]
1972 if node_res.offline:
1974 msg = node_res.fail_msg
1976 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1977 res_nodes[node] = msg
1980 lvs = node_res.payload
1981 for lv_name, (_, _, lv_online) in lvs.items():
1982 inst = nv_dict.pop((node, lv_name), None)
1983 if (not lv_online and inst is not None
1984 and inst.name not in res_instances):
1985 res_instances.append(inst.name)
1987 # any leftover items in nv_dict are missing LVs, let's arrange the
1989 for key, inst in nv_dict.iteritems():
1990 if inst.name not in res_missing:
1991 res_missing[inst.name] = []
1992 res_missing[inst.name].append(key)
1997 class LURepairDiskSizes(NoHooksLU):
1998 """Verifies the cluster disks sizes.
2001 _OP_REQP = ["instances"]
2004 def ExpandNames(self):
2005 if not isinstance(self.op.instances, list):
2006 raise errors.OpPrereqError("Invalid argument type 'instances'",
2009 if self.op.instances:
2010 self.wanted_names = []
2011 for name in self.op.instances:
2012 full_name = _ExpandInstanceName(self.cfg, name)
2013 self.wanted_names.append(full_name)
2014 self.needed_locks = {
2015 locking.LEVEL_NODE: [],
2016 locking.LEVEL_INSTANCE: self.wanted_names,
2018 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2020 self.wanted_names = None
2021 self.needed_locks = {
2022 locking.LEVEL_NODE: locking.ALL_SET,
2023 locking.LEVEL_INSTANCE: locking.ALL_SET,
2025 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2027 def DeclareLocks(self, level):
2028 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2029 self._LockInstancesNodes(primary_only=True)
2031 def CheckPrereq(self):
2032 """Check prerequisites.
2034 This only checks the optional instance list against the existing names.
2037 if self.wanted_names is None:
2038 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2040 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2041 in self.wanted_names]
2043 def _EnsureChildSizes(self, disk):
2044 """Ensure children of the disk have the needed disk size.
2046 This is valid mainly for DRBD8 and fixes an issue where the
2047 children have smaller disk size.
2049 @param disk: an L{ganeti.objects.Disk} object
2052 if disk.dev_type == constants.LD_DRBD8:
2053 assert disk.children, "Empty children for DRBD8?"
2054 fchild = disk.children[0]
2055 mismatch = fchild.size < disk.size
2057 self.LogInfo("Child disk has size %d, parent %d, fixing",
2058 fchild.size, disk.size)
2059 fchild.size = disk.size
2061 # and we recurse on this child only, not on the metadev
2062 return self._EnsureChildSizes(fchild) or mismatch
2066 def Exec(self, feedback_fn):
2067 """Verify the size of cluster disks.
2070 # TODO: check child disks too
2071 # TODO: check differences in size between primary/secondary nodes
2073 for instance in self.wanted_instances:
2074 pnode = instance.primary_node
2075 if pnode not in per_node_disks:
2076 per_node_disks[pnode] = []
2077 for idx, disk in enumerate(instance.disks):
2078 per_node_disks[pnode].append((instance, idx, disk))
2081 for node, dskl in per_node_disks.items():
2082 newl = [v[2].Copy() for v in dskl]
2084 self.cfg.SetDiskID(dsk, node)
2085 result = self.rpc.call_blockdev_getsizes(node, newl)
2087 self.LogWarning("Failure in blockdev_getsizes call to node"
2088 " %s, ignoring", node)
2090 if len(result.data) != len(dskl):
2091 self.LogWarning("Invalid result from node %s, ignoring node results",
2094 for ((instance, idx, disk), size) in zip(dskl, result.data):
2096 self.LogWarning("Disk %d of instance %s did not return size"
2097 " information, ignoring", idx, instance.name)
2099 if not isinstance(size, (int, long)):
2100 self.LogWarning("Disk %d of instance %s did not return valid"
2101 " size information, ignoring", idx, instance.name)
2104 if size != disk.size:
2105 self.LogInfo("Disk %d of instance %s has mismatched size,"
2106 " correcting: recorded %d, actual %d", idx,
2107 instance.name, disk.size, size)
2109 self.cfg.Update(instance, feedback_fn)
2110 changed.append((instance.name, idx, size))
2111 if self._EnsureChildSizes(disk):
2112 self.cfg.Update(instance, feedback_fn)
2113 changed.append((instance.name, idx, disk.size))
2117 class LURenameCluster(LogicalUnit):
2118 """Rename the cluster.
2121 HPATH = "cluster-rename"
2122 HTYPE = constants.HTYPE_CLUSTER
2125 def BuildHooksEnv(self):
2130 "OP_TARGET": self.cfg.GetClusterName(),
2131 "NEW_NAME": self.op.name,
2133 mn = self.cfg.GetMasterNode()
2134 all_nodes = self.cfg.GetNodeList()
2135 return env, [mn], all_nodes
2137 def CheckPrereq(self):
2138 """Verify that the passed name is a valid one.
2141 hostname = utils.GetHostInfo(self.op.name)
2143 new_name = hostname.name
2144 self.ip = new_ip = hostname.ip
2145 old_name = self.cfg.GetClusterName()
2146 old_ip = self.cfg.GetMasterIP()
2147 if new_name == old_name and new_ip == old_ip:
2148 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2149 " cluster has changed",
2151 if new_ip != old_ip:
2152 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2153 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2154 " reachable on the network. Aborting." %
2155 new_ip, errors.ECODE_NOTUNIQUE)
2157 self.op.name = new_name
2159 def Exec(self, feedback_fn):
2160 """Rename the cluster.
2163 clustername = self.op.name
2166 # shutdown the master IP
2167 master = self.cfg.GetMasterNode()
2168 result = self.rpc.call_node_stop_master(master, False)
2169 result.Raise("Could not disable the master role")
2172 cluster = self.cfg.GetClusterInfo()
2173 cluster.cluster_name = clustername
2174 cluster.master_ip = ip
2175 self.cfg.Update(cluster, feedback_fn)
2177 # update the known hosts file
2178 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2179 node_list = self.cfg.GetNodeList()
2181 node_list.remove(master)
2184 result = self.rpc.call_upload_file(node_list,
2185 constants.SSH_KNOWN_HOSTS_FILE)
2186 for to_node, to_result in result.iteritems():
2187 msg = to_result.fail_msg
2189 msg = ("Copy of file %s to node %s failed: %s" %
2190 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2191 self.proc.LogWarning(msg)
2194 result = self.rpc.call_node_start_master(master, False, False)
2195 msg = result.fail_msg
2197 self.LogWarning("Could not re-enable the master role on"
2198 " the master, please restart manually: %s", msg)
2201 def _RecursiveCheckIfLVMBased(disk):
2202 """Check if the given disk or its children are lvm-based.
2204 @type disk: L{objects.Disk}
2205 @param disk: the disk to check
2207 @return: boolean indicating whether a LD_LV dev_type was found or not
2211 for chdisk in disk.children:
2212 if _RecursiveCheckIfLVMBased(chdisk):
2214 return disk.dev_type == constants.LD_LV
2217 class LUSetClusterParams(LogicalUnit):
2218 """Change the parameters of the cluster.
2221 HPATH = "cluster-modify"
2222 HTYPE = constants.HTYPE_CLUSTER
2226 def CheckArguments(self):
2230 for attr in ["candidate_pool_size",
2231 "uid_pool", "add_uids", "remove_uids"]:
2232 if not hasattr(self.op, attr):
2233 setattr(self.op, attr, None)
2235 if self.op.candidate_pool_size is not None:
2237 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2238 except (ValueError, TypeError), err:
2239 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2240 str(err), errors.ECODE_INVAL)
2241 if self.op.candidate_pool_size < 1:
2242 raise errors.OpPrereqError("At least one master candidate needed",
2245 _CheckBooleanOpField(self.op, "maintain_node_health")
2247 if self.op.uid_pool:
2248 uidpool.CheckUidPool(self.op.uid_pool)
2250 if self.op.add_uids:
2251 uidpool.CheckUidPool(self.op.add_uids)
2253 if self.op.remove_uids:
2254 uidpool.CheckUidPool(self.op.remove_uids)
2256 def ExpandNames(self):
2257 # FIXME: in the future maybe other cluster params won't require checking on
2258 # all nodes to be modified.
2259 self.needed_locks = {
2260 locking.LEVEL_NODE: locking.ALL_SET,
2262 self.share_locks[locking.LEVEL_NODE] = 1
2264 def BuildHooksEnv(self):
2269 "OP_TARGET": self.cfg.GetClusterName(),
2270 "NEW_VG_NAME": self.op.vg_name,
2272 mn = self.cfg.GetMasterNode()
2273 return env, [mn], [mn]
2275 def CheckPrereq(self):
2276 """Check prerequisites.
2278 This checks whether the given params don't conflict and
2279 if the given volume group is valid.
2282 if self.op.vg_name is not None and not self.op.vg_name:
2283 instances = self.cfg.GetAllInstancesInfo().values()
2284 for inst in instances:
2285 for disk in inst.disks:
2286 if _RecursiveCheckIfLVMBased(disk):
2287 raise errors.OpPrereqError("Cannot disable lvm storage while"
2288 " lvm-based instances exist",
2291 node_list = self.acquired_locks[locking.LEVEL_NODE]
2293 # if vg_name not None, checks given volume group on all nodes
2295 vglist = self.rpc.call_vg_list(node_list)
2296 for node in node_list:
2297 msg = vglist[node].fail_msg
2299 # ignoring down node
2300 self.LogWarning("Error while gathering data on node %s"
2301 " (ignoring node): %s", node, msg)
2303 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2305 constants.MIN_VG_SIZE)
2307 raise errors.OpPrereqError("Error on node '%s': %s" %
2308 (node, vgstatus), errors.ECODE_ENVIRON)
2310 self.cluster = cluster = self.cfg.GetClusterInfo()
2311 # validate params changes
2312 if self.op.beparams:
2313 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2314 self.new_beparams = objects.FillDict(
2315 cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2317 if self.op.nicparams:
2318 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2319 self.new_nicparams = objects.FillDict(
2320 cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2321 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2324 # check all instances for consistency
2325 for instance in self.cfg.GetAllInstancesInfo().values():
2326 for nic_idx, nic in enumerate(instance.nics):
2327 params_copy = copy.deepcopy(nic.nicparams)
2328 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2330 # check parameter syntax
2332 objects.NIC.CheckParameterSyntax(params_filled)
2333 except errors.ConfigurationError, err:
2334 nic_errors.append("Instance %s, nic/%d: %s" %
2335 (instance.name, nic_idx, err))
2337 # if we're moving instances to routed, check that they have an ip
2338 target_mode = params_filled[constants.NIC_MODE]
2339 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2340 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2341 (instance.name, nic_idx))
2343 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2344 "\n".join(nic_errors))
2346 # hypervisor list/parameters
2347 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2348 if self.op.hvparams:
2349 if not isinstance(self.op.hvparams, dict):
2350 raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2352 for hv_name, hv_dict in self.op.hvparams.items():
2353 if hv_name not in self.new_hvparams:
2354 self.new_hvparams[hv_name] = hv_dict
2356 self.new_hvparams[hv_name].update(hv_dict)
2358 # os hypervisor parameters
2359 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2361 if not isinstance(self.op.os_hvp, dict):
2362 raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2364 for os_name, hvs in self.op.os_hvp.items():
2365 if not isinstance(hvs, dict):
2366 raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2367 " input"), errors.ECODE_INVAL)
2368 if os_name not in self.new_os_hvp:
2369 self.new_os_hvp[os_name] = hvs
2371 for hv_name, hv_dict in hvs.items():
2372 if hv_name not in self.new_os_hvp[os_name]:
2373 self.new_os_hvp[os_name][hv_name] = hv_dict
2375 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2377 # changes to the hypervisor list
2378 if self.op.enabled_hypervisors is not None:
2379 self.hv_list = self.op.enabled_hypervisors
2380 if not self.hv_list:
2381 raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2382 " least one member",
2384 invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2386 raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2388 utils.CommaJoin(invalid_hvs),
2390 for hv in self.hv_list:
2391 # if the hypervisor doesn't already exist in the cluster
2392 # hvparams, we initialize it to empty, and then (in both
2393 # cases) we make sure to fill the defaults, as we might not
2394 # have a complete defaults list if the hypervisor wasn't
2396 if hv not in new_hvp:
2398 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2399 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2401 self.hv_list = cluster.enabled_hypervisors
2403 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2404 # either the enabled list has changed, or the parameters have, validate
2405 for hv_name, hv_params in self.new_hvparams.items():
2406 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2407 (self.op.enabled_hypervisors and
2408 hv_name in self.op.enabled_hypervisors)):
2409 # either this is a new hypervisor, or its parameters have changed
2410 hv_class = hypervisor.GetHypervisor(hv_name)
2411 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2412 hv_class.CheckParameterSyntax(hv_params)
2413 _CheckHVParams(self, node_list, hv_name, hv_params)
2416 # no need to check any newly-enabled hypervisors, since the
2417 # defaults have already been checked in the above code-block
2418 for os_name, os_hvp in self.new_os_hvp.items():
2419 for hv_name, hv_params in os_hvp.items():
2420 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2421 # we need to fill in the new os_hvp on top of the actual hv_p
2422 cluster_defaults = self.new_hvparams.get(hv_name, {})
2423 new_osp = objects.FillDict(cluster_defaults, hv_params)
2424 hv_class = hypervisor.GetHypervisor(hv_name)
2425 hv_class.CheckParameterSyntax(new_osp)
2426 _CheckHVParams(self, node_list, hv_name, new_osp)
2429 def Exec(self, feedback_fn):
2430 """Change the parameters of the cluster.
2433 if self.op.vg_name is not None:
2434 new_volume = self.op.vg_name
2437 if new_volume != self.cfg.GetVGName():
2438 self.cfg.SetVGName(new_volume)
2440 feedback_fn("Cluster LVM configuration already in desired"
2441 " state, not changing")
2442 if self.op.hvparams:
2443 self.cluster.hvparams = self.new_hvparams
2445 self.cluster.os_hvp = self.new_os_hvp
2446 if self.op.enabled_hypervisors is not None:
2447 self.cluster.hvparams = self.new_hvparams
2448 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2449 if self.op.beparams:
2450 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2451 if self.op.nicparams:
2452 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2454 if self.op.candidate_pool_size is not None:
2455 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2456 # we need to update the pool size here, otherwise the save will fail
2457 _AdjustCandidatePool(self, [])
2459 if self.op.maintain_node_health is not None:
2460 self.cluster.maintain_node_health = self.op.maintain_node_health
2462 if self.op.add_uids is not None:
2463 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2465 if self.op.remove_uids is not None:
2466 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2468 if self.op.uid_pool is not None:
2469 self.cluster.uid_pool = self.op.uid_pool
2471 self.cfg.Update(self.cluster, feedback_fn)
2474 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2475 """Distribute additional files which are part of the cluster configuration.
2477 ConfigWriter takes care of distributing the config and ssconf files, but
2478 there are more files which should be distributed to all nodes. This function
2479 makes sure those are copied.
2481 @param lu: calling logical unit
2482 @param additional_nodes: list of nodes not in the config to distribute to
2485 # 1. Gather target nodes
2486 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2487 dist_nodes = lu.cfg.GetOnlineNodeList()
2488 if additional_nodes is not None:
2489 dist_nodes.extend(additional_nodes)
2490 if myself.name in dist_nodes:
2491 dist_nodes.remove(myself.name)
2493 # 2. Gather files to distribute
2494 dist_files = set([constants.ETC_HOSTS,
2495 constants.SSH_KNOWN_HOSTS_FILE,
2496 constants.RAPI_CERT_FILE,
2497 constants.RAPI_USERS_FILE,
2498 constants.CONFD_HMAC_KEY,
2501 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2502 for hv_name in enabled_hypervisors:
2503 hv_class = hypervisor.GetHypervisor(hv_name)
2504 dist_files.update(hv_class.GetAncillaryFiles())
2506 # 3. Perform the files upload
2507 for fname in dist_files:
2508 if os.path.exists(fname):
2509 result = lu.rpc.call_upload_file(dist_nodes, fname)
2510 for to_node, to_result in result.items():
2511 msg = to_result.fail_msg
2513 msg = ("Copy of file %s to node %s failed: %s" %
2514 (fname, to_node, msg))
2515 lu.proc.LogWarning(msg)
2518 class LURedistributeConfig(NoHooksLU):
2519 """Force the redistribution of cluster configuration.
2521 This is a very simple LU.
2527 def ExpandNames(self):
2528 self.needed_locks = {
2529 locking.LEVEL_NODE: locking.ALL_SET,
2531 self.share_locks[locking.LEVEL_NODE] = 1
2533 def CheckPrereq(self):
2534 """Check prerequisites.
2538 def Exec(self, feedback_fn):
2539 """Redistribute the configuration.
2542 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2543 _RedistributeAncillaryFiles(self)
2546 def _WaitForSync(lu, instance, oneshot=False):
2547 """Sleep and poll for an instance's disk to sync.
2550 if not instance.disks:
2554 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2556 node = instance.primary_node
2558 for dev in instance.disks:
2559 lu.cfg.SetDiskID(dev, node)
2561 # TODO: Convert to utils.Retry
2564 degr_retries = 10 # in seconds, as we sleep 1 second each time
2568 cumul_degraded = False
2569 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2570 msg = rstats.fail_msg
2572 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2575 raise errors.RemoteError("Can't contact node %s for mirror data,"
2576 " aborting." % node)
2579 rstats = rstats.payload
2581 for i, mstat in enumerate(rstats):
2583 lu.LogWarning("Can't compute data for node %s/%s",
2584 node, instance.disks[i].iv_name)
2587 cumul_degraded = (cumul_degraded or
2588 (mstat.is_degraded and mstat.sync_percent is None))
2589 if mstat.sync_percent is not None:
2591 if mstat.estimated_time is not None:
2592 rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2593 max_time = mstat.estimated_time
2595 rem_time = "no time estimate"
2596 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2597 (instance.disks[i].iv_name, mstat.sync_percent,
2600 # if we're done but degraded, let's do a few small retries, to
2601 # make sure we see a stable and not transient situation; therefore
2602 # we force restart of the loop
2603 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2604 logging.info("Degraded disks found, %d retries left", degr_retries)
2612 time.sleep(min(60, max_time))
2615 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2616 return not cumul_degraded
2619 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2620 """Check that mirrors are not degraded.
2622 The ldisk parameter, if True, will change the test from the
2623 is_degraded attribute (which represents overall non-ok status for
2624 the device(s)) to the ldisk (representing the local storage status).
2627 lu.cfg.SetDiskID(dev, node)
2631 if on_primary or dev.AssembleOnSecondary():
2632 rstats = lu.rpc.call_blockdev_find(node, dev)
2633 msg = rstats.fail_msg
2635 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2637 elif not rstats.payload:
2638 lu.LogWarning("Can't find disk on node %s", node)
2642 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2644 result = result and not rstats.payload.is_degraded
2647 for child in dev.children:
2648 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2653 class LUDiagnoseOS(NoHooksLU):
2654 """Logical unit for OS diagnose/query.
2657 _OP_REQP = ["output_fields", "names"]
2659 _FIELDS_STATIC = utils.FieldSet()
2660 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2661 # Fields that need calculation of global os validity
2662 _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2664 def ExpandNames(self):
2666 raise errors.OpPrereqError("Selective OS query not supported",
2669 _CheckOutputFields(static=self._FIELDS_STATIC,
2670 dynamic=self._FIELDS_DYNAMIC,
2671 selected=self.op.output_fields)
2673 # Lock all nodes, in shared mode
2674 # Temporary removal of locks, should be reverted later
2675 # TODO: reintroduce locks when they are lighter-weight
2676 self.needed_locks = {}
2677 #self.share_locks[locking.LEVEL_NODE] = 1
2678 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2680 def CheckPrereq(self):
2681 """Check prerequisites.
2686 def _DiagnoseByOS(rlist):
2687 """Remaps a per-node return list into an a per-os per-node dictionary
2689 @param rlist: a map with node names as keys and OS objects as values
2692 @return: a dictionary with osnames as keys and as value another map, with
2693 nodes as keys and tuples of (path, status, diagnose) as values, eg::
2695 {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2696 (/srv/..., False, "invalid api")],
2697 "node2": [(/srv/..., True, "")]}
2702 # we build here the list of nodes that didn't fail the RPC (at RPC
2703 # level), so that nodes with a non-responding node daemon don't
2704 # make all OSes invalid
2705 good_nodes = [node_name for node_name in rlist
2706 if not rlist[node_name].fail_msg]
2707 for node_name, nr in rlist.items():
2708 if nr.fail_msg or not nr.payload:
2710 for name, path, status, diagnose, variants in nr.payload:
2711 if name not in all_os:
2712 # build a list of nodes for this os containing empty lists
2713 # for each node in node_list
2715 for nname in good_nodes:
2716 all_os[name][nname] = []
2717 all_os[name][node_name].append((path, status, diagnose, variants))
2720 def Exec(self, feedback_fn):
2721 """Compute the list of OSes.
2724 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2725 node_data = self.rpc.call_os_diagnose(valid_nodes)
2726 pol = self._DiagnoseByOS(node_data)
2728 calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2729 calc_variants = "variants" in self.op.output_fields
2731 for os_name, os_data in pol.items():
2736 for osl in os_data.values():
2737 valid = valid and osl and osl[0][1]
2742 node_variants = osl[0][3]
2743 if variants is None:
2744 variants = node_variants
2746 variants = [v for v in variants if v in node_variants]
2748 for field in self.op.output_fields:
2751 elif field == "valid":
2753 elif field == "node_status":
2754 # this is just a copy of the dict
2756 for node_name, nos_list in os_data.items():
2757 val[node_name] = nos_list
2758 elif field == "variants":
2761 raise errors.ParameterError(field)
2768 class LURemoveNode(LogicalUnit):
2769 """Logical unit for removing a node.
2772 HPATH = "node-remove"
2773 HTYPE = constants.HTYPE_NODE
2774 _OP_REQP = ["node_name"]
2776 def BuildHooksEnv(self):
2779 This doesn't run on the target node in the pre phase as a failed
2780 node would then be impossible to remove.
2784 "OP_TARGET": self.op.node_name,
2785 "NODE_NAME": self.op.node_name,
2787 all_nodes = self.cfg.GetNodeList()
2789 all_nodes.remove(self.op.node_name)
2791 logging.warning("Node %s which is about to be removed not found"
2792 " in the all nodes list", self.op.node_name)
2793 return env, all_nodes, all_nodes
2795 def CheckPrereq(self):
2796 """Check prerequisites.
2799 - the node exists in the configuration
2800 - it does not have primary or secondary instances
2801 - it's not the master
2803 Any errors are signaled by raising errors.OpPrereqError.
2806 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2807 node = self.cfg.GetNodeInfo(self.op.node_name)
2808 assert node is not None
2810 instance_list = self.cfg.GetInstanceList()
2812 masternode = self.cfg.GetMasterNode()
2813 if node.name == masternode:
2814 raise errors.OpPrereqError("Node is the master node,"
2815 " you need to failover first.",
2818 for instance_name in instance_list:
2819 instance = self.cfg.GetInstanceInfo(instance_name)
2820 if node.name in instance.all_nodes:
2821 raise errors.OpPrereqError("Instance %s is still running on the node,"
2822 " please remove first." % instance_name,
2824 self.op.node_name = node.name
2827 def Exec(self, feedback_fn):
2828 """Removes the node from the cluster.
2832 logging.info("Stopping the node daemon and removing configs from node %s",
2835 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2837 # Promote nodes to master candidate as needed
2838 _AdjustCandidatePool(self, exceptions=[node.name])
2839 self.context.RemoveNode(node.name)
2841 # Run post hooks on the node before it's removed
2842 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2844 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2846 # pylint: disable-msg=W0702
2847 self.LogWarning("Errors occurred running hooks on %s" % node.name)
2849 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2850 msg = result.fail_msg
2852 self.LogWarning("Errors encountered on the remote node while leaving"
2853 " the cluster: %s", msg)
2855 # Remove node from our /etc/hosts
2856 if self.cfg.GetClusterInfo().modify_etc_hosts:
2857 # FIXME: this should be done via an rpc call to node daemon
2858 utils.RemoveHostFromEtcHosts(node.name)
2859 _RedistributeAncillaryFiles(self)
2862 class LUQueryNodes(NoHooksLU):
2863 """Logical unit for querying nodes.
2866 # pylint: disable-msg=W0142
2867 _OP_REQP = ["output_fields", "names", "use_locking"]
2870 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2871 "master_candidate", "offline", "drained"]
2873 _FIELDS_DYNAMIC = utils.FieldSet(
2875 "mtotal", "mnode", "mfree",
2877 "ctotal", "cnodes", "csockets",
2880 _FIELDS_STATIC = utils.FieldSet(*[
2881 "pinst_cnt", "sinst_cnt",
2882 "pinst_list", "sinst_list",
2883 "pip", "sip", "tags",
2885 "role"] + _SIMPLE_FIELDS
2888 def ExpandNames(self):
2889 _CheckOutputFields(static=self._FIELDS_STATIC,
2890 dynamic=self._FIELDS_DYNAMIC,
2891 selected=self.op.output_fields)
2893 self.needed_locks = {}
2894 self.share_locks[locking.LEVEL_NODE] = 1
2897 self.wanted = _GetWantedNodes(self, self.op.names)
2899 self.wanted = locking.ALL_SET
2901 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2902 self.do_locking = self.do_node_query and self.op.use_locking
2904 # if we don't request only static fields, we need to lock the nodes
2905 self.needed_locks[locking.LEVEL_NODE] = self.wanted
2907 def CheckPrereq(self):
2908 """Check prerequisites.
2911 # The validation of the node list is done in the _GetWantedNodes,
2912 # if non empty, and if empty, there's no validation to do
2915 def Exec(self, feedback_fn):
2916 """Computes the list of nodes and their attributes.
2919 all_info = self.cfg.GetAllNodesInfo()
2921 nodenames = self.acquired_locks[locking.LEVEL_NODE]
2922 elif self.wanted != locking.ALL_SET:
2923 nodenames = self.wanted
2924 missing = set(nodenames).difference(all_info.keys())
2926 raise errors.OpExecError(
2927 "Some nodes were removed before retrieving their data: %s" % missing)
2929 nodenames = all_info.keys()
2931 nodenames = utils.NiceSort(nodenames)
2932 nodelist = [all_info[name] for name in nodenames]
2934 # begin data gathering
2936 if self.do_node_query:
2938 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2939 self.cfg.GetHypervisorType())
2940 for name in nodenames:
2941 nodeinfo = node_data[name]
2942 if not nodeinfo.fail_msg and nodeinfo.payload:
2943 nodeinfo = nodeinfo.payload
2944 fn = utils.TryConvert
2946 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2947 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2948 "mfree": fn(int, nodeinfo.get('memory_free', None)),
2949 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2950 "dfree": fn(int, nodeinfo.get('vg_free', None)),
2951 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2952 "bootid": nodeinfo.get('bootid', None),
2953 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2954 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2957 live_data[name] = {}
2959 live_data = dict.fromkeys(nodenames, {})
2961 node_to_primary = dict([(name, set()) for name in nodenames])
2962 node_to_secondary = dict([(name, set()) for name in nodenames])
2964 inst_fields = frozenset(("pinst_cnt", "pinst_list",
2965 "sinst_cnt", "sinst_list"))
2966 if inst_fields & frozenset(self.op.output_fields):
2967 inst_data = self.cfg.GetAllInstancesInfo()
2969 for inst in inst_data.values():
2970 if inst.primary_node in node_to_primary:
2971 node_to_primary[inst.primary_node].add(inst.name)
2972 for secnode in inst.secondary_nodes:
2973 if secnode in node_to_secondary:
2974 node_to_secondary[secnode].add(inst.name)
2976 master_node = self.cfg.GetMasterNode()
2978 # end data gathering
2981 for node in nodelist:
2983 for field in self.op.output_fields:
2984 if field in self._SIMPLE_FIELDS:
2985 val = getattr(node, field)
2986 elif field == "pinst_list":
2987 val = list(node_to_primary[node.name])
2988 elif field == "sinst_list":
2989 val = list(node_to_secondary[node.name])
2990 elif field == "pinst_cnt":
2991 val = len(node_to_primary[node.name])
2992 elif field == "sinst_cnt":
2993 val = len(node_to_secondary[node.name])
2994 elif field == "pip":
2995 val = node.primary_ip
2996 elif field == "sip":
2997 val = node.secondary_ip
2998 elif field == "tags":
2999 val = list(node.GetTags())
3000 elif field == "master":
3001 val = node.name == master_node
3002 elif self._FIELDS_DYNAMIC.Matches(field):
3003 val = live_data[node.name].get(field, None)
3004 elif field == "role":
3005 if node.name == master_node:
3007 elif node.master_candidate:
3016 raise errors.ParameterError(field)
3017 node_output.append(val)
3018 output.append(node_output)
3023 class LUQueryNodeVolumes(NoHooksLU):
3024 """Logical unit for getting volumes on node(s).
3027 _OP_REQP = ["nodes", "output_fields"]
3029 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3030 _FIELDS_STATIC = utils.FieldSet("node")
3032 def ExpandNames(self):
3033 _CheckOutputFields(static=self._FIELDS_STATIC,
3034 dynamic=self._FIELDS_DYNAMIC,
3035 selected=self.op.output_fields)
3037 self.needed_locks = {}
3038 self.share_locks[locking.LEVEL_NODE] = 1
3039 if not self.op.nodes:
3040 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3042 self.needed_locks[locking.LEVEL_NODE] = \
3043 _GetWantedNodes(self, self.op.nodes)
3045 def CheckPrereq(self):
3046 """Check prerequisites.
3048 This checks that the fields required are valid output fields.
3051 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3053 def Exec(self, feedback_fn):
3054 """Computes the list of nodes and their attributes.
3057 nodenames = self.nodes
3058 volumes = self.rpc.call_node_volumes(nodenames)
3060 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3061 in self.cfg.GetInstanceList()]
3063 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3066 for node in nodenames:
3067 nresult = volumes[node]
3070 msg = nresult.fail_msg
3072 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3075 node_vols = nresult.payload[:]
3076 node_vols.sort(key=lambda vol: vol['dev'])
3078 for vol in node_vols:
3080 for field in self.op.output_fields:
3083 elif field == "phys":
3087 elif field == "name":
3089 elif field == "size":
3090 val = int(float(vol['size']))
3091 elif field == "instance":
3093 if node not in lv_by_node[inst]:
3095 if vol['name'] in lv_by_node[inst][node]:
3101 raise errors.ParameterError(field)
3102 node_output.append(str(val))
3104 output.append(node_output)
3109 class LUQueryNodeStorage(NoHooksLU):
3110 """Logical unit for getting information on storage units on node(s).
3113 _OP_REQP = ["nodes", "storage_type", "output_fields"]
3115 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3117 def CheckArguments(self):
3118 _CheckStorageType(self.op.storage_type)
3120 _CheckOutputFields(static=self._FIELDS_STATIC,
3121 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3122 selected=self.op.output_fields)
3124 def ExpandNames(self):
3125 self.needed_locks = {}
3126 self.share_locks[locking.LEVEL_NODE] = 1
3129 self.needed_locks[locking.LEVEL_NODE] = \
3130 _GetWantedNodes(self, self.op.nodes)
3132 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3134 def CheckPrereq(self):
3135 """Check prerequisites.
3137 This checks that the fields required are valid output fields.
3140 self.op.name = getattr(self.op, "name", None)
3142 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3144 def Exec(self, feedback_fn):
3145 """Computes the list of nodes and their attributes.
3148 # Always get name to sort by
3149 if constants.SF_NAME in self.op.output_fields:
3150 fields = self.op.output_fields[:]
3152 fields = [constants.SF_NAME] + self.op.output_fields
3154 # Never ask for node or type as it's only known to the LU
3155 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3156 while extra in fields:
3157 fields.remove(extra)
3159 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3160 name_idx = field_idx[constants.SF_NAME]
3162 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3163 data = self.rpc.call_storage_list(self.nodes,
3164 self.op.storage_type, st_args,
3165 self.op.name, fields)
3169 for node in utils.NiceSort(self.nodes):
3170 nresult = data[node]
3174 msg = nresult.fail_msg
3176 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3179 rows = dict([(row[name_idx], row) for row in nresult.payload])
3181 for name in utils.NiceSort(rows.keys()):
3186 for field in self.op.output_fields:
3187 if field == constants.SF_NODE:
3189 elif field == constants.SF_TYPE:
3190 val = self.op.storage_type
3191 elif field in field_idx:
3192 val = row[field_idx[field]]
3194 raise errors.ParameterError(field)
3203 class LUModifyNodeStorage(NoHooksLU):
3204 """Logical unit for modifying a storage volume on a node.
3207 _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3210 def CheckArguments(self):
3211 self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3213 _CheckStorageType(self.op.storage_type)
3215 def ExpandNames(self):
3216 self.needed_locks = {
3217 locking.LEVEL_NODE: self.op.node_name,
3220 def CheckPrereq(self):
3221 """Check prerequisites.
3224 storage_type = self.op.storage_type
3227 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3229 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3230 " modified" % storage_type,
3233 diff = set(self.op.changes.keys()) - modifiable
3235 raise errors.OpPrereqError("The following fields can not be modified for"
3236 " storage units of type '%s': %r" %
3237 (storage_type, list(diff)),
3240 def Exec(self, feedback_fn):
3241 """Computes the list of nodes and their attributes.
3244 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3245 result = self.rpc.call_storage_modify(self.op.node_name,
3246 self.op.storage_type, st_args,
3247 self.op.name, self.op.changes)
3248 result.Raise("Failed to modify storage unit '%s' on %s" %
3249 (self.op.name, self.op.node_name))
3252 class LUAddNode(LogicalUnit):
3253 """Logical unit for adding node to the cluster.
3257 HTYPE = constants.HTYPE_NODE
3258 _OP_REQP = ["node_name"]
3260 def CheckArguments(self):
3261 # validate/normalize the node name
3262 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3264 def BuildHooksEnv(self):
3267 This will run on all nodes before, and on all nodes + the new node after.
3271 "OP_TARGET": self.op.node_name,
3272 "NODE_NAME": self.op.node_name,
3273 "NODE_PIP": self.op.primary_ip,
3274 "NODE_SIP": self.op.secondary_ip,
3276 nodes_0 = self.cfg.GetNodeList()
3277 nodes_1 = nodes_0 + [self.op.node_name, ]
3278 return env, nodes_0, nodes_1
3280 def CheckPrereq(self):
3281 """Check prerequisites.
3284 - the new node is not already in the config
3286 - its parameters (single/dual homed) matches the cluster
3288 Any errors are signaled by raising errors.OpPrereqError.
3291 node_name = self.op.node_name
3294 dns_data = utils.GetHostInfo(node_name)
3296 node = dns_data.name
3297 primary_ip = self.op.primary_ip = dns_data.ip
3298 secondary_ip = getattr(self.op, "secondary_ip", None)
3299 if secondary_ip is None:
3300 secondary_ip = primary_ip
3301 if not utils.IsValidIP(secondary_ip):
3302 raise errors.OpPrereqError("Invalid secondary IP given",
3304 self.op.secondary_ip = secondary_ip
3306 node_list = cfg.GetNodeList()
3307 if not self.op.readd and node in node_list:
3308 raise errors.OpPrereqError("Node %s is already in the configuration" %
3309 node, errors.ECODE_EXISTS)
3310 elif self.op.readd and node not in node_list:
3311 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3314 self.changed_primary_ip = False
3316 for existing_node_name in node_list:
3317 existing_node = cfg.GetNodeInfo(existing_node_name)
3319 if self.op.readd and node == existing_node_name:
3320 if existing_node.secondary_ip != secondary_ip:
3321 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3322 " address configuration as before",
3324 if existing_node.primary_ip != primary_ip:
3325 self.changed_primary_ip = True
3329 if (existing_node.primary_ip == primary_ip or
3330 existing_node.secondary_ip == primary_ip or
3331 existing_node.primary_ip == secondary_ip or
3332 existing_node.secondary_ip == secondary_ip):
3333 raise errors.OpPrereqError("New node ip address(es) conflict with"
3334 " existing node %s" % existing_node.name,
3335 errors.ECODE_NOTUNIQUE)
3337 # check that the type of the node (single versus dual homed) is the
3338 # same as for the master
3339 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3340 master_singlehomed = myself.secondary_ip == myself.primary_ip
3341 newbie_singlehomed = secondary_ip == primary_ip
3342 if master_singlehomed != newbie_singlehomed:
3343 if master_singlehomed:
3344 raise errors.OpPrereqError("The master has no private ip but the"
3345 " new node has one",
3348 raise errors.OpPrereqError("The master has a private ip but the"
3349 " new node doesn't have one",
3352 # checks reachability
3353 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3354 raise errors.OpPrereqError("Node not reachable by ping",
3355 errors.ECODE_ENVIRON)
3357 if not newbie_singlehomed:
3358 # check reachability from my secondary ip to newbie's secondary ip
3359 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3360 source=myself.secondary_ip):
3361 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3362 " based ping to noded port",
3363 errors.ECODE_ENVIRON)
3370 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3373 self.new_node = self.cfg.GetNodeInfo(node)
3374 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3376 self.new_node = objects.Node(name=node,
3377 primary_ip=primary_ip,
3378 secondary_ip=secondary_ip,
3379 master_candidate=self.master_candidate,
3380 offline=False, drained=False)
3382 def Exec(self, feedback_fn):
3383 """Adds the new node to the cluster.
3386 new_node = self.new_node
3387 node = new_node.name
3389 # for re-adds, reset the offline/drained/master-candidate flags;
3390 # we need to reset here, otherwise offline would prevent RPC calls
3391 # later in the procedure; this also means that if the re-add
3392 # fails, we are left with a non-offlined, broken node
3394 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3395 self.LogInfo("Readding a node, the offline/drained flags were reset")
3396 # if we demote the node, we do cleanup later in the procedure
3397 new_node.master_candidate = self.master_candidate
3398 if self.changed_primary_ip:
3399 new_node.primary_ip = self.op.primary_ip
3401 # notify the user about any possible mc promotion
3402 if new_node.master_candidate:
3403 self.LogInfo("Node will be a master candidate")
3405 # check connectivity
3406 result = self.rpc.call_version([node])[node]
3407 result.Raise("Can't get version information from node %s" % node)
3408 if constants.PROTOCOL_VERSION == result.payload:
3409 logging.info("Communication to node %s fine, sw version %s match",
3410 node, result.payload)
3412 raise errors.OpExecError("Version mismatch master version %s,"
3413 " node version %s" %
3414 (constants.PROTOCOL_VERSION, result.payload))
3417 if self.cfg.GetClusterInfo().modify_ssh_setup:
3418 logging.info("Copy ssh key to node %s", node)
3419 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3421 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3422 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3426 keyarray.append(utils.ReadFile(i))
3428 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3429 keyarray[2], keyarray[3], keyarray[4],
3431 result.Raise("Cannot transfer ssh keys to the new node")
3433 # Add node to our /etc/hosts, and add key to known_hosts
3434 if self.cfg.GetClusterInfo().modify_etc_hosts:
3435 # FIXME: this should be done via an rpc call to node daemon
3436 utils.AddHostToEtcHosts(new_node.name)
3438 if new_node.secondary_ip != new_node.primary_ip:
3439 result = self.rpc.call_node_has_ip_address(new_node.name,
3440 new_node.secondary_ip)
3441 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3442 prereq=True, ecode=errors.ECODE_ENVIRON)
3443 if not result.payload:
3444 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3445 " you gave (%s). Please fix and re-run this"
3446 " command." % new_node.secondary_ip)
3448 node_verify_list = [self.cfg.GetMasterNode()]
3449 node_verify_param = {
3450 constants.NV_NODELIST: [node],
3451 # TODO: do a node-net-test as well?
3454 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3455 self.cfg.GetClusterName())
3456 for verifier in node_verify_list:
3457 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3458 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3460 for failed in nl_payload:
3461 feedback_fn("ssh/hostname verification failed"
3462 " (checking from %s): %s" %
3463 (verifier, nl_payload[failed]))
3464 raise errors.OpExecError("ssh/hostname verification failed.")
3467 _RedistributeAncillaryFiles(self)
3468 self.context.ReaddNode(new_node)
3469 # make sure we redistribute the config
3470 self.cfg.Update(new_node, feedback_fn)
3471 # and make sure the new node will not have old files around
3472 if not new_node.master_candidate:
3473 result = self.rpc.call_node_demote_from_mc(new_node.name)
3474 msg = result.fail_msg
3476 self.LogWarning("Node failed to demote itself from master"
3477 " candidate status: %s" % msg)
3479 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3480 self.context.AddNode(new_node, self.proc.GetECId())
3483 class LUSetNodeParams(LogicalUnit):
3484 """Modifies the parameters of a node.
3487 HPATH = "node-modify"
3488 HTYPE = constants.HTYPE_NODE
3489 _OP_REQP = ["node_name"]
3492 def CheckArguments(self):
3493 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3494 _CheckBooleanOpField(self.op, 'master_candidate')
3495 _CheckBooleanOpField(self.op, 'offline')
3496 _CheckBooleanOpField(self.op, 'drained')
3497 _CheckBooleanOpField(self.op, 'auto_promote')
3498 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3499 if all_mods.count(None) == 3:
3500 raise errors.OpPrereqError("Please pass at least one modification",
3502 if all_mods.count(True) > 1:
3503 raise errors.OpPrereqError("Can't set the node into more than one"
3504 " state at the same time",
3507 # Boolean value that tells us whether we're offlining or draining the node
3508 self.offline_or_drain = (self.op.offline == True or
3509 self.op.drained == True)
3510 self.deoffline_or_drain = (self.op.offline == False or
3511 self.op.drained == False)
3512 self.might_demote = (self.op.master_candidate == False or
3513 self.offline_or_drain)
3515 self.lock_all = self.op.auto_promote and self.might_demote
3518 def ExpandNames(self):
3520 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3522 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3524 def BuildHooksEnv(self):
3527 This runs on the master node.
3531 "OP_TARGET": self.op.node_name,
3532 "MASTER_CANDIDATE": str(self.op.master_candidate),
3533 "OFFLINE": str(self.op.offline),
3534 "DRAINED": str(self.op.drained),
3536 nl = [self.cfg.GetMasterNode(),
3540 def CheckPrereq(self):
3541 """Check prerequisites.
3543 This only checks the instance list against the existing names.
3546 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3548 if (self.op.master_candidate is not None or
3549 self.op.drained is not None or
3550 self.op.offline is not None):
3551 # we can't change the master's node flags
3552 if self.op.node_name == self.cfg.GetMasterNode():
3553 raise errors.OpPrereqError("The master role can be changed"
3554 " only via masterfailover",
3558 if node.master_candidate and self.might_demote and not self.lock_all:
3559 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3560 # check if after removing the current node, we're missing master
3562 (mc_remaining, mc_should, _) = \
3563 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3564 if mc_remaining < mc_should:
3565 raise errors.OpPrereqError("Not enough master candidates, please"
3566 " pass auto_promote to allow promotion",
3569 if (self.op.master_candidate == True and
3570 ((node.offline and not self.op.offline == False) or
3571 (node.drained and not self.op.drained == False))):
3572 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3573 " to master_candidate" % node.name,
3576 # If we're being deofflined/drained, we'll MC ourself if needed
3577 if (self.deoffline_or_drain and not self.offline_or_drain and not
3578 self.op.master_candidate == True and not node.master_candidate):
3579 self.op.master_candidate = _DecideSelfPromotion(self)
3580 if self.op.master_candidate:
3581 self.LogInfo("Autopromoting node to master candidate")
3585 def Exec(self, feedback_fn):
3594 if self.op.offline is not None:
3595 node.offline = self.op.offline
3596 result.append(("offline", str(self.op.offline)))
3597 if self.op.offline == True:
3598 if node.master_candidate:
3599 node.master_candidate = False
3601 result.append(("master_candidate", "auto-demotion due to offline"))
3603 node.drained = False
3604 result.append(("drained", "clear drained status due to offline"))
3606 if self.op.master_candidate is not None:
3607 node.master_candidate = self.op.master_candidate
3609 result.append(("master_candidate", str(self.op.master_candidate)))
3610 if self.op.master_candidate == False:
3611 rrc = self.rpc.call_node_demote_from_mc(node.name)
3614 self.LogWarning("Node failed to demote itself: %s" % msg)
3616 if self.op.drained is not None:
3617 node.drained = self.op.drained
3618 result.append(("drained", str(self.op.drained)))
3619 if self.op.drained == True:
3620 if node.master_candidate:
3621 node.master_candidate = False
3623 result.append(("master_candidate", "auto-demotion due to drain"))
3624 rrc = self.rpc.call_node_demote_from_mc(node.name)
3627 self.LogWarning("Node failed to demote itself: %s" % msg)
3629 node.offline = False
3630 result.append(("offline", "clear offline status due to drain"))
3632 # we locked all nodes, we adjust the CP before updating this node
3634 _AdjustCandidatePool(self, [node.name])
3636 # this will trigger configuration file update, if needed
3637 self.cfg.Update(node, feedback_fn)
3639 # this will trigger job queue propagation or cleanup
3641 self.context.ReaddNode(node)
3646 class LUPowercycleNode(NoHooksLU):
3647 """Powercycles a node.
3650 _OP_REQP = ["node_name", "force"]
3653 def CheckArguments(self):
3654 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3655 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3656 raise errors.OpPrereqError("The node is the master and the force"
3657 " parameter was not set",
3660 def ExpandNames(self):
3661 """Locking for PowercycleNode.
3663 This is a last-resort option and shouldn't block on other
3664 jobs. Therefore, we grab no locks.
3667 self.needed_locks = {}
3669 def CheckPrereq(self):
3670 """Check prerequisites.
3672 This LU has no prereqs.
3677 def Exec(self, feedback_fn):
3681 result = self.rpc.call_node_powercycle(self.op.node_name,
3682 self.cfg.GetHypervisorType())
3683 result.Raise("Failed to schedule the reboot")
3684 return result.payload
3687 class LUQueryClusterInfo(NoHooksLU):
3688 """Query cluster configuration.
3694 def ExpandNames(self):
3695 self.needed_locks = {}
3697 def CheckPrereq(self):
3698 """No prerequsites needed for this LU.
3703 def Exec(self, feedback_fn):
3704 """Return cluster config.
3707 cluster = self.cfg.GetClusterInfo()
3710 # Filter just for enabled hypervisors
3711 for os_name, hv_dict in cluster.os_hvp.items():
3712 os_hvp[os_name] = {}
3713 for hv_name, hv_params in hv_dict.items():
3714 if hv_name in cluster.enabled_hypervisors:
3715 os_hvp[os_name][hv_name] = hv_params
3718 "software_version": constants.RELEASE_VERSION,
3719 "protocol_version": constants.PROTOCOL_VERSION,
3720 "config_version": constants.CONFIG_VERSION,
3721 "os_api_version": max(constants.OS_API_VERSIONS),
3722 "export_version": constants.EXPORT_VERSION,
3723 "architecture": (platform.architecture()[0], platform.machine()),
3724 "name": cluster.cluster_name,
3725 "master": cluster.master_node,
3726 "default_hypervisor": cluster.enabled_hypervisors[0],
3727 "enabled_hypervisors": cluster.enabled_hypervisors,
3728 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3729 for hypervisor_name in cluster.enabled_hypervisors]),
3731 "beparams": cluster.beparams,
3732 "nicparams": cluster.nicparams,
3733 "candidate_pool_size": cluster.candidate_pool_size,
3734 "master_netdev": cluster.master_netdev,
3735 "volume_group_name": cluster.volume_group_name,
3736 "file_storage_dir": cluster.file_storage_dir,
3737 "maintain_node_health": cluster.maintain_node_health,
3738 "ctime": cluster.ctime,
3739 "mtime": cluster.mtime,
3740 "uuid": cluster.uuid,
3741 "tags": list(cluster.GetTags()),
3742 "uid_pool": cluster.uid_pool,
3748 class LUQueryConfigValues(NoHooksLU):
3749 """Return configuration values.
3754 _FIELDS_DYNAMIC = utils.FieldSet()
3755 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3758 def ExpandNames(self):
3759 self.needed_locks = {}
3761 _CheckOutputFields(static=self._FIELDS_STATIC,
3762 dynamic=self._FIELDS_DYNAMIC,
3763 selected=self.op.output_fields)
3765 def CheckPrereq(self):
3766 """No prerequisites.
3771 def Exec(self, feedback_fn):
3772 """Dump a representation of the cluster config to the standard output.
3776 for field in self.op.output_fields:
3777 if field == "cluster_name":
3778 entry = self.cfg.GetClusterName()
3779 elif field == "master_node":
3780 entry = self.cfg.GetMasterNode()
3781 elif field == "drain_flag":
3782 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3783 elif field == "watcher_pause":
3784 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3786 raise errors.ParameterError(field)
3787 values.append(entry)
3791 class LUActivateInstanceDisks(NoHooksLU):
3792 """Bring up an instance's disks.
3795 _OP_REQP = ["instance_name"]
3798 def ExpandNames(self):
3799 self._ExpandAndLockInstance()
3800 self.needed_locks[locking.LEVEL_NODE] = []
3801 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3803 def DeclareLocks(self, level):
3804 if level == locking.LEVEL_NODE:
3805 self._LockInstancesNodes()
3807 def CheckPrereq(self):
3808 """Check prerequisites.
3810 This checks that the instance is in the cluster.
3813 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3814 assert self.instance is not None, \
3815 "Cannot retrieve locked instance %s" % self.op.instance_name
3816 _CheckNodeOnline(self, self.instance.primary_node)
3817 if not hasattr(self.op, "ignore_size"):
3818 self.op.ignore_size = False
3820 def Exec(self, feedback_fn):
3821 """Activate the disks.
3824 disks_ok, disks_info = \
3825 _AssembleInstanceDisks(self, self.instance,
3826 ignore_size=self.op.ignore_size)
3828 raise errors.OpExecError("Cannot activate block devices")
3833 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3835 """Prepare the block devices for an instance.
3837 This sets up the block devices on all nodes.
3839 @type lu: L{LogicalUnit}
3840 @param lu: the logical unit on whose behalf we execute
3841 @type instance: L{objects.Instance}
3842 @param instance: the instance for whose disks we assemble
3843 @type ignore_secondaries: boolean
3844 @param ignore_secondaries: if true, errors on secondary nodes
3845 won't result in an error return from the function
3846 @type ignore_size: boolean
3847 @param ignore_size: if true, the current known size of the disk
3848 will not be used during the disk activation, useful for cases
3849 when the size is wrong
3850 @return: False if the operation failed, otherwise a list of
3851 (host, instance_visible_name, node_visible_name)
3852 with the mapping from node devices to instance devices
3857 iname = instance.name
3858 # With the two passes mechanism we try to reduce the window of
3859 # opportunity for the race condition of switching DRBD to primary
3860 # before handshaking occured, but we do not eliminate it
3862 # The proper fix would be to wait (with some limits) until the
3863 # connection has been made and drbd transitions from WFConnection
3864 # into any other network-connected state (Connected, SyncTarget,
3867 # 1st pass, assemble on all nodes in secondary mode
3868 for inst_disk in instance.disks:
3869 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3871 node_disk = node_disk.Copy()
3872 node_disk.UnsetSize()
3873 lu.cfg.SetDiskID(node_disk, node)
3874 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3875 msg = result.fail_msg
3877 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3878 " (is_primary=False, pass=1): %s",
3879 inst_disk.iv_name, node, msg)
3880 if not ignore_secondaries:
3883 # FIXME: race condition on drbd migration to primary
3885 # 2nd pass, do only the primary node
3886 for inst_disk in instance.disks:
3889 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3890 if node != instance.primary_node:
3893 node_disk = node_disk.Copy()
3894 node_disk.UnsetSize()
3895 lu.cfg.SetDiskID(node_disk, node)
3896 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3897 msg = result.fail_msg
3899 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3900 " (is_primary=True, pass=2): %s",
3901 inst_disk.iv_name, node, msg)
3904 dev_path = result.payload
3906 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3908 # leave the disks configured for the primary node
3909 # this is a workaround that would be fixed better by
3910 # improving the logical/physical id handling
3911 for disk in instance.disks:
3912 lu.cfg.SetDiskID(disk, instance.primary_node)
3914 return disks_ok, device_info
3917 def _StartInstanceDisks(lu, instance, force):
3918 """Start the disks of an instance.
3921 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3922 ignore_secondaries=force)
3924 _ShutdownInstanceDisks(lu, instance)
3925 if force is not None and not force:
3926 lu.proc.LogWarning("", hint="If the message above refers to a"
3928 " you can retry the operation using '--force'.")
3929 raise errors.OpExecError("Disk consistency error")
3932 class LUDeactivateInstanceDisks(NoHooksLU):
3933 """Shutdown an instance's disks.
3936 _OP_REQP = ["instance_name"]
3939 def ExpandNames(self):
3940 self._ExpandAndLockInstance()
3941 self.needed_locks[locking.LEVEL_NODE] = []
3942 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3944 def DeclareLocks(self, level):
3945 if level == locking.LEVEL_NODE:
3946 self._LockInstancesNodes()
3948 def CheckPrereq(self):
3949 """Check prerequisites.
3951 This checks that the instance is in the cluster.
3954 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3955 assert self.instance is not None, \
3956 "Cannot retrieve locked instance %s" % self.op.instance_name
3958 def Exec(self, feedback_fn):
3959 """Deactivate the disks
3962 instance = self.instance
3963 _SafeShutdownInstanceDisks(self, instance)
3966 def _SafeShutdownInstanceDisks(lu, instance):
3967 """Shutdown block devices of an instance.
3969 This function checks if an instance is running, before calling
3970 _ShutdownInstanceDisks.
3973 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
3974 _ShutdownInstanceDisks(lu, instance)
3977 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3978 """Shutdown block devices of an instance.
3980 This does the shutdown on all nodes of the instance.
3982 If the ignore_primary is false, errors on the primary node are
3987 for disk in instance.disks:
3988 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3989 lu.cfg.SetDiskID(top_disk, node)
3990 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3991 msg = result.fail_msg
3993 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3994 disk.iv_name, node, msg)
3995 if not ignore_primary or node != instance.primary_node:
4000 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4001 """Checks if a node has enough free memory.
4003 This function check if a given node has the needed amount of free
4004 memory. In case the node has less memory or we cannot get the
4005 information from the node, this function raise an OpPrereqError
4008 @type lu: C{LogicalUnit}
4009 @param lu: a logical unit from which we get configuration data
4011 @param node: the node to check
4012 @type reason: C{str}
4013 @param reason: string to use in the error message
4014 @type requested: C{int}
4015 @param requested: the amount of memory in MiB to check for
4016 @type hypervisor_name: C{str}
4017 @param hypervisor_name: the hypervisor to ask for memory stats
4018 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4019 we cannot check the node
4022 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4023 nodeinfo[node].Raise("Can't get data from node %s" % node,
4024 prereq=True, ecode=errors.ECODE_ENVIRON)
4025 free_mem = nodeinfo[node].payload.get('memory_free', None)
4026 if not isinstance(free_mem, int):
4027 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4028 " was '%s'" % (node, free_mem),
4029 errors.ECODE_ENVIRON)
4030 if requested > free_mem:
4031 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4032 " needed %s MiB, available %s MiB" %
4033 (node, reason, requested, free_mem),
4037 def _CheckNodesFreeDisk(lu, nodenames, requested):
4038 """Checks if nodes have enough free disk space in the default VG.
4040 This function check if all given nodes have the needed amount of
4041 free disk. In case any node has less disk or we cannot get the
4042 information from the node, this function raise an OpPrereqError
4045 @type lu: C{LogicalUnit}
4046 @param lu: a logical unit from which we get configuration data
4047 @type nodenames: C{list}
4048 @param nodenames: the list of node names to check
4049 @type requested: C{int}
4050 @param requested: the amount of disk in MiB to check for
4051 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4052 we cannot check the node
4055 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4056 lu.cfg.GetHypervisorType())
4057 for node in nodenames:
4058 info = nodeinfo[node]
4059 info.Raise("Cannot get current information from node %s" % node,
4060 prereq=True, ecode=errors.ECODE_ENVIRON)
4061 vg_free = info.payload.get("vg_free", None)
4062 if not isinstance(vg_free, int):
4063 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4064 " result was '%s'" % (node, vg_free),
4065 errors.ECODE_ENVIRON)
4066 if requested > vg_free:
4067 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4068 " required %d MiB, available %d MiB" %
4069 (node, requested, vg_free),
4073 class LUStartupInstance(LogicalUnit):
4074 """Starts an instance.
4077 HPATH = "instance-start"
4078 HTYPE = constants.HTYPE_INSTANCE
4079 _OP_REQP = ["instance_name", "force"]
4082 def ExpandNames(self):
4083 self._ExpandAndLockInstance()
4085 def BuildHooksEnv(self):
4088 This runs on master, primary and secondary nodes of the instance.
4092 "FORCE": self.op.force,
4094 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4095 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4098 def CheckPrereq(self):
4099 """Check prerequisites.
4101 This checks that the instance is in the cluster.
4104 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4105 assert self.instance is not None, \
4106 "Cannot retrieve locked instance %s" % self.op.instance_name
4109 self.beparams = getattr(self.op, "beparams", {})
4111 if not isinstance(self.beparams, dict):
4112 raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4113 " dict" % (type(self.beparams), ),
4115 # fill the beparams dict
4116 utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4117 self.op.beparams = self.beparams
4120 self.hvparams = getattr(self.op, "hvparams", {})
4122 if not isinstance(self.hvparams, dict):
4123 raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4124 " dict" % (type(self.hvparams), ),
4127 # check hypervisor parameter syntax (locally)
4128 cluster = self.cfg.GetClusterInfo()
4129 utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4130 filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4132 filled_hvp.update(self.hvparams)
4133 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4134 hv_type.CheckParameterSyntax(filled_hvp)
4135 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4136 self.op.hvparams = self.hvparams
4138 _CheckNodeOnline(self, instance.primary_node)
4140 bep = self.cfg.GetClusterInfo().FillBE(instance)
4141 # check bridges existence
4142 _CheckInstanceBridgesExist(self, instance)
4144 remote_info = self.rpc.call_instance_info(instance.primary_node,
4146 instance.hypervisor)
4147 remote_info.Raise("Error checking node %s" % instance.primary_node,
4148 prereq=True, ecode=errors.ECODE_ENVIRON)
4149 if not remote_info.payload: # not running already
4150 _CheckNodeFreeMemory(self, instance.primary_node,
4151 "starting instance %s" % instance.name,
4152 bep[constants.BE_MEMORY], instance.hypervisor)
4154 def Exec(self, feedback_fn):
4155 """Start the instance.
4158 instance = self.instance
4159 force = self.op.force
4161 self.cfg.MarkInstanceUp(instance.name)
4163 node_current = instance.primary_node
4165 _StartInstanceDisks(self, instance, force)
4167 result = self.rpc.call_instance_start(node_current, instance,
4168 self.hvparams, self.beparams)
4169 msg = result.fail_msg
4171 _ShutdownInstanceDisks(self, instance)
4172 raise errors.OpExecError("Could not start instance: %s" % msg)
4175 class LURebootInstance(LogicalUnit):
4176 """Reboot an instance.
4179 HPATH = "instance-reboot"
4180 HTYPE = constants.HTYPE_INSTANCE
4181 _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4184 def CheckArguments(self):
4185 """Check the arguments.
4188 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4189 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4191 def ExpandNames(self):
4192 if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4193 constants.INSTANCE_REBOOT_HARD,
4194 constants.INSTANCE_REBOOT_FULL]:
4195 raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4196 (constants.INSTANCE_REBOOT_SOFT,
4197 constants.INSTANCE_REBOOT_HARD,
4198 constants.INSTANCE_REBOOT_FULL))
4199 self._ExpandAndLockInstance()
4201 def BuildHooksEnv(self):
4204 This runs on master, primary and secondary nodes of the instance.
4208 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4209 "REBOOT_TYPE": self.op.reboot_type,
4210 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4212 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4213 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4216 def CheckPrereq(self):
4217 """Check prerequisites.
4219 This checks that the instance is in the cluster.
4222 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4223 assert self.instance is not None, \
4224 "Cannot retrieve locked instance %s" % self.op.instance_name
4226 _CheckNodeOnline(self, instance.primary_node)
4228 # check bridges existence
4229 _CheckInstanceBridgesExist(self, instance)
4231 def Exec(self, feedback_fn):
4232 """Reboot the instance.
4235 instance = self.instance
4236 ignore_secondaries = self.op.ignore_secondaries
4237 reboot_type = self.op.reboot_type
4239 node_current = instance.primary_node
4241 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4242 constants.INSTANCE_REBOOT_HARD]:
4243 for disk in instance.disks:
4244 self.cfg.SetDiskID(disk, node_current)
4245 result = self.rpc.call_instance_reboot(node_current, instance,
4247 self.shutdown_timeout)
4248 result.Raise("Could not reboot instance")
4250 result = self.rpc.call_instance_shutdown(node_current, instance,
4251 self.shutdown_timeout)
4252 result.Raise("Could not shutdown instance for full reboot")
4253 _ShutdownInstanceDisks(self, instance)
4254 _StartInstanceDisks(self, instance, ignore_secondaries)
4255 result = self.rpc.call_instance_start(node_current, instance, None, None)
4256 msg = result.fail_msg
4258 _ShutdownInstanceDisks(self, instance)
4259 raise errors.OpExecError("Could not start instance for"
4260 " full reboot: %s" % msg)
4262 self.cfg.MarkInstanceUp(instance.name)
4265 class LUShutdownInstance(LogicalUnit):
4266 """Shutdown an instance.
4269 HPATH = "instance-stop"
4270 HTYPE = constants.HTYPE_INSTANCE
4271 _OP_REQP = ["instance_name"]
4274 def CheckArguments(self):
4275 """Check the arguments.
4278 self.timeout = getattr(self.op, "timeout",
4279 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4281 def ExpandNames(self):
4282 self._ExpandAndLockInstance()
4284 def BuildHooksEnv(self):
4287 This runs on master, primary and secondary nodes of the instance.
4290 env = _BuildInstanceHookEnvByObject(self, self.instance)
4291 env["TIMEOUT"] = self.timeout
4292 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4295 def CheckPrereq(self):
4296 """Check prerequisites.
4298 This checks that the instance is in the cluster.
4301 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4302 assert self.instance is not None, \
4303 "Cannot retrieve locked instance %s" % self.op.instance_name
4304 _CheckNodeOnline(self, self.instance.primary_node)
4306 def Exec(self, feedback_fn):
4307 """Shutdown the instance.
4310 instance = self.instance
4311 node_current = instance.primary_node
4312 timeout = self.timeout
4313 self.cfg.MarkInstanceDown(instance.name)
4314 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4315 msg = result.fail_msg
4317 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4319 _ShutdownInstanceDisks(self, instance)
4322 class LUReinstallInstance(LogicalUnit):
4323 """Reinstall an instance.
4326 HPATH = "instance-reinstall"
4327 HTYPE = constants.HTYPE_INSTANCE
4328 _OP_REQP = ["instance_name"]
4331 def ExpandNames(self):
4332 self._ExpandAndLockInstance()
4334 def BuildHooksEnv(self):
4337 This runs on master, primary and secondary nodes of the instance.
4340 env = _BuildInstanceHookEnvByObject(self, self.instance)
4341 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4344 def CheckPrereq(self):
4345 """Check prerequisites.
4347 This checks that the instance is in the cluster and is not running.
4350 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4351 assert instance is not None, \
4352 "Cannot retrieve locked instance %s" % self.op.instance_name
4353 _CheckNodeOnline(self, instance.primary_node)
4355 if instance.disk_template == constants.DT_DISKLESS:
4356 raise errors.OpPrereqError("Instance '%s' has no disks" %
4357 self.op.instance_name,
4359 _CheckInstanceDown(self, instance, "cannot reinstall")
4361 self.op.os_type = getattr(self.op, "os_type", None)
4362 self.op.force_variant = getattr(self.op, "force_variant", False)
4363 if self.op.os_type is not None:
4365 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4366 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4368 self.instance = instance
4370 def Exec(self, feedback_fn):
4371 """Reinstall the instance.
4374 inst = self.instance
4376 if self.op.os_type is not None:
4377 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4378 inst.os = self.op.os_type
4379 self.cfg.Update(inst, feedback_fn)
4381 _StartInstanceDisks(self, inst, None)
4383 feedback_fn("Running the instance OS create scripts...")
4384 # FIXME: pass debug option from opcode to backend
4385 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4386 self.op.debug_level)
4387 result.Raise("Could not install OS for instance %s on node %s" %
4388 (inst.name, inst.primary_node))
4390 _ShutdownInstanceDisks(self, inst)
4393 class LURecreateInstanceDisks(LogicalUnit):
4394 """Recreate an instance's missing disks.
4397 HPATH = "instance-recreate-disks"
4398 HTYPE = constants.HTYPE_INSTANCE
4399 _OP_REQP = ["instance_name", "disks"]
4402 def CheckArguments(self):
4403 """Check the arguments.
4406 if not isinstance(self.op.disks, list):
4407 raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4408 for item in self.op.disks:
4409 if (not isinstance(item, int) or
4411 raise errors.OpPrereqError("Invalid disk specification '%s'" %
4412 str(item), errors.ECODE_INVAL)
4414 def ExpandNames(self):
4415 self._ExpandAndLockInstance()
4417 def BuildHooksEnv(self):
4420 This runs on master, primary and secondary nodes of the instance.
4423 env = _BuildInstanceHookEnvByObject(self, self.instance)
4424 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4427 def CheckPrereq(self):
4428 """Check prerequisites.
4430 This checks that the instance is in the cluster and is not running.
4433 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4434 assert instance is not None, \
4435 "Cannot retrieve locked instance %s" % self.op.instance_name
4436 _CheckNodeOnline(self, instance.primary_node)
4438 if instance.disk_template == constants.DT_DISKLESS:
4439 raise errors.OpPrereqError("Instance '%s' has no disks" %
4440 self.op.instance_name, errors.ECODE_INVAL)
4441 _CheckInstanceDown(self, instance, "cannot recreate disks")
4443 if not self.op.disks:
4444 self.op.disks = range(len(instance.disks))
4446 for idx in self.op.disks:
4447 if idx >= len(instance.disks):
4448 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4451 self.instance = instance
4453 def Exec(self, feedback_fn):
4454 """Recreate the disks.
4458 for idx, _ in enumerate(self.instance.disks):
4459 if idx not in self.op.disks: # disk idx has not been passed in
4463 _CreateDisks(self, self.instance, to_skip=to_skip)
4466 class LURenameInstance(LogicalUnit):
4467 """Rename an instance.
4470 HPATH = "instance-rename"
4471 HTYPE = constants.HTYPE_INSTANCE
4472 _OP_REQP = ["instance_name", "new_name"]
4474 def BuildHooksEnv(self):
4477 This runs on master, primary and secondary nodes of the instance.
4480 env = _BuildInstanceHookEnvByObject(self, self.instance)
4481 env["INSTANCE_NEW_NAME"] = self.op.new_name
4482 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4485 def CheckPrereq(self):
4486 """Check prerequisites.
4488 This checks that the instance is in the cluster and is not running.
4491 self.op.instance_name = _ExpandInstanceName(self.cfg,
4492 self.op.instance_name)
4493 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4494 assert instance is not None
4495 _CheckNodeOnline(self, instance.primary_node)
4496 _CheckInstanceDown(self, instance, "cannot rename")
4497 self.instance = instance
4499 # new name verification
4500 name_info = utils.GetHostInfo(self.op.new_name)
4502 self.op.new_name = new_name = name_info.name
4503 instance_list = self.cfg.GetInstanceList()
4504 if new_name in instance_list:
4505 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4506 new_name, errors.ECODE_EXISTS)
4508 if not getattr(self.op, "ignore_ip", False):
4509 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4510 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4511 (name_info.ip, new_name),
4512 errors.ECODE_NOTUNIQUE)
4515 def Exec(self, feedback_fn):
4516 """Reinstall the instance.
4519 inst = self.instance
4520 old_name = inst.name
4522 if inst.disk_template == constants.DT_FILE:
4523 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4525 self.cfg.RenameInstance(inst.name, self.op.new_name)
4526 # Change the instance lock. This is definitely safe while we hold the BGL
4527 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4528 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4530 # re-read the instance from the configuration after rename
4531 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4533 if inst.disk_template == constants.DT_FILE:
4534 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4535 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4536 old_file_storage_dir,
4537 new_file_storage_dir)
4538 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4539 " (but the instance has been renamed in Ganeti)" %
4540 (inst.primary_node, old_file_storage_dir,
4541 new_file_storage_dir))
4543 _StartInstanceDisks(self, inst, None)
4545 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4546 old_name, self.op.debug_level)
4547 msg = result.fail_msg
4549 msg = ("Could not run OS rename script for instance %s on node %s"
4550 " (but the instance has been renamed in Ganeti): %s" %
4551 (inst.name, inst.primary_node, msg))
4552 self.proc.LogWarning(msg)
4554 _ShutdownInstanceDisks(self, inst)
4557 class LURemoveInstance(LogicalUnit):
4558 """Remove an instance.
4561 HPATH = "instance-remove"
4562 HTYPE = constants.HTYPE_INSTANCE
4563 _OP_REQP = ["instance_name", "ignore_failures"]
4566 def CheckArguments(self):
4567 """Check the arguments.
4570 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4571 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4573 def ExpandNames(self):
4574 self._ExpandAndLockInstance()
4575 self.needed_locks[locking.LEVEL_NODE] = []
4576 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4578 def DeclareLocks(self, level):
4579 if level == locking.LEVEL_NODE:
4580 self._LockInstancesNodes()
4582 def BuildHooksEnv(self):
4585 This runs on master, primary and secondary nodes of the instance.
4588 env = _BuildInstanceHookEnvByObject(self, self.instance)
4589 env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4590 nl = [self.cfg.GetMasterNode()]
4591 nl_post = list(self.instance.all_nodes) + nl
4592 return env, nl, nl_post
4594 def CheckPrereq(self):
4595 """Check prerequisites.
4597 This checks that the instance is in the cluster.
4600 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4601 assert self.instance is not None, \
4602 "Cannot retrieve locked instance %s" % self.op.instance_name
4604 def Exec(self, feedback_fn):
4605 """Remove the instance.
4608 instance = self.instance
4609 logging.info("Shutting down instance %s on node %s",
4610 instance.name, instance.primary_node)
4612 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4613 self.shutdown_timeout)
4614 msg = result.fail_msg
4616 if self.op.ignore_failures:
4617 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4619 raise errors.OpExecError("Could not shutdown instance %s on"
4621 (instance.name, instance.primary_node, msg))
4623 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4626 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4627 """Utility function to remove an instance.
4630 logging.info("Removing block devices for instance %s", instance.name)
4632 if not _RemoveDisks(lu, instance):
4633 if not ignore_failures:
4634 raise errors.OpExecError("Can't remove instance's disks")
4635 feedback_fn("Warning: can't remove instance's disks")
4637 logging.info("Removing instance %s out of cluster config", instance.name)
4639 lu.cfg.RemoveInstance(instance.name)
4641 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4642 "Instance lock removal conflict"
4644 # Remove lock for the instance
4645 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4648 class LUQueryInstances(NoHooksLU):
4649 """Logical unit for querying instances.
4652 # pylint: disable-msg=W0142
4653 _OP_REQP = ["output_fields", "names", "use_locking"]
4655 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4656 "serial_no", "ctime", "mtime", "uuid"]
4657 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4659 "disk_template", "ip", "mac", "bridge",
4660 "nic_mode", "nic_link",
4661 "sda_size", "sdb_size", "vcpus", "tags",
4662 "network_port", "beparams",
4663 r"(disk)\.(size)/([0-9]+)",
4664 r"(disk)\.(sizes)", "disk_usage",
4665 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4666 r"(nic)\.(bridge)/([0-9]+)",
4667 r"(nic)\.(macs|ips|modes|links|bridges)",
4668 r"(disk|nic)\.(count)",
4670 ] + _SIMPLE_FIELDS +
4672 for name in constants.HVS_PARAMETERS
4673 if name not in constants.HVC_GLOBALS] +
4675 for name in constants.BES_PARAMETERS])
4676 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4679 def ExpandNames(self):
4680 _CheckOutputFields(static=self._FIELDS_STATIC,
4681 dynamic=self._FIELDS_DYNAMIC,
4682 selected=self.op.output_fields)
4684 self.needed_locks = {}
4685 self.share_locks[locking.LEVEL_INSTANCE] = 1
4686 self.share_locks[locking.LEVEL_NODE] = 1
4689 self.wanted = _GetWantedInstances(self, self.op.names)
4691 self.wanted = locking.ALL_SET
4693 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4694 self.do_locking = self.do_node_query and self.op.use_locking
4696 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4697 self.needed_locks[locking.LEVEL_NODE] = []
4698 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4700 def DeclareLocks(self, level):
4701 if level == locking.LEVEL_NODE and self.do_locking:
4702 self._LockInstancesNodes()
4704 def CheckPrereq(self):
4705 """Check prerequisites.
4710 def Exec(self, feedback_fn):
4711 """Computes the list of nodes and their attributes.
4714 # pylint: disable-msg=R0912
4715 # way too many branches here
4716 all_info = self.cfg.GetAllInstancesInfo()
4717 if self.wanted == locking.ALL_SET:
4718 # caller didn't specify instance names, so ordering is not important
4720 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4722 instance_names = all_info.keys()
4723 instance_names = utils.NiceSort(instance_names)
4725 # caller did specify names, so we must keep the ordering
4727 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4729 tgt_set = all_info.keys()
4730 missing = set(self.wanted).difference(tgt_set)
4732 raise errors.OpExecError("Some instances were removed before"
4733 " retrieving their data: %s" % missing)
4734 instance_names = self.wanted
4736 instance_list = [all_info[iname] for iname in instance_names]
4738 # begin data gathering
4740 nodes = frozenset([inst.primary_node for inst in instance_list])
4741 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4745 if self.do_node_query:
4747 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4749 result = node_data[name]
4751 # offline nodes will be in both lists
4752 off_nodes.append(name)
4754 bad_nodes.append(name)
4757 live_data.update(result.payload)
4758 # else no instance is alive
4760 live_data = dict([(name, {}) for name in instance_names])
4762 # end data gathering
4767 cluster = self.cfg.GetClusterInfo()
4768 for instance in instance_list:
4770 i_hv = cluster.FillHV(instance, skip_globals=True)
4771 i_be = cluster.FillBE(instance)
4772 i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4773 nic.nicparams) for nic in instance.nics]
4774 for field in self.op.output_fields:
4775 st_match = self._FIELDS_STATIC.Matches(field)
4776 if field in self._SIMPLE_FIELDS:
4777 val = getattr(instance, field)
4778 elif field == "pnode":
4779 val = instance.primary_node
4780 elif field == "snodes":
4781 val = list(instance.secondary_nodes)
4782 elif field == "admin_state":
4783 val = instance.admin_up
4784 elif field == "oper_state":
4785 if instance.primary_node in bad_nodes:
4788 val = bool(live_data.get(instance.name))
4789 elif field == "status":
4790 if instance.primary_node in off_nodes:
4791 val = "ERROR_nodeoffline"
4792 elif instance.primary_node in bad_nodes:
4793 val = "ERROR_nodedown"
4795 running = bool(live_data.get(instance.name))
4797 if instance.admin_up:
4802 if instance.admin_up:
4806 elif field == "oper_ram":
4807 if instance.primary_node in bad_nodes:
4809 elif instance.name in live_data:
4810 val = live_data[instance.name].get("memory", "?")
4813 elif field == "vcpus":
4814 val = i_be[constants.BE_VCPUS]
4815 elif field == "disk_template":
4816 val = instance.disk_template
4819 val = instance.nics[0].ip
4822 elif field == "nic_mode":
4824 val = i_nicp[0][constants.NIC_MODE]
4827 elif field == "nic_link":
4829 val = i_nicp[0][constants.NIC_LINK]
4832 elif field == "bridge":
4833 if (instance.nics and
4834 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4835 val = i_nicp[0][constants.NIC_LINK]
4838 elif field == "mac":
4840 val = instance.nics[0].mac
4843 elif field == "sda_size" or field == "sdb_size":
4844 idx = ord(field[2]) - ord('a')
4846 val = instance.FindDisk(idx).size
4847 except errors.OpPrereqError:
4849 elif field == "disk_usage": # total disk usage per node
4850 disk_sizes = [{'size': disk.size} for disk in instance.disks]
4851 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4852 elif field == "tags":
4853 val = list(instance.GetTags())
4854 elif field == "hvparams":
4856 elif (field.startswith(HVPREFIX) and
4857 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4858 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4859 val = i_hv.get(field[len(HVPREFIX):], None)
4860 elif field == "beparams":
4862 elif (field.startswith(BEPREFIX) and
4863 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4864 val = i_be.get(field[len(BEPREFIX):], None)
4865 elif st_match and st_match.groups():
4866 # matches a variable list
4867 st_groups = st_match.groups()
4868 if st_groups and st_groups[0] == "disk":
4869 if st_groups[1] == "count":
4870 val = len(instance.disks)
4871 elif st_groups[1] == "sizes":
4872 val = [disk.size for disk in instance.disks]
4873 elif st_groups[1] == "size":
4875 val = instance.FindDisk(st_groups[2]).size
4876 except errors.OpPrereqError:
4879 assert False, "Unhandled disk parameter"
4880 elif st_groups[0] == "nic":
4881 if st_groups[1] == "count":
4882 val = len(instance.nics)
4883 elif st_groups[1] == "macs":
4884 val = [nic.mac for nic in instance.nics]
4885 elif st_groups[1] == "ips":
4886 val = [nic.ip for nic in instance.nics]
4887 elif st_groups[1] == "modes":
4888 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4889 elif st_groups[1] == "links":
4890 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4891 elif st_groups[1] == "bridges":
4894 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4895 val.append(nicp[constants.NIC_LINK])
4900 nic_idx = int(st_groups[2])
4901 if nic_idx >= len(instance.nics):
4904 if st_groups[1] == "mac":
4905 val = instance.nics[nic_idx].mac
4906 elif st_groups[1] == "ip":
4907 val = instance.nics[nic_idx].ip
4908 elif st_groups[1] == "mode":
4909 val = i_nicp[nic_idx][constants.NIC_MODE]
4910 elif st_groups[1] == "link":
4911 val = i_nicp[nic_idx][constants.NIC_LINK]
4912 elif st_groups[1] == "bridge":
4913 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4914 if nic_mode == constants.NIC_MODE_BRIDGED:
4915 val = i_nicp[nic_idx][constants.NIC_LINK]
4919 assert False, "Unhandled NIC parameter"
4921 assert False, ("Declared but unhandled variable parameter '%s'" %
4924 assert False, "Declared but unhandled parameter '%s'" % field
4931 class LUFailoverInstance(LogicalUnit):
4932 """Failover an instance.
4935 HPATH = "instance-failover"
4936 HTYPE = constants.HTYPE_INSTANCE
4937 _OP_REQP = ["instance_name", "ignore_consistency"]
4940 def CheckArguments(self):
4941 """Check the arguments.
4944 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4945 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4947 def ExpandNames(self):
4948 self._ExpandAndLockInstance()
4949 self.needed_locks[locking.LEVEL_NODE] = []
4950 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4952 def DeclareLocks(self, level):
4953 if level == locking.LEVEL_NODE:
4954 self._LockInstancesNodes()
4956 def BuildHooksEnv(self):
4959 This runs on master, primary and secondary nodes of the instance.
4962 instance = self.instance
4963 source_node = instance.primary_node
4964 target_node = instance.secondary_nodes[0]
4966 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4967 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4968 "OLD_PRIMARY": source_node,
4969 "OLD_SECONDARY": target_node,
4970 "NEW_PRIMARY": target_node,
4971 "NEW_SECONDARY": source_node,
4973 env.update(_BuildInstanceHookEnvByObject(self, instance))
4974 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4976 nl_post.append(source_node)
4977 return env, nl, nl_post
4979 def CheckPrereq(self):
4980 """Check prerequisites.
4982 This checks that the instance is in the cluster.
4985 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4986 assert self.instance is not None, \
4987 "Cannot retrieve locked instance %s" % self.op.instance_name
4989 bep = self.cfg.GetClusterInfo().FillBE(instance)
4990 if instance.disk_template not in constants.DTS_NET_MIRROR:
4991 raise errors.OpPrereqError("Instance's disk layout is not"
4992 " network mirrored, cannot failover.",
4995 secondary_nodes = instance.secondary_nodes
4996 if not secondary_nodes:
4997 raise errors.ProgrammerError("no secondary node but using "
4998 "a mirrored disk template")
5000 target_node = secondary_nodes[0]
5001 _CheckNodeOnline(self, target_node)
5002 _CheckNodeNotDrained(self, target_node)
5003 if instance.admin_up:
5004 # check memory requirements on the secondary node
5005 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5006 instance.name, bep[constants.BE_MEMORY],
5007 instance.hypervisor)
5009 self.LogInfo("Not checking memory on the secondary node as"
5010 " instance will not be started")
5012 # check bridge existance
5013 _CheckInstanceBridgesExist(self, instance, node=target_node)
5015 def Exec(self, feedback_fn):
5016 """Failover an instance.
5018 The failover is done by shutting it down on its present node and
5019 starting it on the secondary.
5022 instance = self.instance
5024 source_node = instance.primary_node
5025 target_node = instance.secondary_nodes[0]
5027 if instance.admin_up:
5028 feedback_fn("* checking disk consistency between source and target")
5029 for dev in instance.disks:
5030 # for drbd, these are drbd over lvm
5031 if not _CheckDiskConsistency(self, dev, target_node, False):
5032 if not self.op.ignore_consistency:
5033 raise errors.OpExecError("Disk %s is degraded on target node,"
5034 " aborting failover." % dev.iv_name)
5036 feedback_fn("* not checking disk consistency as instance is not running")
5038 feedback_fn("* shutting down instance on source node")
5039 logging.info("Shutting down instance %s on node %s",
5040 instance.name, source_node)
5042 result = self.rpc.call_instance_shutdown(source_node, instance,
5043 self.shutdown_timeout)
5044 msg = result.fail_msg
5046 if self.op.ignore_consistency:
5047 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5048 " Proceeding anyway. Please make sure node"
5049 " %s is down. Error details: %s",
5050 instance.name, source_node, source_node, msg)
5052 raise errors.OpExecError("Could not shutdown instance %s on"
5054 (instance.name, source_node, msg))
5056 feedback_fn("* deactivating the instance's disks on source node")
5057 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5058 raise errors.OpExecError("Can't shut down the instance's disks.")
5060 instance.primary_node = target_node
5061 # distribute new instance config to the other nodes
5062 self.cfg.Update(instance, feedback_fn)
5064 # Only start the instance if it's marked as up
5065 if instance.admin_up:
5066 feedback_fn("* activating the instance's disks on target node")
5067 logging.info("Starting instance %s on node %s",
5068 instance.name, target_node)
5070 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5071 ignore_secondaries=True)
5073 _ShutdownInstanceDisks(self, instance)
5074 raise errors.OpExecError("Can't activate the instance's disks")
5076 feedback_fn("* starting the instance on the target node")
5077 result = self.rpc.call_instance_start(target_node, instance, None, None)
5078 msg = result.fail_msg
5080 _ShutdownInstanceDisks(self, instance)
5081 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5082 (instance.name, target_node, msg))
5085 class LUMigrateInstance(LogicalUnit):
5086 """Migrate an instance.
5088 This is migration without shutting down, compared to the failover,
5089 which is done with shutdown.
5092 HPATH = "instance-migrate"
5093 HTYPE = constants.HTYPE_INSTANCE
5094 _OP_REQP = ["instance_name", "live", "cleanup"]
5098 def ExpandNames(self):
5099 self._ExpandAndLockInstance()
5101 self.needed_locks[locking.LEVEL_NODE] = []
5102 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5104 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5105 self.op.live, self.op.cleanup)
5106 self.tasklets = [self._migrater]
5108 def DeclareLocks(self, level):
5109 if level == locking.LEVEL_NODE:
5110 self._LockInstancesNodes()
5112 def BuildHooksEnv(self):
5115 This runs on master, primary and secondary nodes of the instance.
5118 instance = self._migrater.instance
5119 source_node = instance.primary_node
5120 target_node = instance.secondary_nodes[0]
5121 env = _BuildInstanceHookEnvByObject(self, instance)
5122 env["MIGRATE_LIVE"] = self.op.live
5123 env["MIGRATE_CLEANUP"] = self.op.cleanup
5125 "OLD_PRIMARY": source_node,
5126 "OLD_SECONDARY": target_node,
5127 "NEW_PRIMARY": target_node,
5128 "NEW_SECONDARY": source_node,
5130 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5132 nl_post.append(source_node)
5133 return env, nl, nl_post
5136 class LUMoveInstance(LogicalUnit):
5137 """Move an instance by data-copying.
5140 HPATH = "instance-move"
5141 HTYPE = constants.HTYPE_INSTANCE
5142 _OP_REQP = ["instance_name", "target_node"]
5145 def CheckArguments(self):
5146 """Check the arguments.
5149 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5150 constants.DEFAULT_SHUTDOWN_TIMEOUT)
5152 def ExpandNames(self):
5153 self._ExpandAndLockInstance()
5154 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5155 self.op.target_node = target_node
5156 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5157 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5159 def DeclareLocks(self, level):
5160 if level == locking.LEVEL_NODE:
5161 self._LockInstancesNodes(primary_only=True)
5163 def BuildHooksEnv(self):
5166 This runs on master, primary and secondary nodes of the instance.
5170 "TARGET_NODE": self.op.target_node,
5171 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5173 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5174 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5175 self.op.target_node]
5178 def CheckPrereq(self):
5179 """Check prerequisites.
5181 This checks that the instance is in the cluster.
5184 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5185 assert self.instance is not None, \
5186 "Cannot retrieve locked instance %s" % self.op.instance_name
5188 node = self.cfg.GetNodeInfo(self.op.target_node)
5189 assert node is not None, \
5190 "Cannot retrieve locked node %s" % self.op.target_node
5192 self.target_node = target_node = node.name
5194 if target_node == instance.primary_node:
5195 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5196 (instance.name, target_node),
5199 bep = self.cfg.GetClusterInfo().FillBE(instance)
5201 for idx, dsk in enumerate(instance.disks):
5202 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5203 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5204 " cannot copy" % idx, errors.ECODE_STATE)
5206 _CheckNodeOnline(self, target_node)
5207 _CheckNodeNotDrained(self, target_node)
5209 if instance.admin_up:
5210 # check memory requirements on the secondary node
5211 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5212 instance.name, bep[constants.BE_MEMORY],
5213 instance.hypervisor)
5215 self.LogInfo("Not checking memory on the secondary node as"
5216 " instance will not be started")
5218 # check bridge existance
5219 _CheckInstanceBridgesExist(self, instance, node=target_node)
5221 def Exec(self, feedback_fn):
5222 """Move an instance.
5224 The move is done by shutting it down on its present node, copying
5225 the data over (slow) and starting it on the new node.
5228 instance = self.instance
5230 source_node = instance.primary_node
5231 target_node = self.target_node
5233 self.LogInfo("Shutting down instance %s on source node %s",
5234 instance.name, source_node)
5236 result = self.rpc.call_instance_shutdown(source_node, instance,
5237 self.shutdown_timeout)
5238 msg = result.fail_msg
5240 if self.op.ignore_consistency:
5241 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5242 " Proceeding anyway. Please make sure node"
5243 " %s is down. Error details: %s",
5244 instance.name, source_node, source_node, msg)
5246 raise errors.OpExecError("Could not shutdown instance %s on"
5248 (instance.name, source_node, msg))
5250 # create the target disks
5252 _CreateDisks(self, instance, target_node=target_node)
5253 except errors.OpExecError:
5254 self.LogWarning("Device creation failed, reverting...")
5256 _RemoveDisks(self, instance, target_node=target_node)
5258 self.cfg.ReleaseDRBDMinors(instance.name)
5261 cluster_name = self.cfg.GetClusterInfo().cluster_name
5264 # activate, get path, copy the data over
5265 for idx, disk in enumerate(instance.disks):
5266 self.LogInfo("Copying data for disk %d", idx)
5267 result = self.rpc.call_blockdev_assemble(target_node, disk,
5268 instance.name, True)
5270 self.LogWarning("Can't assemble newly created disk %d: %s",
5271 idx, result.fail_msg)
5272 errs.append(result.fail_msg)
5274 dev_path = result.payload
5275 result = self.rpc.call_blockdev_export(source_node, disk,
5276 target_node, dev_path,
5279 self.LogWarning("Can't copy data over for disk %d: %s",
5280 idx, result.fail_msg)
5281 errs.append(result.fail_msg)
5285 self.LogWarning("Some disks failed to copy, aborting")
5287 _RemoveDisks(self, instance, target_node=target_node)
5289 self.cfg.ReleaseDRBDMinors(instance.name)
5290 raise errors.OpExecError("Errors during disk copy: %s" %
5293 instance.primary_node = target_node
5294 self.cfg.Update(instance, feedback_fn)
5296 self.LogInfo("Removing the disks on the original node")
5297 _RemoveDisks(self, instance, target_node=source_node)
5299 # Only start the instance if it's marked as up
5300 if instance.admin_up:
5301 self.LogInfo("Starting instance %s on node %s",
5302 instance.name, target_node)
5304 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5305 ignore_secondaries=True)
5307 _ShutdownInstanceDisks(self, instance)
5308 raise errors.OpExecError("Can't activate the instance's disks")
5310 result = self.rpc.call_instance_start(target_node, instance, None, None)
5311 msg = result.fail_msg
5313 _ShutdownInstanceDisks(self, instance)
5314 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5315 (instance.name, target_node, msg))
5318 class LUMigrateNode(LogicalUnit):
5319 """Migrate all instances from a node.
5322 HPATH = "node-migrate"
5323 HTYPE = constants.HTYPE_NODE
5324 _OP_REQP = ["node_name", "live"]
5327 def ExpandNames(self):
5328 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5330 self.needed_locks = {
5331 locking.LEVEL_NODE: [self.op.node_name],
5334 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5336 # Create tasklets for migrating instances for all instances on this node
5340 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5341 logging.debug("Migrating instance %s", inst.name)
5342 names.append(inst.name)
5344 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5346 self.tasklets = tasklets
5348 # Declare instance locks
5349 self.needed_locks[locking.LEVEL_INSTANCE] = names
5351 def DeclareLocks(self, level):
5352 if level == locking.LEVEL_NODE:
5353 self._LockInstancesNodes()
5355 def BuildHooksEnv(self):
5358 This runs on the master, the primary and all the secondaries.
5362 "NODE_NAME": self.op.node_name,
5365 nl = [self.cfg.GetMasterNode()]
5367 return (env, nl, nl)
5370 class TLMigrateInstance(Tasklet):
5371 def __init__(self, lu, instance_name, live, cleanup):
5372 """Initializes this class.
5375 Tasklet.__init__(self, lu)
5378 self.instance_name = instance_name
5380 self.cleanup = cleanup
5382 def CheckPrereq(self):
5383 """Check prerequisites.
5385 This checks that the instance is in the cluster.
5388 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5389 instance = self.cfg.GetInstanceInfo(instance_name)
5390 assert instance is not None
5392 if instance.disk_template != constants.DT_DRBD8:
5393 raise errors.OpPrereqError("Instance's disk layout is not"
5394 " drbd8, cannot migrate.", errors.ECODE_STATE)
5396 secondary_nodes = instance.secondary_nodes
5397 if not secondary_nodes:
5398 raise errors.ConfigurationError("No secondary node but using"
5399 " drbd8 disk template")
5401 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5403 target_node = secondary_nodes[0]
5404 # check memory requirements on the secondary node
5405 _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5406 instance.name, i_be[constants.BE_MEMORY],
5407 instance.hypervisor)
5409 # check bridge existance
5410 _CheckInstanceBridgesExist(self, instance, node=target_node)
5412 if not self.cleanup:
5413 _CheckNodeNotDrained(self, target_node)
5414 result = self.rpc.call_instance_migratable(instance.primary_node,
5416 result.Raise("Can't migrate, please use failover",
5417 prereq=True, ecode=errors.ECODE_STATE)
5419 self.instance = instance
5421 def _WaitUntilSync(self):
5422 """Poll with custom rpc for disk sync.
5424 This uses our own step-based rpc call.
5427 self.feedback_fn("* wait until resync is done")
5431 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5433 self.instance.disks)
5435 for node, nres in result.items():
5436 nres.Raise("Cannot resync disks on node %s" % node)
5437 node_done, node_percent = nres.payload
5438 all_done = all_done and node_done
5439 if node_percent is not None:
5440 min_percent = min(min_percent, node_percent)
5442 if min_percent < 100:
5443 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5446 def _EnsureSecondary(self, node):
5447 """Demote a node to secondary.
5450 self.feedback_fn("* switching node %s to secondary mode" % node)
5452 for dev in self.instance.disks:
5453 self.cfg.SetDiskID(dev, node)
5455 result = self.rpc.call_blockdev_close(node, self.instance.name,
5456 self.instance.disks)
5457 result.Raise("Cannot change disk to secondary on node %s" % node)
5459 def _GoStandalone(self):
5460 """Disconnect from the network.
5463 self.feedback_fn("* changing into standalone mode")
5464 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5465 self.instance.disks)
5466 for node, nres in result.items():
5467 nres.Raise("Cannot disconnect disks node %s" % node)
5469 def _GoReconnect(self, multimaster):
5470 """Reconnect to the network.
5476 msg = "single-master"
5477 self.feedback_fn("* changing disks into %s mode" % msg)
5478 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5479 self.instance.disks,
5480 self.instance.name, multimaster)
5481 for node, nres in result.items():
5482 nres.Raise("Cannot change disks config on node %s" % node)
5484 def _ExecCleanup(self):
5485 """Try to cleanup after a failed migration.
5487 The cleanup is done by:
5488 - check that the instance is running only on one node
5489 (and update the config if needed)
5490 - change disks on its secondary node to secondary
5491 - wait until disks are fully synchronized
5492 - disconnect from the network
5493 - change disks into single-master mode
5494 - wait again until disks are fully synchronized
5497 instance = self.instance
5498 target_node = self.target_node
5499 source_node = self.source_node
5501 # check running on only one node
5502 self.feedback_fn("* checking where the instance actually runs"
5503 " (if this hangs, the hypervisor might be in"
5505 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5506 for node, result in ins_l.items():
5507 result.Raise("Can't contact node %s" % node)
5509 runningon_source = instance.name in ins_l[source_node].payload
5510 runningon_target = instance.name in ins_l[target_node].payload
5512 if runningon_source and runningon_target:
5513 raise errors.OpExecError("Instance seems to be running on two nodes,"
5514 " or the hypervisor is confused. You will have"
5515 " to ensure manually that it runs only on one"
5516 " and restart this operation.")
5518 if not (runningon_source or runningon_target):
5519 raise errors.OpExecError("Instance does not seem to be running at all."
5520 " In this case, it's safer to repair by"
5521 " running 'gnt-instance stop' to ensure disk"
5522 " shutdown, and then restarting it.")
5524 if runningon_target:
5525 # the migration has actually succeeded, we need to update the config
5526 self.feedback_fn("* instance running on secondary node (%s),"
5527 " updating config" % target_node)
5528 instance.primary_node = target_node
5529 self.cfg.Update(instance, self.feedback_fn)
5530 demoted_node = source_node
5532 self.feedback_fn("* instance confirmed to be running on its"
5533 " primary node (%s)" % source_node)
5534 demoted_node = target_node
5536 self._EnsureSecondary(demoted_node)
5538 self._WaitUntilSync()
5539 except errors.OpExecError:
5540 # we ignore here errors, since if the device is standalone, it
5541 # won't be able to sync
5543 self._GoStandalone()
5544 self._GoReconnect(False)
5545 self._WaitUntilSync()
5547 self.feedback_fn("* done")
5549 def _RevertDiskStatus(self):
5550 """Try to revert the disk status after a failed migration.
5553 target_node = self.target_node
5555 self._EnsureSecondary(target_node)
5556 self._GoStandalone()
5557 self._GoReconnect(False)
5558 self._WaitUntilSync()
5559 except errors.OpExecError, err:
5560 self.lu.LogWarning("Migration failed and I can't reconnect the"
5561 " drives: error '%s'\n"
5562 "Please look and recover the instance status" %
5565 def _AbortMigration(self):
5566 """Call the hypervisor code to abort a started migration.
5569 instance = self.instance
5570 target_node = self.target_node
5571 migration_info = self.migration_info
5573 abort_result = self.rpc.call_finalize_migration(target_node,
5577 abort_msg = abort_result.fail_msg
5579 logging.error("Aborting migration failed on target node %s: %s",
5580 target_node, abort_msg)
5581 # Don't raise an exception here, as we stil have to try to revert the
5582 # disk status, even if this step failed.
5584 def _ExecMigration(self):
5585 """Migrate an instance.
5587 The migrate is done by:
5588 - change the disks into dual-master mode
5589 - wait until disks are fully synchronized again
5590 - migrate the instance
5591 - change disks on the new secondary node (the old primary) to secondary
5592 - wait until disks are fully synchronized
5593 - change disks into single-master mode
5596 instance = self.instance
5597 target_node = self.target_node
5598 source_node = self.source_node
5600 self.feedback_fn("* checking disk consistency between source and target")
5601 for dev in instance.disks:
5602 if not _CheckDiskConsistency(self, dev, target_node, False):
5603 raise errors.OpExecError("Disk %s is degraded or not fully"
5604 " synchronized on target node,"
5605 " aborting migrate." % dev.iv_name)
5607 # First get the migration information from the remote node
5608 result = self.rpc.call_migration_info(source_node, instance)
5609 msg = result.fail_msg
5611 log_err = ("Failed fetching source migration information from %s: %s" %
5613 logging.error(log_err)
5614 raise errors.OpExecError(log_err)
5616 self.migration_info = migration_info = result.payload
5618 # Then switch the disks to master/master mode
5619 self._EnsureSecondary(target_node)
5620 self._GoStandalone()
5621 self._GoReconnect(True)
5622 self._WaitUntilSync()
5624 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5625 result = self.rpc.call_accept_instance(target_node,
5628 self.nodes_ip[target_node])
5630 msg = result.fail_msg
5632 logging.error("Instance pre-migration failed, trying to revert"
5633 " disk status: %s", msg)
5634 self.feedback_fn("Pre-migration failed, aborting")
5635 self._AbortMigration()
5636 self._RevertDiskStatus()
5637 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5638 (instance.name, msg))
5640 self.feedback_fn("* migrating instance to %s" % target_node)
5642 result = self.rpc.call_instance_migrate(source_node, instance,
5643 self.nodes_ip[target_node],
5645 msg = result.fail_msg
5647 logging.error("Instance migration failed, trying to revert"
5648 " disk status: %s", msg)
5649 self.feedback_fn("Migration failed, aborting")
5650 self._AbortMigration()
5651 self._RevertDiskStatus()
5652 raise errors.OpExecError("Could not migrate instance %s: %s" %
5653 (instance.name, msg))
5656 instance.primary_node = target_node
5657 # distribute new instance config to the other nodes
5658 self.cfg.Update(instance, self.feedback_fn)
5660 result = self.rpc.call_finalize_migration(target_node,
5664 msg = result.fail_msg
5666 logging.error("Instance migration succeeded, but finalization failed:"
5668 raise errors.OpExecError("Could not finalize instance migration: %s" %
5671 self._EnsureSecondary(source_node)
5672 self._WaitUntilSync()
5673 self._GoStandalone()
5674 self._GoReconnect(False)
5675 self._WaitUntilSync()
5677 self.feedback_fn("* done")
5679 def Exec(self, feedback_fn):
5680 """Perform the migration.
5683 feedback_fn("Migrating instance %s" % self.instance.name)
5685 self.feedback_fn = feedback_fn
5687 self.source_node = self.instance.primary_node
5688 self.target_node = self.instance.secondary_nodes[0]
5689 self.all_nodes = [self.source_node, self.target_node]
5691 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5692 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5696 return self._ExecCleanup()
5698 return self._ExecMigration()
5701 def _CreateBlockDev(lu, node, instance, device, force_create,
5703 """Create a tree of block devices on a given node.
5705 If this device type has to be created on secondaries, create it and
5708 If not, just recurse to children keeping the same 'force' value.
5710 @param lu: the lu on whose behalf we execute
5711 @param node: the node on which to create the device
5712 @type instance: L{objects.Instance}
5713 @param instance: the instance which owns the device
5714 @type device: L{objects.Disk}
5715 @param device: the device to create
5716 @type force_create: boolean
5717 @param force_create: whether to force creation of this device; this
5718 will be change to True whenever we find a device which has
5719 CreateOnSecondary() attribute
5720 @param info: the extra 'metadata' we should attach to the device
5721 (this will be represented as a LVM tag)
5722 @type force_open: boolean
5723 @param force_open: this parameter will be passes to the
5724 L{backend.BlockdevCreate} function where it specifies
5725 whether we run on primary or not, and it affects both
5726 the child assembly and the device own Open() execution
5729 if device.CreateOnSecondary():
5733 for child in device.children:
5734 _CreateBlockDev(lu, node, instance, child, force_create,
5737 if not force_create:
5740 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5743 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5744 """Create a single block device on a given node.
5746 This will not recurse over children of the device, so they must be
5749 @param lu: the lu on whose behalf we execute
5750 @param node: the node on which to create the device
5751 @type instance: L{objects.Instance}
5752 @param instance: the instance which owns the device
5753 @type device: L{objects.Disk}
5754 @param device: the device to create
5755 @param info: the extra 'metadata' we should attach to the device
5756 (this will be represented as a LVM tag)
5757 @type force_open: boolean
5758 @param force_open: this parameter will be passes to the
5759 L{backend.BlockdevCreate} function where it specifies
5760 whether we run on primary or not, and it affects both
5761 the child assembly and the device own Open() execution
5764 lu.cfg.SetDiskID(device, node)
5765 result = lu.rpc.call_blockdev_create(node, device, device.size,
5766 instance.name, force_open, info)
5767 result.Raise("Can't create block device %s on"
5768 " node %s for instance %s" % (device, node, instance.name))
5769 if device.physical_id is None:
5770 device.physical_id = result.payload
5773 def _GenerateUniqueNames(lu, exts):
5774 """Generate a suitable LV name.
5776 This will generate a logical volume name for the given instance.
5781 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5782 results.append("%s%s" % (new_id, val))
5786 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5788 """Generate a drbd8 device complete with its children.
5791 port = lu.cfg.AllocatePort()
5792 vgname = lu.cfg.GetVGName()
5793 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5794 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5795 logical_id=(vgname, names[0]))
5796 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5797 logical_id=(vgname, names[1]))
5798 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5799 logical_id=(primary, secondary, port,
5802 children=[dev_data, dev_meta],
5807 def _GenerateDiskTemplate(lu, template_name,
5808 instance_name, primary_node,
5809 secondary_nodes, disk_info,
5810 file_storage_dir, file_driver,
5812 """Generate the entire disk layout for a given template type.
5815 #TODO: compute space requirements
5817 vgname = lu.cfg.GetVGName()
5818 disk_count = len(disk_info)
5820 if template_name == constants.DT_DISKLESS:
5822 elif template_name == constants.DT_PLAIN:
5823 if len(secondary_nodes) != 0:
5824 raise errors.ProgrammerError("Wrong template configuration")
5826 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5827 for i in range(disk_count)])
5828 for idx, disk in enumerate(disk_info):
5829 disk_index = idx + base_index
5830 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5831 logical_id=(vgname, names[idx]),
5832 iv_name="disk/%d" % disk_index,
5834 disks.append(disk_dev)
5835 elif template_name == constants.DT_DRBD8:
5836 if len(secondary_nodes) != 1:
5837 raise errors.ProgrammerError("Wrong template configuration")
5838 remote_node = secondary_nodes[0]
5839 minors = lu.cfg.AllocateDRBDMinor(
5840 [primary_node, remote_node] * len(disk_info), instance_name)
5843 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5844 for i in range(disk_count)]):
5845 names.append(lv_prefix + "_data")
5846 names.append(lv_prefix + "_meta")
5847 for idx, disk in enumerate(disk_info):
5848 disk_index = idx + base_index
5849 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5850 disk["size"], names[idx*2:idx*2+2],
5851 "disk/%d" % disk_index,
5852 minors[idx*2], minors[idx*2+1])
5853 disk_dev.mode = disk["mode"]
5854 disks.append(disk_dev)
5855 elif template_name == constants.DT_FILE:
5856 if len(secondary_nodes) != 0:
5857 raise errors.ProgrammerError("Wrong template configuration")
5859 _RequireFileStorage()
5861 for idx, disk in enumerate(disk_info):
5862 disk_index = idx + base_index
5863 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5864 iv_name="disk/%d" % disk_index,
5865 logical_id=(file_driver,
5866 "%s/disk%d" % (file_storage_dir,
5869 disks.append(disk_dev)
5871 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5875 def _GetInstanceInfoText(instance):
5876 """Compute that text that should be added to the disk's metadata.
5879 return "originstname+%s" % instance.name
5882 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5883 """Create all disks for an instance.
5885 This abstracts away some work from AddInstance.
5887 @type lu: L{LogicalUnit}
5888 @param lu: the logical unit on whose behalf we execute
5889 @type instance: L{objects.Instance}
5890 @param instance: the instance whose disks we should create
5892 @param to_skip: list of indices to skip
5893 @type target_node: string
5894 @param target_node: if passed, overrides the target node for creation
5896 @return: the success of the creation
5899 info = _GetInstanceInfoText(instance)
5900 if target_node is None:
5901 pnode = instance.primary_node
5902 all_nodes = instance.all_nodes
5907 if instance.disk_template == constants.DT_FILE:
5908 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5909 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5911 result.Raise("Failed to create directory '%s' on"
5912 " node %s" % (file_storage_dir, pnode))
5914 # Note: this needs to be kept in sync with adding of disks in
5915 # LUSetInstanceParams
5916 for idx, device in enumerate(instance.disks):
5917 if to_skip and idx in to_skip:
5919 logging.info("Creating volume %s for instance %s",
5920 device.iv_name, instance.name)
5922 for node in all_nodes:
5923 f_create = node == pnode
5924 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5927 def _RemoveDisks(lu, instance, target_node=None):
5928 """Remove all disks for an instance.
5930 This abstracts away some work from `AddInstance()` and
5931 `RemoveInstance()`. Note that in case some of the devices couldn't
5932 be removed, the removal will continue with the other ones (compare
5933 with `_CreateDisks()`).
5935 @type lu: L{LogicalUnit}
5936 @param lu: the logical unit on whose behalf we execute
5937 @type instance: L{objects.Instance}
5938 @param instance: the instance whose disks we should remove
5939 @type target_node: string
5940 @param target_node: used to override the node on which to remove the disks
5942 @return: the success of the removal
5945 logging.info("Removing block devices for instance %s", instance.name)
5948 for device in instance.disks:
5950 edata = [(target_node, device)]
5952 edata = device.ComputeNodeTree(instance.primary_node)
5953 for node, disk in edata:
5954 lu.cfg.SetDiskID(disk, node)
5955 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5957 lu.LogWarning("Could not remove block device %s on node %s,"
5958 " continuing anyway: %s", device.iv_name, node, msg)
5961 if instance.disk_template == constants.DT_FILE:
5962 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5966 tgt = instance.primary_node
5967 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5969 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5970 file_storage_dir, instance.primary_node, result.fail_msg)
5976 def _ComputeDiskSize(disk_template, disks):
5977 """Compute disk size requirements in the volume group
5980 # Required free disk space as a function of disk and swap space
5982 constants.DT_DISKLESS: None,
5983 constants.DT_PLAIN: sum(d["size"] for d in disks),
5984 # 128 MB are added for drbd metadata for each disk
5985 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5986 constants.DT_FILE: None,
5989 if disk_template not in req_size_dict:
5990 raise errors.ProgrammerError("Disk template '%s' size requirement"
5991 " is unknown" % disk_template)
5993 return req_size_dict[disk_template]
5996 def _CheckHVParams(lu, nodenames, hvname, hvparams):
5997 """Hypervisor parameter validation.
5999 This function abstract the hypervisor parameter validation to be
6000 used in both instance create and instance modify.
6002 @type lu: L{LogicalUnit}
6003 @param lu: the logical unit for which we check
6004 @type nodenames: list
6005 @param nodenames: the list of nodes on which we should check
6006 @type hvname: string
6007 @param hvname: the name of the hypervisor we should use
6008 @type hvparams: dict
6009 @param hvparams: the parameters which we need to check
6010 @raise errors.OpPrereqError: if the parameters are not valid
6013 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6016 for node in nodenames:
6020 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6023 class LUCreateInstance(LogicalUnit):
6024 """Create an instance.
6027 HPATH = "instance-add"
6028 HTYPE = constants.HTYPE_INSTANCE
6029 _OP_REQP = ["instance_name", "disks",
6031 "wait_for_sync", "ip_check", "nics",
6032 "hvparams", "beparams"]
6035 def CheckArguments(self):
6039 # set optional parameters to none if they don't exist
6040 for attr in ["pnode", "snode", "iallocator", "hypervisor",
6041 "disk_template", "identify_defaults"]:
6042 if not hasattr(self.op, attr):
6043 setattr(self.op, attr, None)
6045 # do not require name_check to ease forward/backward compatibility
6047 if not hasattr(self.op, "name_check"):
6048 self.op.name_check = True
6049 if not hasattr(self.op, "no_install"):
6050 self.op.no_install = False
6051 if self.op.no_install and self.op.start:
6052 self.LogInfo("No-installation mode selected, disabling startup")
6053 self.op.start = False
6054 # validate/normalize the instance name
6055 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6056 if self.op.ip_check and not self.op.name_check:
6057 # TODO: make the ip check more flexible and not depend on the name check
6058 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6060 # check disk information: either all adopt, or no adopt
6061 has_adopt = has_no_adopt = False
6062 for disk in self.op.disks:
6067 if has_adopt and has_no_adopt:
6068 raise errors.OpPrereqError("Either all disks are adopted or none is",
6071 if self.op.disk_template != constants.DT_PLAIN:
6072 raise errors.OpPrereqError("Disk adoption is only supported for the"
6073 " 'plain' disk template",
6075 if self.op.iallocator is not None:
6076 raise errors.OpPrereqError("Disk adoption not allowed with an"
6077 " iallocator script", errors.ECODE_INVAL)
6078 if self.op.mode == constants.INSTANCE_IMPORT:
6079 raise errors.OpPrereqError("Disk adoption not allowed for"
6080 " instance import", errors.ECODE_INVAL)
6082 self.adopt_disks = has_adopt
6084 # verify creation mode
6085 if self.op.mode not in (constants.INSTANCE_CREATE,
6086 constants.INSTANCE_IMPORT):
6087 raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6088 self.op.mode, errors.ECODE_INVAL)
6090 # instance name verification
6091 if self.op.name_check:
6092 self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6093 self.op.instance_name = self.hostname1.name
6094 # used in CheckPrereq for ip ping check
6095 self.check_ip = self.hostname1.ip
6097 self.check_ip = None
6099 # file storage checks
6100 if (self.op.file_driver and
6101 not self.op.file_driver in constants.FILE_DRIVER):
6102 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6103 self.op.file_driver, errors.ECODE_INVAL)
6105 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6106 raise errors.OpPrereqError("File storage directory path not absolute",
6109 ### Node/iallocator related checks
6110 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6111 raise errors.OpPrereqError("One and only one of iallocator and primary"
6112 " node must be given",
6115 if self.op.mode == constants.INSTANCE_IMPORT:
6116 # On import force_variant must be True, because if we forced it at
6117 # initial install, our only chance when importing it back is that it
6119 self.op.force_variant = True
6121 if self.op.no_install:
6122 self.LogInfo("No-installation mode has no effect during import")
6124 else: # INSTANCE_CREATE
6125 if getattr(self.op, "os_type", None) is None:
6126 raise errors.OpPrereqError("No guest OS specified",
6128 self.op.force_variant = getattr(self.op, "force_variant", False)
6129 if self.op.disk_template is None:
6130 raise errors.OpPrereqError("No disk template specified",
6133 def ExpandNames(self):
6134 """ExpandNames for CreateInstance.
6136 Figure out the right locks for instance creation.
6139 self.needed_locks = {}
6141 instance_name = self.op.instance_name
6142 # this is just a preventive check, but someone might still add this
6143 # instance in the meantime, and creation will fail at lock-add time
6144 if instance_name in self.cfg.GetInstanceList():
6145 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6146 instance_name, errors.ECODE_EXISTS)
6148 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6150 if self.op.iallocator:
6151 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6153 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6154 nodelist = [self.op.pnode]
6155 if self.op.snode is not None:
6156 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6157 nodelist.append(self.op.snode)
6158 self.needed_locks[locking.LEVEL_NODE] = nodelist
6160 # in case of import lock the source node too
6161 if self.op.mode == constants.INSTANCE_IMPORT:
6162 src_node = getattr(self.op, "src_node", None)
6163 src_path = getattr(self.op, "src_path", None)
6165 if src_path is None:
6166 self.op.src_path = src_path = self.op.instance_name
6168 if src_node is None:
6169 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6170 self.op.src_node = None
6171 if os.path.isabs(src_path):
6172 raise errors.OpPrereqError("Importing an instance from an absolute"
6173 " path requires a source node option.",
6176 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6177 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6178 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6179 if not os.path.isabs(src_path):
6180 self.op.src_path = src_path = \
6181 utils.PathJoin(constants.EXPORT_DIR, src_path)
6183 def _RunAllocator(self):
6184 """Run the allocator based on input opcode.
6187 nics = [n.ToDict() for n in self.nics]
6188 ial = IAllocator(self.cfg, self.rpc,
6189 mode=constants.IALLOCATOR_MODE_ALLOC,
6190 name=self.op.instance_name,
6191 disk_template=self.op.disk_template,
6194 vcpus=self.be_full[constants.BE_VCPUS],
6195 mem_size=self.be_full[constants.BE_MEMORY],
6198 hypervisor=self.op.hypervisor,
6201 ial.Run(self.op.iallocator)
6204 raise errors.OpPrereqError("Can't compute nodes using"
6205 " iallocator '%s': %s" %
6206 (self.op.iallocator, ial.info),
6208 if len(ial.result) != ial.required_nodes:
6209 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6210 " of nodes (%s), required %s" %
6211 (self.op.iallocator, len(ial.result),
6212 ial.required_nodes), errors.ECODE_FAULT)
6213 self.op.pnode = ial.result[0]
6214 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6215 self.op.instance_name, self.op.iallocator,
6216 utils.CommaJoin(ial.result))
6217 if ial.required_nodes == 2:
6218 self.op.snode = ial.result[1]
6220 def BuildHooksEnv(self):
6223 This runs on master, primary and secondary nodes of the instance.
6227 "ADD_MODE": self.op.mode,
6229 if self.op.mode == constants.INSTANCE_IMPORT:
6230 env["SRC_NODE"] = self.op.src_node
6231 env["SRC_PATH"] = self.op.src_path
6232 env["SRC_IMAGES"] = self.src_images
6234 env.update(_BuildInstanceHookEnv(
6235 name=self.op.instance_name,
6236 primary_node=self.op.pnode,
6237 secondary_nodes=self.secondaries,
6238 status=self.op.start,
6239 os_type=self.op.os_type,
6240 memory=self.be_full[constants.BE_MEMORY],
6241 vcpus=self.be_full[constants.BE_VCPUS],
6242 nics=_NICListToTuple(self, self.nics),
6243 disk_template=self.op.disk_template,
6244 disks=[(d["size"], d["mode"]) for d in self.disks],
6247 hypervisor_name=self.op.hypervisor,
6250 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6254 def _ReadExportInfo(self):
6255 """Reads the export information from disk.
6257 It will override the opcode source node and path with the actual
6258 information, if these two were not specified before.
6260 @return: the export information
6263 assert self.op.mode == constants.INSTANCE_IMPORT
6265 src_node = self.op.src_node
6266 src_path = self.op.src_path
6268 if src_node is None:
6269 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6270 exp_list = self.rpc.call_export_list(locked_nodes)
6272 for node in exp_list:
6273 if exp_list[node].fail_msg:
6275 if src_path in exp_list[node].payload:
6277 self.op.src_node = src_node = node
6278 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6282 raise errors.OpPrereqError("No export found for relative path %s" %
6283 src_path, errors.ECODE_INVAL)
6285 _CheckNodeOnline(self, src_node)
6286 result = self.rpc.call_export_info(src_node, src_path)
6287 result.Raise("No export or invalid export found in dir %s" % src_path)
6289 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6290 if not export_info.has_section(constants.INISECT_EXP):
6291 raise errors.ProgrammerError("Corrupted export config",
6292 errors.ECODE_ENVIRON)
6294 ei_version = export_info.get(constants.INISECT_EXP, "version")
6295 if (int(ei_version) != constants.EXPORT_VERSION):
6296 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6297 (ei_version, constants.EXPORT_VERSION),
6298 errors.ECODE_ENVIRON)
6301 def _ReadExportParams(self, einfo):
6302 """Use export parameters as defaults.
6304 In case the opcode doesn't specify (as in override) some instance
6305 parameters, then try to use them from the export information, if
6309 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6311 if self.op.disk_template is None:
6312 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6313 self.op.disk_template = einfo.get(constants.INISECT_INS,
6316 raise errors.OpPrereqError("No disk template specified and the export"
6317 " is missing the disk_template information",
6320 if not self.op.disks:
6321 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6323 # TODO: import the disk iv_name too
6324 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6325 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6326 disks.append({"size": disk_sz})
6327 self.op.disks = disks
6329 raise errors.OpPrereqError("No disk info specified and the export"
6330 " is missing the disk information",
6333 if (not self.op.nics and
6334 einfo.has_option(constants.INISECT_INS, "nic_count")):
6336 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6338 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6339 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6344 if (self.op.hypervisor is None and
6345 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6346 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6347 if einfo.has_section(constants.INISECT_HYP):
6348 # use the export parameters but do not override the ones
6349 # specified by the user
6350 for name, value in einfo.items(constants.INISECT_HYP):
6351 if name not in self.op.hvparams:
6352 self.op.hvparams[name] = value
6354 if einfo.has_section(constants.INISECT_BEP):
6355 # use the parameters, without overriding
6356 for name, value in einfo.items(constants.INISECT_BEP):
6357 if name not in self.op.beparams:
6358 self.op.beparams[name] = value
6360 # try to read the parameters old style, from the main section
6361 for name in constants.BES_PARAMETERS:
6362 if (name not in self.op.beparams and
6363 einfo.has_option(constants.INISECT_INS, name)):
6364 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6366 def _RevertToDefaults(self, cluster):
6367 """Revert the instance parameters to the default values.
6371 hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6372 for name in self.op.hvparams.keys():
6373 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6374 del self.op.hvparams[name]
6376 be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6377 for name in self.op.beparams.keys():
6378 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6379 del self.op.beparams[name]
6381 nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6382 for nic in self.op.nics:
6383 for name in constants.NICS_PARAMETERS:
6384 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6387 def CheckPrereq(self):
6388 """Check prerequisites.
6391 if self.op.mode == constants.INSTANCE_IMPORT:
6392 export_info = self._ReadExportInfo()
6393 self._ReadExportParams(export_info)
6395 _CheckDiskTemplate(self.op.disk_template)
6397 if (not self.cfg.GetVGName() and
6398 self.op.disk_template not in constants.DTS_NOT_LVM):
6399 raise errors.OpPrereqError("Cluster does not support lvm-based"
6400 " instances", errors.ECODE_STATE)
6402 if self.op.hypervisor is None:
6403 self.op.hypervisor = self.cfg.GetHypervisorType()
6405 cluster = self.cfg.GetClusterInfo()
6406 enabled_hvs = cluster.enabled_hypervisors
6407 if self.op.hypervisor not in enabled_hvs:
6408 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6409 " cluster (%s)" % (self.op.hypervisor,
6410 ",".join(enabled_hvs)),
6413 # check hypervisor parameter syntax (locally)
6414 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6415 filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6418 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6419 hv_type.CheckParameterSyntax(filled_hvp)
6420 self.hv_full = filled_hvp
6421 # check that we don't specify global parameters on an instance
6422 _CheckGlobalHvParams(self.op.hvparams)
6424 # fill and remember the beparams dict
6425 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6426 self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6429 # now that hvp/bep are in final format, let's reset to defaults,
6431 if self.op.identify_defaults:
6432 self._RevertToDefaults(cluster)
6436 for idx, nic in enumerate(self.op.nics):
6437 nic_mode_req = nic.get("mode", None)
6438 nic_mode = nic_mode_req
6439 if nic_mode is None:
6440 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6442 # in routed mode, for the first nic, the default ip is 'auto'
6443 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6444 default_ip_mode = constants.VALUE_AUTO
6446 default_ip_mode = constants.VALUE_NONE
6448 # ip validity checks
6449 ip = nic.get("ip", default_ip_mode)
6450 if ip is None or ip.lower() == constants.VALUE_NONE:
6452 elif ip.lower() == constants.VALUE_AUTO:
6453 if not self.op.name_check:
6454 raise errors.OpPrereqError("IP address set to auto but name checks"
6455 " have been skipped. Aborting.",
6457 nic_ip = self.hostname1.ip
6459 if not utils.IsValidIP(ip):
6460 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6461 " like a valid IP" % ip,
6465 # TODO: check the ip address for uniqueness
6466 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6467 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6470 # MAC address verification
6471 mac = nic.get("mac", constants.VALUE_AUTO)
6472 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6473 mac = utils.NormalizeAndValidateMac(mac)
6476 self.cfg.ReserveMAC(mac, self.proc.GetECId())
6477 except errors.ReservationError:
6478 raise errors.OpPrereqError("MAC address %s already in use"
6479 " in cluster" % mac,
6480 errors.ECODE_NOTUNIQUE)
6482 # bridge verification
6483 bridge = nic.get("bridge", None)
6484 link = nic.get("link", None)
6486 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6487 " at the same time", errors.ECODE_INVAL)
6488 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6489 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6496 nicparams[constants.NIC_MODE] = nic_mode_req
6498 nicparams[constants.NIC_LINK] = link
6500 check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6502 objects.NIC.CheckParameterSyntax(check_params)
6503 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6505 # disk checks/pre-build
6507 for disk in self.op.disks:
6508 mode = disk.get("mode", constants.DISK_RDWR)
6509 if mode not in constants.DISK_ACCESS_SET:
6510 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6511 mode, errors.ECODE_INVAL)
6512 size = disk.get("size", None)
6514 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6517 except (TypeError, ValueError):
6518 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6520 new_disk = {"size": size, "mode": mode}
6522 new_disk["adopt"] = disk["adopt"]
6523 self.disks.append(new_disk)
6525 if self.op.mode == constants.INSTANCE_IMPORT:
6527 # Check that the new instance doesn't have less disks than the export
6528 instance_disks = len(self.disks)
6529 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6530 if instance_disks < export_disks:
6531 raise errors.OpPrereqError("Not enough disks to import."
6532 " (instance: %d, export: %d)" %
6533 (instance_disks, export_disks),
6537 for idx in range(export_disks):
6538 option = 'disk%d_dump' % idx
6539 if export_info.has_option(constants.INISECT_INS, option):
6540 # FIXME: are the old os-es, disk sizes, etc. useful?
6541 export_name = export_info.get(constants.INISECT_INS, option)
6542 image = utils.PathJoin(self.op.src_path, export_name)
6543 disk_images.append(image)
6545 disk_images.append(False)
6547 self.src_images = disk_images
6549 old_name = export_info.get(constants.INISECT_INS, 'name')
6551 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6552 except (TypeError, ValueError), err:
6553 raise errors.OpPrereqError("Invalid export file, nic_count is not"
6554 " an integer: %s" % str(err),
6556 if self.op.instance_name == old_name:
6557 for idx, nic in enumerate(self.nics):
6558 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6559 nic_mac_ini = 'nic%d_mac' % idx
6560 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6562 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6564 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6565 if self.op.ip_check:
6566 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6567 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6568 (self.check_ip, self.op.instance_name),
6569 errors.ECODE_NOTUNIQUE)
6571 #### mac address generation
6572 # By generating here the mac address both the allocator and the hooks get
6573 # the real final mac address rather than the 'auto' or 'generate' value.
6574 # There is a race condition between the generation and the instance object
6575 # creation, which means that we know the mac is valid now, but we're not
6576 # sure it will be when we actually add the instance. If things go bad
6577 # adding the instance will abort because of a duplicate mac, and the
6578 # creation job will fail.
6579 for nic in self.nics:
6580 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6581 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6585 if self.op.iallocator is not None:
6586 self._RunAllocator()
6588 #### node related checks
6590 # check primary node
6591 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6592 assert self.pnode is not None, \
6593 "Cannot retrieve locked node %s" % self.op.pnode
6595 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6596 pnode.name, errors.ECODE_STATE)
6598 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6599 pnode.name, errors.ECODE_STATE)
6601 self.secondaries = []
6603 # mirror node verification
6604 if self.op.disk_template in constants.DTS_NET_MIRROR:
6605 if self.op.snode is None:
6606 raise errors.OpPrereqError("The networked disk templates need"
6607 " a mirror node", errors.ECODE_INVAL)
6608 if self.op.snode == pnode.name:
6609 raise errors.OpPrereqError("The secondary node cannot be the"
6610 " primary node.", errors.ECODE_INVAL)
6611 _CheckNodeOnline(self, self.op.snode)
6612 _CheckNodeNotDrained(self, self.op.snode)
6613 self.secondaries.append(self.op.snode)
6615 nodenames = [pnode.name] + self.secondaries
6617 req_size = _ComputeDiskSize(self.op.disk_template,
6620 # Check lv size requirements, if not adopting
6621 if req_size is not None and not self.adopt_disks:
6622 _CheckNodesFreeDisk(self, nodenames, req_size)
6624 if self.adopt_disks: # instead, we must check the adoption data
6625 all_lvs = set([i["adopt"] for i in self.disks])
6626 if len(all_lvs) != len(self.disks):
6627 raise errors.OpPrereqError("Duplicate volume names given for adoption",
6629 for lv_name in all_lvs:
6631 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6632 except errors.ReservationError:
6633 raise errors.OpPrereqError("LV named %s used by another instance" %
6634 lv_name, errors.ECODE_NOTUNIQUE)
6636 node_lvs = self.rpc.call_lv_list([pnode.name],
6637 self.cfg.GetVGName())[pnode.name]
6638 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6639 node_lvs = node_lvs.payload
6640 delta = all_lvs.difference(node_lvs.keys())
6642 raise errors.OpPrereqError("Missing logical volume(s): %s" %
6643 utils.CommaJoin(delta),
6645 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6647 raise errors.OpPrereqError("Online logical volumes found, cannot"
6648 " adopt: %s" % utils.CommaJoin(online_lvs),
6650 # update the size of disk based on what is found
6651 for dsk in self.disks:
6652 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6654 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6656 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6658 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6660 # memory check on primary node
6662 _CheckNodeFreeMemory(self, self.pnode.name,
6663 "creating instance %s" % self.op.instance_name,
6664 self.be_full[constants.BE_MEMORY],
6667 self.dry_run_result = list(nodenames)
6669 def Exec(self, feedback_fn):
6670 """Create and add the instance to the cluster.
6673 instance = self.op.instance_name
6674 pnode_name = self.pnode.name
6676 ht_kind = self.op.hypervisor
6677 if ht_kind in constants.HTS_REQ_PORT:
6678 network_port = self.cfg.AllocatePort()
6682 if constants.ENABLE_FILE_STORAGE:
6683 # this is needed because os.path.join does not accept None arguments
6684 if self.op.file_storage_dir is None:
6685 string_file_storage_dir = ""
6687 string_file_storage_dir = self.op.file_storage_dir
6689 # build the full file storage dir path
6690 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6691 string_file_storage_dir, instance)
6693 file_storage_dir = ""
6695 disks = _GenerateDiskTemplate(self,
6696 self.op.disk_template,
6697 instance, pnode_name,
6701 self.op.file_driver,
6704 iobj = objects.Instance(name=instance, os=self.op.os_type,
6705 primary_node=pnode_name,
6706 nics=self.nics, disks=disks,
6707 disk_template=self.op.disk_template,
6709 network_port=network_port,
6710 beparams=self.op.beparams,
6711 hvparams=self.op.hvparams,
6712 hypervisor=self.op.hypervisor,
6715 if self.adopt_disks:
6716 # rename LVs to the newly-generated names; we need to construct
6717 # 'fake' LV disks with the old data, plus the new unique_id
6718 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6720 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6721 rename_to.append(t_dsk.logical_id)
6722 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6723 self.cfg.SetDiskID(t_dsk, pnode_name)
6724 result = self.rpc.call_blockdev_rename(pnode_name,
6725 zip(tmp_disks, rename_to))
6726 result.Raise("Failed to rename adoped LVs")
6728 feedback_fn("* creating instance disks...")
6730 _CreateDisks(self, iobj)
6731 except errors.OpExecError:
6732 self.LogWarning("Device creation failed, reverting...")
6734 _RemoveDisks(self, iobj)
6736 self.cfg.ReleaseDRBDMinors(instance)
6739 feedback_fn("adding instance %s to cluster config" % instance)
6741 self.cfg.AddInstance(iobj, self.proc.GetECId())
6743 # Declare that we don't want to remove the instance lock anymore, as we've
6744 # added the instance to the config
6745 del self.remove_locks[locking.LEVEL_INSTANCE]
6746 # Unlock all the nodes
6747 if self.op.mode == constants.INSTANCE_IMPORT:
6748 nodes_keep = [self.op.src_node]
6749 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6750 if node != self.op.src_node]
6751 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6752 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6754 self.context.glm.release(locking.LEVEL_NODE)
6755 del self.acquired_locks[locking.LEVEL_NODE]
6757 if self.op.wait_for_sync:
6758 disk_abort = not _WaitForSync(self, iobj)
6759 elif iobj.disk_template in constants.DTS_NET_MIRROR:
6760 # make sure the disks are not degraded (still sync-ing is ok)
6762 feedback_fn("* checking mirrors status")
6763 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6768 _RemoveDisks(self, iobj)
6769 self.cfg.RemoveInstance(iobj.name)
6770 # Make sure the instance lock gets removed
6771 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6772 raise errors.OpExecError("There are some degraded disks for"
6775 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6776 if self.op.mode == constants.INSTANCE_CREATE:
6777 if not self.op.no_install:
6778 feedback_fn("* running the instance OS create scripts...")
6779 # FIXME: pass debug option from opcode to backend
6780 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6781 self.op.debug_level)
6782 result.Raise("Could not add os for instance %s"
6783 " on node %s" % (instance, pnode_name))
6785 elif self.op.mode == constants.INSTANCE_IMPORT:
6786 feedback_fn("* running the instance OS import scripts...")
6790 for idx, image in enumerate(self.src_images):
6794 # FIXME: pass debug option from opcode to backend
6795 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
6796 constants.IEIO_FILE, (image, ),
6797 constants.IEIO_SCRIPT,
6798 (iobj.disks[idx], idx),
6800 transfers.append(dt)
6803 masterd.instance.TransferInstanceData(self, feedback_fn,
6804 self.op.src_node, pnode_name,
6805 self.pnode.secondary_ip,
6807 if not compat.all(import_result):
6808 self.LogWarning("Some disks for instance %s on node %s were not"
6809 " imported successfully" % (instance, pnode_name))
6812 # also checked in the prereq part
6813 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6817 iobj.admin_up = True
6818 self.cfg.Update(iobj, feedback_fn)
6819 logging.info("Starting instance %s on node %s", instance, pnode_name)
6820 feedback_fn("* starting instance...")
6821 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6822 result.Raise("Could not start instance")
6824 return list(iobj.all_nodes)
6827 class LUConnectConsole(NoHooksLU):
6828 """Connect to an instance's console.
6830 This is somewhat special in that it returns the command line that
6831 you need to run on the master node in order to connect to the
6835 _OP_REQP = ["instance_name"]
6838 def ExpandNames(self):
6839 self._ExpandAndLockInstance()
6841 def CheckPrereq(self):
6842 """Check prerequisites.
6844 This checks that the instance is in the cluster.
6847 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6848 assert self.instance is not None, \
6849 "Cannot retrieve locked instance %s" % self.op.instance_name
6850 _CheckNodeOnline(self, self.instance.primary_node)
6852 def Exec(self, feedback_fn):
6853 """Connect to the console of an instance
6856 instance = self.instance
6857 node = instance.primary_node
6859 node_insts = self.rpc.call_instance_list([node],
6860 [instance.hypervisor])[node]
6861 node_insts.Raise("Can't get node information from %s" % node)
6863 if instance.name not in node_insts.payload:
6864 raise errors.OpExecError("Instance %s is not running." % instance.name)
6866 logging.debug("Connecting to console of %s on %s", instance.name, node)
6868 hyper = hypervisor.GetHypervisor(instance.hypervisor)
6869 cluster = self.cfg.GetClusterInfo()
6870 # beparams and hvparams are passed separately, to avoid editing the
6871 # instance and then saving the defaults in the instance itself.
6872 hvparams = cluster.FillHV(instance)
6873 beparams = cluster.FillBE(instance)
6874 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6877 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6880 class LUReplaceDisks(LogicalUnit):
6881 """Replace the disks of an instance.
6884 HPATH = "mirrors-replace"
6885 HTYPE = constants.HTYPE_INSTANCE
6886 _OP_REQP = ["instance_name", "mode", "disks"]
6889 def CheckArguments(self):
6890 if not hasattr(self.op, "remote_node"):
6891 self.op.remote_node = None
6892 if not hasattr(self.op, "iallocator"):
6893 self.op.iallocator = None
6894 if not hasattr(self.op, "early_release"):
6895 self.op.early_release = False
6897 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6900 def ExpandNames(self):
6901 self._ExpandAndLockInstance()
6903 if self.op.iallocator is not None:
6904 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6906 elif self.op.remote_node is not None:
6907 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6908 self.op.remote_node = remote_node
6910 # Warning: do not remove the locking of the new secondary here
6911 # unless DRBD8.AddChildren is changed to work in parallel;
6912 # currently it doesn't since parallel invocations of
6913 # FindUnusedMinor will conflict
6914 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6915 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6918 self.needed_locks[locking.LEVEL_NODE] = []
6919 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6921 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6922 self.op.iallocator, self.op.remote_node,
6923 self.op.disks, False, self.op.early_release)
6925 self.tasklets = [self.replacer]
6927 def DeclareLocks(self, level):
6928 # If we're not already locking all nodes in the set we have to declare the
6929 # instance's primary/secondary nodes.
6930 if (level == locking.LEVEL_NODE and
6931 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6932 self._LockInstancesNodes()
6934 def BuildHooksEnv(self):
6937 This runs on the master, the primary and all the secondaries.
6940 instance = self.replacer.instance
6942 "MODE": self.op.mode,
6943 "NEW_SECONDARY": self.op.remote_node,
6944 "OLD_SECONDARY": instance.secondary_nodes[0],
6946 env.update(_BuildInstanceHookEnvByObject(self, instance))
6948 self.cfg.GetMasterNode(),
6949 instance.primary_node,
6951 if self.op.remote_node is not None:
6952 nl.append(self.op.remote_node)
6956 class LUEvacuateNode(LogicalUnit):
6957 """Relocate the secondary instances from a node.
6960 HPATH = "node-evacuate"
6961 HTYPE = constants.HTYPE_NODE
6962 _OP_REQP = ["node_name"]
6965 def CheckArguments(self):
6966 if not hasattr(self.op, "remote_node"):
6967 self.op.remote_node = None
6968 if not hasattr(self.op, "iallocator"):
6969 self.op.iallocator = None
6970 if not hasattr(self.op, "early_release"):
6971 self.op.early_release = False
6973 TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6974 self.op.remote_node,
6977 def ExpandNames(self):
6978 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6980 self.needed_locks = {}
6982 # Declare node locks
6983 if self.op.iallocator is not None:
6984 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6986 elif self.op.remote_node is not None:
6987 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6989 # Warning: do not remove the locking of the new secondary here
6990 # unless DRBD8.AddChildren is changed to work in parallel;
6991 # currently it doesn't since parallel invocations of
6992 # FindUnusedMinor will conflict
6993 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6994 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6997 raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6999 # Create tasklets for replacing disks for all secondary instances on this
7004 for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7005 logging.debug("Replacing disks for instance %s", inst.name)
7006 names.append(inst.name)
7008 replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7009 self.op.iallocator, self.op.remote_node, [],
7010 True, self.op.early_release)
7011 tasklets.append(replacer)
7013 self.tasklets = tasklets
7014 self.instance_names = names
7016 # Declare instance locks
7017 self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7019 def DeclareLocks(self, level):
7020 # If we're not already locking all nodes in the set we have to declare the
7021 # instance's primary/secondary nodes.
7022 if (level == locking.LEVEL_NODE and
7023 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7024 self._LockInstancesNodes()
7026 def BuildHooksEnv(self):
7029 This runs on the master, the primary and all the secondaries.
7033 "NODE_NAME": self.op.node_name,
7036 nl = [self.cfg.GetMasterNode()]
7038 if self.op.remote_node is not None:
7039 env["NEW_SECONDARY"] = self.op.remote_node
7040 nl.append(self.op.remote_node)
7042 return (env, nl, nl)
7045 class TLReplaceDisks(Tasklet):
7046 """Replaces disks for an instance.
7048 Note: Locking is not within the scope of this class.
7051 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7052 disks, delay_iallocator, early_release):
7053 """Initializes this class.
7056 Tasklet.__init__(self, lu)
7059 self.instance_name = instance_name
7061 self.iallocator_name = iallocator_name
7062 self.remote_node = remote_node
7064 self.delay_iallocator = delay_iallocator
7065 self.early_release = early_release
7068 self.instance = None
7069 self.new_node = None
7070 self.target_node = None
7071 self.other_node = None
7072 self.remote_node_info = None
7073 self.node_secondary_ip = None
7076 def CheckArguments(mode, remote_node, iallocator):
7077 """Helper function for users of this class.
7080 # check for valid parameter combination
7081 if mode == constants.REPLACE_DISK_CHG:
7082 if remote_node is None and iallocator is None:
7083 raise errors.OpPrereqError("When changing the secondary either an"
7084 " iallocator script must be used or the"
7085 " new node given", errors.ECODE_INVAL)
7087 if remote_node is not None and iallocator is not None:
7088 raise errors.OpPrereqError("Give either the iallocator or the new"
7089 " secondary, not both", errors.ECODE_INVAL)
7091 elif remote_node is not None or iallocator is not None:
7092 # Not replacing the secondary
7093 raise errors.OpPrereqError("The iallocator and new node options can"
7094 " only be used when changing the"
7095 " secondary node", errors.ECODE_INVAL)
7098 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7099 """Compute a new secondary node using an IAllocator.
7102 ial = IAllocator(lu.cfg, lu.rpc,
7103 mode=constants.IALLOCATOR_MODE_RELOC,
7105 relocate_from=relocate_from)
7107 ial.Run(iallocator_name)
7110 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7111 " %s" % (iallocator_name, ial.info),
7114 if len(ial.result) != ial.required_nodes:
7115 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7116 " of nodes (%s), required %s" %
7118 len(ial.result), ial.required_nodes),
7121 remote_node_name = ial.result[0]
7123 lu.LogInfo("Selected new secondary for instance '%s': %s",
7124 instance_name, remote_node_name)
7126 return remote_node_name
7128 def _FindFaultyDisks(self, node_name):
7129 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7132 def CheckPrereq(self):
7133 """Check prerequisites.
7135 This checks that the instance is in the cluster.
7138 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7139 assert instance is not None, \
7140 "Cannot retrieve locked instance %s" % self.instance_name
7142 if instance.disk_template != constants.DT_DRBD8:
7143 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7144 " instances", errors.ECODE_INVAL)
7146 if len(instance.secondary_nodes) != 1:
7147 raise errors.OpPrereqError("The instance has a strange layout,"
7148 " expected one secondary but found %d" %
7149 len(instance.secondary_nodes),
7152 if not self.delay_iallocator:
7153 self._CheckPrereq2()
7155 def _CheckPrereq2(self):
7156 """Check prerequisites, second part.
7158 This function should always be part of CheckPrereq. It was separated and is
7159 now called from Exec because during node evacuation iallocator was only
7160 called with an unmodified cluster model, not taking planned changes into
7164 instance = self.instance
7165 secondary_node = instance.secondary_nodes[0]
7167 if self.iallocator_name is None:
7168 remote_node = self.remote_node
7170 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7171 instance.name, instance.secondary_nodes)
7173 if remote_node is not None:
7174 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7175 assert self.remote_node_info is not None, \
7176 "Cannot retrieve locked node %s" % remote_node
7178 self.remote_node_info = None
7180 if remote_node == self.instance.primary_node:
7181 raise errors.OpPrereqError("The specified node is the primary node of"
7182 " the instance.", errors.ECODE_INVAL)
7184 if remote_node == secondary_node:
7185 raise errors.OpPrereqError("The specified node is already the"
7186 " secondary node of the instance.",
7189 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7190 constants.REPLACE_DISK_CHG):
7191 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7194 if self.mode == constants.REPLACE_DISK_AUTO:
7195 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7196 faulty_secondary = self._FindFaultyDisks(secondary_node)
7198 if faulty_primary and faulty_secondary:
7199 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7200 " one node and can not be repaired"
7201 " automatically" % self.instance_name,
7205 self.disks = faulty_primary
7206 self.target_node = instance.primary_node
7207 self.other_node = secondary_node
7208 check_nodes = [self.target_node, self.other_node]
7209 elif faulty_secondary:
7210 self.disks = faulty_secondary
7211 self.target_node = secondary_node
7212 self.other_node = instance.primary_node
7213 check_nodes = [self.target_node, self.other_node]
7219 # Non-automatic modes
7220 if self.mode == constants.REPLACE_DISK_PRI:
7221 self.target_node = instance.primary_node
7222 self.other_node = secondary_node
7223 check_nodes = [self.target_node, self.other_node]
7225 elif self.mode == constants.REPLACE_DISK_SEC:
7226 self.target_node = secondary_node
7227 self.other_node = instance.primary_node
7228 check_nodes = [self.target_node, self.other_node]
7230 elif self.mode == constants.REPLACE_DISK_CHG:
7231 self.new_node = remote_node
7232 self.other_node = instance.primary_node
7233 self.target_node = secondary_node
7234 check_nodes = [self.new_node, self.other_node]
7236 _CheckNodeNotDrained(self.lu, remote_node)
7238 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7239 assert old_node_info is not None
7240 if old_node_info.offline and not self.early_release:
7241 # doesn't make sense to delay the release
7242 self.early_release = True
7243 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7244 " early-release mode", secondary_node)
7247 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7250 # If not specified all disks should be replaced
7252 self.disks = range(len(self.instance.disks))
7254 for node in check_nodes:
7255 _CheckNodeOnline(self.lu, node)
7257 # Check whether disks are valid
7258 for disk_idx in self.disks:
7259 instance.FindDisk(disk_idx)
7261 # Get secondary node IP addresses
7264 for node_name in [self.target_node, self.other_node, self.new_node]:
7265 if node_name is not None:
7266 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7268 self.node_secondary_ip = node_2nd_ip
7270 def Exec(self, feedback_fn):
7271 """Execute disk replacement.
7273 This dispatches the disk replacement to the appropriate handler.
7276 if self.delay_iallocator:
7277 self._CheckPrereq2()
7280 feedback_fn("No disks need replacement")
7283 feedback_fn("Replacing disk(s) %s for %s" %
7284 (utils.CommaJoin(self.disks), self.instance.name))
7286 activate_disks = (not self.instance.admin_up)
7288 # Activate the instance disks if we're replacing them on a down instance
7290 _StartInstanceDisks(self.lu, self.instance, True)
7293 # Should we replace the secondary node?
7294 if self.new_node is not None:
7295 fn = self._ExecDrbd8Secondary
7297 fn = self._ExecDrbd8DiskOnly
7299 return fn(feedback_fn)
7302 # Deactivate the instance disks if we're replacing them on a
7305 _SafeShutdownInstanceDisks(self.lu, self.instance)
7307 def _CheckVolumeGroup(self, nodes):
7308 self.lu.LogInfo("Checking volume groups")
7310 vgname = self.cfg.GetVGName()
7312 # Make sure volume group exists on all involved nodes
7313 results = self.rpc.call_vg_list(nodes)
7315 raise errors.OpExecError("Can't list volume groups on the nodes")
7319 res.Raise("Error checking node %s" % node)
7320 if vgname not in res.payload:
7321 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7324 def _CheckDisksExistence(self, nodes):
7325 # Check disk existence
7326 for idx, dev in enumerate(self.instance.disks):
7327 if idx not in self.disks:
7331 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7332 self.cfg.SetDiskID(dev, node)
7334 result = self.rpc.call_blockdev_find(node, dev)
7336 msg = result.fail_msg
7337 if msg or not result.payload:
7339 msg = "disk not found"
7340 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7343 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7344 for idx, dev in enumerate(self.instance.disks):
7345 if idx not in self.disks:
7348 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7351 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7353 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7354 " replace disks for instance %s" %
7355 (node_name, self.instance.name))
7357 def _CreateNewStorage(self, node_name):
7358 vgname = self.cfg.GetVGName()
7361 for idx, dev in enumerate(self.instance.disks):
7362 if idx not in self.disks:
7365 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7367 self.cfg.SetDiskID(dev, node_name)
7369 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7370 names = _GenerateUniqueNames(self.lu, lv_names)
7372 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7373 logical_id=(vgname, names[0]))
7374 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7375 logical_id=(vgname, names[1]))
7377 new_lvs = [lv_data, lv_meta]
7378 old_lvs = dev.children
7379 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7381 # we pass force_create=True to force the LVM creation
7382 for new_lv in new_lvs:
7383 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7384 _GetInstanceInfoText(self.instance), False)
7388 def _CheckDevices(self, node_name, iv_names):
7389 for name, (dev, _, _) in iv_names.iteritems():
7390 self.cfg.SetDiskID(dev, node_name)
7392 result = self.rpc.call_blockdev_find(node_name, dev)
7394 msg = result.fail_msg
7395 if msg or not result.payload:
7397 msg = "disk not found"
7398 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7401 if result.payload.is_degraded:
7402 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7404 def _RemoveOldStorage(self, node_name, iv_names):
7405 for name, (_, old_lvs, _) in iv_names.iteritems():
7406 self.lu.LogInfo("Remove logical volumes for %s" % name)
7409 self.cfg.SetDiskID(lv, node_name)
7411 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7413 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7414 hint="remove unused LVs manually")
7416 def _ReleaseNodeLock(self, node_name):
7417 """Releases the lock for a given node."""
7418 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7420 def _ExecDrbd8DiskOnly(self, feedback_fn):
7421 """Replace a disk on the primary or secondary for DRBD 8.
7423 The algorithm for replace is quite complicated:
7425 1. for each disk to be replaced:
7427 1. create new LVs on the target node with unique names
7428 1. detach old LVs from the drbd device
7429 1. rename old LVs to name_replaced.<time_t>
7430 1. rename new LVs to old LVs
7431 1. attach the new LVs (with the old names now) to the drbd device
7433 1. wait for sync across all devices
7435 1. for each modified disk:
7437 1. remove old LVs (which have the name name_replaces.<time_t>)
7439 Failures are not very well handled.
7444 # Step: check device activation
7445 self.lu.LogStep(1, steps_total, "Check device existence")
7446 self._CheckDisksExistence([self.other_node, self.target_node])
7447 self._CheckVolumeGroup([self.target_node, self.other_node])
7449 # Step: check other node consistency
7450 self.lu.LogStep(2, steps_total, "Check peer consistency")
7451 self._CheckDisksConsistency(self.other_node,
7452 self.other_node == self.instance.primary_node,
7455 # Step: create new storage
7456 self.lu.LogStep(3, steps_total, "Allocate new storage")
7457 iv_names = self._CreateNewStorage(self.target_node)
7459 # Step: for each lv, detach+rename*2+attach
7460 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7461 for dev, old_lvs, new_lvs in iv_names.itervalues():
7462 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7464 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7466 result.Raise("Can't detach drbd from local storage on node"
7467 " %s for device %s" % (self.target_node, dev.iv_name))
7469 #cfg.Update(instance)
7471 # ok, we created the new LVs, so now we know we have the needed
7472 # storage; as such, we proceed on the target node to rename
7473 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7474 # using the assumption that logical_id == physical_id (which in
7475 # turn is the unique_id on that node)
7477 # FIXME(iustin): use a better name for the replaced LVs
7478 temp_suffix = int(time.time())
7479 ren_fn = lambda d, suff: (d.physical_id[0],
7480 d.physical_id[1] + "_replaced-%s" % suff)
7482 # Build the rename list based on what LVs exist on the node
7483 rename_old_to_new = []
7484 for to_ren in old_lvs:
7485 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7486 if not result.fail_msg and result.payload:
7488 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7490 self.lu.LogInfo("Renaming the old LVs on the target node")
7491 result = self.rpc.call_blockdev_rename(self.target_node,
7493 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7495 # Now we rename the new LVs to the old LVs
7496 self.lu.LogInfo("Renaming the new LVs on the target node")
7497 rename_new_to_old = [(new, old.physical_id)
7498 for old, new in zip(old_lvs, new_lvs)]
7499 result = self.rpc.call_blockdev_rename(self.target_node,
7501 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7503 for old, new in zip(old_lvs, new_lvs):
7504 new.logical_id = old.logical_id
7505 self.cfg.SetDiskID(new, self.target_node)
7507 for disk in old_lvs:
7508 disk.logical_id = ren_fn(disk, temp_suffix)
7509 self.cfg.SetDiskID(disk, self.target_node)
7511 # Now that the new lvs have the old name, we can add them to the device
7512 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7513 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7515 msg = result.fail_msg
7517 for new_lv in new_lvs:
7518 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7521 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7522 hint=("cleanup manually the unused logical"
7524 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7526 dev.children = new_lvs
7528 self.cfg.Update(self.instance, feedback_fn)
7531 if self.early_release:
7532 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7534 self._RemoveOldStorage(self.target_node, iv_names)
7535 # WARNING: we release both node locks here, do not do other RPCs
7536 # than WaitForSync to the primary node
7537 self._ReleaseNodeLock([self.target_node, self.other_node])
7540 # This can fail as the old devices are degraded and _WaitForSync
7541 # does a combined result over all disks, so we don't check its return value
7542 self.lu.LogStep(cstep, steps_total, "Sync devices")
7544 _WaitForSync(self.lu, self.instance)
7546 # Check all devices manually
7547 self._CheckDevices(self.instance.primary_node, iv_names)
7549 # Step: remove old storage
7550 if not self.early_release:
7551 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7553 self._RemoveOldStorage(self.target_node, iv_names)
7555 def _ExecDrbd8Secondary(self, feedback_fn):
7556 """Replace the secondary node for DRBD 8.
7558 The algorithm for replace is quite complicated:
7559 - for all disks of the instance:
7560 - create new LVs on the new node with same names
7561 - shutdown the drbd device on the old secondary
7562 - disconnect the drbd network on the primary
7563 - create the drbd device on the new secondary
7564 - network attach the drbd on the primary, using an artifice:
7565 the drbd code for Attach() will connect to the network if it
7566 finds a device which is connected to the good local disks but
7568 - wait for sync across all devices
7569 - remove all disks from the old secondary
7571 Failures are not very well handled.
7576 # Step: check device activation
7577 self.lu.LogStep(1, steps_total, "Check device existence")
7578 self._CheckDisksExistence([self.instance.primary_node])
7579 self._CheckVolumeGroup([self.instance.primary_node])
7581 # Step: check other node consistency
7582 self.lu.LogStep(2, steps_total, "Check peer consistency")
7583 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7585 # Step: create new storage
7586 self.lu.LogStep(3, steps_total, "Allocate new storage")
7587 for idx, dev in enumerate(self.instance.disks):
7588 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7589 (self.new_node, idx))
7590 # we pass force_create=True to force LVM creation
7591 for new_lv in dev.children:
7592 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7593 _GetInstanceInfoText(self.instance), False)
7595 # Step 4: dbrd minors and drbd setups changes
7596 # after this, we must manually remove the drbd minors on both the
7597 # error and the success paths
7598 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7599 minors = self.cfg.AllocateDRBDMinor([self.new_node
7600 for dev in self.instance.disks],
7602 logging.debug("Allocated minors %r", minors)
7605 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7606 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7607 (self.new_node, idx))
7608 # create new devices on new_node; note that we create two IDs:
7609 # one without port, so the drbd will be activated without
7610 # networking information on the new node at this stage, and one
7611 # with network, for the latter activation in step 4
7612 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7613 if self.instance.primary_node == o_node1:
7616 assert self.instance.primary_node == o_node2, "Three-node instance?"
7619 new_alone_id = (self.instance.primary_node, self.new_node, None,
7620 p_minor, new_minor, o_secret)
7621 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7622 p_minor, new_minor, o_secret)
7624 iv_names[idx] = (dev, dev.children, new_net_id)
7625 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7627 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7628 logical_id=new_alone_id,
7629 children=dev.children,
7632 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7633 _GetInstanceInfoText(self.instance), False)
7634 except errors.GenericError:
7635 self.cfg.ReleaseDRBDMinors(self.instance.name)
7638 # We have new devices, shutdown the drbd on the old secondary
7639 for idx, dev in enumerate(self.instance.disks):
7640 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7641 self.cfg.SetDiskID(dev, self.target_node)
7642 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7644 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7645 "node: %s" % (idx, msg),
7646 hint=("Please cleanup this device manually as"
7647 " soon as possible"))
7649 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7650 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7651 self.node_secondary_ip,
7652 self.instance.disks)\
7653 [self.instance.primary_node]
7655 msg = result.fail_msg
7657 # detaches didn't succeed (unlikely)
7658 self.cfg.ReleaseDRBDMinors(self.instance.name)
7659 raise errors.OpExecError("Can't detach the disks from the network on"
7660 " old node: %s" % (msg,))
7662 # if we managed to detach at least one, we update all the disks of
7663 # the instance to point to the new secondary
7664 self.lu.LogInfo("Updating instance configuration")
7665 for dev, _, new_logical_id in iv_names.itervalues():
7666 dev.logical_id = new_logical_id
7667 self.cfg.SetDiskID(dev, self.instance.primary_node)
7669 self.cfg.Update(self.instance, feedback_fn)
7671 # and now perform the drbd attach
7672 self.lu.LogInfo("Attaching primary drbds to new secondary"
7673 " (standalone => connected)")
7674 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7676 self.node_secondary_ip,
7677 self.instance.disks,
7680 for to_node, to_result in result.items():
7681 msg = to_result.fail_msg
7683 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7685 hint=("please do a gnt-instance info to see the"
7686 " status of disks"))
7688 if self.early_release:
7689 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7691 self._RemoveOldStorage(self.target_node, iv_names)
7692 # WARNING: we release all node locks here, do not do other RPCs
7693 # than WaitForSync to the primary node
7694 self._ReleaseNodeLock([self.instance.primary_node,
7699 # This can fail as the old devices are degraded and _WaitForSync
7700 # does a combined result over all disks, so we don't check its return value
7701 self.lu.LogStep(cstep, steps_total, "Sync devices")
7703 _WaitForSync(self.lu, self.instance)
7705 # Check all devices manually
7706 self._CheckDevices(self.instance.primary_node, iv_names)
7708 # Step: remove old storage
7709 if not self.early_release:
7710 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7711 self._RemoveOldStorage(self.target_node, iv_names)
7714 class LURepairNodeStorage(NoHooksLU):
7715 """Repairs the volume group on a node.
7718 _OP_REQP = ["node_name"]
7721 def CheckArguments(self):
7722 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7724 _CheckStorageType(self.op.storage_type)
7726 def ExpandNames(self):
7727 self.needed_locks = {
7728 locking.LEVEL_NODE: [self.op.node_name],
7731 def _CheckFaultyDisks(self, instance, node_name):
7732 """Ensure faulty disks abort the opcode or at least warn."""
7734 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7736 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7737 " node '%s'" % (instance.name, node_name),
7739 except errors.OpPrereqError, err:
7740 if self.op.ignore_consistency:
7741 self.proc.LogWarning(str(err.args[0]))
7745 def CheckPrereq(self):
7746 """Check prerequisites.
7749 storage_type = self.op.storage_type
7751 if (constants.SO_FIX_CONSISTENCY not in
7752 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7753 raise errors.OpPrereqError("Storage units of type '%s' can not be"
7754 " repaired" % storage_type,
7757 # Check whether any instance on this node has faulty disks
7758 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7759 if not inst.admin_up:
7761 check_nodes = set(inst.all_nodes)
7762 check_nodes.discard(self.op.node_name)
7763 for inst_node_name in check_nodes:
7764 self._CheckFaultyDisks(inst, inst_node_name)
7766 def Exec(self, feedback_fn):
7767 feedback_fn("Repairing storage unit '%s' on %s ..." %
7768 (self.op.name, self.op.node_name))
7770 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7771 result = self.rpc.call_storage_execute(self.op.node_name,
7772 self.op.storage_type, st_args,
7774 constants.SO_FIX_CONSISTENCY)
7775 result.Raise("Failed to repair storage unit '%s' on %s" %
7776 (self.op.name, self.op.node_name))
7779 class LUNodeEvacuationStrategy(NoHooksLU):
7780 """Computes the node evacuation strategy.
7783 _OP_REQP = ["nodes"]
7786 def CheckArguments(self):
7787 if not hasattr(self.op, "remote_node"):
7788 self.op.remote_node = None
7789 if not hasattr(self.op, "iallocator"):
7790 self.op.iallocator = None
7791 if self.op.remote_node is not None and self.op.iallocator is not None:
7792 raise errors.OpPrereqError("Give either the iallocator or the new"
7793 " secondary, not both", errors.ECODE_INVAL)
7795 def ExpandNames(self):
7796 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7797 self.needed_locks = locks = {}
7798 if self.op.remote_node is None:
7799 locks[locking.LEVEL_NODE] = locking.ALL_SET
7801 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7802 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7804 def CheckPrereq(self):
7807 def Exec(self, feedback_fn):
7808 if self.op.remote_node is not None:
7810 for node in self.op.nodes:
7811 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7814 if i.primary_node == self.op.remote_node:
7815 raise errors.OpPrereqError("Node %s is the primary node of"
7816 " instance %s, cannot use it as"
7818 (self.op.remote_node, i.name),
7820 result.append([i.name, self.op.remote_node])
7822 ial = IAllocator(self.cfg, self.rpc,
7823 mode=constants.IALLOCATOR_MODE_MEVAC,
7824 evac_nodes=self.op.nodes)
7825 ial.Run(self.op.iallocator, validate=True)
7827 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7833 class LUGrowDisk(LogicalUnit):
7834 """Grow a disk of an instance.
7838 HTYPE = constants.HTYPE_INSTANCE
7839 _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7842 def ExpandNames(self):
7843 self._ExpandAndLockInstance()
7844 self.needed_locks[locking.LEVEL_NODE] = []
7845 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7847 def DeclareLocks(self, level):
7848 if level == locking.LEVEL_NODE:
7849 self._LockInstancesNodes()
7851 def BuildHooksEnv(self):
7854 This runs on the master, the primary and all the secondaries.
7858 "DISK": self.op.disk,
7859 "AMOUNT": self.op.amount,
7861 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7862 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7865 def CheckPrereq(self):
7866 """Check prerequisites.
7868 This checks that the instance is in the cluster.
7871 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7872 assert instance is not None, \
7873 "Cannot retrieve locked instance %s" % self.op.instance_name
7874 nodenames = list(instance.all_nodes)
7875 for node in nodenames:
7876 _CheckNodeOnline(self, node)
7879 self.instance = instance
7881 if instance.disk_template not in constants.DTS_GROWABLE:
7882 raise errors.OpPrereqError("Instance's disk layout does not support"
7883 " growing.", errors.ECODE_INVAL)
7885 self.disk = instance.FindDisk(self.op.disk)
7887 if instance.disk_template != constants.DT_FILE:
7888 # TODO: check the free disk space for file, when that feature will be
7890 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7892 def Exec(self, feedback_fn):
7893 """Execute disk grow.
7896 instance = self.instance
7898 for node in instance.all_nodes:
7899 self.cfg.SetDiskID(disk, node)
7900 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7901 result.Raise("Grow request failed to node %s" % node)
7903 # TODO: Rewrite code to work properly
7904 # DRBD goes into sync mode for a short amount of time after executing the
7905 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7906 # calling "resize" in sync mode fails. Sleeping for a short amount of
7907 # time is a work-around.
7910 disk.RecordGrow(self.op.amount)
7911 self.cfg.Update(instance, feedback_fn)
7912 if self.op.wait_for_sync:
7913 disk_abort = not _WaitForSync(self, instance)
7915 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7916 " status.\nPlease check the instance.")
7919 class LUQueryInstanceData(NoHooksLU):
7920 """Query runtime instance data.
7923 _OP_REQP = ["instances", "static"]
7926 def ExpandNames(self):
7927 self.needed_locks = {}
7928 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7930 if not isinstance(self.op.instances, list):
7931 raise errors.OpPrereqError("Invalid argument type 'instances'",
7934 if self.op.instances:
7935 self.wanted_names = []
7936 for name in self.op.instances:
7937 full_name = _ExpandInstanceName(self.cfg, name)
7938 self.wanted_names.append(full_name)
7939 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7941 self.wanted_names = None
7942 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7944 self.needed_locks[locking.LEVEL_NODE] = []
7945 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7947 def DeclareLocks(self, level):
7948 if level == locking.LEVEL_NODE:
7949 self._LockInstancesNodes()
7951 def CheckPrereq(self):
7952 """Check prerequisites.
7954 This only checks the optional instance list against the existing names.
7957 if self.wanted_names is None:
7958 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7960 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7961 in self.wanted_names]
7964 def _ComputeBlockdevStatus(self, node, instance_name, dev):
7965 """Returns the status of a block device
7968 if self.op.static or not node:
7971 self.cfg.SetDiskID(dev, node)
7973 result = self.rpc.call_blockdev_find(node, dev)
7977 result.Raise("Can't compute disk status for %s" % instance_name)
7979 status = result.payload
7983 return (status.dev_path, status.major, status.minor,
7984 status.sync_percent, status.estimated_time,
7985 status.is_degraded, status.ldisk_status)
7987 def _ComputeDiskStatus(self, instance, snode, dev):
7988 """Compute block device status.
7991 if dev.dev_type in constants.LDS_DRBD:
7992 # we change the snode then (otherwise we use the one passed in)
7993 if dev.logical_id[0] == instance.primary_node:
7994 snode = dev.logical_id[1]
7996 snode = dev.logical_id[0]
7998 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8000 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8003 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8004 for child in dev.children]
8009 "iv_name": dev.iv_name,
8010 "dev_type": dev.dev_type,
8011 "logical_id": dev.logical_id,
8012 "physical_id": dev.physical_id,
8013 "pstatus": dev_pstatus,
8014 "sstatus": dev_sstatus,
8015 "children": dev_children,
8022 def Exec(self, feedback_fn):
8023 """Gather and return data"""
8026 cluster = self.cfg.GetClusterInfo()
8028 for instance in self.wanted_instances:
8029 if not self.op.static:
8030 remote_info = self.rpc.call_instance_info(instance.primary_node,
8032 instance.hypervisor)
8033 remote_info.Raise("Error checking node %s" % instance.primary_node)
8034 remote_info = remote_info.payload
8035 if remote_info and "state" in remote_info:
8038 remote_state = "down"
8041 if instance.admin_up:
8044 config_state = "down"
8046 disks = [self._ComputeDiskStatus(instance, None, device)
8047 for device in instance.disks]
8050 "name": instance.name,
8051 "config_state": config_state,
8052 "run_state": remote_state,
8053 "pnode": instance.primary_node,
8054 "snodes": instance.secondary_nodes,
8056 # this happens to be the same format used for hooks
8057 "nics": _NICListToTuple(self, instance.nics),
8059 "hypervisor": instance.hypervisor,
8060 "network_port": instance.network_port,
8061 "hv_instance": instance.hvparams,
8062 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8063 "be_instance": instance.beparams,
8064 "be_actual": cluster.FillBE(instance),
8065 "serial_no": instance.serial_no,
8066 "mtime": instance.mtime,
8067 "ctime": instance.ctime,
8068 "uuid": instance.uuid,
8071 result[instance.name] = idict
8076 class LUSetInstanceParams(LogicalUnit):
8077 """Modifies an instances's parameters.
8080 HPATH = "instance-modify"
8081 HTYPE = constants.HTYPE_INSTANCE
8082 _OP_REQP = ["instance_name"]
8085 def CheckArguments(self):
8086 if not hasattr(self.op, 'nics'):
8088 if not hasattr(self.op, 'disks'):
8090 if not hasattr(self.op, 'beparams'):
8091 self.op.beparams = {}
8092 if not hasattr(self.op, 'hvparams'):
8093 self.op.hvparams = {}
8094 if not hasattr(self.op, "disk_template"):
8095 self.op.disk_template = None
8096 if not hasattr(self.op, "remote_node"):
8097 self.op.remote_node = None
8098 if not hasattr(self.op, "os_name"):
8099 self.op.os_name = None
8100 if not hasattr(self.op, "force_variant"):
8101 self.op.force_variant = False
8102 self.op.force = getattr(self.op, "force", False)
8103 if not (self.op.nics or self.op.disks or self.op.disk_template or
8104 self.op.hvparams or self.op.beparams or self.op.os_name):
8105 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8107 if self.op.hvparams:
8108 _CheckGlobalHvParams(self.op.hvparams)
8112 for disk_op, disk_dict in self.op.disks:
8113 if disk_op == constants.DDM_REMOVE:
8116 elif disk_op == constants.DDM_ADD:
8119 if not isinstance(disk_op, int):
8120 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8121 if not isinstance(disk_dict, dict):
8122 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8123 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8125 if disk_op == constants.DDM_ADD:
8126 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8127 if mode not in constants.DISK_ACCESS_SET:
8128 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8130 size = disk_dict.get('size', None)
8132 raise errors.OpPrereqError("Required disk parameter size missing",
8136 except (TypeError, ValueError), err:
8137 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8138 str(err), errors.ECODE_INVAL)
8139 disk_dict['size'] = size
8141 # modification of disk
8142 if 'size' in disk_dict:
8143 raise errors.OpPrereqError("Disk size change not possible, use"
8144 " grow-disk", errors.ECODE_INVAL)
8146 if disk_addremove > 1:
8147 raise errors.OpPrereqError("Only one disk add or remove operation"
8148 " supported at a time", errors.ECODE_INVAL)
8150 if self.op.disks and self.op.disk_template is not None:
8151 raise errors.OpPrereqError("Disk template conversion and other disk"
8152 " changes not supported at the same time",
8155 if self.op.disk_template:
8156 _CheckDiskTemplate(self.op.disk_template)
8157 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8158 self.op.remote_node is None):
8159 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8160 " one requires specifying a secondary node",
8165 for nic_op, nic_dict in self.op.nics:
8166 if nic_op == constants.DDM_REMOVE:
8169 elif nic_op == constants.DDM_ADD:
8172 if not isinstance(nic_op, int):
8173 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8174 if not isinstance(nic_dict, dict):
8175 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8176 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8178 # nic_dict should be a dict
8179 nic_ip = nic_dict.get('ip', None)
8180 if nic_ip is not None:
8181 if nic_ip.lower() == constants.VALUE_NONE:
8182 nic_dict['ip'] = None
8184 if not utils.IsValidIP(nic_ip):
8185 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8188 nic_bridge = nic_dict.get('bridge', None)
8189 nic_link = nic_dict.get('link', None)
8190 if nic_bridge and nic_link:
8191 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8192 " at the same time", errors.ECODE_INVAL)
8193 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8194 nic_dict['bridge'] = None
8195 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8196 nic_dict['link'] = None
8198 if nic_op == constants.DDM_ADD:
8199 nic_mac = nic_dict.get('mac', None)
8201 nic_dict['mac'] = constants.VALUE_AUTO
8203 if 'mac' in nic_dict:
8204 nic_mac = nic_dict['mac']
8205 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8206 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8208 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8209 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8210 " modifying an existing nic",
8213 if nic_addremove > 1:
8214 raise errors.OpPrereqError("Only one NIC add or remove operation"
8215 " supported at a time", errors.ECODE_INVAL)
8217 def ExpandNames(self):
8218 self._ExpandAndLockInstance()
8219 self.needed_locks[locking.LEVEL_NODE] = []
8220 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8222 def DeclareLocks(self, level):
8223 if level == locking.LEVEL_NODE:
8224 self._LockInstancesNodes()
8225 if self.op.disk_template and self.op.remote_node:
8226 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8227 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8229 def BuildHooksEnv(self):
8232 This runs on the master, primary and secondaries.
8236 if constants.BE_MEMORY in self.be_new:
8237 args['memory'] = self.be_new[constants.BE_MEMORY]
8238 if constants.BE_VCPUS in self.be_new:
8239 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8240 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8241 # information at all.
8244 nic_override = dict(self.op.nics)
8245 c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8246 for idx, nic in enumerate(self.instance.nics):
8247 if idx in nic_override:
8248 this_nic_override = nic_override[idx]
8250 this_nic_override = {}
8251 if 'ip' in this_nic_override:
8252 ip = this_nic_override['ip']
8255 if 'mac' in this_nic_override:
8256 mac = this_nic_override['mac']
8259 if idx in self.nic_pnew:
8260 nicparams = self.nic_pnew[idx]
8262 nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8263 mode = nicparams[constants.NIC_MODE]
8264 link = nicparams[constants.NIC_LINK]
8265 args['nics'].append((ip, mac, mode, link))
8266 if constants.DDM_ADD in nic_override:
8267 ip = nic_override[constants.DDM_ADD].get('ip', None)
8268 mac = nic_override[constants.DDM_ADD]['mac']
8269 nicparams = self.nic_pnew[constants.DDM_ADD]
8270 mode = nicparams[constants.NIC_MODE]
8271 link = nicparams[constants.NIC_LINK]
8272 args['nics'].append((ip, mac, mode, link))
8273 elif constants.DDM_REMOVE in nic_override:
8274 del args['nics'][-1]
8276 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8277 if self.op.disk_template:
8278 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8279 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8283 def _GetUpdatedParams(old_params, update_dict,
8284 default_values, parameter_types):
8285 """Return the new params dict for the given params.
8287 @type old_params: dict
8288 @param old_params: old parameters
8289 @type update_dict: dict
8290 @param update_dict: dict containing new parameter values,
8291 or constants.VALUE_DEFAULT to reset the
8292 parameter to its default value
8293 @type default_values: dict
8294 @param default_values: default values for the filled parameters
8295 @type parameter_types: dict
8296 @param parameter_types: dict mapping target dict keys to types
8297 in constants.ENFORCEABLE_TYPES
8298 @rtype: (dict, dict)
8299 @return: (new_parameters, filled_parameters)
8302 params_copy = copy.deepcopy(old_params)
8303 for key, val in update_dict.iteritems():
8304 if val == constants.VALUE_DEFAULT:
8306 del params_copy[key]
8310 params_copy[key] = val
8311 utils.ForceDictType(params_copy, parameter_types)
8312 params_filled = objects.FillDict(default_values, params_copy)
8313 return (params_copy, params_filled)
8315 def CheckPrereq(self):
8316 """Check prerequisites.
8318 This only checks the instance list against the existing names.
8321 self.force = self.op.force
8323 # checking the new params on the primary/secondary nodes
8325 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8326 cluster = self.cluster = self.cfg.GetClusterInfo()
8327 assert self.instance is not None, \
8328 "Cannot retrieve locked instance %s" % self.op.instance_name
8329 pnode = instance.primary_node
8330 nodelist = list(instance.all_nodes)
8332 if self.op.disk_template:
8333 if instance.disk_template == self.op.disk_template:
8334 raise errors.OpPrereqError("Instance already has disk template %s" %
8335 instance.disk_template, errors.ECODE_INVAL)
8337 if (instance.disk_template,
8338 self.op.disk_template) not in self._DISK_CONVERSIONS:
8339 raise errors.OpPrereqError("Unsupported disk template conversion from"
8340 " %s to %s" % (instance.disk_template,
8341 self.op.disk_template),
8343 if self.op.disk_template in constants.DTS_NET_MIRROR:
8344 _CheckNodeOnline(self, self.op.remote_node)
8345 _CheckNodeNotDrained(self, self.op.remote_node)
8346 disks = [{"size": d.size} for d in instance.disks]
8347 required = _ComputeDiskSize(self.op.disk_template, disks)
8348 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8349 _CheckInstanceDown(self, instance, "cannot change disk template")
8351 # hvparams processing
8352 if self.op.hvparams:
8353 i_hvdict, hv_new = self._GetUpdatedParams(
8354 instance.hvparams, self.op.hvparams,
8355 cluster.hvparams[instance.hypervisor],
8356 constants.HVS_PARAMETER_TYPES)
8358 hypervisor.GetHypervisor(
8359 instance.hypervisor).CheckParameterSyntax(hv_new)
8360 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8361 self.hv_new = hv_new # the new actual values
8362 self.hv_inst = i_hvdict # the new dict (without defaults)
8364 self.hv_new = self.hv_inst = {}
8366 # beparams processing
8367 if self.op.beparams:
8368 i_bedict, be_new = self._GetUpdatedParams(
8369 instance.beparams, self.op.beparams,
8370 cluster.beparams[constants.PP_DEFAULT],
8371 constants.BES_PARAMETER_TYPES)
8372 self.be_new = be_new # the new actual values
8373 self.be_inst = i_bedict # the new dict (without defaults)
8375 self.be_new = self.be_inst = {}
8379 if constants.BE_MEMORY in self.op.beparams and not self.force:
8380 mem_check_list = [pnode]
8381 if be_new[constants.BE_AUTO_BALANCE]:
8382 # either we changed auto_balance to yes or it was from before
8383 mem_check_list.extend(instance.secondary_nodes)
8384 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8385 instance.hypervisor)
8386 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8387 instance.hypervisor)
8388 pninfo = nodeinfo[pnode]
8389 msg = pninfo.fail_msg
8391 # Assume the primary node is unreachable and go ahead
8392 self.warn.append("Can't get info from primary node %s: %s" %
8394 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8395 self.warn.append("Node data from primary node %s doesn't contain"
8396 " free memory information" % pnode)
8397 elif instance_info.fail_msg:
8398 self.warn.append("Can't get instance runtime information: %s" %
8399 instance_info.fail_msg)
8401 if instance_info.payload:
8402 current_mem = int(instance_info.payload['memory'])
8404 # Assume instance not running
8405 # (there is a slight race condition here, but it's not very probable,
8406 # and we have no other way to check)
8408 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8409 pninfo.payload['memory_free'])
8411 raise errors.OpPrereqError("This change will prevent the instance"
8412 " from starting, due to %d MB of memory"
8413 " missing on its primary node" % miss_mem,
8416 if be_new[constants.BE_AUTO_BALANCE]:
8417 for node, nres in nodeinfo.items():
8418 if node not in instance.secondary_nodes:
8422 self.warn.append("Can't get info from secondary node %s: %s" %
8424 elif not isinstance(nres.payload.get('memory_free', None), int):
8425 self.warn.append("Secondary node %s didn't return free"
8426 " memory information" % node)
8427 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8428 self.warn.append("Not enough memory to failover instance to"
8429 " secondary node %s" % node)
8434 for nic_op, nic_dict in self.op.nics:
8435 if nic_op == constants.DDM_REMOVE:
8436 if not instance.nics:
8437 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8440 if nic_op != constants.DDM_ADD:
8442 if not instance.nics:
8443 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8444 " no NICs" % nic_op,
8446 if nic_op < 0 or nic_op >= len(instance.nics):
8447 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8449 (nic_op, len(instance.nics) - 1),
8451 old_nic_params = instance.nics[nic_op].nicparams
8452 old_nic_ip = instance.nics[nic_op].ip
8457 update_params_dict = dict([(key, nic_dict[key])
8458 for key in constants.NICS_PARAMETERS
8459 if key in nic_dict])
8461 if 'bridge' in nic_dict:
8462 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8464 new_nic_params, new_filled_nic_params = \
8465 self._GetUpdatedParams(old_nic_params, update_params_dict,
8466 cluster.nicparams[constants.PP_DEFAULT],
8467 constants.NICS_PARAMETER_TYPES)
8468 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8469 self.nic_pinst[nic_op] = new_nic_params
8470 self.nic_pnew[nic_op] = new_filled_nic_params
8471 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8473 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8474 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8475 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8477 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8479 self.warn.append(msg)
8481 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8482 if new_nic_mode == constants.NIC_MODE_ROUTED:
8483 if 'ip' in nic_dict:
8484 nic_ip = nic_dict['ip']
8488 raise errors.OpPrereqError('Cannot set the nic ip to None'
8489 ' on a routed nic', errors.ECODE_INVAL)
8490 if 'mac' in nic_dict:
8491 nic_mac = nic_dict['mac']
8493 raise errors.OpPrereqError('Cannot set the nic mac to None',
8495 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8496 # otherwise generate the mac
8497 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8499 # or validate/reserve the current one
8501 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8502 except errors.ReservationError:
8503 raise errors.OpPrereqError("MAC address %s already in use"
8504 " in cluster" % nic_mac,
8505 errors.ECODE_NOTUNIQUE)
8508 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8509 raise errors.OpPrereqError("Disk operations not supported for"
8510 " diskless instances",
8512 for disk_op, _ in self.op.disks:
8513 if disk_op == constants.DDM_REMOVE:
8514 if len(instance.disks) == 1:
8515 raise errors.OpPrereqError("Cannot remove the last disk of"
8516 " an instance", errors.ECODE_INVAL)
8517 _CheckInstanceDown(self, instance, "cannot remove disks")
8519 if (disk_op == constants.DDM_ADD and
8520 len(instance.nics) >= constants.MAX_DISKS):
8521 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8522 " add more" % constants.MAX_DISKS,
8524 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8526 if disk_op < 0 or disk_op >= len(instance.disks):
8527 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8529 (disk_op, len(instance.disks)),
8533 if self.op.os_name and not self.op.force:
8534 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8535 self.op.force_variant)
8539 def _ConvertPlainToDrbd(self, feedback_fn):
8540 """Converts an instance from plain to drbd.
8543 feedback_fn("Converting template to drbd")
8544 instance = self.instance
8545 pnode = instance.primary_node
8546 snode = self.op.remote_node
8548 # create a fake disk info for _GenerateDiskTemplate
8549 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8550 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8551 instance.name, pnode, [snode],
8552 disk_info, None, None, 0)
8553 info = _GetInstanceInfoText(instance)
8554 feedback_fn("Creating aditional volumes...")
8555 # first, create the missing data and meta devices
8556 for disk in new_disks:
8557 # unfortunately this is... not too nice
8558 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8560 for child in disk.children:
8561 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8562 # at this stage, all new LVs have been created, we can rename the
8564 feedback_fn("Renaming original volumes...")
8565 rename_list = [(o, n.children[0].logical_id)
8566 for (o, n) in zip(instance.disks, new_disks)]
8567 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8568 result.Raise("Failed to rename original LVs")
8570 feedback_fn("Initializing DRBD devices...")
8571 # all child devices are in place, we can now create the DRBD devices
8572 for disk in new_disks:
8573 for node in [pnode, snode]:
8574 f_create = node == pnode
8575 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8577 # at this point, the instance has been modified
8578 instance.disk_template = constants.DT_DRBD8
8579 instance.disks = new_disks
8580 self.cfg.Update(instance, feedback_fn)
8582 # disks are created, waiting for sync
8583 disk_abort = not _WaitForSync(self, instance)
8585 raise errors.OpExecError("There are some degraded disks for"
8586 " this instance, please cleanup manually")
8588 def _ConvertDrbdToPlain(self, feedback_fn):
8589 """Converts an instance from drbd to plain.
8592 instance = self.instance
8593 assert len(instance.secondary_nodes) == 1
8594 pnode = instance.primary_node
8595 snode = instance.secondary_nodes[0]
8596 feedback_fn("Converting template to plain")
8598 old_disks = instance.disks
8599 new_disks = [d.children[0] for d in old_disks]
8601 # copy over size and mode
8602 for parent, child in zip(old_disks, new_disks):
8603 child.size = parent.size
8604 child.mode = parent.mode
8606 # update instance structure
8607 instance.disks = new_disks
8608 instance.disk_template = constants.DT_PLAIN
8609 self.cfg.Update(instance, feedback_fn)
8611 feedback_fn("Removing volumes on the secondary node...")
8612 for disk in old_disks:
8613 self.cfg.SetDiskID(disk, snode)
8614 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8616 self.LogWarning("Could not remove block device %s on node %s,"
8617 " continuing anyway: %s", disk.iv_name, snode, msg)
8619 feedback_fn("Removing unneeded volumes on the primary node...")
8620 for idx, disk in enumerate(old_disks):
8621 meta = disk.children[1]
8622 self.cfg.SetDiskID(meta, pnode)
8623 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8625 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8626 " continuing anyway: %s", idx, pnode, msg)
8629 def Exec(self, feedback_fn):
8630 """Modifies an instance.
8632 All parameters take effect only at the next restart of the instance.
8635 # Process here the warnings from CheckPrereq, as we don't have a
8636 # feedback_fn there.
8637 for warn in self.warn:
8638 feedback_fn("WARNING: %s" % warn)
8641 instance = self.instance
8643 for disk_op, disk_dict in self.op.disks:
8644 if disk_op == constants.DDM_REMOVE:
8645 # remove the last disk
8646 device = instance.disks.pop()
8647 device_idx = len(instance.disks)
8648 for node, disk in device.ComputeNodeTree(instance.primary_node):
8649 self.cfg.SetDiskID(disk, node)
8650 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8652 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8653 " continuing anyway", device_idx, node, msg)
8654 result.append(("disk/%d" % device_idx, "remove"))
8655 elif disk_op == constants.DDM_ADD:
8657 if instance.disk_template == constants.DT_FILE:
8658 file_driver, file_path = instance.disks[0].logical_id
8659 file_path = os.path.dirname(file_path)
8661 file_driver = file_path = None
8662 disk_idx_base = len(instance.disks)
8663 new_disk = _GenerateDiskTemplate(self,
8664 instance.disk_template,
8665 instance.name, instance.primary_node,
8666 instance.secondary_nodes,
8671 instance.disks.append(new_disk)
8672 info = _GetInstanceInfoText(instance)
8674 logging.info("Creating volume %s for instance %s",
8675 new_disk.iv_name, instance.name)
8676 # Note: this needs to be kept in sync with _CreateDisks
8678 for node in instance.all_nodes:
8679 f_create = node == instance.primary_node
8681 _CreateBlockDev(self, node, instance, new_disk,
8682 f_create, info, f_create)
8683 except errors.OpExecError, err:
8684 self.LogWarning("Failed to create volume %s (%s) on"
8686 new_disk.iv_name, new_disk, node, err)
8687 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8688 (new_disk.size, new_disk.mode)))
8690 # change a given disk
8691 instance.disks[disk_op].mode = disk_dict['mode']
8692 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8694 if self.op.disk_template:
8695 r_shut = _ShutdownInstanceDisks(self, instance)
8697 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8698 " proceed with disk template conversion")
8699 mode = (instance.disk_template, self.op.disk_template)
8701 self._DISK_CONVERSIONS[mode](self, feedback_fn)
8703 self.cfg.ReleaseDRBDMinors(instance.name)
8705 result.append(("disk_template", self.op.disk_template))
8708 for nic_op, nic_dict in self.op.nics:
8709 if nic_op == constants.DDM_REMOVE:
8710 # remove the last nic
8711 del instance.nics[-1]
8712 result.append(("nic.%d" % len(instance.nics), "remove"))
8713 elif nic_op == constants.DDM_ADD:
8714 # mac and bridge should be set, by now
8715 mac = nic_dict['mac']
8716 ip = nic_dict.get('ip', None)
8717 nicparams = self.nic_pinst[constants.DDM_ADD]
8718 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8719 instance.nics.append(new_nic)
8720 result.append(("nic.%d" % (len(instance.nics) - 1),
8721 "add:mac=%s,ip=%s,mode=%s,link=%s" %
8722 (new_nic.mac, new_nic.ip,
8723 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8724 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8727 for key in 'mac', 'ip':
8729 setattr(instance.nics[nic_op], key, nic_dict[key])
8730 if nic_op in self.nic_pinst:
8731 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8732 for key, val in nic_dict.iteritems():
8733 result.append(("nic.%s/%d" % (key, nic_op), val))
8736 if self.op.hvparams:
8737 instance.hvparams = self.hv_inst
8738 for key, val in self.op.hvparams.iteritems():
8739 result.append(("hv/%s" % key, val))
8742 if self.op.beparams:
8743 instance.beparams = self.be_inst
8744 for key, val in self.op.beparams.iteritems():
8745 result.append(("be/%s" % key, val))
8749 instance.os = self.op.os_name
8751 self.cfg.Update(instance, feedback_fn)
8755 _DISK_CONVERSIONS = {
8756 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8757 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8760 class LUQueryExports(NoHooksLU):
8761 """Query the exports list
8764 _OP_REQP = ['nodes']
8767 def ExpandNames(self):
8768 self.needed_locks = {}
8769 self.share_locks[locking.LEVEL_NODE] = 1
8770 if not self.op.nodes:
8771 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8773 self.needed_locks[locking.LEVEL_NODE] = \
8774 _GetWantedNodes(self, self.op.nodes)
8776 def CheckPrereq(self):
8777 """Check prerequisites.
8780 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8782 def Exec(self, feedback_fn):
8783 """Compute the list of all the exported system images.
8786 @return: a dictionary with the structure node->(export-list)
8787 where export-list is a list of the instances exported on
8791 rpcresult = self.rpc.call_export_list(self.nodes)
8793 for node in rpcresult:
8794 if rpcresult[node].fail_msg:
8795 result[node] = False
8797 result[node] = rpcresult[node].payload
8802 class LUExportInstance(LogicalUnit):
8803 """Export an instance to an image in the cluster.
8806 HPATH = "instance-export"
8807 HTYPE = constants.HTYPE_INSTANCE
8808 _OP_REQP = ["instance_name", "target_node", "shutdown"]
8811 def CheckArguments(self):
8812 """Check the arguments.
8815 _CheckBooleanOpField(self.op, "remove_instance")
8816 _CheckBooleanOpField(self.op, "ignore_remove_failures")
8818 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8819 constants.DEFAULT_SHUTDOWN_TIMEOUT)
8820 self.remove_instance = getattr(self.op, "remove_instance", False)
8821 self.ignore_remove_failures = getattr(self.op, "ignore_remove_failures",
8824 if self.remove_instance and not self.op.shutdown:
8825 raise errors.OpPrereqError("Can not remove instance without shutting it"
8828 def ExpandNames(self):
8829 self._ExpandAndLockInstance()
8831 # FIXME: lock only instance primary and destination node
8833 # Sad but true, for now we have do lock all nodes, as we don't know where
8834 # the previous export might be, and and in this LU we search for it and
8835 # remove it from its current node. In the future we could fix this by:
8836 # - making a tasklet to search (share-lock all), then create the new one,
8837 # then one to remove, after
8838 # - removing the removal operation altogether
8839 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8841 def DeclareLocks(self, level):
8842 """Last minute lock declaration."""
8843 # All nodes are locked anyway, so nothing to do here.
8845 def BuildHooksEnv(self):
8848 This will run on the master, primary node and target node.
8852 "EXPORT_NODE": self.op.target_node,
8853 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8854 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8855 # TODO: Generic function for boolean env variables
8856 "REMOVE_INSTANCE": str(bool(self.remove_instance)),
8858 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8859 nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8860 self.op.target_node]
8863 def CheckPrereq(self):
8864 """Check prerequisites.
8866 This checks that the instance and node names are valid.
8869 instance_name = self.op.instance_name
8870 self.instance = self.cfg.GetInstanceInfo(instance_name)
8871 assert self.instance is not None, \
8872 "Cannot retrieve locked instance %s" % self.op.instance_name
8873 _CheckNodeOnline(self, self.instance.primary_node)
8875 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8876 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8877 assert self.dst_node is not None
8879 _CheckNodeOnline(self, self.dst_node.name)
8880 _CheckNodeNotDrained(self, self.dst_node.name)
8882 # instance disk type verification
8883 # TODO: Implement export support for file-based disks
8884 for disk in self.instance.disks:
8885 if disk.dev_type == constants.LD_FILE:
8886 raise errors.OpPrereqError("Export not supported for instances with"
8887 " file-based disks", errors.ECODE_INVAL)
8889 def _CreateSnapshots(self, feedback_fn):
8890 """Creates an LVM snapshot for every disk of the instance.
8892 @return: List of snapshots as L{objects.Disk} instances
8895 instance = self.instance
8896 src_node = instance.primary_node
8898 vgname = self.cfg.GetVGName()
8902 for idx, disk in enumerate(instance.disks):
8903 feedback_fn("Creating a snapshot of disk/%s on node %s" %
8906 # result.payload will be a snapshot of an lvm leaf of the one we
8908 result = self.rpc.call_blockdev_snapshot(src_node, disk)
8909 msg = result.fail_msg
8911 self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8913 snap_disks.append(False)
8915 disk_id = (vgname, result.payload)
8916 new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8917 logical_id=disk_id, physical_id=disk_id,
8918 iv_name=disk.iv_name)
8919 snap_disks.append(new_dev)
8923 def _RemoveSnapshot(self, feedback_fn, snap_disks, disk_index):
8924 """Removes an LVM snapshot.
8926 @type snap_disks: list
8927 @param snap_disks: The list of all snapshots as returned by
8929 @type disk_index: number
8930 @param disk_index: Index of the snapshot to be removed
8932 @return: Whether removal was successful or not
8935 disk = snap_disks[disk_index]
8937 src_node = self.instance.primary_node
8939 feedback_fn("Removing snapshot of disk/%s on node %s" %
8940 (disk_index, src_node))
8942 result = self.rpc.call_blockdev_remove(src_node, disk)
8943 if not result.fail_msg:
8946 self.LogWarning("Could not remove snapshot for disk/%d from node"
8947 " %s: %s", disk_index, src_node, result.fail_msg)
8951 def _CleanupExports(self, feedback_fn):
8952 """Removes exports of current instance from all other nodes.
8954 If an instance in a cluster with nodes A..D was exported to node C, its
8955 exports will be removed from the nodes A, B and D.
8958 nodelist = self.cfg.GetNodeList()
8959 nodelist.remove(self.dst_node.name)
8961 # on one-node clusters nodelist will be empty after the removal
8962 # if we proceed the backup would be removed because OpQueryExports
8963 # substitutes an empty list with the full cluster node list.
8964 iname = self.instance.name
8966 feedback_fn("Removing old exports for instance %s" % iname)
8967 exportlist = self.rpc.call_export_list(nodelist)
8968 for node in exportlist:
8969 if exportlist[node].fail_msg:
8971 if iname in exportlist[node].payload:
8972 msg = self.rpc.call_export_remove(node, iname).fail_msg
8974 self.LogWarning("Could not remove older export for instance %s"
8975 " on node %s: %s", iname, node, msg)
8977 def Exec(self, feedback_fn):
8978 """Export an instance to an image in the cluster.
8981 instance = self.instance
8982 dst_node = self.dst_node
8983 src_node = instance.primary_node
8985 if self.op.shutdown:
8986 # shutdown the instance, but not the disks
8987 feedback_fn("Shutting down instance %s" % instance.name)
8988 result = self.rpc.call_instance_shutdown(src_node, instance,
8989 self.shutdown_timeout)
8990 # TODO: Maybe ignore failures if ignore_remove_failures is set
8991 result.Raise("Could not shutdown instance %s on"
8992 " node %s" % (instance.name, src_node))
8994 # set the disks ID correctly since call_instance_start needs the
8995 # correct drbd minor to create the symlinks
8996 for disk in instance.disks:
8997 self.cfg.SetDiskID(disk, src_node)
8999 activate_disks = (not instance.admin_up)
9002 # Activate the instance disks if we'exporting a stopped instance
9003 feedback_fn("Activating disks for %s" % instance.name)
9004 _StartInstanceDisks(self, instance, None)
9008 removed_snaps = [False] * len(instance.disks)
9013 snap_disks = self._CreateSnapshots(feedback_fn)
9015 if (self.op.shutdown and instance.admin_up and
9016 not self.remove_instance):
9017 feedback_fn("Starting instance %s" % instance.name)
9018 result = self.rpc.call_instance_start(src_node, instance,
9020 msg = result.fail_msg
9022 _ShutdownInstanceDisks(self, instance)
9023 raise errors.OpExecError("Could not start instance: %s" % msg)
9025 assert len(snap_disks) == len(instance.disks)
9026 assert len(removed_snaps) == len(instance.disks)
9028 # TODO: check for size
9030 def _TransferFinished(idx):
9031 logging.debug("Transfer %s finished", idx)
9032 if self._RemoveSnapshot(feedback_fn, snap_disks, idx):
9033 removed_snaps[idx] = True
9037 for idx, dev in enumerate(snap_disks):
9039 transfers.append(None)
9042 path = utils.PathJoin(constants.EXPORT_DIR, "%s.new" % instance.name,
9045 finished_fn = compat.partial(_TransferFinished, idx)
9047 # FIXME: pass debug option from opcode to backend
9048 dt = masterd.instance.DiskTransfer("snapshot/%s" % idx,
9049 constants.IEIO_SCRIPT, (dev, idx),
9050 constants.IEIO_FILE, (path, ),
9052 transfers.append(dt)
9054 # Actually export data
9056 masterd.instance.TransferInstanceData(self, feedback_fn,
9057 src_node, dst_node.name,
9058 dst_node.secondary_ip,
9059 instance, transfers)
9061 assert len(dresults) == len(instance.disks)
9063 # Check for backwards compatibility
9064 assert compat.all(isinstance(i, bool) for i in dresults), \
9065 "Not all results are boolean: %r" % dresults
9067 feedback_fn("Finalizing export on %s" % dst_node.name)
9068 result = self.rpc.call_finalize_export(dst_node.name, instance,
9070 msg = result.fail_msg
9073 self.LogWarning("Could not finalize export for instance %s"
9074 " on node %s: %s", instance.name, dst_node.name, msg)
9077 # Remove all snapshots
9078 assert len(removed_snaps) == len(instance.disks)
9079 for idx, removed in enumerate(removed_snaps):
9081 self._RemoveSnapshot(feedback_fn, snap_disks, idx)
9085 feedback_fn("Deactivating disks for %s" % instance.name)
9086 _ShutdownInstanceDisks(self, instance)
9088 # Remove instance if requested
9089 if self.remove_instance:
9090 feedback_fn("Removing instance %s" % instance.name)
9091 _RemoveInstance(self, feedback_fn, instance, self.ignore_remove_failures)
9093 self._CleanupExports(feedback_fn)
9095 return fin_resu, dresults
9098 class LURemoveExport(NoHooksLU):
9099 """Remove exports related to the named instance.
9102 _OP_REQP = ["instance_name"]
9105 def ExpandNames(self):
9106 self.needed_locks = {}
9107 # We need all nodes to be locked in order for RemoveExport to work, but we
9108 # don't need to lock the instance itself, as nothing will happen to it (and
9109 # we can remove exports also for a removed instance)
9110 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9112 def CheckPrereq(self):
9113 """Check prerequisites.
9117 def Exec(self, feedback_fn):
9118 """Remove any export.
9121 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9122 # If the instance was not found we'll try with the name that was passed in.
9123 # This will only work if it was an FQDN, though.
9125 if not instance_name:
9127 instance_name = self.op.instance_name
9129 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9130 exportlist = self.rpc.call_export_list(locked_nodes)
9132 for node in exportlist:
9133 msg = exportlist[node].fail_msg
9135 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9137 if instance_name in exportlist[node].payload:
9139 result = self.rpc.call_export_remove(node, instance_name)
9140 msg = result.fail_msg
9142 logging.error("Could not remove export for instance %s"
9143 " on node %s: %s", instance_name, node, msg)
9145 if fqdn_warn and not found:
9146 feedback_fn("Export not found. If trying to remove an export belonging"
9147 " to a deleted instance please use its Fully Qualified"
9151 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9154 This is an abstract class which is the parent of all the other tags LUs.
9158 def ExpandNames(self):
9159 self.needed_locks = {}
9160 if self.op.kind == constants.TAG_NODE:
9161 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9162 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9163 elif self.op.kind == constants.TAG_INSTANCE:
9164 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9165 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9167 def CheckPrereq(self):
9168 """Check prerequisites.
9171 if self.op.kind == constants.TAG_CLUSTER:
9172 self.target = self.cfg.GetClusterInfo()
9173 elif self.op.kind == constants.TAG_NODE:
9174 self.target = self.cfg.GetNodeInfo(self.op.name)
9175 elif self.op.kind == constants.TAG_INSTANCE:
9176 self.target = self.cfg.GetInstanceInfo(self.op.name)
9178 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9179 str(self.op.kind), errors.ECODE_INVAL)
9182 class LUGetTags(TagsLU):
9183 """Returns the tags of a given object.
9186 _OP_REQP = ["kind", "name"]
9189 def Exec(self, feedback_fn):
9190 """Returns the tag list.
9193 return list(self.target.GetTags())
9196 class LUSearchTags(NoHooksLU):
9197 """Searches the tags for a given pattern.
9200 _OP_REQP = ["pattern"]
9203 def ExpandNames(self):
9204 self.needed_locks = {}
9206 def CheckPrereq(self):
9207 """Check prerequisites.
9209 This checks the pattern passed for validity by compiling it.
9213 self.re = re.compile(self.op.pattern)
9214 except re.error, err:
9215 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9216 (self.op.pattern, err), errors.ECODE_INVAL)
9218 def Exec(self, feedback_fn):
9219 """Returns the tag list.
9223 tgts = [("/cluster", cfg.GetClusterInfo())]
9224 ilist = cfg.GetAllInstancesInfo().values()
9225 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9226 nlist = cfg.GetAllNodesInfo().values()
9227 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9229 for path, target in tgts:
9230 for tag in target.GetTags():
9231 if self.re.search(tag):
9232 results.append((path, tag))
9236 class LUAddTags(TagsLU):
9237 """Sets a tag on a given object.
9240 _OP_REQP = ["kind", "name", "tags"]
9243 def CheckPrereq(self):
9244 """Check prerequisites.
9246 This checks the type and length of the tag name and value.
9249 TagsLU.CheckPrereq(self)
9250 for tag in self.op.tags:
9251 objects.TaggableObject.ValidateTag(tag)
9253 def Exec(self, feedback_fn):
9258 for tag in self.op.tags:
9259 self.target.AddTag(tag)
9260 except errors.TagError, err:
9261 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9262 self.cfg.Update(self.target, feedback_fn)
9265 class LUDelTags(TagsLU):
9266 """Delete a list of tags from a given object.
9269 _OP_REQP = ["kind", "name", "tags"]
9272 def CheckPrereq(self):
9273 """Check prerequisites.
9275 This checks that we have the given tag.
9278 TagsLU.CheckPrereq(self)
9279 for tag in self.op.tags:
9280 objects.TaggableObject.ValidateTag(tag)
9281 del_tags = frozenset(self.op.tags)
9282 cur_tags = self.target.GetTags()
9283 if not del_tags <= cur_tags:
9284 diff_tags = del_tags - cur_tags
9285 diff_names = ["'%s'" % tag for tag in diff_tags]
9287 raise errors.OpPrereqError("Tag(s) %s not found" %
9288 (",".join(diff_names)), errors.ECODE_NOENT)
9290 def Exec(self, feedback_fn):
9291 """Remove the tag from the object.
9294 for tag in self.op.tags:
9295 self.target.RemoveTag(tag)
9296 self.cfg.Update(self.target, feedback_fn)
9299 class LUTestDelay(NoHooksLU):
9300 """Sleep for a specified amount of time.
9302 This LU sleeps on the master and/or nodes for a specified amount of
9306 _OP_REQP = ["duration", "on_master", "on_nodes"]
9309 def ExpandNames(self):
9310 """Expand names and set required locks.
9312 This expands the node list, if any.
9315 self.needed_locks = {}
9316 if self.op.on_nodes:
9317 # _GetWantedNodes can be used here, but is not always appropriate to use
9318 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9320 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9321 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9323 def CheckPrereq(self):
9324 """Check prerequisites.
9328 def Exec(self, feedback_fn):
9329 """Do the actual sleep.
9332 if self.op.on_master:
9333 if not utils.TestDelay(self.op.duration):
9334 raise errors.OpExecError("Error during master delay test")
9335 if self.op.on_nodes:
9336 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9337 for node, node_result in result.items():
9338 node_result.Raise("Failure during rpc call to node %s" % node)
9341 class IAllocator(object):
9342 """IAllocator framework.
9344 An IAllocator instance has three sets of attributes:
9345 - cfg that is needed to query the cluster
9346 - input data (all members of the _KEYS class attribute are required)
9347 - four buffer attributes (in|out_data|text), that represent the
9348 input (to the external script) in text and data structure format,
9349 and the output from it, again in two formats
9350 - the result variables from the script (success, info, nodes) for
9354 # pylint: disable-msg=R0902
9355 # lots of instance attributes
9357 "name", "mem_size", "disks", "disk_template",
9358 "os", "tags", "nics", "vcpus", "hypervisor",
9361 "name", "relocate_from",
9367 def __init__(self, cfg, rpc, mode, **kwargs):
9370 # init buffer variables
9371 self.in_text = self.out_text = self.in_data = self.out_data = None
9372 # init all input fields so that pylint is happy
9374 self.mem_size = self.disks = self.disk_template = None
9375 self.os = self.tags = self.nics = self.vcpus = None
9376 self.hypervisor = None
9377 self.relocate_from = None
9379 self.evac_nodes = None
9381 self.required_nodes = None
9382 # init result fields
9383 self.success = self.info = self.result = None
9384 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9385 keyset = self._ALLO_KEYS
9386 fn = self._AddNewInstance
9387 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9388 keyset = self._RELO_KEYS
9389 fn = self._AddRelocateInstance
9390 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9391 keyset = self._EVAC_KEYS
9392 fn = self._AddEvacuateNodes
9394 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9395 " IAllocator" % self.mode)
9397 if key not in keyset:
9398 raise errors.ProgrammerError("Invalid input parameter '%s' to"
9399 " IAllocator" % key)
9400 setattr(self, key, kwargs[key])
9403 if key not in kwargs:
9404 raise errors.ProgrammerError("Missing input parameter '%s' to"
9405 " IAllocator" % key)
9406 self._BuildInputData(fn)
9408 def _ComputeClusterData(self):
9409 """Compute the generic allocator input data.
9411 This is the data that is independent of the actual operation.
9415 cluster_info = cfg.GetClusterInfo()
9418 "version": constants.IALLOCATOR_VERSION,
9419 "cluster_name": cfg.GetClusterName(),
9420 "cluster_tags": list(cluster_info.GetTags()),
9421 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9422 # we don't have job IDs
9424 iinfo = cfg.GetAllInstancesInfo().values()
9425 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9429 node_list = cfg.GetNodeList()
9431 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9432 hypervisor_name = self.hypervisor
9433 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9434 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9435 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9436 hypervisor_name = cluster_info.enabled_hypervisors[0]
9438 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9441 self.rpc.call_all_instances_info(node_list,
9442 cluster_info.enabled_hypervisors)
9443 for nname, nresult in node_data.items():
9444 # first fill in static (config-based) values
9445 ninfo = cfg.GetNodeInfo(nname)
9447 "tags": list(ninfo.GetTags()),
9448 "primary_ip": ninfo.primary_ip,
9449 "secondary_ip": ninfo.secondary_ip,
9450 "offline": ninfo.offline,
9451 "drained": ninfo.drained,
9452 "master_candidate": ninfo.master_candidate,
9455 if not (ninfo.offline or ninfo.drained):
9456 nresult.Raise("Can't get data for node %s" % nname)
9457 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9459 remote_info = nresult.payload
9461 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9462 'vg_size', 'vg_free', 'cpu_total']:
9463 if attr not in remote_info:
9464 raise errors.OpExecError("Node '%s' didn't return attribute"
9465 " '%s'" % (nname, attr))
9466 if not isinstance(remote_info[attr], int):
9467 raise errors.OpExecError("Node '%s' returned invalid value"
9469 (nname, attr, remote_info[attr]))
9470 # compute memory used by primary instances
9471 i_p_mem = i_p_up_mem = 0
9472 for iinfo, beinfo in i_list:
9473 if iinfo.primary_node == nname:
9474 i_p_mem += beinfo[constants.BE_MEMORY]
9475 if iinfo.name not in node_iinfo[nname].payload:
9478 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9479 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9480 remote_info['memory_free'] -= max(0, i_mem_diff)
9483 i_p_up_mem += beinfo[constants.BE_MEMORY]
9485 # compute memory used by instances
9487 "total_memory": remote_info['memory_total'],
9488 "reserved_memory": remote_info['memory_dom0'],
9489 "free_memory": remote_info['memory_free'],
9490 "total_disk": remote_info['vg_size'],
9491 "free_disk": remote_info['vg_free'],
9492 "total_cpus": remote_info['cpu_total'],
9493 "i_pri_memory": i_p_mem,
9494 "i_pri_up_memory": i_p_up_mem,
9498 node_results[nname] = pnr
9499 data["nodes"] = node_results
9503 for iinfo, beinfo in i_list:
9505 for nic in iinfo.nics:
9506 filled_params = objects.FillDict(
9507 cluster_info.nicparams[constants.PP_DEFAULT],
9509 nic_dict = {"mac": nic.mac,
9511 "mode": filled_params[constants.NIC_MODE],
9512 "link": filled_params[constants.NIC_LINK],
9514 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9515 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9516 nic_data.append(nic_dict)
9518 "tags": list(iinfo.GetTags()),
9519 "admin_up": iinfo.admin_up,
9520 "vcpus": beinfo[constants.BE_VCPUS],
9521 "memory": beinfo[constants.BE_MEMORY],
9523 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9525 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9526 "disk_template": iinfo.disk_template,
9527 "hypervisor": iinfo.hypervisor,
9529 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9531 instance_data[iinfo.name] = pir
9533 data["instances"] = instance_data
9537 def _AddNewInstance(self):
9538 """Add new instance data to allocator structure.
9540 This in combination with _AllocatorGetClusterData will create the
9541 correct structure needed as input for the allocator.
9543 The checks for the completeness of the opcode must have already been
9547 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9549 if self.disk_template in constants.DTS_NET_MIRROR:
9550 self.required_nodes = 2
9552 self.required_nodes = 1
9555 "disk_template": self.disk_template,
9558 "vcpus": self.vcpus,
9559 "memory": self.mem_size,
9560 "disks": self.disks,
9561 "disk_space_total": disk_space,
9563 "required_nodes": self.required_nodes,
9567 def _AddRelocateInstance(self):
9568 """Add relocate instance data to allocator structure.
9570 This in combination with _IAllocatorGetClusterData will create the
9571 correct structure needed as input for the allocator.
9573 The checks for the completeness of the opcode must have already been
9577 instance = self.cfg.GetInstanceInfo(self.name)
9578 if instance is None:
9579 raise errors.ProgrammerError("Unknown instance '%s' passed to"
9580 " IAllocator" % self.name)
9582 if instance.disk_template not in constants.DTS_NET_MIRROR:
9583 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9586 if len(instance.secondary_nodes) != 1:
9587 raise errors.OpPrereqError("Instance has not exactly one secondary node",
9590 self.required_nodes = 1
9591 disk_sizes = [{'size': disk.size} for disk in instance.disks]
9592 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9596 "disk_space_total": disk_space,
9597 "required_nodes": self.required_nodes,
9598 "relocate_from": self.relocate_from,
9602 def _AddEvacuateNodes(self):
9603 """Add evacuate nodes data to allocator structure.
9607 "evac_nodes": self.evac_nodes
9611 def _BuildInputData(self, fn):
9612 """Build input data structures.
9615 self._ComputeClusterData()
9618 request["type"] = self.mode
9619 self.in_data["request"] = request
9621 self.in_text = serializer.Dump(self.in_data)
9623 def Run(self, name, validate=True, call_fn=None):
9624 """Run an instance allocator and return the results.
9628 call_fn = self.rpc.call_iallocator_runner
9630 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9631 result.Raise("Failure while running the iallocator script")
9633 self.out_text = result.payload
9635 self._ValidateResult()
9637 def _ValidateResult(self):
9638 """Process the allocator results.
9640 This will process and if successful save the result in
9641 self.out_data and the other parameters.
9645 rdict = serializer.Load(self.out_text)
9646 except Exception, err:
9647 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9649 if not isinstance(rdict, dict):
9650 raise errors.OpExecError("Can't parse iallocator results: not a dict")
9652 # TODO: remove backwards compatiblity in later versions
9653 if "nodes" in rdict and "result" not in rdict:
9654 rdict["result"] = rdict["nodes"]
9657 for key in "success", "info", "result":
9658 if key not in rdict:
9659 raise errors.OpExecError("Can't parse iallocator results:"
9660 " missing key '%s'" % key)
9661 setattr(self, key, rdict[key])
9663 if not isinstance(rdict["result"], list):
9664 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9666 self.out_data = rdict
9669 class LUTestAllocator(NoHooksLU):
9670 """Run allocator tests.
9672 This LU runs the allocator tests
9675 _OP_REQP = ["direction", "mode", "name"]
9677 def CheckPrereq(self):
9678 """Check prerequisites.
9680 This checks the opcode parameters depending on the director and mode test.
9683 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9684 for attr in ["name", "mem_size", "disks", "disk_template",
9685 "os", "tags", "nics", "vcpus"]:
9686 if not hasattr(self.op, attr):
9687 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9688 attr, errors.ECODE_INVAL)
9689 iname = self.cfg.ExpandInstanceName(self.op.name)
9690 if iname is not None:
9691 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9692 iname, errors.ECODE_EXISTS)
9693 if not isinstance(self.op.nics, list):
9694 raise errors.OpPrereqError("Invalid parameter 'nics'",
9696 for row in self.op.nics:
9697 if (not isinstance(row, dict) or
9700 "bridge" not in row):
9701 raise errors.OpPrereqError("Invalid contents of the 'nics'"
9702 " parameter", errors.ECODE_INVAL)
9703 if not isinstance(self.op.disks, list):
9704 raise errors.OpPrereqError("Invalid parameter 'disks'",
9706 for row in self.op.disks:
9707 if (not isinstance(row, dict) or
9708 "size" not in row or
9709 not isinstance(row["size"], int) or
9710 "mode" not in row or
9711 row["mode"] not in ['r', 'w']):
9712 raise errors.OpPrereqError("Invalid contents of the 'disks'"
9713 " parameter", errors.ECODE_INVAL)
9714 if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9715 self.op.hypervisor = self.cfg.GetHypervisorType()
9716 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9717 if not hasattr(self.op, "name"):
9718 raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9720 fname = _ExpandInstanceName(self.cfg, self.op.name)
9721 self.op.name = fname
9722 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9723 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9724 if not hasattr(self.op, "evac_nodes"):
9725 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9726 " opcode input", errors.ECODE_INVAL)
9728 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9729 self.op.mode, errors.ECODE_INVAL)
9731 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9732 if not hasattr(self.op, "allocator") or self.op.allocator is None:
9733 raise errors.OpPrereqError("Missing allocator name",
9735 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9736 raise errors.OpPrereqError("Wrong allocator test '%s'" %
9737 self.op.direction, errors.ECODE_INVAL)
9739 def Exec(self, feedback_fn):
9740 """Run the allocator test.
9743 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9744 ial = IAllocator(self.cfg, self.rpc,
9747 mem_size=self.op.mem_size,
9748 disks=self.op.disks,
9749 disk_template=self.op.disk_template,
9753 vcpus=self.op.vcpus,
9754 hypervisor=self.op.hypervisor,
9756 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9757 ial = IAllocator(self.cfg, self.rpc,
9760 relocate_from=list(self.relocate_from),
9762 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9763 ial = IAllocator(self.cfg, self.rpc,
9765 evac_nodes=self.op.evac_nodes)
9767 raise errors.ProgrammerError("Uncatched mode %s in"
9768 " LUTestAllocator.Exec", self.op.mode)
9770 if self.op.direction == constants.IALLOCATOR_DIR_IN:
9771 result = ial.in_text
9773 ial.Run(self.op.allocator, validate=False)
9774 result = ial.out_text