4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
38 from ganeti import ssh
39 from ganeti import utils
40 from ganeti import errors
41 from ganeti import hypervisor
42 from ganeti import locking
43 from ganeti import constants
44 from ganeti import objects
45 from ganeti import serializer
46 from ganeti import ssconf
47 from ganeti import uidpool
48 from ganeti import compat
49 from ganeti import masterd
51 import ganeti.masterd.instance # pylint: disable-msg=W0611
54 class LogicalUnit(object):
55 """Logical Unit base class.
57 Subclasses must follow these rules:
58 - implement ExpandNames
59 - implement CheckPrereq (except when tasklets are used)
60 - implement Exec (except when tasklets are used)
61 - implement BuildHooksEnv
62 - redefine HPATH and HTYPE
63 - optionally redefine their run requirements:
64 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
66 Note that all commands require root permissions.
68 @ivar dry_run_result: the value (if any) that will be returned to the caller
69 in dry-run mode (signalled by opcode dry_run parameter)
77 def __init__(self, processor, op, context, rpc):
78 """Constructor for LogicalUnit.
80 This needs to be overridden in derived classes in order to check op
86 self.cfg = context.cfg
87 self.context = context
89 # Dicts used to declare locking needs to mcpu
90 self.needed_locks = None
91 self.acquired_locks = {}
92 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
94 self.remove_locks = {}
95 # Used to force good behavior when calling helper functions
96 self.recalculate_locks = {}
99 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
100 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
101 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
102 # support for dry-run
103 self.dry_run_result = None
104 # support for generic debug attribute
105 if (not hasattr(self.op, "debug_level") or
106 not isinstance(self.op.debug_level, int)):
107 self.op.debug_level = 0
112 for attr_name in self._OP_REQP:
113 attr_val = getattr(op, attr_name, None)
115 raise errors.OpPrereqError("Required parameter '%s' missing" %
116 attr_name, errors.ECODE_INVAL)
118 self.CheckArguments()
121 """Returns the SshRunner object
125 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
128 ssh = property(fget=__GetSSH)
130 def CheckArguments(self):
131 """Check syntactic validity for the opcode arguments.
133 This method is for doing a simple syntactic check and ensure
134 validity of opcode parameters, without any cluster-related
135 checks. While the same can be accomplished in ExpandNames and/or
136 CheckPrereq, doing these separate is better because:
138 - ExpandNames is left as as purely a lock-related function
139 - CheckPrereq is run after we have acquired locks (and possible
142 The function is allowed to change the self.op attribute so that
143 later methods can no longer worry about missing parameters.
148 def ExpandNames(self):
149 """Expand names for this LU.
151 This method is called before starting to execute the opcode, and it should
152 update all the parameters of the opcode to their canonical form (e.g. a
153 short node name must be fully expanded after this method has successfully
154 completed). This way locking, hooks, logging, ecc. can work correctly.
156 LUs which implement this method must also populate the self.needed_locks
157 member, as a dict with lock levels as keys, and a list of needed lock names
160 - use an empty dict if you don't need any lock
161 - if you don't need any lock at a particular level omit that level
162 - don't put anything for the BGL level
163 - if you want all locks at a level use locking.ALL_SET as a value
165 If you need to share locks (rather than acquire them exclusively) at one
166 level you can modify self.share_locks, setting a true value (usually 1) for
167 that level. By default locks are not shared.
169 This function can also define a list of tasklets, which then will be
170 executed in order instead of the usual LU-level CheckPrereq and Exec
171 functions, if those are not defined by the LU.
175 # Acquire all nodes and one instance
176 self.needed_locks = {
177 locking.LEVEL_NODE: locking.ALL_SET,
178 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
180 # Acquire just two nodes
181 self.needed_locks = {
182 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
185 self.needed_locks = {} # No, you can't leave it to the default value None
188 # The implementation of this method is mandatory only if the new LU is
189 # concurrent, so that old LUs don't need to be changed all at the same
192 self.needed_locks = {} # Exclusive LUs don't need locks.
194 raise NotImplementedError
196 def DeclareLocks(self, level):
197 """Declare LU locking needs for a level
199 While most LUs can just declare their locking needs at ExpandNames time,
200 sometimes there's the need to calculate some locks after having acquired
201 the ones before. This function is called just before acquiring locks at a
202 particular level, but after acquiring the ones at lower levels, and permits
203 such calculations. It can be used to modify self.needed_locks, and by
204 default it does nothing.
206 This function is only called if you have something already set in
207 self.needed_locks for the level.
209 @param level: Locking level which is going to be locked
210 @type level: member of ganeti.locking.LEVELS
214 def CheckPrereq(self):
215 """Check prerequisites for this LU.
217 This method should check that the prerequisites for the execution
218 of this LU are fulfilled. It can do internode communication, but
219 it should be idempotent - no cluster or system changes are
222 The method should raise errors.OpPrereqError in case something is
223 not fulfilled. Its return value is ignored.
225 This method should also update all the parameters of the opcode to
226 their canonical form if it hasn't been done by ExpandNames before.
229 if self.tasklets is not None:
230 for (idx, tl) in enumerate(self.tasklets):
231 logging.debug("Checking prerequisites for tasklet %s/%s",
232 idx + 1, len(self.tasklets))
235 raise NotImplementedError
237 def Exec(self, feedback_fn):
240 This method should implement the actual work. It should raise
241 errors.OpExecError for failures that are somewhat dealt with in
245 if self.tasklets is not None:
246 for (idx, tl) in enumerate(self.tasklets):
247 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
250 raise NotImplementedError
252 def BuildHooksEnv(self):
253 """Build hooks environment for this LU.
255 This method should return a three-node tuple consisting of: a dict
256 containing the environment that will be used for running the
257 specific hook for this LU, a list of node names on which the hook
258 should run before the execution, and a list of node names on which
259 the hook should run after the execution.
261 The keys of the dict must not have 'GANETI_' prefixed as this will
262 be handled in the hooks runner. Also note additional keys will be
263 added by the hooks runner. If the LU doesn't define any
264 environment, an empty dict (and not None) should be returned.
266 No nodes should be returned as an empty list (and not None).
268 Note that if the HPATH for a LU class is None, this function will
272 raise NotImplementedError
274 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
275 """Notify the LU about the results of its hooks.
277 This method is called every time a hooks phase is executed, and notifies
278 the Logical Unit about the hooks' result. The LU can then use it to alter
279 its result based on the hooks. By default the method does nothing and the
280 previous result is passed back unchanged but any LU can define it if it
281 wants to use the local cluster hook-scripts somehow.
283 @param phase: one of L{constants.HOOKS_PHASE_POST} or
284 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
285 @param hook_results: the results of the multi-node hooks rpc call
286 @param feedback_fn: function used send feedback back to the caller
287 @param lu_result: the previous Exec result this LU had, or None
289 @return: the new Exec result, based on the previous result
293 # API must be kept, thus we ignore the unused argument and could
294 # be a function warnings
295 # pylint: disable-msg=W0613,R0201
298 def _ExpandAndLockInstance(self):
299 """Helper function to expand and lock an instance.
301 Many LUs that work on an instance take its name in self.op.instance_name
302 and need to expand it and then declare the expanded name for locking. This
303 function does it, and then updates self.op.instance_name to the expanded
304 name. It also initializes needed_locks as a dict, if this hasn't been done
308 if self.needed_locks is None:
309 self.needed_locks = {}
311 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
312 "_ExpandAndLockInstance called with instance-level locks set"
313 self.op.instance_name = _ExpandInstanceName(self.cfg,
314 self.op.instance_name)
315 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
317 def _LockInstancesNodes(self, primary_only=False):
318 """Helper function to declare instances' nodes for locking.
320 This function should be called after locking one or more instances to lock
321 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
322 with all primary or secondary nodes for instances already locked and
323 present in self.needed_locks[locking.LEVEL_INSTANCE].
325 It should be called from DeclareLocks, and for safety only works if
326 self.recalculate_locks[locking.LEVEL_NODE] is set.
328 In the future it may grow parameters to just lock some instance's nodes, or
329 to just lock primaries or secondary nodes, if needed.
331 If should be called in DeclareLocks in a way similar to::
333 if level == locking.LEVEL_NODE:
334 self._LockInstancesNodes()
336 @type primary_only: boolean
337 @param primary_only: only lock primary nodes of locked instances
340 assert locking.LEVEL_NODE in self.recalculate_locks, \
341 "_LockInstancesNodes helper function called with no nodes to recalculate"
343 # TODO: check if we're really been called with the instance locks held
345 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
346 # future we might want to have different behaviors depending on the value
347 # of self.recalculate_locks[locking.LEVEL_NODE]
349 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
350 instance = self.context.cfg.GetInstanceInfo(instance_name)
351 wanted_nodes.append(instance.primary_node)
353 wanted_nodes.extend(instance.secondary_nodes)
355 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
356 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
357 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
358 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
360 del self.recalculate_locks[locking.LEVEL_NODE]
363 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
364 """Simple LU which runs no hooks.
366 This LU is intended as a parent for other LogicalUnits which will
367 run no hooks, in order to reduce duplicate code.
373 def BuildHooksEnv(self):
374 """Empty BuildHooksEnv for NoHooksLu.
376 This just raises an error.
379 assert False, "BuildHooksEnv called for NoHooksLUs"
383 """Tasklet base class.
385 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
386 they can mix legacy code with tasklets. Locking needs to be done in the LU,
387 tasklets know nothing about locks.
389 Subclasses must follow these rules:
390 - Implement CheckPrereq
394 def __init__(self, lu):
401 def CheckPrereq(self):
402 """Check prerequisites for this tasklets.
404 This method should check whether the prerequisites for the execution of
405 this tasklet are fulfilled. It can do internode communication, but it
406 should be idempotent - no cluster or system changes are allowed.
408 The method should raise errors.OpPrereqError in case something is not
409 fulfilled. Its return value is ignored.
411 This method should also update all parameters to their canonical form if it
412 hasn't been done before.
415 raise NotImplementedError
417 def Exec(self, feedback_fn):
418 """Execute the tasklet.
420 This method should implement the actual work. It should raise
421 errors.OpExecError for failures that are somewhat dealt with in code, or
425 raise NotImplementedError
428 def _GetWantedNodes(lu, nodes):
429 """Returns list of checked and expanded node names.
431 @type lu: L{LogicalUnit}
432 @param lu: the logical unit on whose behalf we execute
434 @param nodes: list of node names or None for all nodes
436 @return: the list of nodes, sorted
437 @raise errors.ProgrammerError: if the nodes parameter is wrong type
440 if not isinstance(nodes, list):
441 raise errors.OpPrereqError("Invalid argument type 'nodes'",
445 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
446 " non-empty list of nodes whose name is to be expanded.")
448 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
449 return utils.NiceSort(wanted)
452 def _GetWantedInstances(lu, instances):
453 """Returns list of checked and expanded instance names.
455 @type lu: L{LogicalUnit}
456 @param lu: the logical unit on whose behalf we execute
457 @type instances: list
458 @param instances: list of instance names or None for all instances
460 @return: the list of instances, sorted
461 @raise errors.OpPrereqError: if the instances parameter is wrong type
462 @raise errors.OpPrereqError: if any of the passed instances is not found
465 if not isinstance(instances, list):
466 raise errors.OpPrereqError("Invalid argument type 'instances'",
470 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
472 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
476 def _GetUpdatedParams(old_params, update_dict):
477 """Return the new version of a parameter dictionary.
479 @type old_params: dict
480 @param old_params: old parameters
481 @type update_dict: dict
482 @param update_dict: dict containing new parameter values, or
483 constants.VALUE_DEFAULT to reset the parameter to its default
486 @return: the new parameter dictionary
489 params_copy = copy.deepcopy(old_params)
490 for key, val in update_dict.iteritems():
491 if val == constants.VALUE_DEFAULT:
497 params_copy[key] = val
501 def _CheckOutputFields(static, dynamic, selected):
502 """Checks whether all selected fields are valid.
504 @type static: L{utils.FieldSet}
505 @param static: static fields set
506 @type dynamic: L{utils.FieldSet}
507 @param dynamic: dynamic fields set
514 delta = f.NonMatching(selected)
516 raise errors.OpPrereqError("Unknown output fields selected: %s"
517 % ",".join(delta), errors.ECODE_INVAL)
520 def _CheckBooleanOpField(op, name):
521 """Validates boolean opcode parameters.
523 This will ensure that an opcode parameter is either a boolean value,
524 or None (but that it always exists).
527 val = getattr(op, name, None)
528 if not (val is None or isinstance(val, bool)):
529 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
530 (name, str(val)), errors.ECODE_INVAL)
531 setattr(op, name, val)
534 def _CheckGlobalHvParams(params):
535 """Validates that given hypervisor params are not global ones.
537 This will ensure that instances don't get customised versions of
541 used_globals = constants.HVC_GLOBALS.intersection(params)
543 msg = ("The following hypervisor parameters are global and cannot"
544 " be customized at instance level, please modify them at"
545 " cluster level: %s" % utils.CommaJoin(used_globals))
546 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
549 def _CheckNodeOnline(lu, node):
550 """Ensure that a given node is online.
552 @param lu: the LU on behalf of which we make the check
553 @param node: the node to check
554 @raise errors.OpPrereqError: if the node is offline
557 if lu.cfg.GetNodeInfo(node).offline:
558 raise errors.OpPrereqError("Can't use offline node %s" % node,
562 def _CheckNodeNotDrained(lu, node):
563 """Ensure that a given node is not drained.
565 @param lu: the LU on behalf of which we make the check
566 @param node: the node to check
567 @raise errors.OpPrereqError: if the node is drained
570 if lu.cfg.GetNodeInfo(node).drained:
571 raise errors.OpPrereqError("Can't use drained node %s" % node,
575 def _CheckNodeHasOS(lu, node, os_name, force_variant):
576 """Ensure that a node supports a given OS.
578 @param lu: the LU on behalf of which we make the check
579 @param node: the node to check
580 @param os_name: the OS to query about
581 @param force_variant: whether to ignore variant errors
582 @raise errors.OpPrereqError: if the node is not supporting the OS
585 result = lu.rpc.call_os_get(node, os_name)
586 result.Raise("OS '%s' not in supported OS list for node %s" %
588 prereq=True, ecode=errors.ECODE_INVAL)
589 if not force_variant:
590 _CheckOSVariant(result.payload, os_name)
593 def _RequireFileStorage():
594 """Checks that file storage is enabled.
596 @raise errors.OpPrereqError: when file storage is disabled
599 if not constants.ENABLE_FILE_STORAGE:
600 raise errors.OpPrereqError("File storage disabled at configure time",
604 def _CheckDiskTemplate(template):
605 """Ensure a given disk template is valid.
608 if template not in constants.DISK_TEMPLATES:
609 msg = ("Invalid disk template name '%s', valid templates are: %s" %
610 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
611 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
612 if template == constants.DT_FILE:
613 _RequireFileStorage()
616 def _CheckStorageType(storage_type):
617 """Ensure a given storage type is valid.
620 if storage_type not in constants.VALID_STORAGE_TYPES:
621 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
623 if storage_type == constants.ST_FILE:
624 _RequireFileStorage()
627 def _GetClusterDomainSecret():
628 """Reads the cluster domain secret.
631 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
635 def _CheckInstanceDown(lu, instance, reason):
636 """Ensure that an instance is not running."""
637 if instance.admin_up:
638 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
639 (instance.name, reason), errors.ECODE_STATE)
641 pnode = instance.primary_node
642 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
643 ins_l.Raise("Can't contact node %s for instance information" % pnode,
644 prereq=True, ecode=errors.ECODE_ENVIRON)
646 if instance.name in ins_l.payload:
647 raise errors.OpPrereqError("Instance %s is running, %s" %
648 (instance.name, reason), errors.ECODE_STATE)
651 def _ExpandItemName(fn, name, kind):
652 """Expand an item name.
654 @param fn: the function to use for expansion
655 @param name: requested item name
656 @param kind: text description ('Node' or 'Instance')
657 @return: the resolved (full) name
658 @raise errors.OpPrereqError: if the item is not found
662 if full_name is None:
663 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
668 def _ExpandNodeName(cfg, name):
669 """Wrapper over L{_ExpandItemName} for nodes."""
670 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
673 def _ExpandInstanceName(cfg, name):
674 """Wrapper over L{_ExpandItemName} for instance."""
675 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
678 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
679 memory, vcpus, nics, disk_template, disks,
680 bep, hvp, hypervisor_name):
681 """Builds instance related env variables for hooks
683 This builds the hook environment from individual variables.
686 @param name: the name of the instance
687 @type primary_node: string
688 @param primary_node: the name of the instance's primary node
689 @type secondary_nodes: list
690 @param secondary_nodes: list of secondary nodes as strings
691 @type os_type: string
692 @param os_type: the name of the instance's OS
693 @type status: boolean
694 @param status: the should_run status of the instance
696 @param memory: the memory size of the instance
698 @param vcpus: the count of VCPUs the instance has
700 @param nics: list of tuples (ip, mac, mode, link) representing
701 the NICs the instance has
702 @type disk_template: string
703 @param disk_template: the disk template of the instance
705 @param disks: the list of (size, mode) pairs
707 @param bep: the backend parameters for the instance
709 @param hvp: the hypervisor parameters for the instance
710 @type hypervisor_name: string
711 @param hypervisor_name: the hypervisor for the instance
713 @return: the hook environment for this instance
722 "INSTANCE_NAME": name,
723 "INSTANCE_PRIMARY": primary_node,
724 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
725 "INSTANCE_OS_TYPE": os_type,
726 "INSTANCE_STATUS": str_status,
727 "INSTANCE_MEMORY": memory,
728 "INSTANCE_VCPUS": vcpus,
729 "INSTANCE_DISK_TEMPLATE": disk_template,
730 "INSTANCE_HYPERVISOR": hypervisor_name,
734 nic_count = len(nics)
735 for idx, (ip, mac, mode, link) in enumerate(nics):
738 env["INSTANCE_NIC%d_IP" % idx] = ip
739 env["INSTANCE_NIC%d_MAC" % idx] = mac
740 env["INSTANCE_NIC%d_MODE" % idx] = mode
741 env["INSTANCE_NIC%d_LINK" % idx] = link
742 if mode == constants.NIC_MODE_BRIDGED:
743 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
747 env["INSTANCE_NIC_COUNT"] = nic_count
750 disk_count = len(disks)
751 for idx, (size, mode) in enumerate(disks):
752 env["INSTANCE_DISK%d_SIZE" % idx] = size
753 env["INSTANCE_DISK%d_MODE" % idx] = mode
757 env["INSTANCE_DISK_COUNT"] = disk_count
759 for source, kind in [(bep, "BE"), (hvp, "HV")]:
760 for key, value in source.items():
761 env["INSTANCE_%s_%s" % (kind, key)] = value
766 def _NICListToTuple(lu, nics):
767 """Build a list of nic information tuples.
769 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
770 value in LUQueryInstanceData.
772 @type lu: L{LogicalUnit}
773 @param lu: the logical unit on whose behalf we execute
774 @type nics: list of L{objects.NIC}
775 @param nics: list of nics to convert to hooks tuples
779 cluster = lu.cfg.GetClusterInfo()
783 filled_params = cluster.SimpleFillNIC(nic.nicparams)
784 mode = filled_params[constants.NIC_MODE]
785 link = filled_params[constants.NIC_LINK]
786 hooks_nics.append((ip, mac, mode, link))
790 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
791 """Builds instance related env variables for hooks from an object.
793 @type lu: L{LogicalUnit}
794 @param lu: the logical unit on whose behalf we execute
795 @type instance: L{objects.Instance}
796 @param instance: the instance for which we should build the
799 @param override: dictionary with key/values that will override
802 @return: the hook environment dictionary
805 cluster = lu.cfg.GetClusterInfo()
806 bep = cluster.FillBE(instance)
807 hvp = cluster.FillHV(instance)
809 'name': instance.name,
810 'primary_node': instance.primary_node,
811 'secondary_nodes': instance.secondary_nodes,
812 'os_type': instance.os,
813 'status': instance.admin_up,
814 'memory': bep[constants.BE_MEMORY],
815 'vcpus': bep[constants.BE_VCPUS],
816 'nics': _NICListToTuple(lu, instance.nics),
817 'disk_template': instance.disk_template,
818 'disks': [(disk.size, disk.mode) for disk in instance.disks],
821 'hypervisor_name': instance.hypervisor,
824 args.update(override)
825 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
828 def _AdjustCandidatePool(lu, exceptions):
829 """Adjust the candidate pool after node operations.
832 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
834 lu.LogInfo("Promoted nodes to master candidate role: %s",
835 utils.CommaJoin(node.name for node in mod_list))
836 for name in mod_list:
837 lu.context.ReaddNode(name)
838 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
840 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
844 def _DecideSelfPromotion(lu, exceptions=None):
845 """Decide whether I should promote myself as a master candidate.
848 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
849 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
850 # the new node will increase mc_max with one, so:
851 mc_should = min(mc_should + 1, cp_size)
852 return mc_now < mc_should
855 def _CheckNicsBridgesExist(lu, target_nics, target_node):
856 """Check that the brigdes needed by a list of nics exist.
859 cluster = lu.cfg.GetClusterInfo()
860 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
861 brlist = [params[constants.NIC_LINK] for params in paramslist
862 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
864 result = lu.rpc.call_bridges_exist(target_node, brlist)
865 result.Raise("Error checking bridges on destination node '%s'" %
866 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
869 def _CheckInstanceBridgesExist(lu, instance, node=None):
870 """Check that the brigdes needed by an instance exist.
874 node = instance.primary_node
875 _CheckNicsBridgesExist(lu, instance.nics, node)
878 def _CheckOSVariant(os_obj, name):
879 """Check whether an OS name conforms to the os variants specification.
881 @type os_obj: L{objects.OS}
882 @param os_obj: OS object to check
884 @param name: OS name passed by the user, to check for validity
887 if not os_obj.supported_variants:
890 variant = name.split("+", 1)[1]
892 raise errors.OpPrereqError("OS name must include a variant",
895 if variant not in os_obj.supported_variants:
896 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
899 def _GetNodeInstancesInner(cfg, fn):
900 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
903 def _GetNodeInstances(cfg, node_name):
904 """Returns a list of all primary and secondary instances on a node.
908 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
911 def _GetNodePrimaryInstances(cfg, node_name):
912 """Returns primary instances on a node.
915 return _GetNodeInstancesInner(cfg,
916 lambda inst: node_name == inst.primary_node)
919 def _GetNodeSecondaryInstances(cfg, node_name):
920 """Returns secondary instances on a node.
923 return _GetNodeInstancesInner(cfg,
924 lambda inst: node_name in inst.secondary_nodes)
927 def _GetStorageTypeArgs(cfg, storage_type):
928 """Returns the arguments for a storage type.
931 # Special case for file storage
932 if storage_type == constants.ST_FILE:
933 # storage.FileStorage wants a list of storage directories
934 return [[cfg.GetFileStorageDir()]]
939 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
942 for dev in instance.disks:
943 cfg.SetDiskID(dev, node_name)
945 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
946 result.Raise("Failed to get disk status from node %s" % node_name,
947 prereq=prereq, ecode=errors.ECODE_ENVIRON)
949 for idx, bdev_status in enumerate(result.payload):
950 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
956 class LUPostInitCluster(LogicalUnit):
957 """Logical unit for running hooks after cluster initialization.
960 HPATH = "cluster-init"
961 HTYPE = constants.HTYPE_CLUSTER
964 def BuildHooksEnv(self):
968 env = {"OP_TARGET": self.cfg.GetClusterName()}
969 mn = self.cfg.GetMasterNode()
972 def CheckPrereq(self):
973 """No prerequisites to check.
978 def Exec(self, feedback_fn):
985 class LUDestroyCluster(LogicalUnit):
986 """Logical unit for destroying the cluster.
989 HPATH = "cluster-destroy"
990 HTYPE = constants.HTYPE_CLUSTER
993 def BuildHooksEnv(self):
997 env = {"OP_TARGET": self.cfg.GetClusterName()}
1000 def CheckPrereq(self):
1001 """Check prerequisites.
1003 This checks whether the cluster is empty.
1005 Any errors are signaled by raising errors.OpPrereqError.
1008 master = self.cfg.GetMasterNode()
1010 nodelist = self.cfg.GetNodeList()
1011 if len(nodelist) != 1 or nodelist[0] != master:
1012 raise errors.OpPrereqError("There are still %d node(s) in"
1013 " this cluster." % (len(nodelist) - 1),
1015 instancelist = self.cfg.GetInstanceList()
1017 raise errors.OpPrereqError("There are still %d instance(s) in"
1018 " this cluster." % len(instancelist),
1021 def Exec(self, feedback_fn):
1022 """Destroys the cluster.
1025 master = self.cfg.GetMasterNode()
1026 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1028 # Run post hooks on master node before it's removed
1029 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1031 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1033 # pylint: disable-msg=W0702
1034 self.LogWarning("Errors occurred running hooks on %s" % master)
1036 result = self.rpc.call_node_stop_master(master, False)
1037 result.Raise("Could not disable the master role")
1039 if modify_ssh_setup:
1040 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1041 utils.CreateBackup(priv_key)
1042 utils.CreateBackup(pub_key)
1047 def _VerifyCertificate(filename):
1048 """Verifies a certificate for LUVerifyCluster.
1050 @type filename: string
1051 @param filename: Path to PEM file
1055 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1056 utils.ReadFile(filename))
1057 except Exception, err: # pylint: disable-msg=W0703
1058 return (LUVerifyCluster.ETYPE_ERROR,
1059 "Failed to load X509 certificate %s: %s" % (filename, err))
1062 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1063 constants.SSL_CERT_EXPIRATION_ERROR)
1066 fnamemsg = "While verifying %s: %s" % (filename, msg)
1071 return (None, fnamemsg)
1072 elif errcode == utils.CERT_WARNING:
1073 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1074 elif errcode == utils.CERT_ERROR:
1075 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1077 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1080 class LUVerifyCluster(LogicalUnit):
1081 """Verifies the cluster status.
1084 HPATH = "cluster-verify"
1085 HTYPE = constants.HTYPE_CLUSTER
1086 _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1089 TCLUSTER = "cluster"
1091 TINSTANCE = "instance"
1093 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1094 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1095 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1096 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1097 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1098 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1099 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1100 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1101 ENODEDRBD = (TNODE, "ENODEDRBD")
1102 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1103 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1104 ENODEHV = (TNODE, "ENODEHV")
1105 ENODELVM = (TNODE, "ENODELVM")
1106 ENODEN1 = (TNODE, "ENODEN1")
1107 ENODENET = (TNODE, "ENODENET")
1108 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1109 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1110 ENODERPC = (TNODE, "ENODERPC")
1111 ENODESSH = (TNODE, "ENODESSH")
1112 ENODEVERSION = (TNODE, "ENODEVERSION")
1113 ENODESETUP = (TNODE, "ENODESETUP")
1114 ENODETIME = (TNODE, "ENODETIME")
1116 ETYPE_FIELD = "code"
1117 ETYPE_ERROR = "ERROR"
1118 ETYPE_WARNING = "WARNING"
1120 class NodeImage(object):
1121 """A class representing the logical and physical status of a node.
1123 @ivar volumes: a structure as returned from
1124 L{ganeti.backend.GetVolumeList} (runtime)
1125 @ivar instances: a list of running instances (runtime)
1126 @ivar pinst: list of configured primary instances (config)
1127 @ivar sinst: list of configured secondary instances (config)
1128 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1129 of this node (config)
1130 @ivar mfree: free memory, as reported by hypervisor (runtime)
1131 @ivar dfree: free disk, as reported by the node (runtime)
1132 @ivar offline: the offline status (config)
1133 @type rpc_fail: boolean
1134 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1135 not whether the individual keys were correct) (runtime)
1136 @type lvm_fail: boolean
1137 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1138 @type hyp_fail: boolean
1139 @ivar hyp_fail: whether the RPC call didn't return the instance list
1140 @type ghost: boolean
1141 @ivar ghost: whether this is a known node or not (config)
1144 def __init__(self, offline=False):
1152 self.offline = offline
1153 self.rpc_fail = False
1154 self.lvm_fail = False
1155 self.hyp_fail = False
1158 def ExpandNames(self):
1159 self.needed_locks = {
1160 locking.LEVEL_NODE: locking.ALL_SET,
1161 locking.LEVEL_INSTANCE: locking.ALL_SET,
1163 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1165 def _Error(self, ecode, item, msg, *args, **kwargs):
1166 """Format an error message.
1168 Based on the opcode's error_codes parameter, either format a
1169 parseable error code, or a simpler error string.
1171 This must be called only from Exec and functions called from Exec.
1174 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1176 # first complete the msg
1179 # then format the whole message
1180 if self.op.error_codes:
1181 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1187 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1188 # and finally report it via the feedback_fn
1189 self._feedback_fn(" - %s" % msg)
1191 def _ErrorIf(self, cond, *args, **kwargs):
1192 """Log an error message if the passed condition is True.
1195 cond = bool(cond) or self.op.debug_simulate_errors
1197 self._Error(*args, **kwargs)
1198 # do not mark the operation as failed for WARN cases only
1199 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1200 self.bad = self.bad or cond
1202 def _VerifyNode(self, ninfo, nresult):
1203 """Run multiple tests against a node.
1207 - compares ganeti version
1208 - checks vg existence and size > 20G
1209 - checks config file checksum
1210 - checks ssh to other nodes
1212 @type ninfo: L{objects.Node}
1213 @param ninfo: the node to check
1214 @param nresult: the results from the node
1216 @return: whether overall this call was successful (and we can expect
1217 reasonable values in the respose)
1221 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1223 # main result, nresult should be a non-empty dict
1224 test = not nresult or not isinstance(nresult, dict)
1225 _ErrorIf(test, self.ENODERPC, node,
1226 "unable to verify node: no data returned")
1230 # compares ganeti version
1231 local_version = constants.PROTOCOL_VERSION
1232 remote_version = nresult.get("version", None)
1233 test = not (remote_version and
1234 isinstance(remote_version, (list, tuple)) and
1235 len(remote_version) == 2)
1236 _ErrorIf(test, self.ENODERPC, node,
1237 "connection to node returned invalid data")
1241 test = local_version != remote_version[0]
1242 _ErrorIf(test, self.ENODEVERSION, node,
1243 "incompatible protocol versions: master %s,"
1244 " node %s", local_version, remote_version[0])
1248 # node seems compatible, we can actually try to look into its results
1250 # full package version
1251 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1252 self.ENODEVERSION, node,
1253 "software version mismatch: master %s, node %s",
1254 constants.RELEASE_VERSION, remote_version[1],
1255 code=self.ETYPE_WARNING)
1257 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1258 if isinstance(hyp_result, dict):
1259 for hv_name, hv_result in hyp_result.iteritems():
1260 test = hv_result is not None
1261 _ErrorIf(test, self.ENODEHV, node,
1262 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1265 test = nresult.get(constants.NV_NODESETUP,
1266 ["Missing NODESETUP results"])
1267 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1272 def _VerifyNodeTime(self, ninfo, nresult,
1273 nvinfo_starttime, nvinfo_endtime):
1274 """Check the node time.
1276 @type ninfo: L{objects.Node}
1277 @param ninfo: the node to check
1278 @param nresult: the remote results for the node
1279 @param nvinfo_starttime: the start time of the RPC call
1280 @param nvinfo_endtime: the end time of the RPC call
1284 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1286 ntime = nresult.get(constants.NV_TIME, None)
1288 ntime_merged = utils.MergeTime(ntime)
1289 except (ValueError, TypeError):
1290 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1293 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1294 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1295 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1296 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1300 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1301 "Node time diverges by at least %s from master node time",
1304 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1305 """Check the node time.
1307 @type ninfo: L{objects.Node}
1308 @param ninfo: the node to check
1309 @param nresult: the remote results for the node
1310 @param vg_name: the configured VG name
1317 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1319 # checks vg existence and size > 20G
1320 vglist = nresult.get(constants.NV_VGLIST, None)
1322 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1324 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1325 constants.MIN_VG_SIZE)
1326 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1329 pvlist = nresult.get(constants.NV_PVLIST, None)
1330 test = pvlist is None
1331 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1333 # check that ':' is not present in PV names, since it's a
1334 # special character for lvcreate (denotes the range of PEs to
1336 for _, pvname, owner_vg in pvlist:
1337 test = ":" in pvname
1338 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1339 " '%s' of VG '%s'", pvname, owner_vg)
1341 def _VerifyNodeNetwork(self, ninfo, nresult):
1342 """Check the node time.
1344 @type ninfo: L{objects.Node}
1345 @param ninfo: the node to check
1346 @param nresult: the remote results for the node
1350 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1352 test = constants.NV_NODELIST not in nresult
1353 _ErrorIf(test, self.ENODESSH, node,
1354 "node hasn't returned node ssh connectivity data")
1356 if nresult[constants.NV_NODELIST]:
1357 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1358 _ErrorIf(True, self.ENODESSH, node,
1359 "ssh communication with node '%s': %s", a_node, a_msg)
1361 test = constants.NV_NODENETTEST not in nresult
1362 _ErrorIf(test, self.ENODENET, node,
1363 "node hasn't returned node tcp connectivity data")
1365 if nresult[constants.NV_NODENETTEST]:
1366 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1368 _ErrorIf(True, self.ENODENET, node,
1369 "tcp communication with node '%s': %s",
1370 anode, nresult[constants.NV_NODENETTEST][anode])
1372 test = constants.NV_MASTERIP not in nresult
1373 _ErrorIf(test, self.ENODENET, node,
1374 "node hasn't returned node master IP reachability data")
1376 if not nresult[constants.NV_MASTERIP]:
1377 if node == self.master_node:
1378 msg = "the master node cannot reach the master IP (not configured?)"
1380 msg = "cannot reach the master IP"
1381 _ErrorIf(True, self.ENODENET, node, msg)
1384 def _VerifyInstance(self, instance, instanceconfig, node_image):
1385 """Verify an instance.
1387 This function checks to see if the required block devices are
1388 available on the instance's node.
1391 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1392 node_current = instanceconfig.primary_node
1394 node_vol_should = {}
1395 instanceconfig.MapLVsByNode(node_vol_should)
1397 for node in node_vol_should:
1398 n_img = node_image[node]
1399 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1400 # ignore missing volumes on offline or broken nodes
1402 for volume in node_vol_should[node]:
1403 test = volume not in n_img.volumes
1404 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1405 "volume %s missing on node %s", volume, node)
1407 if instanceconfig.admin_up:
1408 pri_img = node_image[node_current]
1409 test = instance not in pri_img.instances and not pri_img.offline
1410 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1411 "instance not running on its primary node %s",
1414 for node, n_img in node_image.items():
1415 if (not node == node_current):
1416 test = instance in n_img.instances
1417 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1418 "instance should not run on node %s", node)
1420 def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1421 """Verify if there are any unknown volumes in the cluster.
1423 The .os, .swap and backup volumes are ignored. All other volumes are
1424 reported as unknown.
1427 for node, n_img in node_image.items():
1428 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1429 # skip non-healthy nodes
1431 for volume in n_img.volumes:
1432 test = (node not in node_vol_should or
1433 volume not in node_vol_should[node])
1434 self._ErrorIf(test, self.ENODEORPHANLV, node,
1435 "volume %s is unknown", volume)
1437 def _VerifyOrphanInstances(self, instancelist, node_image):
1438 """Verify the list of running instances.
1440 This checks what instances are running but unknown to the cluster.
1443 for node, n_img in node_image.items():
1444 for o_inst in n_img.instances:
1445 test = o_inst not in instancelist
1446 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1447 "instance %s on node %s should not exist", o_inst, node)
1449 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1450 """Verify N+1 Memory Resilience.
1452 Check that if one single node dies we can still start all the
1453 instances it was primary for.
1456 for node, n_img in node_image.items():
1457 # This code checks that every node which is now listed as
1458 # secondary has enough memory to host all instances it is
1459 # supposed to should a single other node in the cluster fail.
1460 # FIXME: not ready for failover to an arbitrary node
1461 # FIXME: does not support file-backed instances
1462 # WARNING: we currently take into account down instances as well
1463 # as up ones, considering that even if they're down someone
1464 # might want to start them even in the event of a node failure.
1465 for prinode, instances in n_img.sbp.items():
1467 for instance in instances:
1468 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1469 if bep[constants.BE_AUTO_BALANCE]:
1470 needed_mem += bep[constants.BE_MEMORY]
1471 test = n_img.mfree < needed_mem
1472 self._ErrorIf(test, self.ENODEN1, node,
1473 "not enough memory on to accommodate"
1474 " failovers should peer node %s fail", prinode)
1476 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1478 """Verifies and computes the node required file checksums.
1480 @type ninfo: L{objects.Node}
1481 @param ninfo: the node to check
1482 @param nresult: the remote results for the node
1483 @param file_list: required list of files
1484 @param local_cksum: dictionary of local files and their checksums
1485 @param master_files: list of files that only masters should have
1489 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1491 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1492 test = not isinstance(remote_cksum, dict)
1493 _ErrorIf(test, self.ENODEFILECHECK, node,
1494 "node hasn't returned file checksum data")
1498 for file_name in file_list:
1499 node_is_mc = ninfo.master_candidate
1500 must_have = (file_name not in master_files) or node_is_mc
1502 test1 = file_name not in remote_cksum
1504 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1506 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1507 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1508 "file '%s' missing", file_name)
1509 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1510 "file '%s' has wrong checksum", file_name)
1511 # not candidate and this is not a must-have file
1512 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1513 "file '%s' should not exist on non master"
1514 " candidates (and the file is outdated)", file_name)
1515 # all good, except non-master/non-must have combination
1516 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1517 "file '%s' should not exist"
1518 " on non master candidates", file_name)
1520 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1521 """Verifies and the node DRBD status.
1523 @type ninfo: L{objects.Node}
1524 @param ninfo: the node to check
1525 @param nresult: the remote results for the node
1526 @param instanceinfo: the dict of instances
1527 @param drbd_map: the DRBD map as returned by
1528 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1532 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1534 # compute the DRBD minors
1536 for minor, instance in drbd_map[node].items():
1537 test = instance not in instanceinfo
1538 _ErrorIf(test, self.ECLUSTERCFG, None,
1539 "ghost instance '%s' in temporary DRBD map", instance)
1540 # ghost instance should not be running, but otherwise we
1541 # don't give double warnings (both ghost instance and
1542 # unallocated minor in use)
1544 node_drbd[minor] = (instance, False)
1546 instance = instanceinfo[instance]
1547 node_drbd[minor] = (instance.name, instance.admin_up)
1549 # and now check them
1550 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1551 test = not isinstance(used_minors, (tuple, list))
1552 _ErrorIf(test, self.ENODEDRBD, node,
1553 "cannot parse drbd status file: %s", str(used_minors))
1555 # we cannot check drbd status
1558 for minor, (iname, must_exist) in node_drbd.items():
1559 test = minor not in used_minors and must_exist
1560 _ErrorIf(test, self.ENODEDRBD, node,
1561 "drbd minor %d of instance %s is not active", minor, iname)
1562 for minor in used_minors:
1563 test = minor not in node_drbd
1564 _ErrorIf(test, self.ENODEDRBD, node,
1565 "unallocated drbd minor %d is in use", minor)
1567 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1568 """Verifies and updates the node volume data.
1570 This function will update a L{NodeImage}'s internal structures
1571 with data from the remote call.
1573 @type ninfo: L{objects.Node}
1574 @param ninfo: the node to check
1575 @param nresult: the remote results for the node
1576 @param nimg: the node image object
1577 @param vg_name: the configured VG name
1581 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1583 nimg.lvm_fail = True
1584 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1587 elif isinstance(lvdata, basestring):
1588 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1589 utils.SafeEncode(lvdata))
1590 elif not isinstance(lvdata, dict):
1591 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1593 nimg.volumes = lvdata
1594 nimg.lvm_fail = False
1596 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1597 """Verifies and updates the node instance list.
1599 If the listing was successful, then updates this node's instance
1600 list. Otherwise, it marks the RPC call as failed for the instance
1603 @type ninfo: L{objects.Node}
1604 @param ninfo: the node to check
1605 @param nresult: the remote results for the node
1606 @param nimg: the node image object
1609 idata = nresult.get(constants.NV_INSTANCELIST, None)
1610 test = not isinstance(idata, list)
1611 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1612 " (instancelist): %s", utils.SafeEncode(str(idata)))
1614 nimg.hyp_fail = True
1616 nimg.instances = idata
1618 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1619 """Verifies and computes a node information map
1621 @type ninfo: L{objects.Node}
1622 @param ninfo: the node to check
1623 @param nresult: the remote results for the node
1624 @param nimg: the node image object
1625 @param vg_name: the configured VG name
1629 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1631 # try to read free memory (from the hypervisor)
1632 hv_info = nresult.get(constants.NV_HVINFO, None)
1633 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1634 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1637 nimg.mfree = int(hv_info["memory_free"])
1638 except (ValueError, TypeError):
1639 _ErrorIf(True, self.ENODERPC, node,
1640 "node returned invalid nodeinfo, check hypervisor")
1642 # FIXME: devise a free space model for file based instances as well
1643 if vg_name is not None:
1644 test = (constants.NV_VGLIST not in nresult or
1645 vg_name not in nresult[constants.NV_VGLIST])
1646 _ErrorIf(test, self.ENODELVM, node,
1647 "node didn't return data for the volume group '%s'"
1648 " - it is either missing or broken", vg_name)
1651 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1652 except (ValueError, TypeError):
1653 _ErrorIf(True, self.ENODERPC, node,
1654 "node returned invalid LVM info, check LVM status")
1656 def CheckPrereq(self):
1657 """Check prerequisites.
1659 Transform the list of checks we're going to skip into a set and check that
1660 all its members are valid.
1663 self.skip_set = frozenset(self.op.skip_checks)
1664 if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1665 raise errors.OpPrereqError("Invalid checks to be skipped specified",
1668 def BuildHooksEnv(self):
1671 Cluster-Verify hooks just ran in the post phase and their failure makes
1672 the output be logged in the verify output and the verification to fail.
1675 all_nodes = self.cfg.GetNodeList()
1677 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1679 for node in self.cfg.GetAllNodesInfo().values():
1680 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1682 return env, [], all_nodes
1684 def Exec(self, feedback_fn):
1685 """Verify integrity of cluster, performing various test on nodes.
1689 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1690 verbose = self.op.verbose
1691 self._feedback_fn = feedback_fn
1692 feedback_fn("* Verifying global settings")
1693 for msg in self.cfg.VerifyConfig():
1694 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1696 # Check the cluster certificates
1697 for cert_filename in constants.ALL_CERT_FILES:
1698 (errcode, msg) = _VerifyCertificate(cert_filename)
1699 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1701 vg_name = self.cfg.GetVGName()
1702 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1703 cluster = self.cfg.GetClusterInfo()
1704 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1705 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1706 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1707 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1708 for iname in instancelist)
1709 i_non_redundant = [] # Non redundant instances
1710 i_non_a_balanced = [] # Non auto-balanced instances
1711 n_offline = 0 # Count of offline nodes
1712 n_drained = 0 # Count of nodes being drained
1713 node_vol_should = {}
1715 # FIXME: verify OS list
1716 # do local checksums
1717 master_files = [constants.CLUSTER_CONF_FILE]
1718 master_node = self.master_node = self.cfg.GetMasterNode()
1719 master_ip = self.cfg.GetMasterIP()
1721 file_names = ssconf.SimpleStore().GetFileList()
1722 file_names.extend(constants.ALL_CERT_FILES)
1723 file_names.extend(master_files)
1724 if cluster.modify_etc_hosts:
1725 file_names.append(constants.ETC_HOSTS)
1727 local_checksums = utils.FingerprintFiles(file_names)
1729 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1730 node_verify_param = {
1731 constants.NV_FILELIST: file_names,
1732 constants.NV_NODELIST: [node.name for node in nodeinfo
1733 if not node.offline],
1734 constants.NV_HYPERVISOR: hypervisors,
1735 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1736 node.secondary_ip) for node in nodeinfo
1737 if not node.offline],
1738 constants.NV_INSTANCELIST: hypervisors,
1739 constants.NV_VERSION: None,
1740 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1741 constants.NV_NODESETUP: None,
1742 constants.NV_TIME: None,
1743 constants.NV_MASTERIP: (master_node, master_ip),
1746 if vg_name is not None:
1747 node_verify_param[constants.NV_VGLIST] = None
1748 node_verify_param[constants.NV_LVLIST] = vg_name
1749 node_verify_param[constants.NV_PVLIST] = [vg_name]
1750 node_verify_param[constants.NV_DRBDLIST] = None
1752 # Build our expected cluster state
1753 node_image = dict((node.name, self.NodeImage(offline=node.offline))
1754 for node in nodeinfo)
1756 for instance in instancelist:
1757 inst_config = instanceinfo[instance]
1759 for nname in inst_config.all_nodes:
1760 if nname not in node_image:
1762 gnode = self.NodeImage()
1764 node_image[nname] = gnode
1766 inst_config.MapLVsByNode(node_vol_should)
1768 pnode = inst_config.primary_node
1769 node_image[pnode].pinst.append(instance)
1771 for snode in inst_config.secondary_nodes:
1772 nimg = node_image[snode]
1773 nimg.sinst.append(instance)
1774 if pnode not in nimg.sbp:
1775 nimg.sbp[pnode] = []
1776 nimg.sbp[pnode].append(instance)
1778 # At this point, we have the in-memory data structures complete,
1779 # except for the runtime information, which we'll gather next
1781 # Due to the way our RPC system works, exact response times cannot be
1782 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1783 # time before and after executing the request, we can at least have a time
1785 nvinfo_starttime = time.time()
1786 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1787 self.cfg.GetClusterName())
1788 nvinfo_endtime = time.time()
1790 all_drbd_map = self.cfg.ComputeDRBDMap()
1792 feedback_fn("* Verifying node status")
1793 for node_i in nodeinfo:
1795 nimg = node_image[node]
1799 feedback_fn("* Skipping offline node %s" % (node,))
1803 if node == master_node:
1805 elif node_i.master_candidate:
1806 ntype = "master candidate"
1807 elif node_i.drained:
1813 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1815 msg = all_nvinfo[node].fail_msg
1816 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1818 nimg.rpc_fail = True
1821 nresult = all_nvinfo[node].payload
1823 nimg.call_ok = self._VerifyNode(node_i, nresult)
1824 self._VerifyNodeNetwork(node_i, nresult)
1825 self._VerifyNodeLVM(node_i, nresult, vg_name)
1826 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1828 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1829 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1831 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1832 self._UpdateNodeInstances(node_i, nresult, nimg)
1833 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1835 feedback_fn("* Verifying instance status")
1836 for instance in instancelist:
1838 feedback_fn("* Verifying instance %s" % instance)
1839 inst_config = instanceinfo[instance]
1840 self._VerifyInstance(instance, inst_config, node_image)
1841 inst_nodes_offline = []
1843 pnode = inst_config.primary_node
1844 pnode_img = node_image[pnode]
1845 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1846 self.ENODERPC, pnode, "instance %s, connection to"
1847 " primary node failed", instance)
1849 if pnode_img.offline:
1850 inst_nodes_offline.append(pnode)
1852 # If the instance is non-redundant we cannot survive losing its primary
1853 # node, so we are not N+1 compliant. On the other hand we have no disk
1854 # templates with more than one secondary so that situation is not well
1856 # FIXME: does not support file-backed instances
1857 if not inst_config.secondary_nodes:
1858 i_non_redundant.append(instance)
1859 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1860 instance, "instance has multiple secondary nodes: %s",
1861 utils.CommaJoin(inst_config.secondary_nodes),
1862 code=self.ETYPE_WARNING)
1864 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1865 i_non_a_balanced.append(instance)
1867 for snode in inst_config.secondary_nodes:
1868 s_img = node_image[snode]
1869 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1870 "instance %s, connection to secondary node failed", instance)
1873 inst_nodes_offline.append(snode)
1875 # warn that the instance lives on offline nodes
1876 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1877 "instance lives on offline node(s) %s",
1878 utils.CommaJoin(inst_nodes_offline))
1879 # ... or ghost nodes
1880 for node in inst_config.all_nodes:
1881 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1882 "instance lives on ghost node %s", node)
1884 feedback_fn("* Verifying orphan volumes")
1885 self._VerifyOrphanVolumes(node_vol_should, node_image)
1887 feedback_fn("* Verifying orphan instances")
1888 self._VerifyOrphanInstances(instancelist, node_image)
1890 if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1891 feedback_fn("* Verifying N+1 Memory redundancy")
1892 self._VerifyNPlusOneMemory(node_image, instanceinfo)
1894 feedback_fn("* Other Notes")
1896 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
1897 % len(i_non_redundant))
1899 if i_non_a_balanced:
1900 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
1901 % len(i_non_a_balanced))
1904 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
1907 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
1911 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1912 """Analyze the post-hooks' result
1914 This method analyses the hook result, handles it, and sends some
1915 nicely-formatted feedback back to the user.
1917 @param phase: one of L{constants.HOOKS_PHASE_POST} or
1918 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1919 @param hooks_results: the results of the multi-node hooks rpc call
1920 @param feedback_fn: function used send feedback back to the caller
1921 @param lu_result: previous Exec result
1922 @return: the new Exec result, based on the previous result
1926 # We only really run POST phase hooks, and are only interested in
1928 if phase == constants.HOOKS_PHASE_POST:
1929 # Used to change hooks' output to proper indentation
1930 indent_re = re.compile('^', re.M)
1931 feedback_fn("* Hooks Results")
1932 assert hooks_results, "invalid result from hooks"
1934 for node_name in hooks_results:
1935 res = hooks_results[node_name]
1937 test = msg and not res.offline
1938 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1939 "Communication failure in hooks execution: %s", msg)
1940 if res.offline or msg:
1941 # No need to investigate payload if node is offline or gave an error.
1942 # override manually lu_result here as _ErrorIf only
1943 # overrides self.bad
1946 for script, hkr, output in res.payload:
1947 test = hkr == constants.HKR_FAIL
1948 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1949 "Script %s failed, output:", script)
1951 output = indent_re.sub(' ', output)
1952 feedback_fn("%s" % output)
1958 class LUVerifyDisks(NoHooksLU):
1959 """Verifies the cluster disks status.
1965 def ExpandNames(self):
1966 self.needed_locks = {
1967 locking.LEVEL_NODE: locking.ALL_SET,
1968 locking.LEVEL_INSTANCE: locking.ALL_SET,
1970 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1972 def CheckPrereq(self):
1973 """Check prerequisites.
1975 This has no prerequisites.
1980 def Exec(self, feedback_fn):
1981 """Verify integrity of cluster disks.
1983 @rtype: tuple of three items
1984 @return: a tuple of (dict of node-to-node_error, list of instances
1985 which need activate-disks, dict of instance: (node, volume) for
1989 result = res_nodes, res_instances, res_missing = {}, [], {}
1991 vg_name = self.cfg.GetVGName()
1992 nodes = utils.NiceSort(self.cfg.GetNodeList())
1993 instances = [self.cfg.GetInstanceInfo(name)
1994 for name in self.cfg.GetInstanceList()]
1997 for inst in instances:
1999 if (not inst.admin_up or
2000 inst.disk_template not in constants.DTS_NET_MIRROR):
2002 inst.MapLVsByNode(inst_lvs)
2003 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2004 for node, vol_list in inst_lvs.iteritems():
2005 for vol in vol_list:
2006 nv_dict[(node, vol)] = inst
2011 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2015 node_res = node_lvs[node]
2016 if node_res.offline:
2018 msg = node_res.fail_msg
2020 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2021 res_nodes[node] = msg
2024 lvs = node_res.payload
2025 for lv_name, (_, _, lv_online) in lvs.items():
2026 inst = nv_dict.pop((node, lv_name), None)
2027 if (not lv_online and inst is not None
2028 and inst.name not in res_instances):
2029 res_instances.append(inst.name)
2031 # any leftover items in nv_dict are missing LVs, let's arrange the
2033 for key, inst in nv_dict.iteritems():
2034 if inst.name not in res_missing:
2035 res_missing[inst.name] = []
2036 res_missing[inst.name].append(key)
2041 class LURepairDiskSizes(NoHooksLU):
2042 """Verifies the cluster disks sizes.
2045 _OP_REQP = ["instances"]
2048 def ExpandNames(self):
2049 if not isinstance(self.op.instances, list):
2050 raise errors.OpPrereqError("Invalid argument type 'instances'",
2053 if self.op.instances:
2054 self.wanted_names = []
2055 for name in self.op.instances:
2056 full_name = _ExpandInstanceName(self.cfg, name)
2057 self.wanted_names.append(full_name)
2058 self.needed_locks = {
2059 locking.LEVEL_NODE: [],
2060 locking.LEVEL_INSTANCE: self.wanted_names,
2062 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2064 self.wanted_names = None
2065 self.needed_locks = {
2066 locking.LEVEL_NODE: locking.ALL_SET,
2067 locking.LEVEL_INSTANCE: locking.ALL_SET,
2069 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2071 def DeclareLocks(self, level):
2072 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2073 self._LockInstancesNodes(primary_only=True)
2075 def CheckPrereq(self):
2076 """Check prerequisites.
2078 This only checks the optional instance list against the existing names.
2081 if self.wanted_names is None:
2082 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2084 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2085 in self.wanted_names]
2087 def _EnsureChildSizes(self, disk):
2088 """Ensure children of the disk have the needed disk size.
2090 This is valid mainly for DRBD8 and fixes an issue where the
2091 children have smaller disk size.
2093 @param disk: an L{ganeti.objects.Disk} object
2096 if disk.dev_type == constants.LD_DRBD8:
2097 assert disk.children, "Empty children for DRBD8?"
2098 fchild = disk.children[0]
2099 mismatch = fchild.size < disk.size
2101 self.LogInfo("Child disk has size %d, parent %d, fixing",
2102 fchild.size, disk.size)
2103 fchild.size = disk.size
2105 # and we recurse on this child only, not on the metadev
2106 return self._EnsureChildSizes(fchild) or mismatch
2110 def Exec(self, feedback_fn):
2111 """Verify the size of cluster disks.
2114 # TODO: check child disks too
2115 # TODO: check differences in size between primary/secondary nodes
2117 for instance in self.wanted_instances:
2118 pnode = instance.primary_node
2119 if pnode not in per_node_disks:
2120 per_node_disks[pnode] = []
2121 for idx, disk in enumerate(instance.disks):
2122 per_node_disks[pnode].append((instance, idx, disk))
2125 for node, dskl in per_node_disks.items():
2126 newl = [v[2].Copy() for v in dskl]
2128 self.cfg.SetDiskID(dsk, node)
2129 result = self.rpc.call_blockdev_getsizes(node, newl)
2131 self.LogWarning("Failure in blockdev_getsizes call to node"
2132 " %s, ignoring", node)
2134 if len(result.data) != len(dskl):
2135 self.LogWarning("Invalid result from node %s, ignoring node results",
2138 for ((instance, idx, disk), size) in zip(dskl, result.data):
2140 self.LogWarning("Disk %d of instance %s did not return size"
2141 " information, ignoring", idx, instance.name)
2143 if not isinstance(size, (int, long)):
2144 self.LogWarning("Disk %d of instance %s did not return valid"
2145 " size information, ignoring", idx, instance.name)
2148 if size != disk.size:
2149 self.LogInfo("Disk %d of instance %s has mismatched size,"
2150 " correcting: recorded %d, actual %d", idx,
2151 instance.name, disk.size, size)
2153 self.cfg.Update(instance, feedback_fn)
2154 changed.append((instance.name, idx, size))
2155 if self._EnsureChildSizes(disk):
2156 self.cfg.Update(instance, feedback_fn)
2157 changed.append((instance.name, idx, disk.size))
2161 class LURenameCluster(LogicalUnit):
2162 """Rename the cluster.
2165 HPATH = "cluster-rename"
2166 HTYPE = constants.HTYPE_CLUSTER
2169 def BuildHooksEnv(self):
2174 "OP_TARGET": self.cfg.GetClusterName(),
2175 "NEW_NAME": self.op.name,
2177 mn = self.cfg.GetMasterNode()
2178 all_nodes = self.cfg.GetNodeList()
2179 return env, [mn], all_nodes
2181 def CheckPrereq(self):
2182 """Verify that the passed name is a valid one.
2185 hostname = utils.GetHostInfo(self.op.name)
2187 new_name = hostname.name
2188 self.ip = new_ip = hostname.ip
2189 old_name = self.cfg.GetClusterName()
2190 old_ip = self.cfg.GetMasterIP()
2191 if new_name == old_name and new_ip == old_ip:
2192 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2193 " cluster has changed",
2195 if new_ip != old_ip:
2196 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2197 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2198 " reachable on the network. Aborting." %
2199 new_ip, errors.ECODE_NOTUNIQUE)
2201 self.op.name = new_name
2203 def Exec(self, feedback_fn):
2204 """Rename the cluster.
2207 clustername = self.op.name
2210 # shutdown the master IP
2211 master = self.cfg.GetMasterNode()
2212 result = self.rpc.call_node_stop_master(master, False)
2213 result.Raise("Could not disable the master role")
2216 cluster = self.cfg.GetClusterInfo()
2217 cluster.cluster_name = clustername
2218 cluster.master_ip = ip
2219 self.cfg.Update(cluster, feedback_fn)
2221 # update the known hosts file
2222 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2223 node_list = self.cfg.GetNodeList()
2225 node_list.remove(master)
2228 result = self.rpc.call_upload_file(node_list,
2229 constants.SSH_KNOWN_HOSTS_FILE)
2230 for to_node, to_result in result.iteritems():
2231 msg = to_result.fail_msg
2233 msg = ("Copy of file %s to node %s failed: %s" %
2234 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2235 self.proc.LogWarning(msg)
2238 result = self.rpc.call_node_start_master(master, False, False)
2239 msg = result.fail_msg
2241 self.LogWarning("Could not re-enable the master role on"
2242 " the master, please restart manually: %s", msg)
2245 def _RecursiveCheckIfLVMBased(disk):
2246 """Check if the given disk or its children are lvm-based.
2248 @type disk: L{objects.Disk}
2249 @param disk: the disk to check
2251 @return: boolean indicating whether a LD_LV dev_type was found or not
2255 for chdisk in disk.children:
2256 if _RecursiveCheckIfLVMBased(chdisk):
2258 return disk.dev_type == constants.LD_LV
2261 class LUSetClusterParams(LogicalUnit):
2262 """Change the parameters of the cluster.
2265 HPATH = "cluster-modify"
2266 HTYPE = constants.HTYPE_CLUSTER
2270 def CheckArguments(self):
2274 for attr in ["candidate_pool_size",
2275 "uid_pool", "add_uids", "remove_uids"]:
2276 if not hasattr(self.op, attr):
2277 setattr(self.op, attr, None)
2279 if self.op.candidate_pool_size is not None:
2281 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2282 except (ValueError, TypeError), err:
2283 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2284 str(err), errors.ECODE_INVAL)
2285 if self.op.candidate_pool_size < 1:
2286 raise errors.OpPrereqError("At least one master candidate needed",
2289 _CheckBooleanOpField(self.op, "maintain_node_health")
2291 if self.op.uid_pool:
2292 uidpool.CheckUidPool(self.op.uid_pool)
2294 if self.op.add_uids:
2295 uidpool.CheckUidPool(self.op.add_uids)
2297 if self.op.remove_uids:
2298 uidpool.CheckUidPool(self.op.remove_uids)
2300 def ExpandNames(self):
2301 # FIXME: in the future maybe other cluster params won't require checking on
2302 # all nodes to be modified.
2303 self.needed_locks = {
2304 locking.LEVEL_NODE: locking.ALL_SET,
2306 self.share_locks[locking.LEVEL_NODE] = 1
2308 def BuildHooksEnv(self):
2313 "OP_TARGET": self.cfg.GetClusterName(),
2314 "NEW_VG_NAME": self.op.vg_name,
2316 mn = self.cfg.GetMasterNode()
2317 return env, [mn], [mn]
2319 def CheckPrereq(self):
2320 """Check prerequisites.
2322 This checks whether the given params don't conflict and
2323 if the given volume group is valid.
2326 if self.op.vg_name is not None and not self.op.vg_name:
2327 instances = self.cfg.GetAllInstancesInfo().values()
2328 for inst in instances:
2329 for disk in inst.disks:
2330 if _RecursiveCheckIfLVMBased(disk):
2331 raise errors.OpPrereqError("Cannot disable lvm storage while"
2332 " lvm-based instances exist",
2335 node_list = self.acquired_locks[locking.LEVEL_NODE]
2337 # if vg_name not None, checks given volume group on all nodes
2339 vglist = self.rpc.call_vg_list(node_list)
2340 for node in node_list:
2341 msg = vglist[node].fail_msg
2343 # ignoring down node
2344 self.LogWarning("Error while gathering data on node %s"
2345 " (ignoring node): %s", node, msg)
2347 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2349 constants.MIN_VG_SIZE)
2351 raise errors.OpPrereqError("Error on node '%s': %s" %
2352 (node, vgstatus), errors.ECODE_ENVIRON)
2354 self.cluster = cluster = self.cfg.GetClusterInfo()
2355 # validate params changes
2356 if self.op.beparams:
2357 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2358 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2360 if self.op.nicparams:
2361 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2362 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2363 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2366 # check all instances for consistency
2367 for instance in self.cfg.GetAllInstancesInfo().values():
2368 for nic_idx, nic in enumerate(instance.nics):
2369 params_copy = copy.deepcopy(nic.nicparams)
2370 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2372 # check parameter syntax
2374 objects.NIC.CheckParameterSyntax(params_filled)
2375 except errors.ConfigurationError, err:
2376 nic_errors.append("Instance %s, nic/%d: %s" %
2377 (instance.name, nic_idx, err))
2379 # if we're moving instances to routed, check that they have an ip
2380 target_mode = params_filled[constants.NIC_MODE]
2381 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2382 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2383 (instance.name, nic_idx))
2385 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2386 "\n".join(nic_errors))
2388 # hypervisor list/parameters
2389 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2390 if self.op.hvparams:
2391 if not isinstance(self.op.hvparams, dict):
2392 raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2394 for hv_name, hv_dict in self.op.hvparams.items():
2395 if hv_name not in self.new_hvparams:
2396 self.new_hvparams[hv_name] = hv_dict
2398 self.new_hvparams[hv_name].update(hv_dict)
2400 # os hypervisor parameters
2401 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2403 if not isinstance(self.op.os_hvp, dict):
2404 raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2406 for os_name, hvs in self.op.os_hvp.items():
2407 if not isinstance(hvs, dict):
2408 raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2409 " input"), errors.ECODE_INVAL)
2410 if os_name not in self.new_os_hvp:
2411 self.new_os_hvp[os_name] = hvs
2413 for hv_name, hv_dict in hvs.items():
2414 if hv_name not in self.new_os_hvp[os_name]:
2415 self.new_os_hvp[os_name][hv_name] = hv_dict
2417 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2419 # changes to the hypervisor list
2420 if self.op.enabled_hypervisors is not None:
2421 self.hv_list = self.op.enabled_hypervisors
2422 if not self.hv_list:
2423 raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2424 " least one member",
2426 invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2428 raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2430 utils.CommaJoin(invalid_hvs),
2432 for hv in self.hv_list:
2433 # if the hypervisor doesn't already exist in the cluster
2434 # hvparams, we initialize it to empty, and then (in both
2435 # cases) we make sure to fill the defaults, as we might not
2436 # have a complete defaults list if the hypervisor wasn't
2438 if hv not in new_hvp:
2440 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2441 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2443 self.hv_list = cluster.enabled_hypervisors
2445 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2446 # either the enabled list has changed, or the parameters have, validate
2447 for hv_name, hv_params in self.new_hvparams.items():
2448 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2449 (self.op.enabled_hypervisors and
2450 hv_name in self.op.enabled_hypervisors)):
2451 # either this is a new hypervisor, or its parameters have changed
2452 hv_class = hypervisor.GetHypervisor(hv_name)
2453 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2454 hv_class.CheckParameterSyntax(hv_params)
2455 _CheckHVParams(self, node_list, hv_name, hv_params)
2458 # no need to check any newly-enabled hypervisors, since the
2459 # defaults have already been checked in the above code-block
2460 for os_name, os_hvp in self.new_os_hvp.items():
2461 for hv_name, hv_params in os_hvp.items():
2462 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2463 # we need to fill in the new os_hvp on top of the actual hv_p
2464 cluster_defaults = self.new_hvparams.get(hv_name, {})
2465 new_osp = objects.FillDict(cluster_defaults, hv_params)
2466 hv_class = hypervisor.GetHypervisor(hv_name)
2467 hv_class.CheckParameterSyntax(new_osp)
2468 _CheckHVParams(self, node_list, hv_name, new_osp)
2471 def Exec(self, feedback_fn):
2472 """Change the parameters of the cluster.
2475 if self.op.vg_name is not None:
2476 new_volume = self.op.vg_name
2479 if new_volume != self.cfg.GetVGName():
2480 self.cfg.SetVGName(new_volume)
2482 feedback_fn("Cluster LVM configuration already in desired"
2483 " state, not changing")
2484 if self.op.hvparams:
2485 self.cluster.hvparams = self.new_hvparams
2487 self.cluster.os_hvp = self.new_os_hvp
2488 if self.op.enabled_hypervisors is not None:
2489 self.cluster.hvparams = self.new_hvparams
2490 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2491 if self.op.beparams:
2492 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2493 if self.op.nicparams:
2494 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2496 if self.op.candidate_pool_size is not None:
2497 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2498 # we need to update the pool size here, otherwise the save will fail
2499 _AdjustCandidatePool(self, [])
2501 if self.op.maintain_node_health is not None:
2502 self.cluster.maintain_node_health = self.op.maintain_node_health
2504 if self.op.add_uids is not None:
2505 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2507 if self.op.remove_uids is not None:
2508 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2510 if self.op.uid_pool is not None:
2511 self.cluster.uid_pool = self.op.uid_pool
2513 self.cfg.Update(self.cluster, feedback_fn)
2516 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2517 """Distribute additional files which are part of the cluster configuration.
2519 ConfigWriter takes care of distributing the config and ssconf files, but
2520 there are more files which should be distributed to all nodes. This function
2521 makes sure those are copied.
2523 @param lu: calling logical unit
2524 @param additional_nodes: list of nodes not in the config to distribute to
2527 # 1. Gather target nodes
2528 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2529 dist_nodes = lu.cfg.GetOnlineNodeList()
2530 if additional_nodes is not None:
2531 dist_nodes.extend(additional_nodes)
2532 if myself.name in dist_nodes:
2533 dist_nodes.remove(myself.name)
2535 # 2. Gather files to distribute
2536 dist_files = set([constants.ETC_HOSTS,
2537 constants.SSH_KNOWN_HOSTS_FILE,
2538 constants.RAPI_CERT_FILE,
2539 constants.RAPI_USERS_FILE,
2540 constants.CONFD_HMAC_KEY,
2541 constants.CLUSTER_DOMAIN_SECRET_FILE,
2544 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2545 for hv_name in enabled_hypervisors:
2546 hv_class = hypervisor.GetHypervisor(hv_name)
2547 dist_files.update(hv_class.GetAncillaryFiles())
2549 # 3. Perform the files upload
2550 for fname in dist_files:
2551 if os.path.exists(fname):
2552 result = lu.rpc.call_upload_file(dist_nodes, fname)
2553 for to_node, to_result in result.items():
2554 msg = to_result.fail_msg
2556 msg = ("Copy of file %s to node %s failed: %s" %
2557 (fname, to_node, msg))
2558 lu.proc.LogWarning(msg)
2561 class LURedistributeConfig(NoHooksLU):
2562 """Force the redistribution of cluster configuration.
2564 This is a very simple LU.
2570 def ExpandNames(self):
2571 self.needed_locks = {
2572 locking.LEVEL_NODE: locking.ALL_SET,
2574 self.share_locks[locking.LEVEL_NODE] = 1
2576 def CheckPrereq(self):
2577 """Check prerequisites.
2581 def Exec(self, feedback_fn):
2582 """Redistribute the configuration.
2585 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2586 _RedistributeAncillaryFiles(self)
2589 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2590 """Sleep and poll for an instance's disk to sync.
2593 if not instance.disks or disks is not None and not disks:
2596 disks = _ExpandCheckDisks(instance, disks)
2599 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2601 node = instance.primary_node
2604 lu.cfg.SetDiskID(dev, node)
2606 # TODO: Convert to utils.Retry
2609 degr_retries = 10 # in seconds, as we sleep 1 second each time
2613 cumul_degraded = False
2614 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2615 msg = rstats.fail_msg
2617 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2620 raise errors.RemoteError("Can't contact node %s for mirror data,"
2621 " aborting." % node)
2624 rstats = rstats.payload
2626 for i, mstat in enumerate(rstats):
2628 lu.LogWarning("Can't compute data for node %s/%s",
2629 node, disks[i].iv_name)
2632 cumul_degraded = (cumul_degraded or
2633 (mstat.is_degraded and mstat.sync_percent is None))
2634 if mstat.sync_percent is not None:
2636 if mstat.estimated_time is not None:
2637 rem_time = ("%s remaining (estimated)" %
2638 utils.FormatSeconds(mstat.estimated_time))
2639 max_time = mstat.estimated_time
2641 rem_time = "no time estimate"
2642 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2643 (disks[i].iv_name, mstat.sync_percent, rem_time))
2645 # if we're done but degraded, let's do a few small retries, to
2646 # make sure we see a stable and not transient situation; therefore
2647 # we force restart of the loop
2648 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2649 logging.info("Degraded disks found, %d retries left", degr_retries)
2657 time.sleep(min(60, max_time))
2660 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2661 return not cumul_degraded
2664 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2665 """Check that mirrors are not degraded.
2667 The ldisk parameter, if True, will change the test from the
2668 is_degraded attribute (which represents overall non-ok status for
2669 the device(s)) to the ldisk (representing the local storage status).
2672 lu.cfg.SetDiskID(dev, node)
2676 if on_primary or dev.AssembleOnSecondary():
2677 rstats = lu.rpc.call_blockdev_find(node, dev)
2678 msg = rstats.fail_msg
2680 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2682 elif not rstats.payload:
2683 lu.LogWarning("Can't find disk on node %s", node)
2687 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2689 result = result and not rstats.payload.is_degraded
2692 for child in dev.children:
2693 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2698 class LUDiagnoseOS(NoHooksLU):
2699 """Logical unit for OS diagnose/query.
2702 _OP_REQP = ["output_fields", "names"]
2704 _FIELDS_STATIC = utils.FieldSet()
2705 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2706 # Fields that need calculation of global os validity
2707 _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2709 def ExpandNames(self):
2711 raise errors.OpPrereqError("Selective OS query not supported",
2714 _CheckOutputFields(static=self._FIELDS_STATIC,
2715 dynamic=self._FIELDS_DYNAMIC,
2716 selected=self.op.output_fields)
2718 # Lock all nodes, in shared mode
2719 # Temporary removal of locks, should be reverted later
2720 # TODO: reintroduce locks when they are lighter-weight
2721 self.needed_locks = {}
2722 #self.share_locks[locking.LEVEL_NODE] = 1
2723 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2725 def CheckPrereq(self):
2726 """Check prerequisites.
2731 def _DiagnoseByOS(rlist):
2732 """Remaps a per-node return list into an a per-os per-node dictionary
2734 @param rlist: a map with node names as keys and OS objects as values
2737 @return: a dictionary with osnames as keys and as value another map, with
2738 nodes as keys and tuples of (path, status, diagnose) as values, eg::
2740 {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2741 (/srv/..., False, "invalid api")],
2742 "node2": [(/srv/..., True, "")]}
2747 # we build here the list of nodes that didn't fail the RPC (at RPC
2748 # level), so that nodes with a non-responding node daemon don't
2749 # make all OSes invalid
2750 good_nodes = [node_name for node_name in rlist
2751 if not rlist[node_name].fail_msg]
2752 for node_name, nr in rlist.items():
2753 if nr.fail_msg or not nr.payload:
2755 for name, path, status, diagnose, variants in nr.payload:
2756 if name not in all_os:
2757 # build a list of nodes for this os containing empty lists
2758 # for each node in node_list
2760 for nname in good_nodes:
2761 all_os[name][nname] = []
2762 all_os[name][node_name].append((path, status, diagnose, variants))
2765 def Exec(self, feedback_fn):
2766 """Compute the list of OSes.
2769 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2770 node_data = self.rpc.call_os_diagnose(valid_nodes)
2771 pol = self._DiagnoseByOS(node_data)
2773 calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2774 calc_variants = "variants" in self.op.output_fields
2776 for os_name, os_data in pol.items():
2781 for osl in os_data.values():
2782 valid = valid and osl and osl[0][1]
2787 node_variants = osl[0][3]
2788 if variants is None:
2789 variants = set(node_variants)
2791 variants.intersection_update(node_variants)
2793 for field in self.op.output_fields:
2796 elif field == "valid":
2798 elif field == "node_status":
2799 # this is just a copy of the dict
2801 for node_name, nos_list in os_data.items():
2802 val[node_name] = nos_list
2803 elif field == "variants":
2804 val = list(variants)
2806 raise errors.ParameterError(field)
2813 class LURemoveNode(LogicalUnit):
2814 """Logical unit for removing a node.
2817 HPATH = "node-remove"
2818 HTYPE = constants.HTYPE_NODE
2819 _OP_REQP = ["node_name"]
2821 def BuildHooksEnv(self):
2824 This doesn't run on the target node in the pre phase as a failed
2825 node would then be impossible to remove.
2829 "OP_TARGET": self.op.node_name,
2830 "NODE_NAME": self.op.node_name,
2832 all_nodes = self.cfg.GetNodeList()
2834 all_nodes.remove(self.op.node_name)
2836 logging.warning("Node %s which is about to be removed not found"
2837 " in the all nodes list", self.op.node_name)
2838 return env, all_nodes, all_nodes
2840 def CheckPrereq(self):
2841 """Check prerequisites.
2844 - the node exists in the configuration
2845 - it does not have primary or secondary instances
2846 - it's not the master
2848 Any errors are signaled by raising errors.OpPrereqError.
2851 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2852 node = self.cfg.GetNodeInfo(self.op.node_name)
2853 assert node is not None
2855 instance_list = self.cfg.GetInstanceList()
2857 masternode = self.cfg.GetMasterNode()
2858 if node.name == masternode:
2859 raise errors.OpPrereqError("Node is the master node,"
2860 " you need to failover first.",
2863 for instance_name in instance_list:
2864 instance = self.cfg.GetInstanceInfo(instance_name)
2865 if node.name in instance.all_nodes:
2866 raise errors.OpPrereqError("Instance %s is still running on the node,"
2867 " please remove first." % instance_name,
2869 self.op.node_name = node.name
2872 def Exec(self, feedback_fn):
2873 """Removes the node from the cluster.
2877 logging.info("Stopping the node daemon and removing configs from node %s",
2880 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2882 # Promote nodes to master candidate as needed
2883 _AdjustCandidatePool(self, exceptions=[node.name])
2884 self.context.RemoveNode(node.name)
2886 # Run post hooks on the node before it's removed
2887 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2889 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2891 # pylint: disable-msg=W0702
2892 self.LogWarning("Errors occurred running hooks on %s" % node.name)
2894 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2895 msg = result.fail_msg
2897 self.LogWarning("Errors encountered on the remote node while leaving"
2898 " the cluster: %s", msg)
2900 # Remove node from our /etc/hosts
2901 if self.cfg.GetClusterInfo().modify_etc_hosts:
2902 # FIXME: this should be done via an rpc call to node daemon
2903 utils.RemoveHostFromEtcHosts(node.name)
2904 _RedistributeAncillaryFiles(self)
2907 class LUQueryNodes(NoHooksLU):
2908 """Logical unit for querying nodes.
2911 # pylint: disable-msg=W0142
2912 _OP_REQP = ["output_fields", "names", "use_locking"]
2915 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2916 "master_candidate", "offline", "drained"]
2918 _FIELDS_DYNAMIC = utils.FieldSet(
2920 "mtotal", "mnode", "mfree",
2922 "ctotal", "cnodes", "csockets",
2925 _FIELDS_STATIC = utils.FieldSet(*[
2926 "pinst_cnt", "sinst_cnt",
2927 "pinst_list", "sinst_list",
2928 "pip", "sip", "tags",
2930 "role"] + _SIMPLE_FIELDS
2933 def ExpandNames(self):
2934 _CheckOutputFields(static=self._FIELDS_STATIC,
2935 dynamic=self._FIELDS_DYNAMIC,
2936 selected=self.op.output_fields)
2938 self.needed_locks = {}
2939 self.share_locks[locking.LEVEL_NODE] = 1
2942 self.wanted = _GetWantedNodes(self, self.op.names)
2944 self.wanted = locking.ALL_SET
2946 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2947 self.do_locking = self.do_node_query and self.op.use_locking
2949 # if we don't request only static fields, we need to lock the nodes
2950 self.needed_locks[locking.LEVEL_NODE] = self.wanted
2952 def CheckPrereq(self):
2953 """Check prerequisites.
2956 # The validation of the node list is done in the _GetWantedNodes,
2957 # if non empty, and if empty, there's no validation to do
2960 def Exec(self, feedback_fn):
2961 """Computes the list of nodes and their attributes.
2964 all_info = self.cfg.GetAllNodesInfo()
2966 nodenames = self.acquired_locks[locking.LEVEL_NODE]
2967 elif self.wanted != locking.ALL_SET:
2968 nodenames = self.wanted
2969 missing = set(nodenames).difference(all_info.keys())
2971 raise errors.OpExecError(
2972 "Some nodes were removed before retrieving their data: %s" % missing)
2974 nodenames = all_info.keys()
2976 nodenames = utils.NiceSort(nodenames)
2977 nodelist = [all_info[name] for name in nodenames]
2979 # begin data gathering
2981 if self.do_node_query:
2983 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2984 self.cfg.GetHypervisorType())
2985 for name in nodenames:
2986 nodeinfo = node_data[name]
2987 if not nodeinfo.fail_msg and nodeinfo.payload:
2988 nodeinfo = nodeinfo.payload
2989 fn = utils.TryConvert
2991 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2992 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2993 "mfree": fn(int, nodeinfo.get('memory_free', None)),
2994 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2995 "dfree": fn(int, nodeinfo.get('vg_free', None)),
2996 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2997 "bootid": nodeinfo.get('bootid', None),
2998 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2999 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3002 live_data[name] = {}
3004 live_data = dict.fromkeys(nodenames, {})
3006 node_to_primary = dict([(name, set()) for name in nodenames])
3007 node_to_secondary = dict([(name, set()) for name in nodenames])
3009 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3010 "sinst_cnt", "sinst_list"))
3011 if inst_fields & frozenset(self.op.output_fields):
3012 inst_data = self.cfg.GetAllInstancesInfo()
3014 for inst in inst_data.values():
3015 if inst.primary_node in node_to_primary:
3016 node_to_primary[inst.primary_node].add(inst.name)
3017 for secnode in inst.secondary_nodes:
3018 if secnode in node_to_secondary:
3019 node_to_secondary[secnode].add(inst.name)
3021 master_node = self.cfg.GetMasterNode()
3023 # end data gathering
3026 for node in nodelist:
3028 for field in self.op.output_fields:
3029 if field in self._SIMPLE_FIELDS:
3030 val = getattr(node, field)
3031 elif field == "pinst_list":
3032 val = list(node_to_primary[node.name])
3033 elif field == "sinst_list":
3034 val = list(node_to_secondary[node.name])
3035 elif field == "pinst_cnt":
3036 val = len(node_to_primary[node.name])
3037 elif field == "sinst_cnt":
3038 val = len(node_to_secondary[node.name])
3039 elif field == "pip":
3040 val = node.primary_ip
3041 elif field == "sip":
3042 val = node.secondary_ip
3043 elif field == "tags":
3044 val = list(node.GetTags())
3045 elif field == "master":
3046 val = node.name == master_node
3047 elif self._FIELDS_DYNAMIC.Matches(field):
3048 val = live_data[node.name].get(field, None)
3049 elif field == "role":
3050 if node.name == master_node:
3052 elif node.master_candidate:
3061 raise errors.ParameterError(field)
3062 node_output.append(val)
3063 output.append(node_output)
3068 class LUQueryNodeVolumes(NoHooksLU):
3069 """Logical unit for getting volumes on node(s).
3072 _OP_REQP = ["nodes", "output_fields"]
3074 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3075 _FIELDS_STATIC = utils.FieldSet("node")
3077 def ExpandNames(self):
3078 _CheckOutputFields(static=self._FIELDS_STATIC,
3079 dynamic=self._FIELDS_DYNAMIC,
3080 selected=self.op.output_fields)
3082 self.needed_locks = {}
3083 self.share_locks[locking.LEVEL_NODE] = 1
3084 if not self.op.nodes:
3085 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3087 self.needed_locks[locking.LEVEL_NODE] = \
3088 _GetWantedNodes(self, self.op.nodes)
3090 def CheckPrereq(self):
3091 """Check prerequisites.
3093 This checks that the fields required are valid output fields.
3096 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3098 def Exec(self, feedback_fn):
3099 """Computes the list of nodes and their attributes.
3102 nodenames = self.nodes
3103 volumes = self.rpc.call_node_volumes(nodenames)
3105 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3106 in self.cfg.GetInstanceList()]
3108 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3111 for node in nodenames:
3112 nresult = volumes[node]
3115 msg = nresult.fail_msg
3117 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3120 node_vols = nresult.payload[:]
3121 node_vols.sort(key=lambda vol: vol['dev'])
3123 for vol in node_vols:
3125 for field in self.op.output_fields:
3128 elif field == "phys":
3132 elif field == "name":
3134 elif field == "size":
3135 val = int(float(vol['size']))
3136 elif field == "instance":
3138 if node not in lv_by_node[inst]:
3140 if vol['name'] in lv_by_node[inst][node]:
3146 raise errors.ParameterError(field)
3147 node_output.append(str(val))
3149 output.append(node_output)
3154 class LUQueryNodeStorage(NoHooksLU):
3155 """Logical unit for getting information on storage units on node(s).
3158 _OP_REQP = ["nodes", "storage_type", "output_fields"]
3160 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3162 def CheckArguments(self):
3163 _CheckStorageType(self.op.storage_type)
3165 _CheckOutputFields(static=self._FIELDS_STATIC,
3166 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3167 selected=self.op.output_fields)
3169 def ExpandNames(self):
3170 self.needed_locks = {}
3171 self.share_locks[locking.LEVEL_NODE] = 1
3174 self.needed_locks[locking.LEVEL_NODE] = \
3175 _GetWantedNodes(self, self.op.nodes)
3177 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3179 def CheckPrereq(self):
3180 """Check prerequisites.
3182 This checks that the fields required are valid output fields.
3185 self.op.name = getattr(self.op, "name", None)
3187 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3189 def Exec(self, feedback_fn):
3190 """Computes the list of nodes and their attributes.
3193 # Always get name to sort by
3194 if constants.SF_NAME in self.op.output_fields:
3195 fields = self.op.output_fields[:]
3197 fields = [constants.SF_NAME] + self.op.output_fields
3199 # Never ask for node or type as it's only known to the LU
3200 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3201 while extra in fields:
3202 fields.remove(extra)
3204 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3205 name_idx = field_idx[constants.SF_NAME]
3207 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3208 data = self.rpc.call_storage_list(self.nodes,
3209 self.op.storage_type, st_args,
3210 self.op.name, fields)
3214 for node in utils.NiceSort(self.nodes):
3215 nresult = data[node]
3219 msg = nresult.fail_msg
3221 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3224 rows = dict([(row[name_idx], row) for row in nresult.payload])
3226 for name in utils.NiceSort(rows.keys()):
3231 for field in self.op.output_fields:
3232 if field == constants.SF_NODE:
3234 elif field == constants.SF_TYPE:
3235 val = self.op.storage_type
3236 elif field in field_idx:
3237 val = row[field_idx[field]]
3239 raise errors.ParameterError(field)
3248 class LUModifyNodeStorage(NoHooksLU):
3249 """Logical unit for modifying a storage volume on a node.
3252 _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3255 def CheckArguments(self):
3256 self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3258 _CheckStorageType(self.op.storage_type)
3260 def ExpandNames(self):
3261 self.needed_locks = {
3262 locking.LEVEL_NODE: self.op.node_name,
3265 def CheckPrereq(self):
3266 """Check prerequisites.
3269 storage_type = self.op.storage_type
3272 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3274 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3275 " modified" % storage_type,
3278 diff = set(self.op.changes.keys()) - modifiable
3280 raise errors.OpPrereqError("The following fields can not be modified for"
3281 " storage units of type '%s': %r" %
3282 (storage_type, list(diff)),
3285 def Exec(self, feedback_fn):
3286 """Computes the list of nodes and their attributes.
3289 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3290 result = self.rpc.call_storage_modify(self.op.node_name,
3291 self.op.storage_type, st_args,
3292 self.op.name, self.op.changes)
3293 result.Raise("Failed to modify storage unit '%s' on %s" %
3294 (self.op.name, self.op.node_name))
3297 class LUAddNode(LogicalUnit):
3298 """Logical unit for adding node to the cluster.
3302 HTYPE = constants.HTYPE_NODE
3303 _OP_REQP = ["node_name"]
3305 def CheckArguments(self):
3306 # validate/normalize the node name
3307 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3309 def BuildHooksEnv(self):
3312 This will run on all nodes before, and on all nodes + the new node after.
3316 "OP_TARGET": self.op.node_name,
3317 "NODE_NAME": self.op.node_name,
3318 "NODE_PIP": self.op.primary_ip,
3319 "NODE_SIP": self.op.secondary_ip,
3321 nodes_0 = self.cfg.GetNodeList()
3322 nodes_1 = nodes_0 + [self.op.node_name, ]
3323 return env, nodes_0, nodes_1
3325 def CheckPrereq(self):
3326 """Check prerequisites.
3329 - the new node is not already in the config
3331 - its parameters (single/dual homed) matches the cluster
3333 Any errors are signaled by raising errors.OpPrereqError.
3336 node_name = self.op.node_name
3339 dns_data = utils.GetHostInfo(node_name)
3341 node = dns_data.name
3342 primary_ip = self.op.primary_ip = dns_data.ip
3343 secondary_ip = getattr(self.op, "secondary_ip", None)
3344 if secondary_ip is None:
3345 secondary_ip = primary_ip
3346 if not utils.IsValidIP(secondary_ip):
3347 raise errors.OpPrereqError("Invalid secondary IP given",
3349 self.op.secondary_ip = secondary_ip
3351 node_list = cfg.GetNodeList()
3352 if not self.op.readd and node in node_list:
3353 raise errors.OpPrereqError("Node %s is already in the configuration" %
3354 node, errors.ECODE_EXISTS)
3355 elif self.op.readd and node not in node_list:
3356 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3359 self.changed_primary_ip = False
3361 for existing_node_name in node_list:
3362 existing_node = cfg.GetNodeInfo(existing_node_name)
3364 if self.op.readd and node == existing_node_name:
3365 if existing_node.secondary_ip != secondary_ip:
3366 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3367 " address configuration as before",
3369 if existing_node.primary_ip != primary_ip:
3370 self.changed_primary_ip = True
3374 if (existing_node.primary_ip == primary_ip or
3375 existing_node.secondary_ip == primary_ip or
3376 existing_node.primary_ip == secondary_ip or
3377 existing_node.secondary_ip == secondary_ip):
3378 raise errors.OpPrereqError("New node ip address(es) conflict with"
3379 " existing node %s" % existing_node.name,
3380 errors.ECODE_NOTUNIQUE)
3382 # check that the type of the node (single versus dual homed) is the
3383 # same as for the master
3384 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3385 master_singlehomed = myself.secondary_ip == myself.primary_ip
3386 newbie_singlehomed = secondary_ip == primary_ip
3387 if master_singlehomed != newbie_singlehomed:
3388 if master_singlehomed:
3389 raise errors.OpPrereqError("The master has no private ip but the"
3390 " new node has one",
3393 raise errors.OpPrereqError("The master has a private ip but the"
3394 " new node doesn't have one",
3397 # checks reachability
3398 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3399 raise errors.OpPrereqError("Node not reachable by ping",
3400 errors.ECODE_ENVIRON)
3402 if not newbie_singlehomed:
3403 # check reachability from my secondary ip to newbie's secondary ip
3404 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3405 source=myself.secondary_ip):
3406 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3407 " based ping to noded port",
3408 errors.ECODE_ENVIRON)
3415 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3418 self.new_node = self.cfg.GetNodeInfo(node)
3419 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3421 self.new_node = objects.Node(name=node,
3422 primary_ip=primary_ip,
3423 secondary_ip=secondary_ip,
3424 master_candidate=self.master_candidate,
3425 offline=False, drained=False)
3427 def Exec(self, feedback_fn):
3428 """Adds the new node to the cluster.
3431 new_node = self.new_node
3432 node = new_node.name
3434 # for re-adds, reset the offline/drained/master-candidate flags;
3435 # we need to reset here, otherwise offline would prevent RPC calls
3436 # later in the procedure; this also means that if the re-add
3437 # fails, we are left with a non-offlined, broken node
3439 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3440 self.LogInfo("Readding a node, the offline/drained flags were reset")
3441 # if we demote the node, we do cleanup later in the procedure
3442 new_node.master_candidate = self.master_candidate
3443 if self.changed_primary_ip:
3444 new_node.primary_ip = self.op.primary_ip
3446 # notify the user about any possible mc promotion
3447 if new_node.master_candidate:
3448 self.LogInfo("Node will be a master candidate")
3450 # check connectivity
3451 result = self.rpc.call_version([node])[node]
3452 result.Raise("Can't get version information from node %s" % node)
3453 if constants.PROTOCOL_VERSION == result.payload:
3454 logging.info("Communication to node %s fine, sw version %s match",
3455 node, result.payload)
3457 raise errors.OpExecError("Version mismatch master version %s,"
3458 " node version %s" %
3459 (constants.PROTOCOL_VERSION, result.payload))
3462 if self.cfg.GetClusterInfo().modify_ssh_setup:
3463 logging.info("Copy ssh key to node %s", node)
3464 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3466 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3467 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3471 keyarray.append(utils.ReadFile(i))
3473 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3474 keyarray[2], keyarray[3], keyarray[4],
3476 result.Raise("Cannot transfer ssh keys to the new node")
3478 # Add node to our /etc/hosts, and add key to known_hosts
3479 if self.cfg.GetClusterInfo().modify_etc_hosts:
3480 # FIXME: this should be done via an rpc call to node daemon
3481 utils.AddHostToEtcHosts(new_node.name)
3483 if new_node.secondary_ip != new_node.primary_ip:
3484 result = self.rpc.call_node_has_ip_address(new_node.name,
3485 new_node.secondary_ip)
3486 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3487 prereq=True, ecode=errors.ECODE_ENVIRON)
3488 if not result.payload:
3489 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3490 " you gave (%s). Please fix and re-run this"
3491 " command." % new_node.secondary_ip)
3493 node_verify_list = [self.cfg.GetMasterNode()]
3494 node_verify_param = {
3495 constants.NV_NODELIST: [node],
3496 # TODO: do a node-net-test as well?
3499 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3500 self.cfg.GetClusterName())
3501 for verifier in node_verify_list:
3502 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3503 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3505 for failed in nl_payload:
3506 feedback_fn("ssh/hostname verification failed"
3507 " (checking from %s): %s" %
3508 (verifier, nl_payload[failed]))
3509 raise errors.OpExecError("ssh/hostname verification failed.")
3512 _RedistributeAncillaryFiles(self)
3513 self.context.ReaddNode(new_node)
3514 # make sure we redistribute the config
3515 self.cfg.Update(new_node, feedback_fn)
3516 # and make sure the new node will not have old files around
3517 if not new_node.master_candidate:
3518 result = self.rpc.call_node_demote_from_mc(new_node.name)
3519 msg = result.fail_msg
3521 self.LogWarning("Node failed to demote itself from master"
3522 " candidate status: %s" % msg)
3524 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3525 self.context.AddNode(new_node, self.proc.GetECId())
3528 class LUSetNodeParams(LogicalUnit):
3529 """Modifies the parameters of a node.
3532 HPATH = "node-modify"
3533 HTYPE = constants.HTYPE_NODE
3534 _OP_REQP = ["node_name"]
3537 def CheckArguments(self):
3538 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3539 _CheckBooleanOpField(self.op, 'master_candidate')
3540 _CheckBooleanOpField(self.op, 'offline')
3541 _CheckBooleanOpField(self.op, 'drained')
3542 _CheckBooleanOpField(self.op, 'auto_promote')
3543 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3544 if all_mods.count(None) == 3:
3545 raise errors.OpPrereqError("Please pass at least one modification",
3547 if all_mods.count(True) > 1:
3548 raise errors.OpPrereqError("Can't set the node into more than one"
3549 " state at the same time",
3552 # Boolean value that tells us whether we're offlining or draining the node
3553 self.offline_or_drain = (self.op.offline == True or
3554 self.op.drained == True)
3555 self.deoffline_or_drain = (self.op.offline == False or
3556 self.op.drained == False)
3557 self.might_demote = (self.op.master_candidate == False or
3558 self.offline_or_drain)
3560 self.lock_all = self.op.auto_promote and self.might_demote
3563 def ExpandNames(self):
3565 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3567 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3569 def BuildHooksEnv(self):
3572 This runs on the master node.
3576 "OP_TARGET": self.op.node_name,
3577 "MASTER_CANDIDATE": str(self.op.master_candidate),
3578 "OFFLINE": str(self.op.offline),
3579 "DRAINED": str(self.op.drained),
3581 nl = [self.cfg.GetMasterNode(),
3585 def CheckPrereq(self):
3586 """Check prerequisites.
3588 This only checks the instance list against the existing names.
3591 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3593 if (self.op.master_candidate is not None or
3594 self.op.drained is not None or
3595 self.op.offline is not None):
3596 # we can't change the master's node flags
3597 if self.op.node_name == self.cfg.GetMasterNode():
3598 raise errors.OpPrereqError("The master role can be changed"
3599 " only via masterfailover",
3603 if node.master_candidate and self.might_demote and not self.lock_all:
3604 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3605 # check if after removing the current node, we're missing master
3607 (mc_remaining, mc_should, _) = \
3608 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3609 if mc_remaining < mc_should:
3610 raise errors.OpPrereqError("Not enough master candidates, please"
3611 " pass auto_promote to allow promotion",
3614 if (self.op.master_candidate == True and
3615 ((node.offline and not self.op.offline == False) or
3616 (node.drained and not self.op.drained == False))):
3617 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3618 " to master_candidate" % node.name,
3621 # If we're being deofflined/drained, we'll MC ourself if needed
3622 if (self.deoffline_or_drain and not self.offline_or_drain and not
3623 self.op.master_candidate == True and not node.master_candidate):
3624 self.op.master_candidate = _DecideSelfPromotion(self)
3625 if self.op.master_candidate:
3626 self.LogInfo("Autopromoting node to master candidate")
3630 def Exec(self, feedback_fn):
3639 if self.op.offline is not None:
3640 node.offline = self.op.offline
3641 result.append(("offline", str(self.op.offline)))
3642 if self.op.offline == True:
3643 if node.master_candidate:
3644 node.master_candidate = False
3646 result.append(("master_candidate", "auto-demotion due to offline"))
3648 node.drained = False
3649 result.append(("drained", "clear drained status due to offline"))
3651 if self.op.master_candidate is not None:
3652 node.master_candidate = self.op.master_candidate
3654 result.append(("master_candidate", str(self.op.master_candidate)))
3655 if self.op.master_candidate == False:
3656 rrc = self.rpc.call_node_demote_from_mc(node.name)
3659 self.LogWarning("Node failed to demote itself: %s" % msg)
3661 if self.op.drained is not None:
3662 node.drained = self.op.drained
3663 result.append(("drained", str(self.op.drained)))
3664 if self.op.drained == True:
3665 if node.master_candidate:
3666 node.master_candidate = False
3668 result.append(("master_candidate", "auto-demotion due to drain"))
3669 rrc = self.rpc.call_node_demote_from_mc(node.name)
3672 self.LogWarning("Node failed to demote itself: %s" % msg)
3674 node.offline = False
3675 result.append(("offline", "clear offline status due to drain"))
3677 # we locked all nodes, we adjust the CP before updating this node
3679 _AdjustCandidatePool(self, [node.name])
3681 # this will trigger configuration file update, if needed
3682 self.cfg.Update(node, feedback_fn)
3684 # this will trigger job queue propagation or cleanup
3686 self.context.ReaddNode(node)
3691 class LUPowercycleNode(NoHooksLU):
3692 """Powercycles a node.
3695 _OP_REQP = ["node_name", "force"]
3698 def CheckArguments(self):
3699 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3700 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3701 raise errors.OpPrereqError("The node is the master and the force"
3702 " parameter was not set",
3705 def ExpandNames(self):
3706 """Locking for PowercycleNode.
3708 This is a last-resort option and shouldn't block on other
3709 jobs. Therefore, we grab no locks.
3712 self.needed_locks = {}
3714 def CheckPrereq(self):
3715 """Check prerequisites.
3717 This LU has no prereqs.
3722 def Exec(self, feedback_fn):
3726 result = self.rpc.call_node_powercycle(self.op.node_name,
3727 self.cfg.GetHypervisorType())
3728 result.Raise("Failed to schedule the reboot")
3729 return result.payload
3732 class LUQueryClusterInfo(NoHooksLU):
3733 """Query cluster configuration.
3739 def ExpandNames(self):
3740 self.needed_locks = {}
3742 def CheckPrereq(self):
3743 """No prerequsites needed for this LU.
3748 def Exec(self, feedback_fn):
3749 """Return cluster config.
3752 cluster = self.cfg.GetClusterInfo()
3755 # Filter just for enabled hypervisors
3756 for os_name, hv_dict in cluster.os_hvp.items():
3757 os_hvp[os_name] = {}
3758 for hv_name, hv_params in hv_dict.items():
3759 if hv_name in cluster.enabled_hypervisors:
3760 os_hvp[os_name][hv_name] = hv_params
3763 "software_version": constants.RELEASE_VERSION,
3764 "protocol_version": constants.PROTOCOL_VERSION,
3765 "config_version": constants.CONFIG_VERSION,
3766 "os_api_version": max(constants.OS_API_VERSIONS),
3767 "export_version": constants.EXPORT_VERSION,
3768 "architecture": (platform.architecture()[0], platform.machine()),
3769 "name": cluster.cluster_name,
3770 "master": cluster.master_node,
3771 "default_hypervisor": cluster.enabled_hypervisors[0],
3772 "enabled_hypervisors": cluster.enabled_hypervisors,
3773 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3774 for hypervisor_name in cluster.enabled_hypervisors]),
3776 "beparams": cluster.beparams,
3777 "nicparams": cluster.nicparams,
3778 "candidate_pool_size": cluster.candidate_pool_size,
3779 "master_netdev": cluster.master_netdev,
3780 "volume_group_name": cluster.volume_group_name,
3781 "file_storage_dir": cluster.file_storage_dir,
3782 "maintain_node_health": cluster.maintain_node_health,
3783 "ctime": cluster.ctime,
3784 "mtime": cluster.mtime,
3785 "uuid": cluster.uuid,
3786 "tags": list(cluster.GetTags()),
3787 "uid_pool": cluster.uid_pool,
3793 class LUQueryConfigValues(NoHooksLU):
3794 """Return configuration values.
3799 _FIELDS_DYNAMIC = utils.FieldSet()
3800 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3803 def ExpandNames(self):
3804 self.needed_locks = {}
3806 _CheckOutputFields(static=self._FIELDS_STATIC,
3807 dynamic=self._FIELDS_DYNAMIC,
3808 selected=self.op.output_fields)
3810 def CheckPrereq(self):
3811 """No prerequisites.
3816 def Exec(self, feedback_fn):
3817 """Dump a representation of the cluster config to the standard output.
3821 for field in self.op.output_fields:
3822 if field == "cluster_name":
3823 entry = self.cfg.GetClusterName()
3824 elif field == "master_node":
3825 entry = self.cfg.GetMasterNode()
3826 elif field == "drain_flag":
3827 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3828 elif field == "watcher_pause":
3829 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3831 raise errors.ParameterError(field)
3832 values.append(entry)
3836 class LUActivateInstanceDisks(NoHooksLU):
3837 """Bring up an instance's disks.
3840 _OP_REQP = ["instance_name"]
3843 def ExpandNames(self):
3844 self._ExpandAndLockInstance()
3845 self.needed_locks[locking.LEVEL_NODE] = []
3846 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3848 def DeclareLocks(self, level):
3849 if level == locking.LEVEL_NODE:
3850 self._LockInstancesNodes()
3852 def CheckPrereq(self):
3853 """Check prerequisites.
3855 This checks that the instance is in the cluster.
3858 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3859 assert self.instance is not None, \
3860 "Cannot retrieve locked instance %s" % self.op.instance_name
3861 _CheckNodeOnline(self, self.instance.primary_node)
3862 if not hasattr(self.op, "ignore_size"):
3863 self.op.ignore_size = False
3865 def Exec(self, feedback_fn):
3866 """Activate the disks.
3869 disks_ok, disks_info = \
3870 _AssembleInstanceDisks(self, self.instance,
3871 ignore_size=self.op.ignore_size)
3873 raise errors.OpExecError("Cannot activate block devices")
3878 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
3880 """Prepare the block devices for an instance.
3882 This sets up the block devices on all nodes.
3884 @type lu: L{LogicalUnit}
3885 @param lu: the logical unit on whose behalf we execute
3886 @type instance: L{objects.Instance}
3887 @param instance: the instance for whose disks we assemble
3888 @type disks: list of L{objects.Disk} or None
3889 @param disks: which disks to assemble (or all, if None)
3890 @type ignore_secondaries: boolean
3891 @param ignore_secondaries: if true, errors on secondary nodes
3892 won't result in an error return from the function
3893 @type ignore_size: boolean
3894 @param ignore_size: if true, the current known size of the disk
3895 will not be used during the disk activation, useful for cases
3896 when the size is wrong
3897 @return: False if the operation failed, otherwise a list of
3898 (host, instance_visible_name, node_visible_name)
3899 with the mapping from node devices to instance devices
3904 iname = instance.name
3905 disks = _ExpandCheckDisks(instance, disks)
3907 # With the two passes mechanism we try to reduce the window of
3908 # opportunity for the race condition of switching DRBD to primary
3909 # before handshaking occured, but we do not eliminate it
3911 # The proper fix would be to wait (with some limits) until the
3912 # connection has been made and drbd transitions from WFConnection
3913 # into any other network-connected state (Connected, SyncTarget,
3916 # 1st pass, assemble on all nodes in secondary mode
3917 for inst_disk in disks:
3918 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3920 node_disk = node_disk.Copy()
3921 node_disk.UnsetSize()
3922 lu.cfg.SetDiskID(node_disk, node)
3923 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3924 msg = result.fail_msg
3926 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3927 " (is_primary=False, pass=1): %s",
3928 inst_disk.iv_name, node, msg)
3929 if not ignore_secondaries:
3932 # FIXME: race condition on drbd migration to primary
3934 # 2nd pass, do only the primary node
3935 for inst_disk in disks:
3938 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3939 if node != instance.primary_node:
3942 node_disk = node_disk.Copy()
3943 node_disk.UnsetSize()
3944 lu.cfg.SetDiskID(node_disk, node)
3945 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3946 msg = result.fail_msg
3948 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3949 " (is_primary=True, pass=2): %s",
3950 inst_disk.iv_name, node, msg)
3953 dev_path = result.payload
3955 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3957 # leave the disks configured for the primary node
3958 # this is a workaround that would be fixed better by
3959 # improving the logical/physical id handling
3961 lu.cfg.SetDiskID(disk, instance.primary_node)
3963 return disks_ok, device_info
3966 def _StartInstanceDisks(lu, instance, force):
3967 """Start the disks of an instance.
3970 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3971 ignore_secondaries=force)
3973 _ShutdownInstanceDisks(lu, instance)
3974 if force is not None and not force:
3975 lu.proc.LogWarning("", hint="If the message above refers to a"
3977 " you can retry the operation using '--force'.")
3978 raise errors.OpExecError("Disk consistency error")
3981 class LUDeactivateInstanceDisks(NoHooksLU):
3982 """Shutdown an instance's disks.
3985 _OP_REQP = ["instance_name"]
3988 def ExpandNames(self):
3989 self._ExpandAndLockInstance()
3990 self.needed_locks[locking.LEVEL_NODE] = []
3991 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3993 def DeclareLocks(self, level):
3994 if level == locking.LEVEL_NODE:
3995 self._LockInstancesNodes()
3997 def CheckPrereq(self):
3998 """Check prerequisites.
4000 This checks that the instance is in the cluster.
4003 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4004 assert self.instance is not None, \
4005 "Cannot retrieve locked instance %s" % self.op.instance_name
4007 def Exec(self, feedback_fn):
4008 """Deactivate the disks
4011 instance = self.instance
4012 _SafeShutdownInstanceDisks(self, instance)
4015 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4016 """Shutdown block devices of an instance.
4018 This function checks if an instance is running, before calling
4019 _ShutdownInstanceDisks.
4022 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4023 _ShutdownInstanceDisks(lu, instance, disks=disks)
4026 def _ExpandCheckDisks(instance, disks):
4027 """Return the instance disks selected by the disks list
4029 @type disks: list of L{objects.Disk} or None
4030 @param disks: selected disks
4031 @rtype: list of L{objects.Disk}
4032 @return: selected instance disks to act on
4036 return instance.disks
4038 if not set(disks).issubset(instance.disks):
4039 raise errors.ProgrammerError("Can only act on disks belonging to the"
4044 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4045 """Shutdown block devices of an instance.
4047 This does the shutdown on all nodes of the instance.
4049 If the ignore_primary is false, errors on the primary node are
4054 disks = _ExpandCheckDisks(instance, disks)
4057 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4058 lu.cfg.SetDiskID(top_disk, node)
4059 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4060 msg = result.fail_msg
4062 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4063 disk.iv_name, node, msg)
4064 if not ignore_primary or node != instance.primary_node:
4069 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4070 """Checks if a node has enough free memory.
4072 This function check if a given node has the needed amount of free
4073 memory. In case the node has less memory or we cannot get the
4074 information from the node, this function raise an OpPrereqError
4077 @type lu: C{LogicalUnit}
4078 @param lu: a logical unit from which we get configuration data
4080 @param node: the node to check
4081 @type reason: C{str}
4082 @param reason: string to use in the error message
4083 @type requested: C{int}
4084 @param requested: the amount of memory in MiB to check for
4085 @type hypervisor_name: C{str}
4086 @param hypervisor_name: the hypervisor to ask for memory stats
4087 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4088 we cannot check the node
4091 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4092 nodeinfo[node].Raise("Can't get data from node %s" % node,
4093 prereq=True, ecode=errors.ECODE_ENVIRON)
4094 free_mem = nodeinfo[node].payload.get('memory_free', None)
4095 if not isinstance(free_mem, int):
4096 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4097 " was '%s'" % (node, free_mem),
4098 errors.ECODE_ENVIRON)
4099 if requested > free_mem:
4100 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4101 " needed %s MiB, available %s MiB" %
4102 (node, reason, requested, free_mem),
4106 def _CheckNodesFreeDisk(lu, nodenames, requested):
4107 """Checks if nodes have enough free disk space in the default VG.
4109 This function check if all given nodes have the needed amount of
4110 free disk. In case any node has less disk or we cannot get the
4111 information from the node, this function raise an OpPrereqError
4114 @type lu: C{LogicalUnit}
4115 @param lu: a logical unit from which we get configuration data
4116 @type nodenames: C{list}
4117 @param nodenames: the list of node names to check
4118 @type requested: C{int}
4119 @param requested: the amount of disk in MiB to check for
4120 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4121 we cannot check the node
4124 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4125 lu.cfg.GetHypervisorType())
4126 for node in nodenames:
4127 info = nodeinfo[node]
4128 info.Raise("Cannot get current information from node %s" % node,
4129 prereq=True, ecode=errors.ECODE_ENVIRON)
4130 vg_free = info.payload.get("vg_free", None)
4131 if not isinstance(vg_free, int):
4132 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4133 " result was '%s'" % (node, vg_free),
4134 errors.ECODE_ENVIRON)
4135 if requested > vg_free:
4136 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4137 " required %d MiB, available %d MiB" %
4138 (node, requested, vg_free),
4142 class LUStartupInstance(LogicalUnit):
4143 """Starts an instance.
4146 HPATH = "instance-start"
4147 HTYPE = constants.HTYPE_INSTANCE
4148 _OP_REQP = ["instance_name", "force"]
4151 def ExpandNames(self):
4152 self._ExpandAndLockInstance()
4154 def BuildHooksEnv(self):
4157 This runs on master, primary and secondary nodes of the instance.
4161 "FORCE": self.op.force,
4163 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4164 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4167 def CheckPrereq(self):
4168 """Check prerequisites.
4170 This checks that the instance is in the cluster.
4173 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4174 assert self.instance is not None, \
4175 "Cannot retrieve locked instance %s" % self.op.instance_name
4178 self.beparams = getattr(self.op, "beparams", {})
4180 if not isinstance(self.beparams, dict):
4181 raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4182 " dict" % (type(self.beparams), ),
4184 # fill the beparams dict
4185 utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4186 self.op.beparams = self.beparams
4189 self.hvparams = getattr(self.op, "hvparams", {})
4191 if not isinstance(self.hvparams, dict):
4192 raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4193 " dict" % (type(self.hvparams), ),
4196 # check hypervisor parameter syntax (locally)
4197 cluster = self.cfg.GetClusterInfo()
4198 utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4199 filled_hvp = cluster.FillHV(instance)
4200 filled_hvp.update(self.hvparams)
4201 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4202 hv_type.CheckParameterSyntax(filled_hvp)
4203 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4204 self.op.hvparams = self.hvparams
4206 _CheckNodeOnline(self, instance.primary_node)
4208 bep = self.cfg.GetClusterInfo().FillBE(instance)
4209 # check bridges existence
4210 _CheckInstanceBridgesExist(self, instance)
4212 remote_info = self.rpc.call_instance_info(instance.primary_node,
4214 instance.hypervisor)
4215 remote_info.Raise("Error checking node %s" % instance.primary_node,
4216 prereq=True, ecode=errors.ECODE_ENVIRON)
4217 if not remote_info.payload: # not running already
4218 _CheckNodeFreeMemory(self, instance.primary_node,
4219 "starting instance %s" % instance.name,
4220 bep[constants.BE_MEMORY], instance.hypervisor)
4222 def Exec(self, feedback_fn):
4223 """Start the instance.
4226 instance = self.instance
4227 force = self.op.force
4229 self.cfg.MarkInstanceUp(instance.name)
4231 node_current = instance.primary_node
4233 _StartInstanceDisks(self, instance, force)
4235 result = self.rpc.call_instance_start(node_current, instance,
4236 self.hvparams, self.beparams)
4237 msg = result.fail_msg
4239 _ShutdownInstanceDisks(self, instance)
4240 raise errors.OpExecError("Could not start instance: %s" % msg)
4243 class LURebootInstance(LogicalUnit):
4244 """Reboot an instance.
4247 HPATH = "instance-reboot"
4248 HTYPE = constants.HTYPE_INSTANCE
4249 _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4252 def CheckArguments(self):
4253 """Check the arguments.
4256 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4257 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4259 def ExpandNames(self):
4260 if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4261 constants.INSTANCE_REBOOT_HARD,
4262 constants.INSTANCE_REBOOT_FULL]:
4263 raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4264 (constants.INSTANCE_REBOOT_SOFT,
4265 constants.INSTANCE_REBOOT_HARD,
4266 constants.INSTANCE_REBOOT_FULL))
4267 self._ExpandAndLockInstance()
4269 def BuildHooksEnv(self):
4272 This runs on master, primary and secondary nodes of the instance.
4276 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4277 "REBOOT_TYPE": self.op.reboot_type,
4278 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4280 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4281 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4284 def CheckPrereq(self):
4285 """Check prerequisites.
4287 This checks that the instance is in the cluster.
4290 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4291 assert self.instance is not None, \
4292 "Cannot retrieve locked instance %s" % self.op.instance_name
4294 _CheckNodeOnline(self, instance.primary_node)
4296 # check bridges existence
4297 _CheckInstanceBridgesExist(self, instance)
4299 def Exec(self, feedback_fn):
4300 """Reboot the instance.
4303 instance = self.instance
4304 ignore_secondaries = self.op.ignore_secondaries
4305 reboot_type = self.op.reboot_type
4307 node_current = instance.primary_node
4309 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4310 constants.INSTANCE_REBOOT_HARD]:
4311 for disk in instance.disks:
4312 self.cfg.SetDiskID(disk, node_current)
4313 result = self.rpc.call_instance_reboot(node_current, instance,
4315 self.shutdown_timeout)
4316 result.Raise("Could not reboot instance")
4318 result = self.rpc.call_instance_shutdown(node_current, instance,
4319 self.shutdown_timeout)
4320 result.Raise("Could not shutdown instance for full reboot")
4321 _ShutdownInstanceDisks(self, instance)
4322 _StartInstanceDisks(self, instance, ignore_secondaries)
4323 result = self.rpc.call_instance_start(node_current, instance, None, None)
4324 msg = result.fail_msg
4326 _ShutdownInstanceDisks(self, instance)
4327 raise errors.OpExecError("Could not start instance for"
4328 " full reboot: %s" % msg)
4330 self.cfg.MarkInstanceUp(instance.name)
4333 class LUShutdownInstance(LogicalUnit):
4334 """Shutdown an instance.
4337 HPATH = "instance-stop"
4338 HTYPE = constants.HTYPE_INSTANCE
4339 _OP_REQP = ["instance_name"]
4342 def CheckArguments(self):
4343 """Check the arguments.
4346 self.timeout = getattr(self.op, "timeout",
4347 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4349 def ExpandNames(self):
4350 self._ExpandAndLockInstance()
4352 def BuildHooksEnv(self):
4355 This runs on master, primary and secondary nodes of the instance.
4358 env = _BuildInstanceHookEnvByObject(self, self.instance)
4359 env["TIMEOUT"] = self.timeout
4360 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4363 def CheckPrereq(self):
4364 """Check prerequisites.
4366 This checks that the instance is in the cluster.
4369 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4370 assert self.instance is not None, \
4371 "Cannot retrieve locked instance %s" % self.op.instance_name
4372 _CheckNodeOnline(self, self.instance.primary_node)
4374 def Exec(self, feedback_fn):
4375 """Shutdown the instance.
4378 instance = self.instance
4379 node_current = instance.primary_node
4380 timeout = self.timeout
4381 self.cfg.MarkInstanceDown(instance.name)
4382 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4383 msg = result.fail_msg
4385 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4387 _ShutdownInstanceDisks(self, instance)
4390 class LUReinstallInstance(LogicalUnit):
4391 """Reinstall an instance.
4394 HPATH = "instance-reinstall"
4395 HTYPE = constants.HTYPE_INSTANCE
4396 _OP_REQP = ["instance_name"]
4399 def ExpandNames(self):
4400 self._ExpandAndLockInstance()
4402 def BuildHooksEnv(self):
4405 This runs on master, primary and secondary nodes of the instance.
4408 env = _BuildInstanceHookEnvByObject(self, self.instance)
4409 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4412 def CheckPrereq(self):
4413 """Check prerequisites.
4415 This checks that the instance is in the cluster and is not running.
4418 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4419 assert instance is not None, \
4420 "Cannot retrieve locked instance %s" % self.op.instance_name
4421 _CheckNodeOnline(self, instance.primary_node)
4423 if instance.disk_template == constants.DT_DISKLESS:
4424 raise errors.OpPrereqError("Instance '%s' has no disks" %
4425 self.op.instance_name,
4427 _CheckInstanceDown(self, instance, "cannot reinstall")
4429 self.op.os_type = getattr(self.op, "os_type", None)
4430 self.op.force_variant = getattr(self.op, "force_variant", False)
4431 if self.op.os_type is not None:
4433 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4434 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4436 self.instance = instance
4438 def Exec(self, feedback_fn):
4439 """Reinstall the instance.
4442 inst = self.instance
4444 if self.op.os_type is not None:
4445 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4446 inst.os = self.op.os_type
4447 self.cfg.Update(inst, feedback_fn)
4449 _StartInstanceDisks(self, inst, None)
4451 feedback_fn("Running the instance OS create scripts...")
4452 # FIXME: pass debug option from opcode to backend
4453 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4454 self.op.debug_level)
4455 result.Raise("Could not install OS for instance %s on node %s" %
4456 (inst.name, inst.primary_node))
4458 _ShutdownInstanceDisks(self, inst)
4461 class LURecreateInstanceDisks(LogicalUnit):
4462 """Recreate an instance's missing disks.
4465 HPATH = "instance-recreate-disks"
4466 HTYPE = constants.HTYPE_INSTANCE
4467 _OP_REQP = ["instance_name", "disks"]
4470 def CheckArguments(self):
4471 """Check the arguments.
4474 if not isinstance(self.op.disks, list):
4475 raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4476 for item in self.op.disks:
4477 if (not isinstance(item, int) or
4479 raise errors.OpPrereqError("Invalid disk specification '%s'" %
4480 str(item), errors.ECODE_INVAL)
4482 def ExpandNames(self):
4483 self._ExpandAndLockInstance()
4485 def BuildHooksEnv(self):
4488 This runs on master, primary and secondary nodes of the instance.
4491 env = _BuildInstanceHookEnvByObject(self, self.instance)
4492 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4495 def CheckPrereq(self):
4496 """Check prerequisites.
4498 This checks that the instance is in the cluster and is not running.
4501 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4502 assert instance is not None, \
4503 "Cannot retrieve locked instance %s" % self.op.instance_name
4504 _CheckNodeOnline(self, instance.primary_node)
4506 if instance.disk_template == constants.DT_DISKLESS:
4507 raise errors.OpPrereqError("Instance '%s' has no disks" %
4508 self.op.instance_name, errors.ECODE_INVAL)
4509 _CheckInstanceDown(self, instance, "cannot recreate disks")
4511 if not self.op.disks:
4512 self.op.disks = range(len(instance.disks))
4514 for idx in self.op.disks:
4515 if idx >= len(instance.disks):
4516 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4519 self.instance = instance
4521 def Exec(self, feedback_fn):
4522 """Recreate the disks.
4526 for idx, _ in enumerate(self.instance.disks):
4527 if idx not in self.op.disks: # disk idx has not been passed in
4531 _CreateDisks(self, self.instance, to_skip=to_skip)
4534 class LURenameInstance(LogicalUnit):
4535 """Rename an instance.
4538 HPATH = "instance-rename"
4539 HTYPE = constants.HTYPE_INSTANCE
4540 _OP_REQP = ["instance_name", "new_name"]
4542 def BuildHooksEnv(self):
4545 This runs on master, primary and secondary nodes of the instance.
4548 env = _BuildInstanceHookEnvByObject(self, self.instance)
4549 env["INSTANCE_NEW_NAME"] = self.op.new_name
4550 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4553 def CheckPrereq(self):
4554 """Check prerequisites.
4556 This checks that the instance is in the cluster and is not running.
4559 self.op.instance_name = _ExpandInstanceName(self.cfg,
4560 self.op.instance_name)
4561 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4562 assert instance is not None
4563 _CheckNodeOnline(self, instance.primary_node)
4564 _CheckInstanceDown(self, instance, "cannot rename")
4565 self.instance = instance
4567 # new name verification
4568 name_info = utils.GetHostInfo(self.op.new_name)
4570 self.op.new_name = new_name = name_info.name
4571 instance_list = self.cfg.GetInstanceList()
4572 if new_name in instance_list:
4573 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4574 new_name, errors.ECODE_EXISTS)
4576 if not getattr(self.op, "ignore_ip", False):
4577 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4578 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4579 (name_info.ip, new_name),
4580 errors.ECODE_NOTUNIQUE)
4583 def Exec(self, feedback_fn):
4584 """Reinstall the instance.
4587 inst = self.instance
4588 old_name = inst.name
4590 if inst.disk_template == constants.DT_FILE:
4591 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4593 self.cfg.RenameInstance(inst.name, self.op.new_name)
4594 # Change the instance lock. This is definitely safe while we hold the BGL
4595 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4596 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4598 # re-read the instance from the configuration after rename
4599 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4601 if inst.disk_template == constants.DT_FILE:
4602 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4603 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4604 old_file_storage_dir,
4605 new_file_storage_dir)
4606 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4607 " (but the instance has been renamed in Ganeti)" %
4608 (inst.primary_node, old_file_storage_dir,
4609 new_file_storage_dir))
4611 _StartInstanceDisks(self, inst, None)
4613 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4614 old_name, self.op.debug_level)
4615 msg = result.fail_msg
4617 msg = ("Could not run OS rename script for instance %s on node %s"
4618 " (but the instance has been renamed in Ganeti): %s" %
4619 (inst.name, inst.primary_node, msg))
4620 self.proc.LogWarning(msg)
4622 _ShutdownInstanceDisks(self, inst)
4625 class LURemoveInstance(LogicalUnit):
4626 """Remove an instance.
4629 HPATH = "instance-remove"
4630 HTYPE = constants.HTYPE_INSTANCE
4631 _OP_REQP = ["instance_name", "ignore_failures"]
4634 def CheckArguments(self):
4635 """Check the arguments.
4638 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4639 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4641 def ExpandNames(self):
4642 self._ExpandAndLockInstance()
4643 self.needed_locks[locking.LEVEL_NODE] = []
4644 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4646 def DeclareLocks(self, level):
4647 if level == locking.LEVEL_NODE:
4648 self._LockInstancesNodes()
4650 def BuildHooksEnv(self):
4653 This runs on master, primary and secondary nodes of the instance.
4656 env = _BuildInstanceHookEnvByObject(self, self.instance)
4657 env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4658 nl = [self.cfg.GetMasterNode()]
4659 nl_post = list(self.instance.all_nodes) + nl
4660 return env, nl, nl_post
4662 def CheckPrereq(self):
4663 """Check prerequisites.
4665 This checks that the instance is in the cluster.
4668 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4669 assert self.instance is not None, \
4670 "Cannot retrieve locked instance %s" % self.op.instance_name
4672 def Exec(self, feedback_fn):
4673 """Remove the instance.
4676 instance = self.instance
4677 logging.info("Shutting down instance %s on node %s",
4678 instance.name, instance.primary_node)
4680 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4681 self.shutdown_timeout)
4682 msg = result.fail_msg
4684 if self.op.ignore_failures:
4685 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4687 raise errors.OpExecError("Could not shutdown instance %s on"
4689 (instance.name, instance.primary_node, msg))
4691 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4694 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4695 """Utility function to remove an instance.
4698 logging.info("Removing block devices for instance %s", instance.name)
4700 if not _RemoveDisks(lu, instance):
4701 if not ignore_failures:
4702 raise errors.OpExecError("Can't remove instance's disks")
4703 feedback_fn("Warning: can't remove instance's disks")
4705 logging.info("Removing instance %s out of cluster config", instance.name)
4707 lu.cfg.RemoveInstance(instance.name)
4709 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4710 "Instance lock removal conflict"
4712 # Remove lock for the instance
4713 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4716 class LUQueryInstances(NoHooksLU):
4717 """Logical unit for querying instances.
4720 # pylint: disable-msg=W0142
4721 _OP_REQP = ["output_fields", "names", "use_locking"]
4723 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4724 "serial_no", "ctime", "mtime", "uuid"]
4725 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4727 "disk_template", "ip", "mac", "bridge",
4728 "nic_mode", "nic_link",
4729 "sda_size", "sdb_size", "vcpus", "tags",
4730 "network_port", "beparams",
4731 r"(disk)\.(size)/([0-9]+)",
4732 r"(disk)\.(sizes)", "disk_usage",
4733 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4734 r"(nic)\.(bridge)/([0-9]+)",
4735 r"(nic)\.(macs|ips|modes|links|bridges)",
4736 r"(disk|nic)\.(count)",
4738 ] + _SIMPLE_FIELDS +
4740 for name in constants.HVS_PARAMETERS
4741 if name not in constants.HVC_GLOBALS] +
4743 for name in constants.BES_PARAMETERS])
4744 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4747 def ExpandNames(self):
4748 _CheckOutputFields(static=self._FIELDS_STATIC,
4749 dynamic=self._FIELDS_DYNAMIC,
4750 selected=self.op.output_fields)
4752 self.needed_locks = {}
4753 self.share_locks[locking.LEVEL_INSTANCE] = 1
4754 self.share_locks[locking.LEVEL_NODE] = 1
4757 self.wanted = _GetWantedInstances(self, self.op.names)
4759 self.wanted = locking.ALL_SET
4761 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4762 self.do_locking = self.do_node_query and self.op.use_locking
4764 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4765 self.needed_locks[locking.LEVEL_NODE] = []
4766 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4768 def DeclareLocks(self, level):
4769 if level == locking.LEVEL_NODE and self.do_locking:
4770 self._LockInstancesNodes()
4772 def CheckPrereq(self):
4773 """Check prerequisites.
4778 def Exec(self, feedback_fn):
4779 """Computes the list of nodes and their attributes.
4782 # pylint: disable-msg=R0912
4783 # way too many branches here
4784 all_info = self.cfg.GetAllInstancesInfo()
4785 if self.wanted == locking.ALL_SET:
4786 # caller didn't specify instance names, so ordering is not important
4788 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4790 instance_names = all_info.keys()
4791 instance_names = utils.NiceSort(instance_names)
4793 # caller did specify names, so we must keep the ordering
4795 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4797 tgt_set = all_info.keys()
4798 missing = set(self.wanted).difference(tgt_set)
4800 raise errors.OpExecError("Some instances were removed before"
4801 " retrieving their data: %s" % missing)
4802 instance_names = self.wanted
4804 instance_list = [all_info[iname] for iname in instance_names]
4806 # begin data gathering
4808 nodes = frozenset([inst.primary_node for inst in instance_list])
4809 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4813 if self.do_node_query:
4815 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4817 result = node_data[name]
4819 # offline nodes will be in both lists
4820 off_nodes.append(name)
4822 bad_nodes.append(name)
4825 live_data.update(result.payload)
4826 # else no instance is alive
4828 live_data = dict([(name, {}) for name in instance_names])
4830 # end data gathering
4835 cluster = self.cfg.GetClusterInfo()
4836 for instance in instance_list:
4838 i_hv = cluster.FillHV(instance, skip_globals=True)
4839 i_be = cluster.FillBE(instance)
4840 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
4841 for field in self.op.output_fields:
4842 st_match = self._FIELDS_STATIC.Matches(field)
4843 if field in self._SIMPLE_FIELDS:
4844 val = getattr(instance, field)
4845 elif field == "pnode":
4846 val = instance.primary_node
4847 elif field == "snodes":
4848 val = list(instance.secondary_nodes)
4849 elif field == "admin_state":
4850 val = instance.admin_up
4851 elif field == "oper_state":
4852 if instance.primary_node in bad_nodes:
4855 val = bool(live_data.get(instance.name))
4856 elif field == "status":
4857 if instance.primary_node in off_nodes:
4858 val = "ERROR_nodeoffline"
4859 elif instance.primary_node in bad_nodes:
4860 val = "ERROR_nodedown"
4862 running = bool(live_data.get(instance.name))
4864 if instance.admin_up:
4869 if instance.admin_up:
4873 elif field == "oper_ram":
4874 if instance.primary_node in bad_nodes:
4876 elif instance.name in live_data:
4877 val = live_data[instance.name].get("memory", "?")
4880 elif field == "vcpus":
4881 val = i_be[constants.BE_VCPUS]
4882 elif field == "disk_template":
4883 val = instance.disk_template
4886 val = instance.nics[0].ip
4889 elif field == "nic_mode":
4891 val = i_nicp[0][constants.NIC_MODE]
4894 elif field == "nic_link":
4896 val = i_nicp[0][constants.NIC_LINK]
4899 elif field == "bridge":
4900 if (instance.nics and
4901 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4902 val = i_nicp[0][constants.NIC_LINK]
4905 elif field == "mac":
4907 val = instance.nics[0].mac
4910 elif field == "sda_size" or field == "sdb_size":
4911 idx = ord(field[2]) - ord('a')
4913 val = instance.FindDisk(idx).size
4914 except errors.OpPrereqError:
4916 elif field == "disk_usage": # total disk usage per node
4917 disk_sizes = [{'size': disk.size} for disk in instance.disks]
4918 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4919 elif field == "tags":
4920 val = list(instance.GetTags())
4921 elif field == "hvparams":
4923 elif (field.startswith(HVPREFIX) and
4924 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4925 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4926 val = i_hv.get(field[len(HVPREFIX):], None)
4927 elif field == "beparams":
4929 elif (field.startswith(BEPREFIX) and
4930 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4931 val = i_be.get(field[len(BEPREFIX):], None)
4932 elif st_match and st_match.groups():
4933 # matches a variable list
4934 st_groups = st_match.groups()
4935 if st_groups and st_groups[0] == "disk":
4936 if st_groups[1] == "count":
4937 val = len(instance.disks)
4938 elif st_groups[1] == "sizes":
4939 val = [disk.size for disk in instance.disks]
4940 elif st_groups[1] == "size":
4942 val = instance.FindDisk(st_groups[2]).size
4943 except errors.OpPrereqError:
4946 assert False, "Unhandled disk parameter"
4947 elif st_groups[0] == "nic":
4948 if st_groups[1] == "count":
4949 val = len(instance.nics)
4950 elif st_groups[1] == "macs":
4951 val = [nic.mac for nic in instance.nics]
4952 elif st_groups[1] == "ips":
4953 val = [nic.ip for nic in instance.nics]
4954 elif st_groups[1] == "modes":
4955 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4956 elif st_groups[1] == "links":
4957 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4958 elif st_groups[1] == "bridges":
4961 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4962 val.append(nicp[constants.NIC_LINK])
4967 nic_idx = int(st_groups[2])
4968 if nic_idx >= len(instance.nics):
4971 if st_groups[1] == "mac":
4972 val = instance.nics[nic_idx].mac
4973 elif st_groups[1] == "ip":
4974 val = instance.nics[nic_idx].ip
4975 elif st_groups[1] == "mode":
4976 val = i_nicp[nic_idx][constants.NIC_MODE]
4977 elif st_groups[1] == "link":
4978 val = i_nicp[nic_idx][constants.NIC_LINK]
4979 elif st_groups[1] == "bridge":
4980 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4981 if nic_mode == constants.NIC_MODE_BRIDGED:
4982 val = i_nicp[nic_idx][constants.NIC_LINK]
4986 assert False, "Unhandled NIC parameter"
4988 assert False, ("Declared but unhandled variable parameter '%s'" %
4991 assert False, "Declared but unhandled parameter '%s'" % field
4998 class LUFailoverInstance(LogicalUnit):
4999 """Failover an instance.
5002 HPATH = "instance-failover"
5003 HTYPE = constants.HTYPE_INSTANCE
5004 _OP_REQP = ["instance_name", "ignore_consistency"]
5007 def CheckArguments(self):
5008 """Check the arguments.
5011 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5012 constants.DEFAULT_SHUTDOWN_TIMEOUT)
5014 def ExpandNames(self):
5015 self._ExpandAndLockInstance()
5016 self.needed_locks[locking.LEVEL_NODE] = []
5017 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5019 def DeclareLocks(self, level):
5020 if level == locking.LEVEL_NODE:
5021 self._LockInstancesNodes()
5023 def BuildHooksEnv(self):
5026 This runs on master, primary and secondary nodes of the instance.
5029 instance = self.instance
5030 source_node = instance.primary_node
5031 target_node = instance.secondary_nodes[0]
5033 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5034 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5035 "OLD_PRIMARY": source_node,
5036 "OLD_SECONDARY": target_node,
5037 "NEW_PRIMARY": target_node,
5038 "NEW_SECONDARY": source_node,
5040 env.update(_BuildInstanceHookEnvByObject(self, instance))
5041 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5043 nl_post.append(source_node)
5044 return env, nl, nl_post
5046 def CheckPrereq(self):
5047 """Check prerequisites.
5049 This checks that the instance is in the cluster.
5052 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5053 assert self.instance is not None, \
5054 "Cannot retrieve locked instance %s" % self.op.instance_name
5056 bep = self.cfg.GetClusterInfo().FillBE(instance)
5057 if instance.disk_template not in constants.DTS_NET_MIRROR:
5058 raise errors.OpPrereqError("Instance's disk layout is not"
5059 " network mirrored, cannot failover.",
5062 secondary_nodes = instance.secondary_nodes
5063 if not secondary_nodes:
5064 raise errors.ProgrammerError("no secondary node but using "
5065 "a mirrored disk template")
5067 target_node = secondary_nodes[0]
5068 _CheckNodeOnline(self, target_node)
5069 _CheckNodeNotDrained(self, target_node)
5070 if instance.admin_up:
5071 # check memory requirements on the secondary node
5072 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5073 instance.name, bep[constants.BE_MEMORY],
5074 instance.hypervisor)
5076 self.LogInfo("Not checking memory on the secondary node as"
5077 " instance will not be started")
5079 # check bridge existance
5080 _CheckInstanceBridgesExist(self, instance, node=target_node)
5082 def Exec(self, feedback_fn):
5083 """Failover an instance.
5085 The failover is done by shutting it down on its present node and
5086 starting it on the secondary.
5089 instance = self.instance
5091 source_node = instance.primary_node
5092 target_node = instance.secondary_nodes[0]
5094 if instance.admin_up:
5095 feedback_fn("* checking disk consistency between source and target")
5096 for dev in instance.disks:
5097 # for drbd, these are drbd over lvm
5098 if not _CheckDiskConsistency(self, dev, target_node, False):
5099 if not self.op.ignore_consistency:
5100 raise errors.OpExecError("Disk %s is degraded on target node,"
5101 " aborting failover." % dev.iv_name)
5103 feedback_fn("* not checking disk consistency as instance is not running")
5105 feedback_fn("* shutting down instance on source node")
5106 logging.info("Shutting down instance %s on node %s",
5107 instance.name, source_node)
5109 result = self.rpc.call_instance_shutdown(source_node, instance,
5110 self.shutdown_timeout)
5111 msg = result.fail_msg
5113 if self.op.ignore_consistency:
5114 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5115 " Proceeding anyway. Please make sure node"
5116 " %s is down. Error details: %s",
5117 instance.name, source_node, source_node, msg)
5119 raise errors.OpExecError("Could not shutdown instance %s on"
5121 (instance.name, source_node, msg))
5123 feedback_fn("* deactivating the instance's disks on source node")
5124 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5125 raise errors.OpExecError("Can't shut down the instance's disks.")
5127 instance.primary_node = target_node
5128 # distribute new instance config to the other nodes
5129 self.cfg.Update(instance, feedback_fn)
5131 # Only start the instance if it's marked as up
5132 if instance.admin_up:
5133 feedback_fn("* activating the instance's disks on target node")
5134 logging.info("Starting instance %s on node %s",
5135 instance.name, target_node)
5137 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5138 ignore_secondaries=True)
5140 _ShutdownInstanceDisks(self, instance)
5141 raise errors.OpExecError("Can't activate the instance's disks")
5143 feedback_fn("* starting the instance on the target node")
5144 result = self.rpc.call_instance_start(target_node, instance, None, None)
5145 msg = result.fail_msg
5147 _ShutdownInstanceDisks(self, instance)
5148 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5149 (instance.name, target_node, msg))
5152 class LUMigrateInstance(LogicalUnit):
5153 """Migrate an instance.
5155 This is migration without shutting down, compared to the failover,
5156 which is done with shutdown.
5159 HPATH = "instance-migrate"
5160 HTYPE = constants.HTYPE_INSTANCE
5161 _OP_REQP = ["instance_name", "live", "cleanup"]
5165 def ExpandNames(self):
5166 self._ExpandAndLockInstance()
5168 self.needed_locks[locking.LEVEL_NODE] = []
5169 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5171 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5172 self.op.live, self.op.cleanup)
5173 self.tasklets = [self._migrater]
5175 def DeclareLocks(self, level):
5176 if level == locking.LEVEL_NODE:
5177 self._LockInstancesNodes()
5179 def BuildHooksEnv(self):
5182 This runs on master, primary and secondary nodes of the instance.
5185 instance = self._migrater.instance
5186 source_node = instance.primary_node
5187 target_node = instance.secondary_nodes[0]
5188 env = _BuildInstanceHookEnvByObject(self, instance)
5189 env["MIGRATE_LIVE"] = self.op.live
5190 env["MIGRATE_CLEANUP"] = self.op.cleanup
5192 "OLD_PRIMARY": source_node,
5193 "OLD_SECONDARY": target_node,
5194 "NEW_PRIMARY": target_node,
5195 "NEW_SECONDARY": source_node,
5197 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5199 nl_post.append(source_node)
5200 return env, nl, nl_post
5203 class LUMoveInstance(LogicalUnit):
5204 """Move an instance by data-copying.
5207 HPATH = "instance-move"
5208 HTYPE = constants.HTYPE_INSTANCE
5209 _OP_REQP = ["instance_name", "target_node"]
5212 def CheckArguments(self):
5213 """Check the arguments.
5216 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5217 constants.DEFAULT_SHUTDOWN_TIMEOUT)
5219 def ExpandNames(self):
5220 self._ExpandAndLockInstance()
5221 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5222 self.op.target_node = target_node
5223 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5224 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5226 def DeclareLocks(self, level):
5227 if level == locking.LEVEL_NODE:
5228 self._LockInstancesNodes(primary_only=True)
5230 def BuildHooksEnv(self):
5233 This runs on master, primary and secondary nodes of the instance.
5237 "TARGET_NODE": self.op.target_node,
5238 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5240 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5241 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5242 self.op.target_node]
5245 def CheckPrereq(self):
5246 """Check prerequisites.
5248 This checks that the instance is in the cluster.
5251 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5252 assert self.instance is not None, \
5253 "Cannot retrieve locked instance %s" % self.op.instance_name
5255 node = self.cfg.GetNodeInfo(self.op.target_node)
5256 assert node is not None, \
5257 "Cannot retrieve locked node %s" % self.op.target_node
5259 self.target_node = target_node = node.name
5261 if target_node == instance.primary_node:
5262 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5263 (instance.name, target_node),
5266 bep = self.cfg.GetClusterInfo().FillBE(instance)
5268 for idx, dsk in enumerate(instance.disks):
5269 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5270 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5271 " cannot copy" % idx, errors.ECODE_STATE)
5273 _CheckNodeOnline(self, target_node)
5274 _CheckNodeNotDrained(self, target_node)
5276 if instance.admin_up:
5277 # check memory requirements on the secondary node
5278 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5279 instance.name, bep[constants.BE_MEMORY],
5280 instance.hypervisor)
5282 self.LogInfo("Not checking memory on the secondary node as"
5283 " instance will not be started")
5285 # check bridge existance
5286 _CheckInstanceBridgesExist(self, instance, node=target_node)
5288 def Exec(self, feedback_fn):
5289 """Move an instance.
5291 The move is done by shutting it down on its present node, copying
5292 the data over (slow) and starting it on the new node.
5295 instance = self.instance
5297 source_node = instance.primary_node
5298 target_node = self.target_node
5300 self.LogInfo("Shutting down instance %s on source node %s",
5301 instance.name, source_node)
5303 result = self.rpc.call_instance_shutdown(source_node, instance,
5304 self.shutdown_timeout)
5305 msg = result.fail_msg
5307 if self.op.ignore_consistency:
5308 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5309 " Proceeding anyway. Please make sure node"
5310 " %s is down. Error details: %s",
5311 instance.name, source_node, source_node, msg)
5313 raise errors.OpExecError("Could not shutdown instance %s on"
5315 (instance.name, source_node, msg))
5317 # create the target disks
5319 _CreateDisks(self, instance, target_node=target_node)
5320 except errors.OpExecError:
5321 self.LogWarning("Device creation failed, reverting...")
5323 _RemoveDisks(self, instance, target_node=target_node)
5325 self.cfg.ReleaseDRBDMinors(instance.name)
5328 cluster_name = self.cfg.GetClusterInfo().cluster_name
5331 # activate, get path, copy the data over
5332 for idx, disk in enumerate(instance.disks):
5333 self.LogInfo("Copying data for disk %d", idx)
5334 result = self.rpc.call_blockdev_assemble(target_node, disk,
5335 instance.name, True)
5337 self.LogWarning("Can't assemble newly created disk %d: %s",
5338 idx, result.fail_msg)
5339 errs.append(result.fail_msg)
5341 dev_path = result.payload
5342 result = self.rpc.call_blockdev_export(source_node, disk,
5343 target_node, dev_path,
5346 self.LogWarning("Can't copy data over for disk %d: %s",
5347 idx, result.fail_msg)
5348 errs.append(result.fail_msg)
5352 self.LogWarning("Some disks failed to copy, aborting")
5354 _RemoveDisks(self, instance, target_node=target_node)
5356 self.cfg.ReleaseDRBDMinors(instance.name)
5357 raise errors.OpExecError("Errors during disk copy: %s" %
5360 instance.primary_node = target_node
5361 self.cfg.Update(instance, feedback_fn)
5363 self.LogInfo("Removing the disks on the original node")
5364 _RemoveDisks(self, instance, target_node=source_node)
5366 # Only start the instance if it's marked as up
5367 if instance.admin_up:
5368 self.LogInfo("Starting instance %s on node %s",
5369 instance.name, target_node)
5371 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5372 ignore_secondaries=True)
5374 _ShutdownInstanceDisks(self, instance)
5375 raise errors.OpExecError("Can't activate the instance's disks")
5377 result = self.rpc.call_instance_start(target_node, instance, None, None)
5378 msg = result.fail_msg
5380 _ShutdownInstanceDisks(self, instance)
5381 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5382 (instance.name, target_node, msg))
5385 class LUMigrateNode(LogicalUnit):
5386 """Migrate all instances from a node.
5389 HPATH = "node-migrate"
5390 HTYPE = constants.HTYPE_NODE
5391 _OP_REQP = ["node_name", "live"]
5394 def ExpandNames(self):
5395 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5397 self.needed_locks = {
5398 locking.LEVEL_NODE: [self.op.node_name],
5401 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5403 # Create tasklets for migrating instances for all instances on this node
5407 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5408 logging.debug("Migrating instance %s", inst.name)
5409 names.append(inst.name)
5411 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5413 self.tasklets = tasklets
5415 # Declare instance locks
5416 self.needed_locks[locking.LEVEL_INSTANCE] = names
5418 def DeclareLocks(self, level):
5419 if level == locking.LEVEL_NODE:
5420 self._LockInstancesNodes()
5422 def BuildHooksEnv(self):
5425 This runs on the master, the primary and all the secondaries.
5429 "NODE_NAME": self.op.node_name,
5432 nl = [self.cfg.GetMasterNode()]
5434 return (env, nl, nl)
5437 class TLMigrateInstance(Tasklet):
5438 def __init__(self, lu, instance_name, live, cleanup):
5439 """Initializes this class.
5442 Tasklet.__init__(self, lu)
5445 self.instance_name = instance_name
5447 self.cleanup = cleanup
5449 def CheckPrereq(self):
5450 """Check prerequisites.
5452 This checks that the instance is in the cluster.
5455 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5456 instance = self.cfg.GetInstanceInfo(instance_name)
5457 assert instance is not None
5459 if instance.disk_template != constants.DT_DRBD8:
5460 raise errors.OpPrereqError("Instance's disk layout is not"
5461 " drbd8, cannot migrate.", errors.ECODE_STATE)
5463 secondary_nodes = instance.secondary_nodes
5464 if not secondary_nodes:
5465 raise errors.ConfigurationError("No secondary node but using"
5466 " drbd8 disk template")
5468 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5470 target_node = secondary_nodes[0]
5471 # check memory requirements on the secondary node
5472 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5473 instance.name, i_be[constants.BE_MEMORY],
5474 instance.hypervisor)
5476 # check bridge existance
5477 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5479 if not self.cleanup:
5480 _CheckNodeNotDrained(self.lu, target_node)
5481 result = self.rpc.call_instance_migratable(instance.primary_node,
5483 result.Raise("Can't migrate, please use failover",
5484 prereq=True, ecode=errors.ECODE_STATE)
5486 self.instance = instance
5488 def _WaitUntilSync(self):
5489 """Poll with custom rpc for disk sync.
5491 This uses our own step-based rpc call.
5494 self.feedback_fn("* wait until resync is done")
5498 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5500 self.instance.disks)
5502 for node, nres in result.items():
5503 nres.Raise("Cannot resync disks on node %s" % node)
5504 node_done, node_percent = nres.payload
5505 all_done = all_done and node_done
5506 if node_percent is not None:
5507 min_percent = min(min_percent, node_percent)
5509 if min_percent < 100:
5510 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5513 def _EnsureSecondary(self, node):
5514 """Demote a node to secondary.
5517 self.feedback_fn("* switching node %s to secondary mode" % node)
5519 for dev in self.instance.disks:
5520 self.cfg.SetDiskID(dev, node)
5522 result = self.rpc.call_blockdev_close(node, self.instance.name,
5523 self.instance.disks)
5524 result.Raise("Cannot change disk to secondary on node %s" % node)
5526 def _GoStandalone(self):
5527 """Disconnect from the network.
5530 self.feedback_fn("* changing into standalone mode")
5531 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5532 self.instance.disks)
5533 for node, nres in result.items():
5534 nres.Raise("Cannot disconnect disks node %s" % node)
5536 def _GoReconnect(self, multimaster):
5537 """Reconnect to the network.
5543 msg = "single-master"
5544 self.feedback_fn("* changing disks into %s mode" % msg)
5545 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5546 self.instance.disks,
5547 self.instance.name, multimaster)
5548 for node, nres in result.items():
5549 nres.Raise("Cannot change disks config on node %s" % node)
5551 def _ExecCleanup(self):
5552 """Try to cleanup after a failed migration.
5554 The cleanup is done by:
5555 - check that the instance is running only on one node
5556 (and update the config if needed)
5557 - change disks on its secondary node to secondary
5558 - wait until disks are fully synchronized
5559 - disconnect from the network
5560 - change disks into single-master mode
5561 - wait again until disks are fully synchronized
5564 instance = self.instance
5565 target_node = self.target_node
5566 source_node = self.source_node
5568 # check running on only one node
5569 self.feedback_fn("* checking where the instance actually runs"
5570 " (if this hangs, the hypervisor might be in"
5572 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5573 for node, result in ins_l.items():
5574 result.Raise("Can't contact node %s" % node)
5576 runningon_source = instance.name in ins_l[source_node].payload
5577 runningon_target = instance.name in ins_l[target_node].payload
5579 if runningon_source and runningon_target:
5580 raise errors.OpExecError("Instance seems to be running on two nodes,"
5581 " or the hypervisor is confused. You will have"
5582 " to ensure manually that it runs only on one"
5583 " and restart this operation.")
5585 if not (runningon_source or runningon_target):
5586 raise errors.OpExecError("Instance does not seem to be running at all."
5587 " In this case, it's safer to repair by"
5588 " running 'gnt-instance stop' to ensure disk"
5589 " shutdown, and then restarting it.")
5591 if runningon_target:
5592 # the migration has actually succeeded, we need to update the config
5593 self.feedback_fn("* instance running on secondary node (%s),"
5594 " updating config" % target_node)
5595 instance.primary_node = target_node
5596 self.cfg.Update(instance, self.feedback_fn)
5597 demoted_node = source_node
5599 self.feedback_fn("* instance confirmed to be running on its"
5600 " primary node (%s)" % source_node)
5601 demoted_node = target_node
5603 self._EnsureSecondary(demoted_node)
5605 self._WaitUntilSync()
5606 except errors.OpExecError:
5607 # we ignore here errors, since if the device is standalone, it
5608 # won't be able to sync
5610 self._GoStandalone()
5611 self._GoReconnect(False)
5612 self._WaitUntilSync()
5614 self.feedback_fn("* done")
5616 def _RevertDiskStatus(self):
5617 """Try to revert the disk status after a failed migration.
5620 target_node = self.target_node
5622 self._EnsureSecondary(target_node)
5623 self._GoStandalone()
5624 self._GoReconnect(False)
5625 self._WaitUntilSync()
5626 except errors.OpExecError, err:
5627 self.lu.LogWarning("Migration failed and I can't reconnect the"
5628 " drives: error '%s'\n"
5629 "Please look and recover the instance status" %
5632 def _AbortMigration(self):
5633 """Call the hypervisor code to abort a started migration.
5636 instance = self.instance
5637 target_node = self.target_node
5638 migration_info = self.migration_info
5640 abort_result = self.rpc.call_finalize_migration(target_node,
5644 abort_msg = abort_result.fail_msg
5646 logging.error("Aborting migration failed on target node %s: %s",
5647 target_node, abort_msg)
5648 # Don't raise an exception here, as we stil have to try to revert the
5649 # disk status, even if this step failed.
5651 def _ExecMigration(self):
5652 """Migrate an instance.
5654 The migrate is done by:
5655 - change the disks into dual-master mode
5656 - wait until disks are fully synchronized again
5657 - migrate the instance
5658 - change disks on the new secondary node (the old primary) to secondary
5659 - wait until disks are fully synchronized
5660 - change disks into single-master mode
5663 instance = self.instance
5664 target_node = self.target_node
5665 source_node = self.source_node
5667 self.feedback_fn("* checking disk consistency between source and target")
5668 for dev in instance.disks:
5669 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5670 raise errors.OpExecError("Disk %s is degraded or not fully"
5671 " synchronized on target node,"
5672 " aborting migrate." % dev.iv_name)
5674 # First get the migration information from the remote node
5675 result = self.rpc.call_migration_info(source_node, instance)
5676 msg = result.fail_msg
5678 log_err = ("Failed fetching source migration information from %s: %s" %
5680 logging.error(log_err)
5681 raise errors.OpExecError(log_err)
5683 self.migration_info = migration_info = result.payload
5685 # Then switch the disks to master/master mode
5686 self._EnsureSecondary(target_node)
5687 self._GoStandalone()
5688 self._GoReconnect(True)
5689 self._WaitUntilSync()
5691 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5692 result = self.rpc.call_accept_instance(target_node,
5695 self.nodes_ip[target_node])
5697 msg = result.fail_msg
5699 logging.error("Instance pre-migration failed, trying to revert"
5700 " disk status: %s", msg)
5701 self.feedback_fn("Pre-migration failed, aborting")
5702 self._AbortMigration()
5703 self._RevertDiskStatus()
5704 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5705 (instance.name, msg))
5707 self.feedback_fn("* migrating instance to %s" % target_node)
5709 result = self.rpc.call_instance_migrate(source_node, instance,
5710 self.nodes_ip[target_node],
5712 msg = result.fail_msg
5714 logging.error("Instance migration failed, trying to revert"
5715 " disk status: %s", msg)
5716 self.feedback_fn("Migration failed, aborting")
5717 self._AbortMigration()
5718 self._RevertDiskStatus()
5719 raise errors.OpExecError("Could not migrate instance %s: %s" %
5720 (instance.name, msg))
5723 instance.primary_node = target_node
5724 # distribute new instance config to the other nodes
5725 self.cfg.Update(instance, self.feedback_fn)
5727 result = self.rpc.call_finalize_migration(target_node,
5731 msg = result.fail_msg
5733 logging.error("Instance migration succeeded, but finalization failed:"
5735 raise errors.OpExecError("Could not finalize instance migration: %s" %
5738 self._EnsureSecondary(source_node)
5739 self._WaitUntilSync()
5740 self._GoStandalone()
5741 self._GoReconnect(False)
5742 self._WaitUntilSync()
5744 self.feedback_fn("* done")
5746 def Exec(self, feedback_fn):
5747 """Perform the migration.
5750 feedback_fn("Migrating instance %s" % self.instance.name)
5752 self.feedback_fn = feedback_fn
5754 self.source_node = self.instance.primary_node
5755 self.target_node = self.instance.secondary_nodes[0]
5756 self.all_nodes = [self.source_node, self.target_node]
5758 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5759 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5763 return self._ExecCleanup()
5765 return self._ExecMigration()
5768 def _CreateBlockDev(lu, node, instance, device, force_create,
5770 """Create a tree of block devices on a given node.
5772 If this device type has to be created on secondaries, create it and
5775 If not, just recurse to children keeping the same 'force' value.
5777 @param lu: the lu on whose behalf we execute
5778 @param node: the node on which to create the device
5779 @type instance: L{objects.Instance}
5780 @param instance: the instance which owns the device
5781 @type device: L{objects.Disk}
5782 @param device: the device to create
5783 @type force_create: boolean
5784 @param force_create: whether to force creation of this device; this
5785 will be change to True whenever we find a device which has
5786 CreateOnSecondary() attribute
5787 @param info: the extra 'metadata' we should attach to the device
5788 (this will be represented as a LVM tag)
5789 @type force_open: boolean
5790 @param force_open: this parameter will be passes to the
5791 L{backend.BlockdevCreate} function where it specifies
5792 whether we run on primary or not, and it affects both
5793 the child assembly and the device own Open() execution
5796 if device.CreateOnSecondary():
5800 for child in device.children:
5801 _CreateBlockDev(lu, node, instance, child, force_create,
5804 if not force_create:
5807 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5810 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5811 """Create a single block device on a given node.
5813 This will not recurse over children of the device, so they must be
5816 @param lu: the lu on whose behalf we execute
5817 @param node: the node on which to create the device
5818 @type instance: L{objects.Instance}
5819 @param instance: the instance which owns the device
5820 @type device: L{objects.Disk}
5821 @param device: the device to create
5822 @param info: the extra 'metadata' we should attach to the device
5823 (this will be represented as a LVM tag)
5824 @type force_open: boolean
5825 @param force_open: this parameter will be passes to the
5826 L{backend.BlockdevCreate} function where it specifies
5827 whether we run on primary or not, and it affects both
5828 the child assembly and the device own Open() execution
5831 lu.cfg.SetDiskID(device, node)
5832 result = lu.rpc.call_blockdev_create(node, device, device.size,
5833 instance.name, force_open, info)
5834 result.Raise("Can't create block device %s on"
5835 " node %s for instance %s" % (device, node, instance.name))
5836 if device.physical_id is None:
5837 device.physical_id = result.payload
5840 def _GenerateUniqueNames(lu, exts):
5841 """Generate a suitable LV name.
5843 This will generate a logical volume name for the given instance.
5848 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5849 results.append("%s%s" % (new_id, val))
5853 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5855 """Generate a drbd8 device complete with its children.
5858 port = lu.cfg.AllocatePort()
5859 vgname = lu.cfg.GetVGName()
5860 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5861 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5862 logical_id=(vgname, names[0]))
5863 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5864 logical_id=(vgname, names[1]))
5865 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5866 logical_id=(primary, secondary, port,
5869 children=[dev_data, dev_meta],
5874 def _GenerateDiskTemplate(lu, template_name,
5875 instance_name, primary_node,
5876 secondary_nodes, disk_info,
5877 file_storage_dir, file_driver,
5879 """Generate the entire disk layout for a given template type.
5882 #TODO: compute space requirements
5884 vgname = lu.cfg.GetVGName()
5885 disk_count = len(disk_info)
5887 if template_name == constants.DT_DISKLESS:
5889 elif template_name == constants.DT_PLAIN:
5890 if len(secondary_nodes) != 0:
5891 raise errors.ProgrammerError("Wrong template configuration")
5893 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5894 for i in range(disk_count)])
5895 for idx, disk in enumerate(disk_info):
5896 disk_index = idx + base_index
5897 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5898 logical_id=(vgname, names[idx]),
5899 iv_name="disk/%d" % disk_index,
5901 disks.append(disk_dev)
5902 elif template_name == constants.DT_DRBD8:
5903 if len(secondary_nodes) != 1:
5904 raise errors.ProgrammerError("Wrong template configuration")
5905 remote_node = secondary_nodes[0]
5906 minors = lu.cfg.AllocateDRBDMinor(
5907 [primary_node, remote_node] * len(disk_info), instance_name)
5910 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5911 for i in range(disk_count)]):
5912 names.append(lv_prefix + "_data")
5913 names.append(lv_prefix + "_meta")
5914 for idx, disk in enumerate(disk_info):
5915 disk_index = idx + base_index
5916 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5917 disk["size"], names[idx*2:idx*2+2],
5918 "disk/%d" % disk_index,
5919 minors[idx*2], minors[idx*2+1])
5920 disk_dev.mode = disk["mode"]
5921 disks.append(disk_dev)
5922 elif template_name == constants.DT_FILE:
5923 if len(secondary_nodes) != 0:
5924 raise errors.ProgrammerError("Wrong template configuration")
5926 _RequireFileStorage()
5928 for idx, disk in enumerate(disk_info):
5929 disk_index = idx + base_index
5930 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5931 iv_name="disk/%d" % disk_index,
5932 logical_id=(file_driver,
5933 "%s/disk%d" % (file_storage_dir,
5936 disks.append(disk_dev)
5938 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5942 def _GetInstanceInfoText(instance):
5943 """Compute that text that should be added to the disk's metadata.
5946 return "originstname+%s" % instance.name
5949 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5950 """Create all disks for an instance.
5952 This abstracts away some work from AddInstance.
5954 @type lu: L{LogicalUnit}
5955 @param lu: the logical unit on whose behalf we execute
5956 @type instance: L{objects.Instance}
5957 @param instance: the instance whose disks we should create
5959 @param to_skip: list of indices to skip
5960 @type target_node: string
5961 @param target_node: if passed, overrides the target node for creation
5963 @return: the success of the creation
5966 info = _GetInstanceInfoText(instance)
5967 if target_node is None:
5968 pnode = instance.primary_node
5969 all_nodes = instance.all_nodes
5974 if instance.disk_template == constants.DT_FILE:
5975 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5976 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5978 result.Raise("Failed to create directory '%s' on"
5979 " node %s" % (file_storage_dir, pnode))
5981 # Note: this needs to be kept in sync with adding of disks in
5982 # LUSetInstanceParams
5983 for idx, device in enumerate(instance.disks):
5984 if to_skip and idx in to_skip:
5986 logging.info("Creating volume %s for instance %s",
5987 device.iv_name, instance.name)
5989 for node in all_nodes:
5990 f_create = node == pnode
5991 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5994 def _RemoveDisks(lu, instance, target_node=None):
5995 """Remove all disks for an instance.
5997 This abstracts away some work from `AddInstance()` and
5998 `RemoveInstance()`. Note that in case some of the devices couldn't
5999 be removed, the removal will continue with the other ones (compare
6000 with `_CreateDisks()`).
6002 @type lu: L{LogicalUnit}
6003 @param lu: the logical unit on whose behalf we execute
6004 @type instance: L{objects.Instance}
6005 @param instance: the instance whose disks we should remove
6006 @type target_node: string
6007 @param target_node: used to override the node on which to remove the disks
6009 @return: the success of the removal
6012 logging.info("Removing block devices for instance %s", instance.name)
6015 for device in instance.disks:
6017 edata = [(target_node, device)]
6019 edata = device.ComputeNodeTree(instance.primary_node)
6020 for node, disk in edata:
6021 lu.cfg.SetDiskID(disk, node)
6022 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6024 lu.LogWarning("Could not remove block device %s on node %s,"
6025 " continuing anyway: %s", device.iv_name, node, msg)
6028 if instance.disk_template == constants.DT_FILE:
6029 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6033 tgt = instance.primary_node
6034 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6036 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6037 file_storage_dir, instance.primary_node, result.fail_msg)
6043 def _ComputeDiskSize(disk_template, disks):
6044 """Compute disk size requirements in the volume group
6047 # Required free disk space as a function of disk and swap space
6049 constants.DT_DISKLESS: None,
6050 constants.DT_PLAIN: sum(d["size"] for d in disks),
6051 # 128 MB are added for drbd metadata for each disk
6052 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6053 constants.DT_FILE: None,
6056 if disk_template not in req_size_dict:
6057 raise errors.ProgrammerError("Disk template '%s' size requirement"
6058 " is unknown" % disk_template)
6060 return req_size_dict[disk_template]
6063 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6064 """Hypervisor parameter validation.
6066 This function abstract the hypervisor parameter validation to be
6067 used in both instance create and instance modify.
6069 @type lu: L{LogicalUnit}
6070 @param lu: the logical unit for which we check
6071 @type nodenames: list
6072 @param nodenames: the list of nodes on which we should check
6073 @type hvname: string
6074 @param hvname: the name of the hypervisor we should use
6075 @type hvparams: dict
6076 @param hvparams: the parameters which we need to check
6077 @raise errors.OpPrereqError: if the parameters are not valid
6080 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6083 for node in nodenames:
6087 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6090 class LUCreateInstance(LogicalUnit):
6091 """Create an instance.
6094 HPATH = "instance-add"
6095 HTYPE = constants.HTYPE_INSTANCE
6096 _OP_REQP = ["instance_name", "disks",
6098 "wait_for_sync", "ip_check", "nics",
6099 "hvparams", "beparams"]
6102 def CheckArguments(self):
6106 # set optional parameters to none if they don't exist
6107 for attr in ["pnode", "snode", "iallocator", "hypervisor",
6108 "disk_template", "identify_defaults"]:
6109 if not hasattr(self.op, attr):
6110 setattr(self.op, attr, None)
6112 # do not require name_check to ease forward/backward compatibility
6114 if not hasattr(self.op, "name_check"):
6115 self.op.name_check = True
6116 if not hasattr(self.op, "no_install"):
6117 self.op.no_install = False
6118 if self.op.no_install and self.op.start:
6119 self.LogInfo("No-installation mode selected, disabling startup")
6120 self.op.start = False
6121 # validate/normalize the instance name
6122 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6123 if self.op.ip_check and not self.op.name_check:
6124 # TODO: make the ip check more flexible and not depend on the name check
6125 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6128 # check nics' parameter names
6129 for nic in self.op.nics:
6130 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6132 # check disks. parameter names and consistent adopt/no-adopt strategy
6133 has_adopt = has_no_adopt = False
6134 for disk in self.op.disks:
6135 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6140 if has_adopt and has_no_adopt:
6141 raise errors.OpPrereqError("Either all disks are adopted or none is",
6144 if self.op.disk_template != constants.DT_PLAIN:
6145 raise errors.OpPrereqError("Disk adoption is only supported for the"
6146 " 'plain' disk template",
6148 if self.op.iallocator is not None:
6149 raise errors.OpPrereqError("Disk adoption not allowed with an"
6150 " iallocator script", errors.ECODE_INVAL)
6151 if self.op.mode == constants.INSTANCE_IMPORT:
6152 raise errors.OpPrereqError("Disk adoption not allowed for"
6153 " instance import", errors.ECODE_INVAL)
6155 self.adopt_disks = has_adopt
6157 # verify creation mode
6158 if self.op.mode not in constants.INSTANCE_CREATE_MODES:
6159 raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6160 self.op.mode, errors.ECODE_INVAL)
6162 # instance name verification
6163 if self.op.name_check:
6164 self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6165 self.op.instance_name = self.hostname1.name
6166 # used in CheckPrereq for ip ping check
6167 self.check_ip = self.hostname1.ip
6168 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6169 raise errors.OpPrereqError("Remote imports require names to be checked" %
6172 self.check_ip = None
6174 # file storage checks
6175 if (self.op.file_driver and
6176 not self.op.file_driver in constants.FILE_DRIVER):
6177 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6178 self.op.file_driver, errors.ECODE_INVAL)
6180 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6181 raise errors.OpPrereqError("File storage directory path not absolute",
6184 ### Node/iallocator related checks
6185 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6186 raise errors.OpPrereqError("One and only one of iallocator and primary"
6187 " node must be given",
6190 self._cds = _GetClusterDomainSecret()
6192 if self.op.mode == constants.INSTANCE_IMPORT:
6193 # On import force_variant must be True, because if we forced it at
6194 # initial install, our only chance when importing it back is that it
6196 self.op.force_variant = True
6198 if self.op.no_install:
6199 self.LogInfo("No-installation mode has no effect during import")
6201 elif self.op.mode == constants.INSTANCE_CREATE:
6202 if getattr(self.op, "os_type", None) is None:
6203 raise errors.OpPrereqError("No guest OS specified",
6205 self.op.force_variant = getattr(self.op, "force_variant", False)
6206 if self.op.disk_template is None:
6207 raise errors.OpPrereqError("No disk template specified",
6210 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6211 # Check handshake to ensure both clusters have the same domain secret
6212 src_handshake = getattr(self.op, "source_handshake", None)
6213 if not src_handshake:
6214 raise errors.OpPrereqError("Missing source handshake",
6217 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6220 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6223 # Load and check source CA
6224 self.source_x509_ca_pem = getattr(self.op, "source_x509_ca", None)
6225 if not self.source_x509_ca_pem:
6226 raise errors.OpPrereqError("Missing source X509 CA",
6230 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6232 except OpenSSL.crypto.Error, err:
6233 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6234 (err, ), errors.ECODE_INVAL)
6236 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6237 if errcode is not None:
6238 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6241 self.source_x509_ca = cert
6243 src_instance_name = getattr(self.op, "source_instance_name", None)
6244 if not src_instance_name:
6245 raise errors.OpPrereqError("Missing source instance name",
6248 self.source_instance_name = \
6249 utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6252 raise errors.OpPrereqError("Invalid instance creation mode %r" %
6253 self.op.mode, errors.ECODE_INVAL)
6255 def ExpandNames(self):
6256 """ExpandNames for CreateInstance.
6258 Figure out the right locks for instance creation.
6261 self.needed_locks = {}
6263 instance_name = self.op.instance_name
6264 # this is just a preventive check, but someone might still add this
6265 # instance in the meantime, and creation will fail at lock-add time
6266 if instance_name in self.cfg.GetInstanceList():
6267 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6268 instance_name, errors.ECODE_EXISTS)
6270 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6272 if self.op.iallocator:
6273 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6275 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6276 nodelist = [self.op.pnode]
6277 if self.op.snode is not None:
6278 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6279 nodelist.append(self.op.snode)
6280 self.needed_locks[locking.LEVEL_NODE] = nodelist
6282 # in case of import lock the source node too
6283 if self.op.mode == constants.INSTANCE_IMPORT:
6284 src_node = getattr(self.op, "src_node", None)
6285 src_path = getattr(self.op, "src_path", None)
6287 if src_path is None:
6288 self.op.src_path = src_path = self.op.instance_name
6290 if src_node is None:
6291 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6292 self.op.src_node = None
6293 if os.path.isabs(src_path):
6294 raise errors.OpPrereqError("Importing an instance from an absolute"
6295 " path requires a source node option.",
6298 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6299 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6300 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6301 if not os.path.isabs(src_path):
6302 self.op.src_path = src_path = \
6303 utils.PathJoin(constants.EXPORT_DIR, src_path)
6305 def _RunAllocator(self):
6306 """Run the allocator based on input opcode.
6309 nics = [n.ToDict() for n in self.nics]
6310 ial = IAllocator(self.cfg, self.rpc,
6311 mode=constants.IALLOCATOR_MODE_ALLOC,
6312 name=self.op.instance_name,
6313 disk_template=self.op.disk_template,
6316 vcpus=self.be_full[constants.BE_VCPUS],
6317 mem_size=self.be_full[constants.BE_MEMORY],
6320 hypervisor=self.op.hypervisor,
6323 ial.Run(self.op.iallocator)
6326 raise errors.OpPrereqError("Can't compute nodes using"
6327 " iallocator '%s': %s" %
6328 (self.op.iallocator, ial.info),
6330 if len(ial.result) != ial.required_nodes:
6331 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6332 " of nodes (%s), required %s" %
6333 (self.op.iallocator, len(ial.result),
6334 ial.required_nodes), errors.ECODE_FAULT)
6335 self.op.pnode = ial.result[0]
6336 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6337 self.op.instance_name, self.op.iallocator,
6338 utils.CommaJoin(ial.result))
6339 if ial.required_nodes == 2:
6340 self.op.snode = ial.result[1]
6342 def BuildHooksEnv(self):
6345 This runs on master, primary and secondary nodes of the instance.
6349 "ADD_MODE": self.op.mode,
6351 if self.op.mode == constants.INSTANCE_IMPORT:
6352 env["SRC_NODE"] = self.op.src_node
6353 env["SRC_PATH"] = self.op.src_path
6354 env["SRC_IMAGES"] = self.src_images
6356 env.update(_BuildInstanceHookEnv(
6357 name=self.op.instance_name,
6358 primary_node=self.op.pnode,
6359 secondary_nodes=self.secondaries,
6360 status=self.op.start,
6361 os_type=self.op.os_type,
6362 memory=self.be_full[constants.BE_MEMORY],
6363 vcpus=self.be_full[constants.BE_VCPUS],
6364 nics=_NICListToTuple(self, self.nics),
6365 disk_template=self.op.disk_template,
6366 disks=[(d["size"], d["mode"]) for d in self.disks],
6369 hypervisor_name=self.op.hypervisor,
6372 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6376 def _ReadExportInfo(self):
6377 """Reads the export information from disk.
6379 It will override the opcode source node and path with the actual
6380 information, if these two were not specified before.
6382 @return: the export information
6385 assert self.op.mode == constants.INSTANCE_IMPORT
6387 src_node = self.op.src_node
6388 src_path = self.op.src_path
6390 if src_node is None:
6391 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6392 exp_list = self.rpc.call_export_list(locked_nodes)
6394 for node in exp_list:
6395 if exp_list[node].fail_msg:
6397 if src_path in exp_list[node].payload:
6399 self.op.src_node = src_node = node
6400 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6404 raise errors.OpPrereqError("No export found for relative path %s" %
6405 src_path, errors.ECODE_INVAL)
6407 _CheckNodeOnline(self, src_node)
6408 result = self.rpc.call_export_info(src_node, src_path)
6409 result.Raise("No export or invalid export found in dir %s" % src_path)
6411 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6412 if not export_info.has_section(constants.INISECT_EXP):
6413 raise errors.ProgrammerError("Corrupted export config",
6414 errors.ECODE_ENVIRON)
6416 ei_version = export_info.get(constants.INISECT_EXP, "version")
6417 if (int(ei_version) != constants.EXPORT_VERSION):
6418 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6419 (ei_version, constants.EXPORT_VERSION),
6420 errors.ECODE_ENVIRON)
6423 def _ReadExportParams(self, einfo):
6424 """Use export parameters as defaults.
6426 In case the opcode doesn't specify (as in override) some instance
6427 parameters, then try to use them from the export information, if
6431 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6433 if self.op.disk_template is None:
6434 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6435 self.op.disk_template = einfo.get(constants.INISECT_INS,
6438 raise errors.OpPrereqError("No disk template specified and the export"
6439 " is missing the disk_template information",
6442 if not self.op.disks:
6443 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6445 # TODO: import the disk iv_name too
6446 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6447 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6448 disks.append({"size": disk_sz})
6449 self.op.disks = disks
6451 raise errors.OpPrereqError("No disk info specified and the export"
6452 " is missing the disk information",
6455 if (not self.op.nics and
6456 einfo.has_option(constants.INISECT_INS, "nic_count")):
6458 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6460 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6461 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6466 if (self.op.hypervisor is None and
6467 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6468 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6469 if einfo.has_section(constants.INISECT_HYP):
6470 # use the export parameters but do not override the ones
6471 # specified by the user
6472 for name, value in einfo.items(constants.INISECT_HYP):
6473 if name not in self.op.hvparams:
6474 self.op.hvparams[name] = value
6476 if einfo.has_section(constants.INISECT_BEP):
6477 # use the parameters, without overriding
6478 for name, value in einfo.items(constants.INISECT_BEP):
6479 if name not in self.op.beparams:
6480 self.op.beparams[name] = value
6482 # try to read the parameters old style, from the main section
6483 for name in constants.BES_PARAMETERS:
6484 if (name not in self.op.beparams and
6485 einfo.has_option(constants.INISECT_INS, name)):
6486 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6488 def _RevertToDefaults(self, cluster):
6489 """Revert the instance parameters to the default values.
6493 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6494 for name in self.op.hvparams.keys():
6495 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6496 del self.op.hvparams[name]
6498 be_defs = cluster.SimpleFillBE({})
6499 for name in self.op.beparams.keys():
6500 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6501 del self.op.beparams[name]
6503 nic_defs = cluster.SimpleFillNIC({})
6504 for nic in self.op.nics:
6505 for name in constants.NICS_PARAMETERS:
6506 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6509 def CheckPrereq(self):
6510 """Check prerequisites.
6513 if self.op.mode == constants.INSTANCE_IMPORT:
6514 export_info = self._ReadExportInfo()
6515 self._ReadExportParams(export_info)
6517 _CheckDiskTemplate(self.op.disk_template)
6519 if (not self.cfg.GetVGName() and
6520 self.op.disk_template not in constants.DTS_NOT_LVM):
6521 raise errors.OpPrereqError("Cluster does not support lvm-based"
6522 " instances", errors.ECODE_STATE)
6524 if self.op.hypervisor is None:
6525 self.op.hypervisor = self.cfg.GetHypervisorType()
6527 cluster = self.cfg.GetClusterInfo()
6528 enabled_hvs = cluster.enabled_hypervisors
6529 if self.op.hypervisor not in enabled_hvs:
6530 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6531 " cluster (%s)" % (self.op.hypervisor,
6532 ",".join(enabled_hvs)),
6535 # check hypervisor parameter syntax (locally)
6536 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6537 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6539 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6540 hv_type.CheckParameterSyntax(filled_hvp)
6541 self.hv_full = filled_hvp
6542 # check that we don't specify global parameters on an instance
6543 _CheckGlobalHvParams(self.op.hvparams)
6545 # fill and remember the beparams dict
6546 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6547 self.be_full = cluster.SimpleFillBE(self.op.beparams)
6549 # now that hvp/bep are in final format, let's reset to defaults,
6551 if self.op.identify_defaults:
6552 self._RevertToDefaults(cluster)
6556 for idx, nic in enumerate(self.op.nics):
6557 nic_mode_req = nic.get("mode", None)
6558 nic_mode = nic_mode_req
6559 if nic_mode is None:
6560 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6562 # in routed mode, for the first nic, the default ip is 'auto'
6563 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6564 default_ip_mode = constants.VALUE_AUTO
6566 default_ip_mode = constants.VALUE_NONE
6568 # ip validity checks
6569 ip = nic.get("ip", default_ip_mode)
6570 if ip is None or ip.lower() == constants.VALUE_NONE:
6572 elif ip.lower() == constants.VALUE_AUTO:
6573 if not self.op.name_check:
6574 raise errors.OpPrereqError("IP address set to auto but name checks"
6575 " have been skipped. Aborting.",
6577 nic_ip = self.hostname1.ip
6579 if not utils.IsValidIP(ip):
6580 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6581 " like a valid IP" % ip,
6585 # TODO: check the ip address for uniqueness
6586 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6587 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6590 # MAC address verification
6591 mac = nic.get("mac", constants.VALUE_AUTO)
6592 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6593 mac = utils.NormalizeAndValidateMac(mac)
6596 self.cfg.ReserveMAC(mac, self.proc.GetECId())
6597 except errors.ReservationError:
6598 raise errors.OpPrereqError("MAC address %s already in use"
6599 " in cluster" % mac,
6600 errors.ECODE_NOTUNIQUE)
6602 # bridge verification
6603 bridge = nic.get("bridge", None)
6604 link = nic.get("link", None)
6606 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6607 " at the same time", errors.ECODE_INVAL)
6608 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6609 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6616 nicparams[constants.NIC_MODE] = nic_mode_req
6618 nicparams[constants.NIC_LINK] = link
6620 check_params = cluster.SimpleFillNIC(nicparams)
6621 objects.NIC.CheckParameterSyntax(check_params)
6622 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6624 # disk checks/pre-build
6626 for disk in self.op.disks:
6627 mode = disk.get("mode", constants.DISK_RDWR)
6628 if mode not in constants.DISK_ACCESS_SET:
6629 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6630 mode, errors.ECODE_INVAL)
6631 size = disk.get("size", None)
6633 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6636 except (TypeError, ValueError):
6637 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6639 new_disk = {"size": size, "mode": mode}
6641 new_disk["adopt"] = disk["adopt"]
6642 self.disks.append(new_disk)
6644 if self.op.mode == constants.INSTANCE_IMPORT:
6646 # Check that the new instance doesn't have less disks than the export
6647 instance_disks = len(self.disks)
6648 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6649 if instance_disks < export_disks:
6650 raise errors.OpPrereqError("Not enough disks to import."
6651 " (instance: %d, export: %d)" %
6652 (instance_disks, export_disks),
6656 for idx in range(export_disks):
6657 option = 'disk%d_dump' % idx
6658 if export_info.has_option(constants.INISECT_INS, option):
6659 # FIXME: are the old os-es, disk sizes, etc. useful?
6660 export_name = export_info.get(constants.INISECT_INS, option)
6661 image = utils.PathJoin(self.op.src_path, export_name)
6662 disk_images.append(image)
6664 disk_images.append(False)
6666 self.src_images = disk_images
6668 old_name = export_info.get(constants.INISECT_INS, 'name')
6670 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6671 except (TypeError, ValueError), err:
6672 raise errors.OpPrereqError("Invalid export file, nic_count is not"
6673 " an integer: %s" % str(err),
6675 if self.op.instance_name == old_name:
6676 for idx, nic in enumerate(self.nics):
6677 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6678 nic_mac_ini = 'nic%d_mac' % idx
6679 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6681 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6683 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6684 if self.op.ip_check:
6685 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6686 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6687 (self.check_ip, self.op.instance_name),
6688 errors.ECODE_NOTUNIQUE)
6690 #### mac address generation
6691 # By generating here the mac address both the allocator and the hooks get
6692 # the real final mac address rather than the 'auto' or 'generate' value.
6693 # There is a race condition between the generation and the instance object
6694 # creation, which means that we know the mac is valid now, but we're not
6695 # sure it will be when we actually add the instance. If things go bad
6696 # adding the instance will abort because of a duplicate mac, and the
6697 # creation job will fail.
6698 for nic in self.nics:
6699 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6700 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6704 if self.op.iallocator is not None:
6705 self._RunAllocator()
6707 #### node related checks
6709 # check primary node
6710 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6711 assert self.pnode is not None, \
6712 "Cannot retrieve locked node %s" % self.op.pnode
6714 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6715 pnode.name, errors.ECODE_STATE)
6717 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6718 pnode.name, errors.ECODE_STATE)
6720 self.secondaries = []
6722 # mirror node verification
6723 if self.op.disk_template in constants.DTS_NET_MIRROR:
6724 if self.op.snode is None:
6725 raise errors.OpPrereqError("The networked disk templates need"
6726 " a mirror node", errors.ECODE_INVAL)
6727 if self.op.snode == pnode.name:
6728 raise errors.OpPrereqError("The secondary node cannot be the"
6729 " primary node.", errors.ECODE_INVAL)
6730 _CheckNodeOnline(self, self.op.snode)
6731 _CheckNodeNotDrained(self, self.op.snode)
6732 self.secondaries.append(self.op.snode)
6734 nodenames = [pnode.name] + self.secondaries
6736 req_size = _ComputeDiskSize(self.op.disk_template,
6739 # Check lv size requirements, if not adopting
6740 if req_size is not None and not self.adopt_disks:
6741 _CheckNodesFreeDisk(self, nodenames, req_size)
6743 if self.adopt_disks: # instead, we must check the adoption data
6744 all_lvs = set([i["adopt"] for i in self.disks])
6745 if len(all_lvs) != len(self.disks):
6746 raise errors.OpPrereqError("Duplicate volume names given for adoption",
6748 for lv_name in all_lvs:
6750 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6751 except errors.ReservationError:
6752 raise errors.OpPrereqError("LV named %s used by another instance" %
6753 lv_name, errors.ECODE_NOTUNIQUE)
6755 node_lvs = self.rpc.call_lv_list([pnode.name],
6756 self.cfg.GetVGName())[pnode.name]
6757 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6758 node_lvs = node_lvs.payload
6759 delta = all_lvs.difference(node_lvs.keys())
6761 raise errors.OpPrereqError("Missing logical volume(s): %s" %
6762 utils.CommaJoin(delta),
6764 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6766 raise errors.OpPrereqError("Online logical volumes found, cannot"
6767 " adopt: %s" % utils.CommaJoin(online_lvs),
6769 # update the size of disk based on what is found
6770 for dsk in self.disks:
6771 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6773 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6775 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6777 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6779 # memory check on primary node
6781 _CheckNodeFreeMemory(self, self.pnode.name,
6782 "creating instance %s" % self.op.instance_name,
6783 self.be_full[constants.BE_MEMORY],
6786 self.dry_run_result = list(nodenames)
6788 def Exec(self, feedback_fn):
6789 """Create and add the instance to the cluster.
6792 instance = self.op.instance_name
6793 pnode_name = self.pnode.name
6795 ht_kind = self.op.hypervisor
6796 if ht_kind in constants.HTS_REQ_PORT:
6797 network_port = self.cfg.AllocatePort()
6801 if constants.ENABLE_FILE_STORAGE:
6802 # this is needed because os.path.join does not accept None arguments
6803 if self.op.file_storage_dir is None:
6804 string_file_storage_dir = ""
6806 string_file_storage_dir = self.op.file_storage_dir
6808 # build the full file storage dir path
6809 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6810 string_file_storage_dir, instance)
6812 file_storage_dir = ""
6814 disks = _GenerateDiskTemplate(self,
6815 self.op.disk_template,
6816 instance, pnode_name,
6820 self.op.file_driver,
6823 iobj = objects.Instance(name=instance, os=self.op.os_type,
6824 primary_node=pnode_name,
6825 nics=self.nics, disks=disks,
6826 disk_template=self.op.disk_template,
6828 network_port=network_port,
6829 beparams=self.op.beparams,
6830 hvparams=self.op.hvparams,
6831 hypervisor=self.op.hypervisor,
6834 if self.adopt_disks:
6835 # rename LVs to the newly-generated names; we need to construct
6836 # 'fake' LV disks with the old data, plus the new unique_id
6837 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6839 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6840 rename_to.append(t_dsk.logical_id)
6841 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6842 self.cfg.SetDiskID(t_dsk, pnode_name)
6843 result = self.rpc.call_blockdev_rename(pnode_name,
6844 zip(tmp_disks, rename_to))
6845 result.Raise("Failed to rename adoped LVs")
6847 feedback_fn("* creating instance disks...")
6849 _CreateDisks(self, iobj)
6850 except errors.OpExecError:
6851 self.LogWarning("Device creation failed, reverting...")
6853 _RemoveDisks(self, iobj)
6855 self.cfg.ReleaseDRBDMinors(instance)
6858 feedback_fn("adding instance %s to cluster config" % instance)
6860 self.cfg.AddInstance(iobj, self.proc.GetECId())
6862 # Declare that we don't want to remove the instance lock anymore, as we've
6863 # added the instance to the config
6864 del self.remove_locks[locking.LEVEL_INSTANCE]
6865 # Unlock all the nodes
6866 if self.op.mode == constants.INSTANCE_IMPORT:
6867 nodes_keep = [self.op.src_node]
6868 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6869 if node != self.op.src_node]
6870 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6871 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6873 self.context.glm.release(locking.LEVEL_NODE)
6874 del self.acquired_locks[locking.LEVEL_NODE]
6876 if self.op.wait_for_sync:
6877 disk_abort = not _WaitForSync(self, iobj)
6878 elif iobj.disk_template in constants.DTS_NET_MIRROR:
6879 # make sure the disks are not degraded (still sync-ing is ok)
6881 feedback_fn("* checking mirrors status")
6882 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6887 _RemoveDisks(self, iobj)
6888 self.cfg.RemoveInstance(iobj.name)
6889 # Make sure the instance lock gets removed
6890 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6891 raise errors.OpExecError("There are some degraded disks for"
6894 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6895 if self.op.mode == constants.INSTANCE_CREATE:
6896 if not self.op.no_install:
6897 feedback_fn("* running the instance OS create scripts...")
6898 # FIXME: pass debug option from opcode to backend
6899 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6900 self.op.debug_level)
6901 result.Raise("Could not add os for instance %s"
6902 " on node %s" % (instance, pnode_name))
6904 elif self.op.mode == constants.INSTANCE_IMPORT:
6905 feedback_fn("* running the instance OS import scripts...")
6909 for idx, image in enumerate(self.src_images):
6913 # FIXME: pass debug option from opcode to backend
6914 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
6915 constants.IEIO_FILE, (image, ),
6916 constants.IEIO_SCRIPT,
6917 (iobj.disks[idx], idx),
6919 transfers.append(dt)
6922 masterd.instance.TransferInstanceData(self, feedback_fn,
6923 self.op.src_node, pnode_name,
6924 self.pnode.secondary_ip,
6926 if not compat.all(import_result):
6927 self.LogWarning("Some disks for instance %s on node %s were not"
6928 " imported successfully" % (instance, pnode_name))
6930 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6931 feedback_fn("* preparing remote import...")
6932 connect_timeout = constants.RIE_CONNECT_TIMEOUT
6933 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
6935 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
6936 self.source_x509_ca,
6937 self._cds, timeouts)
6938 if not compat.all(disk_results):
6939 # TODO: Should the instance still be started, even if some disks
6940 # failed to import (valid for local imports, too)?
6941 self.LogWarning("Some disks for instance %s on node %s were not"
6942 " imported successfully" % (instance, pnode_name))
6944 # Run rename script on newly imported instance
6945 assert iobj.name == instance
6946 feedback_fn("Running rename script for %s" % instance)
6947 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
6948 self.source_instance_name,
6949 self.op.debug_level)
6951 self.LogWarning("Failed to run rename script for %s on node"
6952 " %s: %s" % (instance, pnode_name, result.fail_msg))
6955 # also checked in the prereq part
6956 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6960 iobj.admin_up = True
6961 self.cfg.Update(iobj, feedback_fn)
6962 logging.info("Starting instance %s on node %s", instance, pnode_name)
6963 feedback_fn("* starting instance...")
6964 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6965 result.Raise("Could not start instance")
6967 return list(iobj.all_nodes)
6970 class LUConnectConsole(NoHooksLU):
6971 """Connect to an instance's console.
6973 This is somewhat special in that it returns the command line that
6974 you need to run on the master node in order to connect to the
6978 _OP_REQP = ["instance_name"]
6981 def ExpandNames(self):
6982 self._ExpandAndLockInstance()
6984 def CheckPrereq(self):
6985 """Check prerequisites.
6987 This checks that the instance is in the cluster.
6990 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6991 assert self.instance is not None, \
6992 "Cannot retrieve locked instance %s" % self.op.instance_name
6993 _CheckNodeOnline(self, self.instance.primary_node)
6995 def Exec(self, feedback_fn):
6996 """Connect to the console of an instance
6999 instance = self.instance
7000 node = instance.primary_node
7002 node_insts = self.rpc.call_instance_list([node],
7003 [instance.hypervisor])[node]
7004 node_insts.Raise("Can't get node information from %s" % node)
7006 if instance.name not in node_insts.payload:
7007 raise errors.OpExecError("Instance %s is not running." % instance.name)
7009 logging.debug("Connecting to console of %s on %s", instance.name, node)
7011 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7012 cluster = self.cfg.GetClusterInfo()
7013 # beparams and hvparams are passed separately, to avoid editing the
7014 # instance and then saving the defaults in the instance itself.
7015 hvparams = cluster.FillHV(instance)
7016 beparams = cluster.FillBE(instance)
7017 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7020 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7023 class LUReplaceDisks(LogicalUnit):
7024 """Replace the disks of an instance.
7027 HPATH = "mirrors-replace"
7028 HTYPE = constants.HTYPE_INSTANCE
7029 _OP_REQP = ["instance_name", "mode", "disks"]
7032 def CheckArguments(self):
7033 if not hasattr(self.op, "remote_node"):
7034 self.op.remote_node = None
7035 if not hasattr(self.op, "iallocator"):
7036 self.op.iallocator = None
7037 if not hasattr(self.op, "early_release"):
7038 self.op.early_release = False
7040 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7043 def ExpandNames(self):
7044 self._ExpandAndLockInstance()
7046 if self.op.iallocator is not None:
7047 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7049 elif self.op.remote_node is not None:
7050 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7051 self.op.remote_node = remote_node
7053 # Warning: do not remove the locking of the new secondary here
7054 # unless DRBD8.AddChildren is changed to work in parallel;
7055 # currently it doesn't since parallel invocations of
7056 # FindUnusedMinor will conflict
7057 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7058 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7061 self.needed_locks[locking.LEVEL_NODE] = []
7062 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7064 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7065 self.op.iallocator, self.op.remote_node,
7066 self.op.disks, False, self.op.early_release)
7068 self.tasklets = [self.replacer]
7070 def DeclareLocks(self, level):
7071 # If we're not already locking all nodes in the set we have to declare the
7072 # instance's primary/secondary nodes.
7073 if (level == locking.LEVEL_NODE and
7074 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7075 self._LockInstancesNodes()
7077 def BuildHooksEnv(self):
7080 This runs on the master, the primary and all the secondaries.
7083 instance = self.replacer.instance
7085 "MODE": self.op.mode,
7086 "NEW_SECONDARY": self.op.remote_node,
7087 "OLD_SECONDARY": instance.secondary_nodes[0],
7089 env.update(_BuildInstanceHookEnvByObject(self, instance))
7091 self.cfg.GetMasterNode(),
7092 instance.primary_node,
7094 if self.op.remote_node is not None:
7095 nl.append(self.op.remote_node)
7099 class LUEvacuateNode(LogicalUnit):
7100 """Relocate the secondary instances from a node.
7103 HPATH = "node-evacuate"
7104 HTYPE = constants.HTYPE_NODE
7105 _OP_REQP = ["node_name"]
7108 def CheckArguments(self):
7109 if not hasattr(self.op, "remote_node"):
7110 self.op.remote_node = None
7111 if not hasattr(self.op, "iallocator"):
7112 self.op.iallocator = None
7113 if not hasattr(self.op, "early_release"):
7114 self.op.early_release = False
7116 TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
7117 self.op.remote_node,
7120 def ExpandNames(self):
7121 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7123 self.needed_locks = {}
7125 # Declare node locks
7126 if self.op.iallocator is not None:
7127 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7129 elif self.op.remote_node is not None:
7130 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7132 # Warning: do not remove the locking of the new secondary here
7133 # unless DRBD8.AddChildren is changed to work in parallel;
7134 # currently it doesn't since parallel invocations of
7135 # FindUnusedMinor will conflict
7136 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7137 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7140 raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7142 # Create tasklets for replacing disks for all secondary instances on this
7147 for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7148 logging.debug("Replacing disks for instance %s", inst.name)
7149 names.append(inst.name)
7151 replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7152 self.op.iallocator, self.op.remote_node, [],
7153 True, self.op.early_release)
7154 tasklets.append(replacer)
7156 self.tasklets = tasklets
7157 self.instance_names = names
7159 # Declare instance locks
7160 self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7162 def DeclareLocks(self, level):
7163 # If we're not already locking all nodes in the set we have to declare the
7164 # instance's primary/secondary nodes.
7165 if (level == locking.LEVEL_NODE and
7166 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7167 self._LockInstancesNodes()
7169 def BuildHooksEnv(self):
7172 This runs on the master, the primary and all the secondaries.
7176 "NODE_NAME": self.op.node_name,
7179 nl = [self.cfg.GetMasterNode()]
7181 if self.op.remote_node is not None:
7182 env["NEW_SECONDARY"] = self.op.remote_node
7183 nl.append(self.op.remote_node)
7185 return (env, nl, nl)
7188 class TLReplaceDisks(Tasklet):
7189 """Replaces disks for an instance.
7191 Note: Locking is not within the scope of this class.
7194 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7195 disks, delay_iallocator, early_release):
7196 """Initializes this class.
7199 Tasklet.__init__(self, lu)
7202 self.instance_name = instance_name
7204 self.iallocator_name = iallocator_name
7205 self.remote_node = remote_node
7207 self.delay_iallocator = delay_iallocator
7208 self.early_release = early_release
7211 self.instance = None
7212 self.new_node = None
7213 self.target_node = None
7214 self.other_node = None
7215 self.remote_node_info = None
7216 self.node_secondary_ip = None
7219 def CheckArguments(mode, remote_node, iallocator):
7220 """Helper function for users of this class.
7223 # check for valid parameter combination
7224 if mode == constants.REPLACE_DISK_CHG:
7225 if remote_node is None and iallocator is None:
7226 raise errors.OpPrereqError("When changing the secondary either an"
7227 " iallocator script must be used or the"
7228 " new node given", errors.ECODE_INVAL)
7230 if remote_node is not None and iallocator is not None:
7231 raise errors.OpPrereqError("Give either the iallocator or the new"
7232 " secondary, not both", errors.ECODE_INVAL)
7234 elif remote_node is not None or iallocator is not None:
7235 # Not replacing the secondary
7236 raise errors.OpPrereqError("The iallocator and new node options can"
7237 " only be used when changing the"
7238 " secondary node", errors.ECODE_INVAL)
7241 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7242 """Compute a new secondary node using an IAllocator.
7245 ial = IAllocator(lu.cfg, lu.rpc,
7246 mode=constants.IALLOCATOR_MODE_RELOC,
7248 relocate_from=relocate_from)
7250 ial.Run(iallocator_name)
7253 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7254 " %s" % (iallocator_name, ial.info),
7257 if len(ial.result) != ial.required_nodes:
7258 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7259 " of nodes (%s), required %s" %
7261 len(ial.result), ial.required_nodes),
7264 remote_node_name = ial.result[0]
7266 lu.LogInfo("Selected new secondary for instance '%s': %s",
7267 instance_name, remote_node_name)
7269 return remote_node_name
7271 def _FindFaultyDisks(self, node_name):
7272 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7275 def CheckPrereq(self):
7276 """Check prerequisites.
7278 This checks that the instance is in the cluster.
7281 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7282 assert instance is not None, \
7283 "Cannot retrieve locked instance %s" % self.instance_name
7285 if instance.disk_template != constants.DT_DRBD8:
7286 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7287 " instances", errors.ECODE_INVAL)
7289 if len(instance.secondary_nodes) != 1:
7290 raise errors.OpPrereqError("The instance has a strange layout,"
7291 " expected one secondary but found %d" %
7292 len(instance.secondary_nodes),
7295 if not self.delay_iallocator:
7296 self._CheckPrereq2()
7298 def _CheckPrereq2(self):
7299 """Check prerequisites, second part.
7301 This function should always be part of CheckPrereq. It was separated and is
7302 now called from Exec because during node evacuation iallocator was only
7303 called with an unmodified cluster model, not taking planned changes into
7307 instance = self.instance
7308 secondary_node = instance.secondary_nodes[0]
7310 if self.iallocator_name is None:
7311 remote_node = self.remote_node
7313 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7314 instance.name, instance.secondary_nodes)
7316 if remote_node is not None:
7317 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7318 assert self.remote_node_info is not None, \
7319 "Cannot retrieve locked node %s" % remote_node
7321 self.remote_node_info = None
7323 if remote_node == self.instance.primary_node:
7324 raise errors.OpPrereqError("The specified node is the primary node of"
7325 " the instance.", errors.ECODE_INVAL)
7327 if remote_node == secondary_node:
7328 raise errors.OpPrereqError("The specified node is already the"
7329 " secondary node of the instance.",
7332 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7333 constants.REPLACE_DISK_CHG):
7334 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7337 if self.mode == constants.REPLACE_DISK_AUTO:
7338 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7339 faulty_secondary = self._FindFaultyDisks(secondary_node)
7341 if faulty_primary and faulty_secondary:
7342 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7343 " one node and can not be repaired"
7344 " automatically" % self.instance_name,
7348 self.disks = faulty_primary
7349 self.target_node = instance.primary_node
7350 self.other_node = secondary_node
7351 check_nodes = [self.target_node, self.other_node]
7352 elif faulty_secondary:
7353 self.disks = faulty_secondary
7354 self.target_node = secondary_node
7355 self.other_node = instance.primary_node
7356 check_nodes = [self.target_node, self.other_node]
7362 # Non-automatic modes
7363 if self.mode == constants.REPLACE_DISK_PRI:
7364 self.target_node = instance.primary_node
7365 self.other_node = secondary_node
7366 check_nodes = [self.target_node, self.other_node]
7368 elif self.mode == constants.REPLACE_DISK_SEC:
7369 self.target_node = secondary_node
7370 self.other_node = instance.primary_node
7371 check_nodes = [self.target_node, self.other_node]
7373 elif self.mode == constants.REPLACE_DISK_CHG:
7374 self.new_node = remote_node
7375 self.other_node = instance.primary_node
7376 self.target_node = secondary_node
7377 check_nodes = [self.new_node, self.other_node]
7379 _CheckNodeNotDrained(self.lu, remote_node)
7381 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7382 assert old_node_info is not None
7383 if old_node_info.offline and not self.early_release:
7384 # doesn't make sense to delay the release
7385 self.early_release = True
7386 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7387 " early-release mode", secondary_node)
7390 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7393 # If not specified all disks should be replaced
7395 self.disks = range(len(self.instance.disks))
7397 for node in check_nodes:
7398 _CheckNodeOnline(self.lu, node)
7400 # Check whether disks are valid
7401 for disk_idx in self.disks:
7402 instance.FindDisk(disk_idx)
7404 # Get secondary node IP addresses
7407 for node_name in [self.target_node, self.other_node, self.new_node]:
7408 if node_name is not None:
7409 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7411 self.node_secondary_ip = node_2nd_ip
7413 def Exec(self, feedback_fn):
7414 """Execute disk replacement.
7416 This dispatches the disk replacement to the appropriate handler.
7419 if self.delay_iallocator:
7420 self._CheckPrereq2()
7423 feedback_fn("No disks need replacement")
7426 feedback_fn("Replacing disk(s) %s for %s" %
7427 (utils.CommaJoin(self.disks), self.instance.name))
7429 activate_disks = (not self.instance.admin_up)
7431 # Activate the instance disks if we're replacing them on a down instance
7433 _StartInstanceDisks(self.lu, self.instance, True)
7436 # Should we replace the secondary node?
7437 if self.new_node is not None:
7438 fn = self._ExecDrbd8Secondary
7440 fn = self._ExecDrbd8DiskOnly
7442 return fn(feedback_fn)
7445 # Deactivate the instance disks if we're replacing them on a
7448 _SafeShutdownInstanceDisks(self.lu, self.instance)
7450 def _CheckVolumeGroup(self, nodes):
7451 self.lu.LogInfo("Checking volume groups")
7453 vgname = self.cfg.GetVGName()
7455 # Make sure volume group exists on all involved nodes
7456 results = self.rpc.call_vg_list(nodes)
7458 raise errors.OpExecError("Can't list volume groups on the nodes")
7462 res.Raise("Error checking node %s" % node)
7463 if vgname not in res.payload:
7464 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7467 def _CheckDisksExistence(self, nodes):
7468 # Check disk existence
7469 for idx, dev in enumerate(self.instance.disks):
7470 if idx not in self.disks:
7474 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7475 self.cfg.SetDiskID(dev, node)
7477 result = self.rpc.call_blockdev_find(node, dev)
7479 msg = result.fail_msg
7480 if msg or not result.payload:
7482 msg = "disk not found"
7483 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7486 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7487 for idx, dev in enumerate(self.instance.disks):
7488 if idx not in self.disks:
7491 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7494 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7496 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7497 " replace disks for instance %s" %
7498 (node_name, self.instance.name))
7500 def _CreateNewStorage(self, node_name):
7501 vgname = self.cfg.GetVGName()
7504 for idx, dev in enumerate(self.instance.disks):
7505 if idx not in self.disks:
7508 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7510 self.cfg.SetDiskID(dev, node_name)
7512 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7513 names = _GenerateUniqueNames(self.lu, lv_names)
7515 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7516 logical_id=(vgname, names[0]))
7517 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7518 logical_id=(vgname, names[1]))
7520 new_lvs = [lv_data, lv_meta]
7521 old_lvs = dev.children
7522 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7524 # we pass force_create=True to force the LVM creation
7525 for new_lv in new_lvs:
7526 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7527 _GetInstanceInfoText(self.instance), False)
7531 def _CheckDevices(self, node_name, iv_names):
7532 for name, (dev, _, _) in iv_names.iteritems():
7533 self.cfg.SetDiskID(dev, node_name)
7535 result = self.rpc.call_blockdev_find(node_name, dev)
7537 msg = result.fail_msg
7538 if msg or not result.payload:
7540 msg = "disk not found"
7541 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7544 if result.payload.is_degraded:
7545 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7547 def _RemoveOldStorage(self, node_name, iv_names):
7548 for name, (_, old_lvs, _) in iv_names.iteritems():
7549 self.lu.LogInfo("Remove logical volumes for %s" % name)
7552 self.cfg.SetDiskID(lv, node_name)
7554 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7556 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7557 hint="remove unused LVs manually")
7559 def _ReleaseNodeLock(self, node_name):
7560 """Releases the lock for a given node."""
7561 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7563 def _ExecDrbd8DiskOnly(self, feedback_fn):
7564 """Replace a disk on the primary or secondary for DRBD 8.
7566 The algorithm for replace is quite complicated:
7568 1. for each disk to be replaced:
7570 1. create new LVs on the target node with unique names
7571 1. detach old LVs from the drbd device
7572 1. rename old LVs to name_replaced.<time_t>
7573 1. rename new LVs to old LVs
7574 1. attach the new LVs (with the old names now) to the drbd device
7576 1. wait for sync across all devices
7578 1. for each modified disk:
7580 1. remove old LVs (which have the name name_replaces.<time_t>)
7582 Failures are not very well handled.
7587 # Step: check device activation
7588 self.lu.LogStep(1, steps_total, "Check device existence")
7589 self._CheckDisksExistence([self.other_node, self.target_node])
7590 self._CheckVolumeGroup([self.target_node, self.other_node])
7592 # Step: check other node consistency
7593 self.lu.LogStep(2, steps_total, "Check peer consistency")
7594 self._CheckDisksConsistency(self.other_node,
7595 self.other_node == self.instance.primary_node,
7598 # Step: create new storage
7599 self.lu.LogStep(3, steps_total, "Allocate new storage")
7600 iv_names = self._CreateNewStorage(self.target_node)
7602 # Step: for each lv, detach+rename*2+attach
7603 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7604 for dev, old_lvs, new_lvs in iv_names.itervalues():
7605 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7607 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7609 result.Raise("Can't detach drbd from local storage on node"
7610 " %s for device %s" % (self.target_node, dev.iv_name))
7612 #cfg.Update(instance)
7614 # ok, we created the new LVs, so now we know we have the needed
7615 # storage; as such, we proceed on the target node to rename
7616 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7617 # using the assumption that logical_id == physical_id (which in
7618 # turn is the unique_id on that node)
7620 # FIXME(iustin): use a better name for the replaced LVs
7621 temp_suffix = int(time.time())
7622 ren_fn = lambda d, suff: (d.physical_id[0],
7623 d.physical_id[1] + "_replaced-%s" % suff)
7625 # Build the rename list based on what LVs exist on the node
7626 rename_old_to_new = []
7627 for to_ren in old_lvs:
7628 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7629 if not result.fail_msg and result.payload:
7631 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7633 self.lu.LogInfo("Renaming the old LVs on the target node")
7634 result = self.rpc.call_blockdev_rename(self.target_node,
7636 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7638 # Now we rename the new LVs to the old LVs
7639 self.lu.LogInfo("Renaming the new LVs on the target node")
7640 rename_new_to_old = [(new, old.physical_id)
7641 for old, new in zip(old_lvs, new_lvs)]
7642 result = self.rpc.call_blockdev_rename(self.target_node,
7644 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7646 for old, new in zip(old_lvs, new_lvs):
7647 new.logical_id = old.logical_id
7648 self.cfg.SetDiskID(new, self.target_node)
7650 for disk in old_lvs:
7651 disk.logical_id = ren_fn(disk, temp_suffix)
7652 self.cfg.SetDiskID(disk, self.target_node)
7654 # Now that the new lvs have the old name, we can add them to the device
7655 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7656 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7658 msg = result.fail_msg
7660 for new_lv in new_lvs:
7661 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7664 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7665 hint=("cleanup manually the unused logical"
7667 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7669 dev.children = new_lvs
7671 self.cfg.Update(self.instance, feedback_fn)
7674 if self.early_release:
7675 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7677 self._RemoveOldStorage(self.target_node, iv_names)
7678 # WARNING: we release both node locks here, do not do other RPCs
7679 # than WaitForSync to the primary node
7680 self._ReleaseNodeLock([self.target_node, self.other_node])
7683 # This can fail as the old devices are degraded and _WaitForSync
7684 # does a combined result over all disks, so we don't check its return value
7685 self.lu.LogStep(cstep, steps_total, "Sync devices")
7687 _WaitForSync(self.lu, self.instance)
7689 # Check all devices manually
7690 self._CheckDevices(self.instance.primary_node, iv_names)
7692 # Step: remove old storage
7693 if not self.early_release:
7694 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7696 self._RemoveOldStorage(self.target_node, iv_names)
7698 def _ExecDrbd8Secondary(self, feedback_fn):
7699 """Replace the secondary node for DRBD 8.
7701 The algorithm for replace is quite complicated:
7702 - for all disks of the instance:
7703 - create new LVs on the new node with same names
7704 - shutdown the drbd device on the old secondary
7705 - disconnect the drbd network on the primary
7706 - create the drbd device on the new secondary
7707 - network attach the drbd on the primary, using an artifice:
7708 the drbd code for Attach() will connect to the network if it
7709 finds a device which is connected to the good local disks but
7711 - wait for sync across all devices
7712 - remove all disks from the old secondary
7714 Failures are not very well handled.
7719 # Step: check device activation
7720 self.lu.LogStep(1, steps_total, "Check device existence")
7721 self._CheckDisksExistence([self.instance.primary_node])
7722 self._CheckVolumeGroup([self.instance.primary_node])
7724 # Step: check other node consistency
7725 self.lu.LogStep(2, steps_total, "Check peer consistency")
7726 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7728 # Step: create new storage
7729 self.lu.LogStep(3, steps_total, "Allocate new storage")
7730 for idx, dev in enumerate(self.instance.disks):
7731 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7732 (self.new_node, idx))
7733 # we pass force_create=True to force LVM creation
7734 for new_lv in dev.children:
7735 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7736 _GetInstanceInfoText(self.instance), False)
7738 # Step 4: dbrd minors and drbd setups changes
7739 # after this, we must manually remove the drbd minors on both the
7740 # error and the success paths
7741 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7742 minors = self.cfg.AllocateDRBDMinor([self.new_node
7743 for dev in self.instance.disks],
7745 logging.debug("Allocated minors %r", minors)
7748 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7749 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7750 (self.new_node, idx))
7751 # create new devices on new_node; note that we create two IDs:
7752 # one without port, so the drbd will be activated without
7753 # networking information on the new node at this stage, and one
7754 # with network, for the latter activation in step 4
7755 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7756 if self.instance.primary_node == o_node1:
7759 assert self.instance.primary_node == o_node2, "Three-node instance?"
7762 new_alone_id = (self.instance.primary_node, self.new_node, None,
7763 p_minor, new_minor, o_secret)
7764 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7765 p_minor, new_minor, o_secret)
7767 iv_names[idx] = (dev, dev.children, new_net_id)
7768 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7770 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7771 logical_id=new_alone_id,
7772 children=dev.children,
7775 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7776 _GetInstanceInfoText(self.instance), False)
7777 except errors.GenericError:
7778 self.cfg.ReleaseDRBDMinors(self.instance.name)
7781 # We have new devices, shutdown the drbd on the old secondary
7782 for idx, dev in enumerate(self.instance.disks):
7783 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7784 self.cfg.SetDiskID(dev, self.target_node)
7785 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7787 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7788 "node: %s" % (idx, msg),
7789 hint=("Please cleanup this device manually as"
7790 " soon as possible"))
7792 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7793 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7794 self.node_secondary_ip,
7795 self.instance.disks)\
7796 [self.instance.primary_node]
7798 msg = result.fail_msg
7800 # detaches didn't succeed (unlikely)
7801 self.cfg.ReleaseDRBDMinors(self.instance.name)
7802 raise errors.OpExecError("Can't detach the disks from the network on"
7803 " old node: %s" % (msg,))
7805 # if we managed to detach at least one, we update all the disks of
7806 # the instance to point to the new secondary
7807 self.lu.LogInfo("Updating instance configuration")
7808 for dev, _, new_logical_id in iv_names.itervalues():
7809 dev.logical_id = new_logical_id
7810 self.cfg.SetDiskID(dev, self.instance.primary_node)
7812 self.cfg.Update(self.instance, feedback_fn)
7814 # and now perform the drbd attach
7815 self.lu.LogInfo("Attaching primary drbds to new secondary"
7816 " (standalone => connected)")
7817 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7819 self.node_secondary_ip,
7820 self.instance.disks,
7823 for to_node, to_result in result.items():
7824 msg = to_result.fail_msg
7826 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7828 hint=("please do a gnt-instance info to see the"
7829 " status of disks"))
7831 if self.early_release:
7832 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7834 self._RemoveOldStorage(self.target_node, iv_names)
7835 # WARNING: we release all node locks here, do not do other RPCs
7836 # than WaitForSync to the primary node
7837 self._ReleaseNodeLock([self.instance.primary_node,
7842 # This can fail as the old devices are degraded and _WaitForSync
7843 # does a combined result over all disks, so we don't check its return value
7844 self.lu.LogStep(cstep, steps_total, "Sync devices")
7846 _WaitForSync(self.lu, self.instance)
7848 # Check all devices manually
7849 self._CheckDevices(self.instance.primary_node, iv_names)
7851 # Step: remove old storage
7852 if not self.early_release:
7853 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7854 self._RemoveOldStorage(self.target_node, iv_names)
7857 class LURepairNodeStorage(NoHooksLU):
7858 """Repairs the volume group on a node.
7861 _OP_REQP = ["node_name"]
7864 def CheckArguments(self):
7865 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7867 _CheckStorageType(self.op.storage_type)
7869 def ExpandNames(self):
7870 self.needed_locks = {
7871 locking.LEVEL_NODE: [self.op.node_name],
7874 def _CheckFaultyDisks(self, instance, node_name):
7875 """Ensure faulty disks abort the opcode or at least warn."""
7877 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7879 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7880 " node '%s'" % (instance.name, node_name),
7882 except errors.OpPrereqError, err:
7883 if self.op.ignore_consistency:
7884 self.proc.LogWarning(str(err.args[0]))
7888 def CheckPrereq(self):
7889 """Check prerequisites.
7892 storage_type = self.op.storage_type
7894 if (constants.SO_FIX_CONSISTENCY not in
7895 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7896 raise errors.OpPrereqError("Storage units of type '%s' can not be"
7897 " repaired" % storage_type,
7900 # Check whether any instance on this node has faulty disks
7901 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7902 if not inst.admin_up:
7904 check_nodes = set(inst.all_nodes)
7905 check_nodes.discard(self.op.node_name)
7906 for inst_node_name in check_nodes:
7907 self._CheckFaultyDisks(inst, inst_node_name)
7909 def Exec(self, feedback_fn):
7910 feedback_fn("Repairing storage unit '%s' on %s ..." %
7911 (self.op.name, self.op.node_name))
7913 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7914 result = self.rpc.call_storage_execute(self.op.node_name,
7915 self.op.storage_type, st_args,
7917 constants.SO_FIX_CONSISTENCY)
7918 result.Raise("Failed to repair storage unit '%s' on %s" %
7919 (self.op.name, self.op.node_name))
7922 class LUNodeEvacuationStrategy(NoHooksLU):
7923 """Computes the node evacuation strategy.
7926 _OP_REQP = ["nodes"]
7929 def CheckArguments(self):
7930 if not hasattr(self.op, "remote_node"):
7931 self.op.remote_node = None
7932 if not hasattr(self.op, "iallocator"):
7933 self.op.iallocator = None
7934 if self.op.remote_node is not None and self.op.iallocator is not None:
7935 raise errors.OpPrereqError("Give either the iallocator or the new"
7936 " secondary, not both", errors.ECODE_INVAL)
7938 def ExpandNames(self):
7939 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7940 self.needed_locks = locks = {}
7941 if self.op.remote_node is None:
7942 locks[locking.LEVEL_NODE] = locking.ALL_SET
7944 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7945 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7947 def CheckPrereq(self):
7950 def Exec(self, feedback_fn):
7951 if self.op.remote_node is not None:
7953 for node in self.op.nodes:
7954 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7957 if i.primary_node == self.op.remote_node:
7958 raise errors.OpPrereqError("Node %s is the primary node of"
7959 " instance %s, cannot use it as"
7961 (self.op.remote_node, i.name),
7963 result.append([i.name, self.op.remote_node])
7965 ial = IAllocator(self.cfg, self.rpc,
7966 mode=constants.IALLOCATOR_MODE_MEVAC,
7967 evac_nodes=self.op.nodes)
7968 ial.Run(self.op.iallocator, validate=True)
7970 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7976 class LUGrowDisk(LogicalUnit):
7977 """Grow a disk of an instance.
7981 HTYPE = constants.HTYPE_INSTANCE
7982 _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7985 def ExpandNames(self):
7986 self._ExpandAndLockInstance()
7987 self.needed_locks[locking.LEVEL_NODE] = []
7988 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7990 def DeclareLocks(self, level):
7991 if level == locking.LEVEL_NODE:
7992 self._LockInstancesNodes()
7994 def BuildHooksEnv(self):
7997 This runs on the master, the primary and all the secondaries.
8001 "DISK": self.op.disk,
8002 "AMOUNT": self.op.amount,
8004 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8005 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8008 def CheckPrereq(self):
8009 """Check prerequisites.
8011 This checks that the instance is in the cluster.
8014 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8015 assert instance is not None, \
8016 "Cannot retrieve locked instance %s" % self.op.instance_name
8017 nodenames = list(instance.all_nodes)
8018 for node in nodenames:
8019 _CheckNodeOnline(self, node)
8022 self.instance = instance
8024 if instance.disk_template not in constants.DTS_GROWABLE:
8025 raise errors.OpPrereqError("Instance's disk layout does not support"
8026 " growing.", errors.ECODE_INVAL)
8028 self.disk = instance.FindDisk(self.op.disk)
8030 if instance.disk_template != constants.DT_FILE:
8031 # TODO: check the free disk space for file, when that feature will be
8033 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8035 def Exec(self, feedback_fn):
8036 """Execute disk grow.
8039 instance = self.instance
8042 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8044 raise errors.OpExecError("Cannot activate block device to grow")
8046 for node in instance.all_nodes:
8047 self.cfg.SetDiskID(disk, node)
8048 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8049 result.Raise("Grow request failed to node %s" % node)
8051 # TODO: Rewrite code to work properly
8052 # DRBD goes into sync mode for a short amount of time after executing the
8053 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8054 # calling "resize" in sync mode fails. Sleeping for a short amount of
8055 # time is a work-around.
8058 disk.RecordGrow(self.op.amount)
8059 self.cfg.Update(instance, feedback_fn)
8060 if self.op.wait_for_sync:
8061 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8063 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8064 " status.\nPlease check the instance.")
8065 if not instance.admin_up:
8066 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8067 elif not instance.admin_up:
8068 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8069 " not supposed to be running because no wait for"
8070 " sync mode was requested.")
8073 class LUQueryInstanceData(NoHooksLU):
8074 """Query runtime instance data.
8077 _OP_REQP = ["instances", "static"]
8080 def ExpandNames(self):
8081 self.needed_locks = {}
8082 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8084 if not isinstance(self.op.instances, list):
8085 raise errors.OpPrereqError("Invalid argument type 'instances'",
8088 if self.op.instances:
8089 self.wanted_names = []
8090 for name in self.op.instances:
8091 full_name = _ExpandInstanceName(self.cfg, name)
8092 self.wanted_names.append(full_name)
8093 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8095 self.wanted_names = None
8096 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8098 self.needed_locks[locking.LEVEL_NODE] = []
8099 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8101 def DeclareLocks(self, level):
8102 if level == locking.LEVEL_NODE:
8103 self._LockInstancesNodes()
8105 def CheckPrereq(self):
8106 """Check prerequisites.
8108 This only checks the optional instance list against the existing names.
8111 if self.wanted_names is None:
8112 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8114 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8115 in self.wanted_names]
8118 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8119 """Returns the status of a block device
8122 if self.op.static or not node:
8125 self.cfg.SetDiskID(dev, node)
8127 result = self.rpc.call_blockdev_find(node, dev)
8131 result.Raise("Can't compute disk status for %s" % instance_name)
8133 status = result.payload
8137 return (status.dev_path, status.major, status.minor,
8138 status.sync_percent, status.estimated_time,
8139 status.is_degraded, status.ldisk_status)
8141 def _ComputeDiskStatus(self, instance, snode, dev):
8142 """Compute block device status.
8145 if dev.dev_type in constants.LDS_DRBD:
8146 # we change the snode then (otherwise we use the one passed in)
8147 if dev.logical_id[0] == instance.primary_node:
8148 snode = dev.logical_id[1]
8150 snode = dev.logical_id[0]
8152 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8154 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8157 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8158 for child in dev.children]
8163 "iv_name": dev.iv_name,
8164 "dev_type": dev.dev_type,
8165 "logical_id": dev.logical_id,
8166 "physical_id": dev.physical_id,
8167 "pstatus": dev_pstatus,
8168 "sstatus": dev_sstatus,
8169 "children": dev_children,
8176 def Exec(self, feedback_fn):
8177 """Gather and return data"""
8180 cluster = self.cfg.GetClusterInfo()
8182 for instance in self.wanted_instances:
8183 if not self.op.static:
8184 remote_info = self.rpc.call_instance_info(instance.primary_node,
8186 instance.hypervisor)
8187 remote_info.Raise("Error checking node %s" % instance.primary_node)
8188 remote_info = remote_info.payload
8189 if remote_info and "state" in remote_info:
8192 remote_state = "down"
8195 if instance.admin_up:
8198 config_state = "down"
8200 disks = [self._ComputeDiskStatus(instance, None, device)
8201 for device in instance.disks]
8204 "name": instance.name,
8205 "config_state": config_state,
8206 "run_state": remote_state,
8207 "pnode": instance.primary_node,
8208 "snodes": instance.secondary_nodes,
8210 # this happens to be the same format used for hooks
8211 "nics": _NICListToTuple(self, instance.nics),
8212 "disk_template": instance.disk_template,
8214 "hypervisor": instance.hypervisor,
8215 "network_port": instance.network_port,
8216 "hv_instance": instance.hvparams,
8217 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8218 "be_instance": instance.beparams,
8219 "be_actual": cluster.FillBE(instance),
8220 "serial_no": instance.serial_no,
8221 "mtime": instance.mtime,
8222 "ctime": instance.ctime,
8223 "uuid": instance.uuid,
8226 result[instance.name] = idict
8231 class LUSetInstanceParams(LogicalUnit):
8232 """Modifies an instances's parameters.
8235 HPATH = "instance-modify"
8236 HTYPE = constants.HTYPE_INSTANCE
8237 _OP_REQP = ["instance_name"]
8240 def CheckArguments(self):
8241 if not hasattr(self.op, 'nics'):
8243 if not hasattr(self.op, 'disks'):
8245 if not hasattr(self.op, 'beparams'):
8246 self.op.beparams = {}
8247 if not hasattr(self.op, 'hvparams'):
8248 self.op.hvparams = {}
8249 if not hasattr(self.op, "disk_template"):
8250 self.op.disk_template = None
8251 if not hasattr(self.op, "remote_node"):
8252 self.op.remote_node = None
8253 if not hasattr(self.op, "os_name"):
8254 self.op.os_name = None
8255 if not hasattr(self.op, "force_variant"):
8256 self.op.force_variant = False
8257 self.op.force = getattr(self.op, "force", False)
8258 if not (self.op.nics or self.op.disks or self.op.disk_template or
8259 self.op.hvparams or self.op.beparams or self.op.os_name):
8260 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8262 if self.op.hvparams:
8263 _CheckGlobalHvParams(self.op.hvparams)
8267 for disk_op, disk_dict in self.op.disks:
8268 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8269 if disk_op == constants.DDM_REMOVE:
8272 elif disk_op == constants.DDM_ADD:
8275 if not isinstance(disk_op, int):
8276 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8277 if not isinstance(disk_dict, dict):
8278 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8279 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8281 if disk_op == constants.DDM_ADD:
8282 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8283 if mode not in constants.DISK_ACCESS_SET:
8284 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8286 size = disk_dict.get('size', None)
8288 raise errors.OpPrereqError("Required disk parameter size missing",
8292 except (TypeError, ValueError), err:
8293 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8294 str(err), errors.ECODE_INVAL)
8295 disk_dict['size'] = size
8297 # modification of disk
8298 if 'size' in disk_dict:
8299 raise errors.OpPrereqError("Disk size change not possible, use"
8300 " grow-disk", errors.ECODE_INVAL)
8302 if disk_addremove > 1:
8303 raise errors.OpPrereqError("Only one disk add or remove operation"
8304 " supported at a time", errors.ECODE_INVAL)
8306 if self.op.disks and self.op.disk_template is not None:
8307 raise errors.OpPrereqError("Disk template conversion and other disk"
8308 " changes not supported at the same time",
8311 if self.op.disk_template:
8312 _CheckDiskTemplate(self.op.disk_template)
8313 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8314 self.op.remote_node is None):
8315 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8316 " one requires specifying a secondary node",
8321 for nic_op, nic_dict in self.op.nics:
8322 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8323 if nic_op == constants.DDM_REMOVE:
8326 elif nic_op == constants.DDM_ADD:
8329 if not isinstance(nic_op, int):
8330 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8331 if not isinstance(nic_dict, dict):
8332 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8333 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8335 # nic_dict should be a dict
8336 nic_ip = nic_dict.get('ip', None)
8337 if nic_ip is not None:
8338 if nic_ip.lower() == constants.VALUE_NONE:
8339 nic_dict['ip'] = None
8341 if not utils.IsValidIP(nic_ip):
8342 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8345 nic_bridge = nic_dict.get('bridge', None)
8346 nic_link = nic_dict.get('link', None)
8347 if nic_bridge and nic_link:
8348 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8349 " at the same time", errors.ECODE_INVAL)
8350 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8351 nic_dict['bridge'] = None
8352 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8353 nic_dict['link'] = None
8355 if nic_op == constants.DDM_ADD:
8356 nic_mac = nic_dict.get('mac', None)
8358 nic_dict['mac'] = constants.VALUE_AUTO
8360 if 'mac' in nic_dict:
8361 nic_mac = nic_dict['mac']
8362 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8363 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8365 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8366 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8367 " modifying an existing nic",
8370 if nic_addremove > 1:
8371 raise errors.OpPrereqError("Only one NIC add or remove operation"
8372 " supported at a time", errors.ECODE_INVAL)
8374 def ExpandNames(self):
8375 self._ExpandAndLockInstance()
8376 self.needed_locks[locking.LEVEL_NODE] = []
8377 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8379 def DeclareLocks(self, level):
8380 if level == locking.LEVEL_NODE:
8381 self._LockInstancesNodes()
8382 if self.op.disk_template and self.op.remote_node:
8383 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8384 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8386 def BuildHooksEnv(self):
8389 This runs on the master, primary and secondaries.
8393 if constants.BE_MEMORY in self.be_new:
8394 args['memory'] = self.be_new[constants.BE_MEMORY]
8395 if constants.BE_VCPUS in self.be_new:
8396 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8397 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8398 # information at all.
8401 nic_override = dict(self.op.nics)
8402 for idx, nic in enumerate(self.instance.nics):
8403 if idx in nic_override:
8404 this_nic_override = nic_override[idx]
8406 this_nic_override = {}
8407 if 'ip' in this_nic_override:
8408 ip = this_nic_override['ip']
8411 if 'mac' in this_nic_override:
8412 mac = this_nic_override['mac']
8415 if idx in self.nic_pnew:
8416 nicparams = self.nic_pnew[idx]
8418 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8419 mode = nicparams[constants.NIC_MODE]
8420 link = nicparams[constants.NIC_LINK]
8421 args['nics'].append((ip, mac, mode, link))
8422 if constants.DDM_ADD in nic_override:
8423 ip = nic_override[constants.DDM_ADD].get('ip', None)
8424 mac = nic_override[constants.DDM_ADD]['mac']
8425 nicparams = self.nic_pnew[constants.DDM_ADD]
8426 mode = nicparams[constants.NIC_MODE]
8427 link = nicparams[constants.NIC_LINK]
8428 args['nics'].append((ip, mac, mode, link))
8429 elif constants.DDM_REMOVE in nic_override:
8430 del args['nics'][-1]
8432 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8433 if self.op.disk_template:
8434 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8435 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8438 def CheckPrereq(self):
8439 """Check prerequisites.
8441 This only checks the instance list against the existing names.
8444 self.force = self.op.force
8446 # checking the new params on the primary/secondary nodes
8448 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8449 cluster = self.cluster = self.cfg.GetClusterInfo()
8450 assert self.instance is not None, \
8451 "Cannot retrieve locked instance %s" % self.op.instance_name
8452 pnode = instance.primary_node
8453 nodelist = list(instance.all_nodes)
8455 if self.op.disk_template:
8456 if instance.disk_template == self.op.disk_template:
8457 raise errors.OpPrereqError("Instance already has disk template %s" %
8458 instance.disk_template, errors.ECODE_INVAL)
8460 if (instance.disk_template,
8461 self.op.disk_template) not in self._DISK_CONVERSIONS:
8462 raise errors.OpPrereqError("Unsupported disk template conversion from"
8463 " %s to %s" % (instance.disk_template,
8464 self.op.disk_template),
8466 if self.op.disk_template in constants.DTS_NET_MIRROR:
8467 _CheckNodeOnline(self, self.op.remote_node)
8468 _CheckNodeNotDrained(self, self.op.remote_node)
8469 disks = [{"size": d.size} for d in instance.disks]
8470 required = _ComputeDiskSize(self.op.disk_template, disks)
8471 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8472 _CheckInstanceDown(self, instance, "cannot change disk template")
8474 # hvparams processing
8475 if self.op.hvparams:
8476 hv_type = instance.hypervisor
8477 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8478 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8479 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8482 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8483 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8484 self.hv_new = hv_new # the new actual values
8485 self.hv_inst = i_hvdict # the new dict (without defaults)
8487 self.hv_new = self.hv_inst = {}
8489 # beparams processing
8490 if self.op.beparams:
8491 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams)
8492 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8493 be_new = cluster.SimpleFillBE(i_bedict)
8494 self.be_new = be_new # the new actual values
8495 self.be_inst = i_bedict # the new dict (without defaults)
8497 self.be_new = self.be_inst = {}
8501 if constants.BE_MEMORY in self.op.beparams and not self.force:
8502 mem_check_list = [pnode]
8503 if be_new[constants.BE_AUTO_BALANCE]:
8504 # either we changed auto_balance to yes or it was from before
8505 mem_check_list.extend(instance.secondary_nodes)
8506 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8507 instance.hypervisor)
8508 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8509 instance.hypervisor)
8510 pninfo = nodeinfo[pnode]
8511 msg = pninfo.fail_msg
8513 # Assume the primary node is unreachable and go ahead
8514 self.warn.append("Can't get info from primary node %s: %s" %
8516 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8517 self.warn.append("Node data from primary node %s doesn't contain"
8518 " free memory information" % pnode)
8519 elif instance_info.fail_msg:
8520 self.warn.append("Can't get instance runtime information: %s" %
8521 instance_info.fail_msg)
8523 if instance_info.payload:
8524 current_mem = int(instance_info.payload['memory'])
8526 # Assume instance not running
8527 # (there is a slight race condition here, but it's not very probable,
8528 # and we have no other way to check)
8530 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8531 pninfo.payload['memory_free'])
8533 raise errors.OpPrereqError("This change will prevent the instance"
8534 " from starting, due to %d MB of memory"
8535 " missing on its primary node" % miss_mem,
8538 if be_new[constants.BE_AUTO_BALANCE]:
8539 for node, nres in nodeinfo.items():
8540 if node not in instance.secondary_nodes:
8544 self.warn.append("Can't get info from secondary node %s: %s" %
8546 elif not isinstance(nres.payload.get('memory_free', None), int):
8547 self.warn.append("Secondary node %s didn't return free"
8548 " memory information" % node)
8549 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8550 self.warn.append("Not enough memory to failover instance to"
8551 " secondary node %s" % node)
8556 for nic_op, nic_dict in self.op.nics:
8557 if nic_op == constants.DDM_REMOVE:
8558 if not instance.nics:
8559 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8562 if nic_op != constants.DDM_ADD:
8564 if not instance.nics:
8565 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8566 " no NICs" % nic_op,
8568 if nic_op < 0 or nic_op >= len(instance.nics):
8569 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8571 (nic_op, len(instance.nics) - 1),
8573 old_nic_params = instance.nics[nic_op].nicparams
8574 old_nic_ip = instance.nics[nic_op].ip
8579 update_params_dict = dict([(key, nic_dict[key])
8580 for key in constants.NICS_PARAMETERS
8581 if key in nic_dict])
8583 if 'bridge' in nic_dict:
8584 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8586 new_nic_params = _GetUpdatedParams(old_nic_params,
8588 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8589 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8590 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8591 self.nic_pinst[nic_op] = new_nic_params
8592 self.nic_pnew[nic_op] = new_filled_nic_params
8593 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8595 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8596 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8597 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8599 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8601 self.warn.append(msg)
8603 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8604 if new_nic_mode == constants.NIC_MODE_ROUTED:
8605 if 'ip' in nic_dict:
8606 nic_ip = nic_dict['ip']
8610 raise errors.OpPrereqError('Cannot set the nic ip to None'
8611 ' on a routed nic', errors.ECODE_INVAL)
8612 if 'mac' in nic_dict:
8613 nic_mac = nic_dict['mac']
8615 raise errors.OpPrereqError('Cannot set the nic mac to None',
8617 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8618 # otherwise generate the mac
8619 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8621 # or validate/reserve the current one
8623 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8624 except errors.ReservationError:
8625 raise errors.OpPrereqError("MAC address %s already in use"
8626 " in cluster" % nic_mac,
8627 errors.ECODE_NOTUNIQUE)
8630 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8631 raise errors.OpPrereqError("Disk operations not supported for"
8632 " diskless instances",
8634 for disk_op, _ in self.op.disks:
8635 if disk_op == constants.DDM_REMOVE:
8636 if len(instance.disks) == 1:
8637 raise errors.OpPrereqError("Cannot remove the last disk of"
8638 " an instance", errors.ECODE_INVAL)
8639 _CheckInstanceDown(self, instance, "cannot remove disks")
8641 if (disk_op == constants.DDM_ADD and
8642 len(instance.nics) >= constants.MAX_DISKS):
8643 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8644 " add more" % constants.MAX_DISKS,
8646 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8648 if disk_op < 0 or disk_op >= len(instance.disks):
8649 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8651 (disk_op, len(instance.disks)),
8655 if self.op.os_name and not self.op.force:
8656 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8657 self.op.force_variant)
8661 def _ConvertPlainToDrbd(self, feedback_fn):
8662 """Converts an instance from plain to drbd.
8665 feedback_fn("Converting template to drbd")
8666 instance = self.instance
8667 pnode = instance.primary_node
8668 snode = self.op.remote_node
8670 # create a fake disk info for _GenerateDiskTemplate
8671 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8672 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8673 instance.name, pnode, [snode],
8674 disk_info, None, None, 0)
8675 info = _GetInstanceInfoText(instance)
8676 feedback_fn("Creating aditional volumes...")
8677 # first, create the missing data and meta devices
8678 for disk in new_disks:
8679 # unfortunately this is... not too nice
8680 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8682 for child in disk.children:
8683 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8684 # at this stage, all new LVs have been created, we can rename the
8686 feedback_fn("Renaming original volumes...")
8687 rename_list = [(o, n.children[0].logical_id)
8688 for (o, n) in zip(instance.disks, new_disks)]
8689 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8690 result.Raise("Failed to rename original LVs")
8692 feedback_fn("Initializing DRBD devices...")
8693 # all child devices are in place, we can now create the DRBD devices
8694 for disk in new_disks:
8695 for node in [pnode, snode]:
8696 f_create = node == pnode
8697 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8699 # at this point, the instance has been modified
8700 instance.disk_template = constants.DT_DRBD8
8701 instance.disks = new_disks
8702 self.cfg.Update(instance, feedback_fn)
8704 # disks are created, waiting for sync
8705 disk_abort = not _WaitForSync(self, instance)
8707 raise errors.OpExecError("There are some degraded disks for"
8708 " this instance, please cleanup manually")
8710 def _ConvertDrbdToPlain(self, feedback_fn):
8711 """Converts an instance from drbd to plain.
8714 instance = self.instance
8715 assert len(instance.secondary_nodes) == 1
8716 pnode = instance.primary_node
8717 snode = instance.secondary_nodes[0]
8718 feedback_fn("Converting template to plain")
8720 old_disks = instance.disks
8721 new_disks = [d.children[0] for d in old_disks]
8723 # copy over size and mode
8724 for parent, child in zip(old_disks, new_disks):
8725 child.size = parent.size
8726 child.mode = parent.mode
8728 # update instance structure
8729 instance.disks = new_disks
8730 instance.disk_template = constants.DT_PLAIN
8731 self.cfg.Update(instance, feedback_fn)
8733 feedback_fn("Removing volumes on the secondary node...")
8734 for disk in old_disks:
8735 self.cfg.SetDiskID(disk, snode)
8736 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8738 self.LogWarning("Could not remove block device %s on node %s,"
8739 " continuing anyway: %s", disk.iv_name, snode, msg)
8741 feedback_fn("Removing unneeded volumes on the primary node...")
8742 for idx, disk in enumerate(old_disks):
8743 meta = disk.children[1]
8744 self.cfg.SetDiskID(meta, pnode)
8745 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8747 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8748 " continuing anyway: %s", idx, pnode, msg)
8751 def Exec(self, feedback_fn):
8752 """Modifies an instance.
8754 All parameters take effect only at the next restart of the instance.
8757 # Process here the warnings from CheckPrereq, as we don't have a
8758 # feedback_fn there.
8759 for warn in self.warn:
8760 feedback_fn("WARNING: %s" % warn)
8763 instance = self.instance
8765 for disk_op, disk_dict in self.op.disks:
8766 if disk_op == constants.DDM_REMOVE:
8767 # remove the last disk
8768 device = instance.disks.pop()
8769 device_idx = len(instance.disks)
8770 for node, disk in device.ComputeNodeTree(instance.primary_node):
8771 self.cfg.SetDiskID(disk, node)
8772 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8774 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8775 " continuing anyway", device_idx, node, msg)
8776 result.append(("disk/%d" % device_idx, "remove"))
8777 elif disk_op == constants.DDM_ADD:
8779 if instance.disk_template == constants.DT_FILE:
8780 file_driver, file_path = instance.disks[0].logical_id
8781 file_path = os.path.dirname(file_path)
8783 file_driver = file_path = None
8784 disk_idx_base = len(instance.disks)
8785 new_disk = _GenerateDiskTemplate(self,
8786 instance.disk_template,
8787 instance.name, instance.primary_node,
8788 instance.secondary_nodes,
8793 instance.disks.append(new_disk)
8794 info = _GetInstanceInfoText(instance)
8796 logging.info("Creating volume %s for instance %s",
8797 new_disk.iv_name, instance.name)
8798 # Note: this needs to be kept in sync with _CreateDisks
8800 for node in instance.all_nodes:
8801 f_create = node == instance.primary_node
8803 _CreateBlockDev(self, node, instance, new_disk,
8804 f_create, info, f_create)
8805 except errors.OpExecError, err:
8806 self.LogWarning("Failed to create volume %s (%s) on"
8808 new_disk.iv_name, new_disk, node, err)
8809 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8810 (new_disk.size, new_disk.mode)))
8812 # change a given disk
8813 instance.disks[disk_op].mode = disk_dict['mode']
8814 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8816 if self.op.disk_template:
8817 r_shut = _ShutdownInstanceDisks(self, instance)
8819 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8820 " proceed with disk template conversion")
8821 mode = (instance.disk_template, self.op.disk_template)
8823 self._DISK_CONVERSIONS[mode](self, feedback_fn)
8825 self.cfg.ReleaseDRBDMinors(instance.name)
8827 result.append(("disk_template", self.op.disk_template))
8830 for nic_op, nic_dict in self.op.nics:
8831 if nic_op == constants.DDM_REMOVE:
8832 # remove the last nic
8833 del instance.nics[-1]
8834 result.append(("nic.%d" % len(instance.nics), "remove"))
8835 elif nic_op == constants.DDM_ADD:
8836 # mac and bridge should be set, by now
8837 mac = nic_dict['mac']
8838 ip = nic_dict.get('ip', None)
8839 nicparams = self.nic_pinst[constants.DDM_ADD]
8840 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8841 instance.nics.append(new_nic)
8842 result.append(("nic.%d" % (len(instance.nics) - 1),
8843 "add:mac=%s,ip=%s,mode=%s,link=%s" %
8844 (new_nic.mac, new_nic.ip,
8845 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8846 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8849 for key in 'mac', 'ip':
8851 setattr(instance.nics[nic_op], key, nic_dict[key])
8852 if nic_op in self.nic_pinst:
8853 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8854 for key, val in nic_dict.iteritems():
8855 result.append(("nic.%s/%d" % (key, nic_op), val))
8858 if self.op.hvparams:
8859 instance.hvparams = self.hv_inst
8860 for key, val in self.op.hvparams.iteritems():
8861 result.append(("hv/%s" % key, val))
8864 if self.op.beparams:
8865 instance.beparams = self.be_inst
8866 for key, val in self.op.beparams.iteritems():
8867 result.append(("be/%s" % key, val))
8871 instance.os = self.op.os_name
8873 self.cfg.Update(instance, feedback_fn)
8877 _DISK_CONVERSIONS = {
8878 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8879 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8883 class LUQueryExports(NoHooksLU):
8884 """Query the exports list
8887 _OP_REQP = ['nodes']
8890 def ExpandNames(self):
8891 self.needed_locks = {}
8892 self.share_locks[locking.LEVEL_NODE] = 1
8893 if not self.op.nodes:
8894 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8896 self.needed_locks[locking.LEVEL_NODE] = \
8897 _GetWantedNodes(self, self.op.nodes)
8899 def CheckPrereq(self):
8900 """Check prerequisites.
8903 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8905 def Exec(self, feedback_fn):
8906 """Compute the list of all the exported system images.
8909 @return: a dictionary with the structure node->(export-list)
8910 where export-list is a list of the instances exported on
8914 rpcresult = self.rpc.call_export_list(self.nodes)
8916 for node in rpcresult:
8917 if rpcresult[node].fail_msg:
8918 result[node] = False
8920 result[node] = rpcresult[node].payload
8925 class LUPrepareExport(NoHooksLU):
8926 """Prepares an instance for an export and returns useful information.
8929 _OP_REQP = ["instance_name", "mode"]
8932 def CheckArguments(self):
8933 """Check the arguments.
8936 if self.op.mode not in constants.EXPORT_MODES:
8937 raise errors.OpPrereqError("Invalid export mode %r" % self.op.mode,
8940 def ExpandNames(self):
8941 self._ExpandAndLockInstance()
8943 def CheckPrereq(self):
8944 """Check prerequisites.
8947 instance_name = self.op.instance_name
8949 self.instance = self.cfg.GetInstanceInfo(instance_name)
8950 assert self.instance is not None, \
8951 "Cannot retrieve locked instance %s" % self.op.instance_name
8952 _CheckNodeOnline(self, self.instance.primary_node)
8954 self._cds = _GetClusterDomainSecret()
8956 def Exec(self, feedback_fn):
8957 """Prepares an instance for an export.
8960 instance = self.instance
8962 if self.op.mode == constants.EXPORT_MODE_REMOTE:
8963 salt = utils.GenerateSecret(8)
8965 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
8966 result = self.rpc.call_x509_cert_create(instance.primary_node,
8967 constants.RIE_CERT_VALIDITY)
8968 result.Raise("Can't create X509 key and certificate on %s" % result.node)
8970 (name, cert_pem) = result.payload
8972 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
8976 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
8977 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
8979 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
8985 class LUExportInstance(LogicalUnit):
8986 """Export an instance to an image in the cluster.
8989 HPATH = "instance-export"
8990 HTYPE = constants.HTYPE_INSTANCE
8991 _OP_REQP = ["instance_name", "target_node", "shutdown"]
8994 def CheckArguments(self):
8995 """Check the arguments.
8998 _CheckBooleanOpField(self.op, "remove_instance")
8999 _CheckBooleanOpField(self.op, "ignore_remove_failures")
9001 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
9002 constants.DEFAULT_SHUTDOWN_TIMEOUT)
9003 self.remove_instance = getattr(self.op, "remove_instance", False)
9004 self.ignore_remove_failures = getattr(self.op, "ignore_remove_failures",
9006 self.export_mode = getattr(self.op, "mode", constants.EXPORT_MODE_LOCAL)
9007 self.x509_key_name = getattr(self.op, "x509_key_name", None)
9008 self.dest_x509_ca_pem = getattr(self.op, "destination_x509_ca", None)
9010 if self.remove_instance and not self.op.shutdown:
9011 raise errors.OpPrereqError("Can not remove instance without shutting it"
9014 if self.export_mode not in constants.EXPORT_MODES:
9015 raise errors.OpPrereqError("Invalid export mode %r" % self.export_mode,
9018 if self.export_mode == constants.EXPORT_MODE_REMOTE:
9019 if not self.x509_key_name:
9020 raise errors.OpPrereqError("Missing X509 key name for encryption",
9023 if not self.dest_x509_ca_pem:
9024 raise errors.OpPrereqError("Missing destination X509 CA",
9027 def ExpandNames(self):
9028 self._ExpandAndLockInstance()
9030 # Lock all nodes for local exports
9031 if self.export_mode == constants.EXPORT_MODE_LOCAL:
9032 # FIXME: lock only instance primary and destination node
9034 # Sad but true, for now we have do lock all nodes, as we don't know where
9035 # the previous export might be, and in this LU we search for it and
9036 # remove it from its current node. In the future we could fix this by:
9037 # - making a tasklet to search (share-lock all), then create the new one,
9038 # then one to remove, after
9039 # - removing the removal operation altogether
9040 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9042 def DeclareLocks(self, level):
9043 """Last minute lock declaration."""
9044 # All nodes are locked anyway, so nothing to do here.
9046 def BuildHooksEnv(self):
9049 This will run on the master, primary node and target node.
9053 "EXPORT_MODE": self.export_mode,
9054 "EXPORT_NODE": self.op.target_node,
9055 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9056 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
9057 # TODO: Generic function for boolean env variables
9058 "REMOVE_INSTANCE": str(bool(self.remove_instance)),
9061 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9063 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9065 if self.export_mode == constants.EXPORT_MODE_LOCAL:
9066 nl.append(self.op.target_node)
9070 def CheckPrereq(self):
9071 """Check prerequisites.
9073 This checks that the instance and node names are valid.
9076 instance_name = self.op.instance_name
9078 self.instance = self.cfg.GetInstanceInfo(instance_name)
9079 assert self.instance is not None, \
9080 "Cannot retrieve locked instance %s" % self.op.instance_name
9081 _CheckNodeOnline(self, self.instance.primary_node)
9083 if self.export_mode == constants.EXPORT_MODE_LOCAL:
9084 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9085 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9086 assert self.dst_node is not None
9088 _CheckNodeOnline(self, self.dst_node.name)
9089 _CheckNodeNotDrained(self, self.dst_node.name)
9092 self.dest_disk_info = None
9093 self.dest_x509_ca = None
9095 elif self.export_mode == constants.EXPORT_MODE_REMOTE:
9096 self.dst_node = None
9098 if len(self.op.target_node) != len(self.instance.disks):
9099 raise errors.OpPrereqError(("Received destination information for %s"
9100 " disks, but instance %s has %s disks") %
9101 (len(self.op.target_node), instance_name,
9102 len(self.instance.disks)),
9105 cds = _GetClusterDomainSecret()
9107 # Check X509 key name
9109 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9110 except (TypeError, ValueError), err:
9111 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9113 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9114 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9117 # Load and verify CA
9119 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9120 except OpenSSL.crypto.Error, err:
9121 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9122 (err, ), errors.ECODE_INVAL)
9124 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9125 if errcode is not None:
9126 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" % (msg, ),
9129 self.dest_x509_ca = cert
9131 # Verify target information
9133 for idx, disk_data in enumerate(self.op.target_node):
9135 (host, port, magic) = \
9136 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9137 except errors.GenericError, err:
9138 raise errors.OpPrereqError("Target info for disk %s: %s" % (idx, err),
9141 disk_info.append((host, port, magic))
9143 assert len(disk_info) == len(self.op.target_node)
9144 self.dest_disk_info = disk_info
9147 raise errors.ProgrammerError("Unhandled export mode %r" %
9150 # instance disk type verification
9151 # TODO: Implement export support for file-based disks
9152 for disk in self.instance.disks:
9153 if disk.dev_type == constants.LD_FILE:
9154 raise errors.OpPrereqError("Export not supported for instances with"
9155 " file-based disks", errors.ECODE_INVAL)
9157 def _CleanupExports(self, feedback_fn):
9158 """Removes exports of current instance from all other nodes.
9160 If an instance in a cluster with nodes A..D was exported to node C, its
9161 exports will be removed from the nodes A, B and D.
9164 assert self.export_mode != constants.EXPORT_MODE_REMOTE
9166 nodelist = self.cfg.GetNodeList()
9167 nodelist.remove(self.dst_node.name)
9169 # on one-node clusters nodelist will be empty after the removal
9170 # if we proceed the backup would be removed because OpQueryExports
9171 # substitutes an empty list with the full cluster node list.
9172 iname = self.instance.name
9174 feedback_fn("Removing old exports for instance %s" % iname)
9175 exportlist = self.rpc.call_export_list(nodelist)
9176 for node in exportlist:
9177 if exportlist[node].fail_msg:
9179 if iname in exportlist[node].payload:
9180 msg = self.rpc.call_export_remove(node, iname).fail_msg
9182 self.LogWarning("Could not remove older export for instance %s"
9183 " on node %s: %s", iname, node, msg)
9185 def Exec(self, feedback_fn):
9186 """Export an instance to an image in the cluster.
9189 assert self.export_mode in constants.EXPORT_MODES
9191 instance = self.instance
9192 src_node = instance.primary_node
9194 if self.op.shutdown:
9195 # shutdown the instance, but not the disks
9196 feedback_fn("Shutting down instance %s" % instance.name)
9197 result = self.rpc.call_instance_shutdown(src_node, instance,
9198 self.shutdown_timeout)
9199 # TODO: Maybe ignore failures if ignore_remove_failures is set
9200 result.Raise("Could not shutdown instance %s on"
9201 " node %s" % (instance.name, src_node))
9203 # set the disks ID correctly since call_instance_start needs the
9204 # correct drbd minor to create the symlinks
9205 for disk in instance.disks:
9206 self.cfg.SetDiskID(disk, src_node)
9208 activate_disks = (not instance.admin_up)
9211 # Activate the instance disks if we'exporting a stopped instance
9212 feedback_fn("Activating disks for %s" % instance.name)
9213 _StartInstanceDisks(self, instance, None)
9216 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9219 helper.CreateSnapshots()
9221 if (self.op.shutdown and instance.admin_up and
9222 not self.remove_instance):
9223 assert not activate_disks
9224 feedback_fn("Starting instance %s" % instance.name)
9225 result = self.rpc.call_instance_start(src_node, instance, None, None)
9226 msg = result.fail_msg
9228 feedback_fn("Failed to start instance: %s" % msg)
9229 _ShutdownInstanceDisks(self, instance)
9230 raise errors.OpExecError("Could not start instance: %s" % msg)
9232 if self.export_mode == constants.EXPORT_MODE_LOCAL:
9233 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9234 elif self.export_mode == constants.EXPORT_MODE_REMOTE:
9235 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9236 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9238 (key_name, _, _) = self.x509_key_name
9241 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9244 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9245 key_name, dest_ca_pem,
9250 # Check for backwards compatibility
9251 assert len(dresults) == len(instance.disks)
9252 assert compat.all(isinstance(i, bool) for i in dresults), \
9253 "Not all results are boolean: %r" % dresults
9257 feedback_fn("Deactivating disks for %s" % instance.name)
9258 _ShutdownInstanceDisks(self, instance)
9260 # Remove instance if requested
9261 if self.remove_instance:
9262 if not (compat.all(dresults) and fin_resu):
9263 feedback_fn("Not removing instance %s as parts of the export failed" %
9266 feedback_fn("Removing instance %s" % instance.name)
9267 _RemoveInstance(self, feedback_fn, instance,
9268 self.ignore_remove_failures)
9270 if self.export_mode == constants.EXPORT_MODE_LOCAL:
9271 self._CleanupExports(feedback_fn)
9273 return fin_resu, dresults
9276 class LURemoveExport(NoHooksLU):
9277 """Remove exports related to the named instance.
9280 _OP_REQP = ["instance_name"]
9283 def ExpandNames(self):
9284 self.needed_locks = {}
9285 # We need all nodes to be locked in order for RemoveExport to work, but we
9286 # don't need to lock the instance itself, as nothing will happen to it (and
9287 # we can remove exports also for a removed instance)
9288 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9290 def CheckPrereq(self):
9291 """Check prerequisites.
9295 def Exec(self, feedback_fn):
9296 """Remove any export.
9299 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9300 # If the instance was not found we'll try with the name that was passed in.
9301 # This will only work if it was an FQDN, though.
9303 if not instance_name:
9305 instance_name = self.op.instance_name
9307 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9308 exportlist = self.rpc.call_export_list(locked_nodes)
9310 for node in exportlist:
9311 msg = exportlist[node].fail_msg
9313 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9315 if instance_name in exportlist[node].payload:
9317 result = self.rpc.call_export_remove(node, instance_name)
9318 msg = result.fail_msg
9320 logging.error("Could not remove export for instance %s"
9321 " on node %s: %s", instance_name, node, msg)
9323 if fqdn_warn and not found:
9324 feedback_fn("Export not found. If trying to remove an export belonging"
9325 " to a deleted instance please use its Fully Qualified"
9329 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9332 This is an abstract class which is the parent of all the other tags LUs.
9336 def ExpandNames(self):
9337 self.needed_locks = {}
9338 if self.op.kind == constants.TAG_NODE:
9339 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9340 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9341 elif self.op.kind == constants.TAG_INSTANCE:
9342 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9343 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9345 def CheckPrereq(self):
9346 """Check prerequisites.
9349 if self.op.kind == constants.TAG_CLUSTER:
9350 self.target = self.cfg.GetClusterInfo()
9351 elif self.op.kind == constants.TAG_NODE:
9352 self.target = self.cfg.GetNodeInfo(self.op.name)
9353 elif self.op.kind == constants.TAG_INSTANCE:
9354 self.target = self.cfg.GetInstanceInfo(self.op.name)
9356 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9357 str(self.op.kind), errors.ECODE_INVAL)
9360 class LUGetTags(TagsLU):
9361 """Returns the tags of a given object.
9364 _OP_REQP = ["kind", "name"]
9367 def Exec(self, feedback_fn):
9368 """Returns the tag list.
9371 return list(self.target.GetTags())
9374 class LUSearchTags(NoHooksLU):
9375 """Searches the tags for a given pattern.
9378 _OP_REQP = ["pattern"]
9381 def ExpandNames(self):
9382 self.needed_locks = {}
9384 def CheckPrereq(self):
9385 """Check prerequisites.
9387 This checks the pattern passed for validity by compiling it.
9391 self.re = re.compile(self.op.pattern)
9392 except re.error, err:
9393 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9394 (self.op.pattern, err), errors.ECODE_INVAL)
9396 def Exec(self, feedback_fn):
9397 """Returns the tag list.
9401 tgts = [("/cluster", cfg.GetClusterInfo())]
9402 ilist = cfg.GetAllInstancesInfo().values()
9403 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9404 nlist = cfg.GetAllNodesInfo().values()
9405 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9407 for path, target in tgts:
9408 for tag in target.GetTags():
9409 if self.re.search(tag):
9410 results.append((path, tag))
9414 class LUAddTags(TagsLU):
9415 """Sets a tag on a given object.
9418 _OP_REQP = ["kind", "name", "tags"]
9421 def CheckPrereq(self):
9422 """Check prerequisites.
9424 This checks the type and length of the tag name and value.
9427 TagsLU.CheckPrereq(self)
9428 for tag in self.op.tags:
9429 objects.TaggableObject.ValidateTag(tag)
9431 def Exec(self, feedback_fn):
9436 for tag in self.op.tags:
9437 self.target.AddTag(tag)
9438 except errors.TagError, err:
9439 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9440 self.cfg.Update(self.target, feedback_fn)
9443 class LUDelTags(TagsLU):
9444 """Delete a list of tags from a given object.
9447 _OP_REQP = ["kind", "name", "tags"]
9450 def CheckPrereq(self):
9451 """Check prerequisites.
9453 This checks that we have the given tag.
9456 TagsLU.CheckPrereq(self)
9457 for tag in self.op.tags:
9458 objects.TaggableObject.ValidateTag(tag)
9459 del_tags = frozenset(self.op.tags)
9460 cur_tags = self.target.GetTags()
9461 if not del_tags <= cur_tags:
9462 diff_tags = del_tags - cur_tags
9463 diff_names = ["'%s'" % tag for tag in diff_tags]
9465 raise errors.OpPrereqError("Tag(s) %s not found" %
9466 (",".join(diff_names)), errors.ECODE_NOENT)
9468 def Exec(self, feedback_fn):
9469 """Remove the tag from the object.
9472 for tag in self.op.tags:
9473 self.target.RemoveTag(tag)
9474 self.cfg.Update(self.target, feedback_fn)
9477 class LUTestDelay(NoHooksLU):
9478 """Sleep for a specified amount of time.
9480 This LU sleeps on the master and/or nodes for a specified amount of
9484 _OP_REQP = ["duration", "on_master", "on_nodes"]
9487 def ExpandNames(self):
9488 """Expand names and set required locks.
9490 This expands the node list, if any.
9493 self.needed_locks = {}
9494 if self.op.on_nodes:
9495 # _GetWantedNodes can be used here, but is not always appropriate to use
9496 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9498 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9499 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9501 def CheckPrereq(self):
9502 """Check prerequisites.
9506 def Exec(self, feedback_fn):
9507 """Do the actual sleep.
9510 if self.op.on_master:
9511 if not utils.TestDelay(self.op.duration):
9512 raise errors.OpExecError("Error during master delay test")
9513 if self.op.on_nodes:
9514 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9515 for node, node_result in result.items():
9516 node_result.Raise("Failure during rpc call to node %s" % node)
9519 class IAllocator(object):
9520 """IAllocator framework.
9522 An IAllocator instance has three sets of attributes:
9523 - cfg that is needed to query the cluster
9524 - input data (all members of the _KEYS class attribute are required)
9525 - four buffer attributes (in|out_data|text), that represent the
9526 input (to the external script) in text and data structure format,
9527 and the output from it, again in two formats
9528 - the result variables from the script (success, info, nodes) for
9532 # pylint: disable-msg=R0902
9533 # lots of instance attributes
9535 "name", "mem_size", "disks", "disk_template",
9536 "os", "tags", "nics", "vcpus", "hypervisor",
9539 "name", "relocate_from",
9545 def __init__(self, cfg, rpc, mode, **kwargs):
9548 # init buffer variables
9549 self.in_text = self.out_text = self.in_data = self.out_data = None
9550 # init all input fields so that pylint is happy
9552 self.mem_size = self.disks = self.disk_template = None
9553 self.os = self.tags = self.nics = self.vcpus = None
9554 self.hypervisor = None
9555 self.relocate_from = None
9557 self.evac_nodes = None
9559 self.required_nodes = None
9560 # init result fields
9561 self.success = self.info = self.result = None
9562 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9563 keyset = self._ALLO_KEYS
9564 fn = self._AddNewInstance
9565 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9566 keyset = self._RELO_KEYS
9567 fn = self._AddRelocateInstance
9568 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9569 keyset = self._EVAC_KEYS
9570 fn = self._AddEvacuateNodes
9572 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9573 " IAllocator" % self.mode)
9575 if key not in keyset:
9576 raise errors.ProgrammerError("Invalid input parameter '%s' to"
9577 " IAllocator" % key)
9578 setattr(self, key, kwargs[key])
9581 if key not in kwargs:
9582 raise errors.ProgrammerError("Missing input parameter '%s' to"
9583 " IAllocator" % key)
9584 self._BuildInputData(fn)
9586 def _ComputeClusterData(self):
9587 """Compute the generic allocator input data.
9589 This is the data that is independent of the actual operation.
9593 cluster_info = cfg.GetClusterInfo()
9596 "version": constants.IALLOCATOR_VERSION,
9597 "cluster_name": cfg.GetClusterName(),
9598 "cluster_tags": list(cluster_info.GetTags()),
9599 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9600 # we don't have job IDs
9602 iinfo = cfg.GetAllInstancesInfo().values()
9603 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9607 node_list = cfg.GetNodeList()
9609 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9610 hypervisor_name = self.hypervisor
9611 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9612 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9613 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9614 hypervisor_name = cluster_info.enabled_hypervisors[0]
9616 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9619 self.rpc.call_all_instances_info(node_list,
9620 cluster_info.enabled_hypervisors)
9621 for nname, nresult in node_data.items():
9622 # first fill in static (config-based) values
9623 ninfo = cfg.GetNodeInfo(nname)
9625 "tags": list(ninfo.GetTags()),
9626 "primary_ip": ninfo.primary_ip,
9627 "secondary_ip": ninfo.secondary_ip,
9628 "offline": ninfo.offline,
9629 "drained": ninfo.drained,
9630 "master_candidate": ninfo.master_candidate,
9633 if not (ninfo.offline or ninfo.drained):
9634 nresult.Raise("Can't get data for node %s" % nname)
9635 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9637 remote_info = nresult.payload
9639 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9640 'vg_size', 'vg_free', 'cpu_total']:
9641 if attr not in remote_info:
9642 raise errors.OpExecError("Node '%s' didn't return attribute"
9643 " '%s'" % (nname, attr))
9644 if not isinstance(remote_info[attr], int):
9645 raise errors.OpExecError("Node '%s' returned invalid value"
9647 (nname, attr, remote_info[attr]))
9648 # compute memory used by primary instances
9649 i_p_mem = i_p_up_mem = 0
9650 for iinfo, beinfo in i_list:
9651 if iinfo.primary_node == nname:
9652 i_p_mem += beinfo[constants.BE_MEMORY]
9653 if iinfo.name not in node_iinfo[nname].payload:
9656 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9657 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9658 remote_info['memory_free'] -= max(0, i_mem_diff)
9661 i_p_up_mem += beinfo[constants.BE_MEMORY]
9663 # compute memory used by instances
9665 "total_memory": remote_info['memory_total'],
9666 "reserved_memory": remote_info['memory_dom0'],
9667 "free_memory": remote_info['memory_free'],
9668 "total_disk": remote_info['vg_size'],
9669 "free_disk": remote_info['vg_free'],
9670 "total_cpus": remote_info['cpu_total'],
9671 "i_pri_memory": i_p_mem,
9672 "i_pri_up_memory": i_p_up_mem,
9676 node_results[nname] = pnr
9677 data["nodes"] = node_results
9681 for iinfo, beinfo in i_list:
9683 for nic in iinfo.nics:
9684 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9685 nic_dict = {"mac": nic.mac,
9687 "mode": filled_params[constants.NIC_MODE],
9688 "link": filled_params[constants.NIC_LINK],
9690 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9691 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9692 nic_data.append(nic_dict)
9694 "tags": list(iinfo.GetTags()),
9695 "admin_up": iinfo.admin_up,
9696 "vcpus": beinfo[constants.BE_VCPUS],
9697 "memory": beinfo[constants.BE_MEMORY],
9699 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9701 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9702 "disk_template": iinfo.disk_template,
9703 "hypervisor": iinfo.hypervisor,
9705 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9707 instance_data[iinfo.name] = pir
9709 data["instances"] = instance_data
9713 def _AddNewInstance(self):
9714 """Add new instance data to allocator structure.
9716 This in combination with _AllocatorGetClusterData will create the
9717 correct structure needed as input for the allocator.
9719 The checks for the completeness of the opcode must have already been
9723 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9725 if self.disk_template in constants.DTS_NET_MIRROR:
9726 self.required_nodes = 2
9728 self.required_nodes = 1
9731 "disk_template": self.disk_template,
9734 "vcpus": self.vcpus,
9735 "memory": self.mem_size,
9736 "disks": self.disks,
9737 "disk_space_total": disk_space,
9739 "required_nodes": self.required_nodes,
9743 def _AddRelocateInstance(self):
9744 """Add relocate instance data to allocator structure.
9746 This in combination with _IAllocatorGetClusterData will create the
9747 correct structure needed as input for the allocator.
9749 The checks for the completeness of the opcode must have already been
9753 instance = self.cfg.GetInstanceInfo(self.name)
9754 if instance is None:
9755 raise errors.ProgrammerError("Unknown instance '%s' passed to"
9756 " IAllocator" % self.name)
9758 if instance.disk_template not in constants.DTS_NET_MIRROR:
9759 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9762 if len(instance.secondary_nodes) != 1:
9763 raise errors.OpPrereqError("Instance has not exactly one secondary node",
9766 self.required_nodes = 1
9767 disk_sizes = [{'size': disk.size} for disk in instance.disks]
9768 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9772 "disk_space_total": disk_space,
9773 "required_nodes": self.required_nodes,
9774 "relocate_from": self.relocate_from,
9778 def _AddEvacuateNodes(self):
9779 """Add evacuate nodes data to allocator structure.
9783 "evac_nodes": self.evac_nodes
9787 def _BuildInputData(self, fn):
9788 """Build input data structures.
9791 self._ComputeClusterData()
9794 request["type"] = self.mode
9795 self.in_data["request"] = request
9797 self.in_text = serializer.Dump(self.in_data)
9799 def Run(self, name, validate=True, call_fn=None):
9800 """Run an instance allocator and return the results.
9804 call_fn = self.rpc.call_iallocator_runner
9806 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9807 result.Raise("Failure while running the iallocator script")
9809 self.out_text = result.payload
9811 self._ValidateResult()
9813 def _ValidateResult(self):
9814 """Process the allocator results.
9816 This will process and if successful save the result in
9817 self.out_data and the other parameters.
9821 rdict = serializer.Load(self.out_text)
9822 except Exception, err:
9823 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9825 if not isinstance(rdict, dict):
9826 raise errors.OpExecError("Can't parse iallocator results: not a dict")
9828 # TODO: remove backwards compatiblity in later versions
9829 if "nodes" in rdict and "result" not in rdict:
9830 rdict["result"] = rdict["nodes"]
9833 for key in "success", "info", "result":
9834 if key not in rdict:
9835 raise errors.OpExecError("Can't parse iallocator results:"
9836 " missing key '%s'" % key)
9837 setattr(self, key, rdict[key])
9839 if not isinstance(rdict["result"], list):
9840 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9842 self.out_data = rdict
9845 class LUTestAllocator(NoHooksLU):
9846 """Run allocator tests.
9848 This LU runs the allocator tests
9851 _OP_REQP = ["direction", "mode", "name"]
9853 def CheckPrereq(self):
9854 """Check prerequisites.
9856 This checks the opcode parameters depending on the director and mode test.
9859 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9860 for attr in ["name", "mem_size", "disks", "disk_template",
9861 "os", "tags", "nics", "vcpus"]:
9862 if not hasattr(self.op, attr):
9863 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9864 attr, errors.ECODE_INVAL)
9865 iname = self.cfg.ExpandInstanceName(self.op.name)
9866 if iname is not None:
9867 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9868 iname, errors.ECODE_EXISTS)
9869 if not isinstance(self.op.nics, list):
9870 raise errors.OpPrereqError("Invalid parameter 'nics'",
9872 for row in self.op.nics:
9873 if (not isinstance(row, dict) or
9876 "bridge" not in row):
9877 raise errors.OpPrereqError("Invalid contents of the 'nics'"
9878 " parameter", errors.ECODE_INVAL)
9879 if not isinstance(self.op.disks, list):
9880 raise errors.OpPrereqError("Invalid parameter 'disks'",
9882 for row in self.op.disks:
9883 if (not isinstance(row, dict) or
9884 "size" not in row or
9885 not isinstance(row["size"], int) or
9886 "mode" not in row or
9887 row["mode"] not in ['r', 'w']):
9888 raise errors.OpPrereqError("Invalid contents of the 'disks'"
9889 " parameter", errors.ECODE_INVAL)
9890 if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9891 self.op.hypervisor = self.cfg.GetHypervisorType()
9892 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9893 if not hasattr(self.op, "name"):
9894 raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9896 fname = _ExpandInstanceName(self.cfg, self.op.name)
9897 self.op.name = fname
9898 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9899 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9900 if not hasattr(self.op, "evac_nodes"):
9901 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9902 " opcode input", errors.ECODE_INVAL)
9904 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9905 self.op.mode, errors.ECODE_INVAL)
9907 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9908 if not hasattr(self.op, "allocator") or self.op.allocator is None:
9909 raise errors.OpPrereqError("Missing allocator name",
9911 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9912 raise errors.OpPrereqError("Wrong allocator test '%s'" %
9913 self.op.direction, errors.ECODE_INVAL)
9915 def Exec(self, feedback_fn):
9916 """Run the allocator test.
9919 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9920 ial = IAllocator(self.cfg, self.rpc,
9923 mem_size=self.op.mem_size,
9924 disks=self.op.disks,
9925 disk_template=self.op.disk_template,
9929 vcpus=self.op.vcpus,
9930 hypervisor=self.op.hypervisor,
9932 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9933 ial = IAllocator(self.cfg, self.rpc,
9936 relocate_from=list(self.relocate_from),
9938 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9939 ial = IAllocator(self.cfg, self.rpc,
9941 evac_nodes=self.op.evac_nodes)
9943 raise errors.ProgrammerError("Uncatched mode %s in"
9944 " LUTestAllocator.Exec", self.op.mode)
9946 if self.op.direction == constants.IALLOCATOR_DIR_IN:
9947 result = ial.in_text
9949 ial.Run(self.op.allocator, validate=False)
9950 result = ial.out_text