4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
38 from ganeti import ssh
39 from ganeti import utils
40 from ganeti import errors
41 from ganeti import hypervisor
42 from ganeti import locking
43 from ganeti import constants
44 from ganeti import objects
45 from ganeti import serializer
46 from ganeti import ssconf
47 from ganeti import uidpool
48 from ganeti import compat
49 from ganeti import masterd
51 import ganeti.masterd.instance # pylint: disable-msg=W0611
54 class LogicalUnit(object):
55 """Logical Unit base class.
57 Subclasses must follow these rules:
58 - implement ExpandNames
59 - implement CheckPrereq (except when tasklets are used)
60 - implement Exec (except when tasklets are used)
61 - implement BuildHooksEnv
62 - redefine HPATH and HTYPE
63 - optionally redefine their run requirements:
64 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
66 Note that all commands require root permissions.
68 @ivar dry_run_result: the value (if any) that will be returned to the caller
69 in dry-run mode (signalled by opcode dry_run parameter)
77 def __init__(self, processor, op, context, rpc):
78 """Constructor for LogicalUnit.
80 This needs to be overridden in derived classes in order to check op
86 self.cfg = context.cfg
87 self.context = context
89 # Dicts used to declare locking needs to mcpu
90 self.needed_locks = None
91 self.acquired_locks = {}
92 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
94 self.remove_locks = {}
95 # Used to force good behavior when calling helper functions
96 self.recalculate_locks = {}
99 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
100 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
101 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
102 # support for dry-run
103 self.dry_run_result = None
104 # support for generic debug attribute
105 if (not hasattr(self.op, "debug_level") or
106 not isinstance(self.op.debug_level, int)):
107 self.op.debug_level = 0
112 for attr_name in self._OP_REQP:
113 attr_val = getattr(op, attr_name, None)
115 raise errors.OpPrereqError("Required parameter '%s' missing" %
116 attr_name, errors.ECODE_INVAL)
118 self.CheckArguments()
121 """Returns the SshRunner object
125 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
128 ssh = property(fget=__GetSSH)
130 def CheckArguments(self):
131 """Check syntactic validity for the opcode arguments.
133 This method is for doing a simple syntactic check and ensure
134 validity of opcode parameters, without any cluster-related
135 checks. While the same can be accomplished in ExpandNames and/or
136 CheckPrereq, doing these separate is better because:
138 - ExpandNames is left as as purely a lock-related function
139 - CheckPrereq is run after we have acquired locks (and possible
142 The function is allowed to change the self.op attribute so that
143 later methods can no longer worry about missing parameters.
148 def ExpandNames(self):
149 """Expand names for this LU.
151 This method is called before starting to execute the opcode, and it should
152 update all the parameters of the opcode to their canonical form (e.g. a
153 short node name must be fully expanded after this method has successfully
154 completed). This way locking, hooks, logging, ecc. can work correctly.
156 LUs which implement this method must also populate the self.needed_locks
157 member, as a dict with lock levels as keys, and a list of needed lock names
160 - use an empty dict if you don't need any lock
161 - if you don't need any lock at a particular level omit that level
162 - don't put anything for the BGL level
163 - if you want all locks at a level use locking.ALL_SET as a value
165 If you need to share locks (rather than acquire them exclusively) at one
166 level you can modify self.share_locks, setting a true value (usually 1) for
167 that level. By default locks are not shared.
169 This function can also define a list of tasklets, which then will be
170 executed in order instead of the usual LU-level CheckPrereq and Exec
171 functions, if those are not defined by the LU.
175 # Acquire all nodes and one instance
176 self.needed_locks = {
177 locking.LEVEL_NODE: locking.ALL_SET,
178 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
180 # Acquire just two nodes
181 self.needed_locks = {
182 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
185 self.needed_locks = {} # No, you can't leave it to the default value None
188 # The implementation of this method is mandatory only if the new LU is
189 # concurrent, so that old LUs don't need to be changed all at the same
192 self.needed_locks = {} # Exclusive LUs don't need locks.
194 raise NotImplementedError
196 def DeclareLocks(self, level):
197 """Declare LU locking needs for a level
199 While most LUs can just declare their locking needs at ExpandNames time,
200 sometimes there's the need to calculate some locks after having acquired
201 the ones before. This function is called just before acquiring locks at a
202 particular level, but after acquiring the ones at lower levels, and permits
203 such calculations. It can be used to modify self.needed_locks, and by
204 default it does nothing.
206 This function is only called if you have something already set in
207 self.needed_locks for the level.
209 @param level: Locking level which is going to be locked
210 @type level: member of ganeti.locking.LEVELS
214 def CheckPrereq(self):
215 """Check prerequisites for this LU.
217 This method should check that the prerequisites for the execution
218 of this LU are fulfilled. It can do internode communication, but
219 it should be idempotent - no cluster or system changes are
222 The method should raise errors.OpPrereqError in case something is
223 not fulfilled. Its return value is ignored.
225 This method should also update all the parameters of the opcode to
226 their canonical form if it hasn't been done by ExpandNames before.
229 if self.tasklets is not None:
230 for (idx, tl) in enumerate(self.tasklets):
231 logging.debug("Checking prerequisites for tasklet %s/%s",
232 idx + 1, len(self.tasklets))
235 raise NotImplementedError
237 def Exec(self, feedback_fn):
240 This method should implement the actual work. It should raise
241 errors.OpExecError for failures that are somewhat dealt with in
245 if self.tasklets is not None:
246 for (idx, tl) in enumerate(self.tasklets):
247 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
250 raise NotImplementedError
252 def BuildHooksEnv(self):
253 """Build hooks environment for this LU.
255 This method should return a three-node tuple consisting of: a dict
256 containing the environment that will be used for running the
257 specific hook for this LU, a list of node names on which the hook
258 should run before the execution, and a list of node names on which
259 the hook should run after the execution.
261 The keys of the dict must not have 'GANETI_' prefixed as this will
262 be handled in the hooks runner. Also note additional keys will be
263 added by the hooks runner. If the LU doesn't define any
264 environment, an empty dict (and not None) should be returned.
266 No nodes should be returned as an empty list (and not None).
268 Note that if the HPATH for a LU class is None, this function will
272 raise NotImplementedError
274 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
275 """Notify the LU about the results of its hooks.
277 This method is called every time a hooks phase is executed, and notifies
278 the Logical Unit about the hooks' result. The LU can then use it to alter
279 its result based on the hooks. By default the method does nothing and the
280 previous result is passed back unchanged but any LU can define it if it
281 wants to use the local cluster hook-scripts somehow.
283 @param phase: one of L{constants.HOOKS_PHASE_POST} or
284 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
285 @param hook_results: the results of the multi-node hooks rpc call
286 @param feedback_fn: function used send feedback back to the caller
287 @param lu_result: the previous Exec result this LU had, or None
289 @return: the new Exec result, based on the previous result
293 # API must be kept, thus we ignore the unused argument and could
294 # be a function warnings
295 # pylint: disable-msg=W0613,R0201
298 def _ExpandAndLockInstance(self):
299 """Helper function to expand and lock an instance.
301 Many LUs that work on an instance take its name in self.op.instance_name
302 and need to expand it and then declare the expanded name for locking. This
303 function does it, and then updates self.op.instance_name to the expanded
304 name. It also initializes needed_locks as a dict, if this hasn't been done
308 if self.needed_locks is None:
309 self.needed_locks = {}
311 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
312 "_ExpandAndLockInstance called with instance-level locks set"
313 self.op.instance_name = _ExpandInstanceName(self.cfg,
314 self.op.instance_name)
315 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
317 def _LockInstancesNodes(self, primary_only=False):
318 """Helper function to declare instances' nodes for locking.
320 This function should be called after locking one or more instances to lock
321 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
322 with all primary or secondary nodes for instances already locked and
323 present in self.needed_locks[locking.LEVEL_INSTANCE].
325 It should be called from DeclareLocks, and for safety only works if
326 self.recalculate_locks[locking.LEVEL_NODE] is set.
328 In the future it may grow parameters to just lock some instance's nodes, or
329 to just lock primaries or secondary nodes, if needed.
331 If should be called in DeclareLocks in a way similar to::
333 if level == locking.LEVEL_NODE:
334 self._LockInstancesNodes()
336 @type primary_only: boolean
337 @param primary_only: only lock primary nodes of locked instances
340 assert locking.LEVEL_NODE in self.recalculate_locks, \
341 "_LockInstancesNodes helper function called with no nodes to recalculate"
343 # TODO: check if we're really been called with the instance locks held
345 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
346 # future we might want to have different behaviors depending on the value
347 # of self.recalculate_locks[locking.LEVEL_NODE]
349 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
350 instance = self.context.cfg.GetInstanceInfo(instance_name)
351 wanted_nodes.append(instance.primary_node)
353 wanted_nodes.extend(instance.secondary_nodes)
355 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
356 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
357 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
358 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
360 del self.recalculate_locks[locking.LEVEL_NODE]
363 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
364 """Simple LU which runs no hooks.
366 This LU is intended as a parent for other LogicalUnits which will
367 run no hooks, in order to reduce duplicate code.
373 def BuildHooksEnv(self):
374 """Empty BuildHooksEnv for NoHooksLu.
376 This just raises an error.
379 assert False, "BuildHooksEnv called for NoHooksLUs"
383 """Tasklet base class.
385 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
386 they can mix legacy code with tasklets. Locking needs to be done in the LU,
387 tasklets know nothing about locks.
389 Subclasses must follow these rules:
390 - Implement CheckPrereq
394 def __init__(self, lu):
401 def CheckPrereq(self):
402 """Check prerequisites for this tasklets.
404 This method should check whether the prerequisites for the execution of
405 this tasklet are fulfilled. It can do internode communication, but it
406 should be idempotent - no cluster or system changes are allowed.
408 The method should raise errors.OpPrereqError in case something is not
409 fulfilled. Its return value is ignored.
411 This method should also update all parameters to their canonical form if it
412 hasn't been done before.
415 raise NotImplementedError
417 def Exec(self, feedback_fn):
418 """Execute the tasklet.
420 This method should implement the actual work. It should raise
421 errors.OpExecError for failures that are somewhat dealt with in code, or
425 raise NotImplementedError
428 def _GetWantedNodes(lu, nodes):
429 """Returns list of checked and expanded node names.
431 @type lu: L{LogicalUnit}
432 @param lu: the logical unit on whose behalf we execute
434 @param nodes: list of node names or None for all nodes
436 @return: the list of nodes, sorted
437 @raise errors.ProgrammerError: if the nodes parameter is wrong type
440 if not isinstance(nodes, list):
441 raise errors.OpPrereqError("Invalid argument type 'nodes'",
445 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
446 " non-empty list of nodes whose name is to be expanded.")
448 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
449 return utils.NiceSort(wanted)
452 def _GetWantedInstances(lu, instances):
453 """Returns list of checked and expanded instance names.
455 @type lu: L{LogicalUnit}
456 @param lu: the logical unit on whose behalf we execute
457 @type instances: list
458 @param instances: list of instance names or None for all instances
460 @return: the list of instances, sorted
461 @raise errors.OpPrereqError: if the instances parameter is wrong type
462 @raise errors.OpPrereqError: if any of the passed instances is not found
465 if not isinstance(instances, list):
466 raise errors.OpPrereqError("Invalid argument type 'instances'",
470 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
472 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
476 def _CheckOutputFields(static, dynamic, selected):
477 """Checks whether all selected fields are valid.
479 @type static: L{utils.FieldSet}
480 @param static: static fields set
481 @type dynamic: L{utils.FieldSet}
482 @param dynamic: dynamic fields set
489 delta = f.NonMatching(selected)
491 raise errors.OpPrereqError("Unknown output fields selected: %s"
492 % ",".join(delta), errors.ECODE_INVAL)
495 def _CheckBooleanOpField(op, name):
496 """Validates boolean opcode parameters.
498 This will ensure that an opcode parameter is either a boolean value,
499 or None (but that it always exists).
502 val = getattr(op, name, None)
503 if not (val is None or isinstance(val, bool)):
504 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
505 (name, str(val)), errors.ECODE_INVAL)
506 setattr(op, name, val)
509 def _CheckGlobalHvParams(params):
510 """Validates that given hypervisor params are not global ones.
512 This will ensure that instances don't get customised versions of
516 used_globals = constants.HVC_GLOBALS.intersection(params)
518 msg = ("The following hypervisor parameters are global and cannot"
519 " be customized at instance level, please modify them at"
520 " cluster level: %s" % utils.CommaJoin(used_globals))
521 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
524 def _CheckNodeOnline(lu, node):
525 """Ensure that a given node is online.
527 @param lu: the LU on behalf of which we make the check
528 @param node: the node to check
529 @raise errors.OpPrereqError: if the node is offline
532 if lu.cfg.GetNodeInfo(node).offline:
533 raise errors.OpPrereqError("Can't use offline node %s" % node,
537 def _CheckNodeNotDrained(lu, node):
538 """Ensure that a given node is not drained.
540 @param lu: the LU on behalf of which we make the check
541 @param node: the node to check
542 @raise errors.OpPrereqError: if the node is drained
545 if lu.cfg.GetNodeInfo(node).drained:
546 raise errors.OpPrereqError("Can't use drained node %s" % node,
550 def _CheckNodeHasOS(lu, node, os_name, force_variant):
551 """Ensure that a node supports a given OS.
553 @param lu: the LU on behalf of which we make the check
554 @param node: the node to check
555 @param os_name: the OS to query about
556 @param force_variant: whether to ignore variant errors
557 @raise errors.OpPrereqError: if the node is not supporting the OS
560 result = lu.rpc.call_os_get(node, os_name)
561 result.Raise("OS '%s' not in supported OS list for node %s" %
563 prereq=True, ecode=errors.ECODE_INVAL)
564 if not force_variant:
565 _CheckOSVariant(result.payload, os_name)
568 def _RequireFileStorage():
569 """Checks that file storage is enabled.
571 @raise errors.OpPrereqError: when file storage is disabled
574 if not constants.ENABLE_FILE_STORAGE:
575 raise errors.OpPrereqError("File storage disabled at configure time",
579 def _CheckDiskTemplate(template):
580 """Ensure a given disk template is valid.
583 if template not in constants.DISK_TEMPLATES:
584 msg = ("Invalid disk template name '%s', valid templates are: %s" %
585 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
586 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
587 if template == constants.DT_FILE:
588 _RequireFileStorage()
591 def _CheckStorageType(storage_type):
592 """Ensure a given storage type is valid.
595 if storage_type not in constants.VALID_STORAGE_TYPES:
596 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
598 if storage_type == constants.ST_FILE:
599 _RequireFileStorage()
602 def _GetClusterDomainSecret():
603 """Reads the cluster domain secret.
606 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
610 def _CheckInstanceDown(lu, instance, reason):
611 """Ensure that an instance is not running."""
612 if instance.admin_up:
613 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
614 (instance.name, reason), errors.ECODE_STATE)
616 pnode = instance.primary_node
617 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
618 ins_l.Raise("Can't contact node %s for instance information" % pnode,
619 prereq=True, ecode=errors.ECODE_ENVIRON)
621 if instance.name in ins_l.payload:
622 raise errors.OpPrereqError("Instance %s is running, %s" %
623 (instance.name, reason), errors.ECODE_STATE)
626 def _ExpandItemName(fn, name, kind):
627 """Expand an item name.
629 @param fn: the function to use for expansion
630 @param name: requested item name
631 @param kind: text description ('Node' or 'Instance')
632 @return: the resolved (full) name
633 @raise errors.OpPrereqError: if the item is not found
637 if full_name is None:
638 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
643 def _ExpandNodeName(cfg, name):
644 """Wrapper over L{_ExpandItemName} for nodes."""
645 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
648 def _ExpandInstanceName(cfg, name):
649 """Wrapper over L{_ExpandItemName} for instance."""
650 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
653 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
654 memory, vcpus, nics, disk_template, disks,
655 bep, hvp, hypervisor_name):
656 """Builds instance related env variables for hooks
658 This builds the hook environment from individual variables.
661 @param name: the name of the instance
662 @type primary_node: string
663 @param primary_node: the name of the instance's primary node
664 @type secondary_nodes: list
665 @param secondary_nodes: list of secondary nodes as strings
666 @type os_type: string
667 @param os_type: the name of the instance's OS
668 @type status: boolean
669 @param status: the should_run status of the instance
671 @param memory: the memory size of the instance
673 @param vcpus: the count of VCPUs the instance has
675 @param nics: list of tuples (ip, mac, mode, link) representing
676 the NICs the instance has
677 @type disk_template: string
678 @param disk_template: the disk template of the instance
680 @param disks: the list of (size, mode) pairs
682 @param bep: the backend parameters for the instance
684 @param hvp: the hypervisor parameters for the instance
685 @type hypervisor_name: string
686 @param hypervisor_name: the hypervisor for the instance
688 @return: the hook environment for this instance
697 "INSTANCE_NAME": name,
698 "INSTANCE_PRIMARY": primary_node,
699 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
700 "INSTANCE_OS_TYPE": os_type,
701 "INSTANCE_STATUS": str_status,
702 "INSTANCE_MEMORY": memory,
703 "INSTANCE_VCPUS": vcpus,
704 "INSTANCE_DISK_TEMPLATE": disk_template,
705 "INSTANCE_HYPERVISOR": hypervisor_name,
709 nic_count = len(nics)
710 for idx, (ip, mac, mode, link) in enumerate(nics):
713 env["INSTANCE_NIC%d_IP" % idx] = ip
714 env["INSTANCE_NIC%d_MAC" % idx] = mac
715 env["INSTANCE_NIC%d_MODE" % idx] = mode
716 env["INSTANCE_NIC%d_LINK" % idx] = link
717 if mode == constants.NIC_MODE_BRIDGED:
718 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
722 env["INSTANCE_NIC_COUNT"] = nic_count
725 disk_count = len(disks)
726 for idx, (size, mode) in enumerate(disks):
727 env["INSTANCE_DISK%d_SIZE" % idx] = size
728 env["INSTANCE_DISK%d_MODE" % idx] = mode
732 env["INSTANCE_DISK_COUNT"] = disk_count
734 for source, kind in [(bep, "BE"), (hvp, "HV")]:
735 for key, value in source.items():
736 env["INSTANCE_%s_%s" % (kind, key)] = value
741 def _NICListToTuple(lu, nics):
742 """Build a list of nic information tuples.
744 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
745 value in LUQueryInstanceData.
747 @type lu: L{LogicalUnit}
748 @param lu: the logical unit on whose behalf we execute
749 @type nics: list of L{objects.NIC}
750 @param nics: list of nics to convert to hooks tuples
754 c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
758 filled_params = objects.FillDict(c_nicparams, nic.nicparams)
759 mode = filled_params[constants.NIC_MODE]
760 link = filled_params[constants.NIC_LINK]
761 hooks_nics.append((ip, mac, mode, link))
765 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
766 """Builds instance related env variables for hooks from an object.
768 @type lu: L{LogicalUnit}
769 @param lu: the logical unit on whose behalf we execute
770 @type instance: L{objects.Instance}
771 @param instance: the instance for which we should build the
774 @param override: dictionary with key/values that will override
777 @return: the hook environment dictionary
780 cluster = lu.cfg.GetClusterInfo()
781 bep = cluster.FillBE(instance)
782 hvp = cluster.FillHV(instance)
784 'name': instance.name,
785 'primary_node': instance.primary_node,
786 'secondary_nodes': instance.secondary_nodes,
787 'os_type': instance.os,
788 'status': instance.admin_up,
789 'memory': bep[constants.BE_MEMORY],
790 'vcpus': bep[constants.BE_VCPUS],
791 'nics': _NICListToTuple(lu, instance.nics),
792 'disk_template': instance.disk_template,
793 'disks': [(disk.size, disk.mode) for disk in instance.disks],
796 'hypervisor_name': instance.hypervisor,
799 args.update(override)
800 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
803 def _AdjustCandidatePool(lu, exceptions):
804 """Adjust the candidate pool after node operations.
807 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
809 lu.LogInfo("Promoted nodes to master candidate role: %s",
810 utils.CommaJoin(node.name for node in mod_list))
811 for name in mod_list:
812 lu.context.ReaddNode(name)
813 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
815 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
819 def _DecideSelfPromotion(lu, exceptions=None):
820 """Decide whether I should promote myself as a master candidate.
823 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
824 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
825 # the new node will increase mc_max with one, so:
826 mc_should = min(mc_should + 1, cp_size)
827 return mc_now < mc_should
830 def _CheckNicsBridgesExist(lu, target_nics, target_node,
831 profile=constants.PP_DEFAULT):
832 """Check that the brigdes needed by a list of nics exist.
835 c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
836 paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
837 for nic in target_nics]
838 brlist = [params[constants.NIC_LINK] for params in paramslist
839 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
841 result = lu.rpc.call_bridges_exist(target_node, brlist)
842 result.Raise("Error checking bridges on destination node '%s'" %
843 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
846 def _CheckInstanceBridgesExist(lu, instance, node=None):
847 """Check that the brigdes needed by an instance exist.
851 node = instance.primary_node
852 _CheckNicsBridgesExist(lu, instance.nics, node)
855 def _CheckOSVariant(os_obj, name):
856 """Check whether an OS name conforms to the os variants specification.
858 @type os_obj: L{objects.OS}
859 @param os_obj: OS object to check
861 @param name: OS name passed by the user, to check for validity
864 if not os_obj.supported_variants:
867 variant = name.split("+", 1)[1]
869 raise errors.OpPrereqError("OS name must include a variant",
872 if variant not in os_obj.supported_variants:
873 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
876 def _GetNodeInstancesInner(cfg, fn):
877 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
880 def _GetNodeInstances(cfg, node_name):
881 """Returns a list of all primary and secondary instances on a node.
885 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
888 def _GetNodePrimaryInstances(cfg, node_name):
889 """Returns primary instances on a node.
892 return _GetNodeInstancesInner(cfg,
893 lambda inst: node_name == inst.primary_node)
896 def _GetNodeSecondaryInstances(cfg, node_name):
897 """Returns secondary instances on a node.
900 return _GetNodeInstancesInner(cfg,
901 lambda inst: node_name in inst.secondary_nodes)
904 def _GetStorageTypeArgs(cfg, storage_type):
905 """Returns the arguments for a storage type.
908 # Special case for file storage
909 if storage_type == constants.ST_FILE:
910 # storage.FileStorage wants a list of storage directories
911 return [[cfg.GetFileStorageDir()]]
916 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
919 for dev in instance.disks:
920 cfg.SetDiskID(dev, node_name)
922 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
923 result.Raise("Failed to get disk status from node %s" % node_name,
924 prereq=prereq, ecode=errors.ECODE_ENVIRON)
926 for idx, bdev_status in enumerate(result.payload):
927 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
933 class LUPostInitCluster(LogicalUnit):
934 """Logical unit for running hooks after cluster initialization.
937 HPATH = "cluster-init"
938 HTYPE = constants.HTYPE_CLUSTER
941 def BuildHooksEnv(self):
945 env = {"OP_TARGET": self.cfg.GetClusterName()}
946 mn = self.cfg.GetMasterNode()
949 def CheckPrereq(self):
950 """No prerequisites to check.
955 def Exec(self, feedback_fn):
962 class LUDestroyCluster(LogicalUnit):
963 """Logical unit for destroying the cluster.
966 HPATH = "cluster-destroy"
967 HTYPE = constants.HTYPE_CLUSTER
970 def BuildHooksEnv(self):
974 env = {"OP_TARGET": self.cfg.GetClusterName()}
977 def CheckPrereq(self):
978 """Check prerequisites.
980 This checks whether the cluster is empty.
982 Any errors are signaled by raising errors.OpPrereqError.
985 master = self.cfg.GetMasterNode()
987 nodelist = self.cfg.GetNodeList()
988 if len(nodelist) != 1 or nodelist[0] != master:
989 raise errors.OpPrereqError("There are still %d node(s) in"
990 " this cluster." % (len(nodelist) - 1),
992 instancelist = self.cfg.GetInstanceList()
994 raise errors.OpPrereqError("There are still %d instance(s) in"
995 " this cluster." % len(instancelist),
998 def Exec(self, feedback_fn):
999 """Destroys the cluster.
1002 master = self.cfg.GetMasterNode()
1003 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1005 # Run post hooks on master node before it's removed
1006 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1008 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1010 # pylint: disable-msg=W0702
1011 self.LogWarning("Errors occurred running hooks on %s" % master)
1013 result = self.rpc.call_node_stop_master(master, False)
1014 result.Raise("Could not disable the master role")
1016 if modify_ssh_setup:
1017 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1018 utils.CreateBackup(priv_key)
1019 utils.CreateBackup(pub_key)
1024 def _VerifyCertificate(filename):
1025 """Verifies a certificate for LUVerifyCluster.
1027 @type filename: string
1028 @param filename: Path to PEM file
1032 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1033 utils.ReadFile(filename))
1034 except Exception, err: # pylint: disable-msg=W0703
1035 return (LUVerifyCluster.ETYPE_ERROR,
1036 "Failed to load X509 certificate %s: %s" % (filename, err))
1039 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1040 constants.SSL_CERT_EXPIRATION_ERROR)
1043 fnamemsg = "While verifying %s: %s" % (filename, msg)
1048 return (None, fnamemsg)
1049 elif errcode == utils.CERT_WARNING:
1050 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1051 elif errcode == utils.CERT_ERROR:
1052 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1054 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1057 class LUVerifyCluster(LogicalUnit):
1058 """Verifies the cluster status.
1061 HPATH = "cluster-verify"
1062 HTYPE = constants.HTYPE_CLUSTER
1063 _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1066 TCLUSTER = "cluster"
1068 TINSTANCE = "instance"
1070 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1071 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1072 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1073 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1074 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1075 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1076 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1077 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1078 ENODEDRBD = (TNODE, "ENODEDRBD")
1079 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1080 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1081 ENODEHV = (TNODE, "ENODEHV")
1082 ENODELVM = (TNODE, "ENODELVM")
1083 ENODEN1 = (TNODE, "ENODEN1")
1084 ENODENET = (TNODE, "ENODENET")
1085 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1086 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1087 ENODERPC = (TNODE, "ENODERPC")
1088 ENODESSH = (TNODE, "ENODESSH")
1089 ENODEVERSION = (TNODE, "ENODEVERSION")
1090 ENODESETUP = (TNODE, "ENODESETUP")
1091 ENODETIME = (TNODE, "ENODETIME")
1093 ETYPE_FIELD = "code"
1094 ETYPE_ERROR = "ERROR"
1095 ETYPE_WARNING = "WARNING"
1097 class NodeImage(object):
1098 """A class representing the logical and physical status of a node.
1100 @ivar volumes: a structure as returned from
1101 L{ganeti.backend.GetVolumeList} (runtime)
1102 @ivar instances: a list of running instances (runtime)
1103 @ivar pinst: list of configured primary instances (config)
1104 @ivar sinst: list of configured secondary instances (config)
1105 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1106 of this node (config)
1107 @ivar mfree: free memory, as reported by hypervisor (runtime)
1108 @ivar dfree: free disk, as reported by the node (runtime)
1109 @ivar offline: the offline status (config)
1110 @type rpc_fail: boolean
1111 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1112 not whether the individual keys were correct) (runtime)
1113 @type lvm_fail: boolean
1114 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1115 @type hyp_fail: boolean
1116 @ivar hyp_fail: whether the RPC call didn't return the instance list
1117 @type ghost: boolean
1118 @ivar ghost: whether this is a known node or not (config)
1121 def __init__(self, offline=False):
1129 self.offline = offline
1130 self.rpc_fail = False
1131 self.lvm_fail = False
1132 self.hyp_fail = False
1135 def ExpandNames(self):
1136 self.needed_locks = {
1137 locking.LEVEL_NODE: locking.ALL_SET,
1138 locking.LEVEL_INSTANCE: locking.ALL_SET,
1140 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1142 def _Error(self, ecode, item, msg, *args, **kwargs):
1143 """Format an error message.
1145 Based on the opcode's error_codes parameter, either format a
1146 parseable error code, or a simpler error string.
1148 This must be called only from Exec and functions called from Exec.
1151 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1153 # first complete the msg
1156 # then format the whole message
1157 if self.op.error_codes:
1158 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1164 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1165 # and finally report it via the feedback_fn
1166 self._feedback_fn(" - %s" % msg)
1168 def _ErrorIf(self, cond, *args, **kwargs):
1169 """Log an error message if the passed condition is True.
1172 cond = bool(cond) or self.op.debug_simulate_errors
1174 self._Error(*args, **kwargs)
1175 # do not mark the operation as failed for WARN cases only
1176 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1177 self.bad = self.bad or cond
1179 def _VerifyNode(self, ninfo, nresult):
1180 """Run multiple tests against a node.
1184 - compares ganeti version
1185 - checks vg existence and size > 20G
1186 - checks config file checksum
1187 - checks ssh to other nodes
1189 @type ninfo: L{objects.Node}
1190 @param ninfo: the node to check
1191 @param nresult: the results from the node
1193 @return: whether overall this call was successful (and we can expect
1194 reasonable values in the respose)
1198 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1200 # main result, nresult should be a non-empty dict
1201 test = not nresult or not isinstance(nresult, dict)
1202 _ErrorIf(test, self.ENODERPC, node,
1203 "unable to verify node: no data returned")
1207 # compares ganeti version
1208 local_version = constants.PROTOCOL_VERSION
1209 remote_version = nresult.get("version", None)
1210 test = not (remote_version and
1211 isinstance(remote_version, (list, tuple)) and
1212 len(remote_version) == 2)
1213 _ErrorIf(test, self.ENODERPC, node,
1214 "connection to node returned invalid data")
1218 test = local_version != remote_version[0]
1219 _ErrorIf(test, self.ENODEVERSION, node,
1220 "incompatible protocol versions: master %s,"
1221 " node %s", local_version, remote_version[0])
1225 # node seems compatible, we can actually try to look into its results
1227 # full package version
1228 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1229 self.ENODEVERSION, node,
1230 "software version mismatch: master %s, node %s",
1231 constants.RELEASE_VERSION, remote_version[1],
1232 code=self.ETYPE_WARNING)
1234 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1235 if isinstance(hyp_result, dict):
1236 for hv_name, hv_result in hyp_result.iteritems():
1237 test = hv_result is not None
1238 _ErrorIf(test, self.ENODEHV, node,
1239 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1242 test = nresult.get(constants.NV_NODESETUP,
1243 ["Missing NODESETUP results"])
1244 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1249 def _VerifyNodeTime(self, ninfo, nresult,
1250 nvinfo_starttime, nvinfo_endtime):
1251 """Check the node time.
1253 @type ninfo: L{objects.Node}
1254 @param ninfo: the node to check
1255 @param nresult: the remote results for the node
1256 @param nvinfo_starttime: the start time of the RPC call
1257 @param nvinfo_endtime: the end time of the RPC call
1261 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1263 ntime = nresult.get(constants.NV_TIME, None)
1265 ntime_merged = utils.MergeTime(ntime)
1266 except (ValueError, TypeError):
1267 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1270 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1271 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1272 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1273 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1277 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1278 "Node time diverges by at least %s from master node time",
1281 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1282 """Check the node time.
1284 @type ninfo: L{objects.Node}
1285 @param ninfo: the node to check
1286 @param nresult: the remote results for the node
1287 @param vg_name: the configured VG name
1294 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1296 # checks vg existence and size > 20G
1297 vglist = nresult.get(constants.NV_VGLIST, None)
1299 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1301 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1302 constants.MIN_VG_SIZE)
1303 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1306 pvlist = nresult.get(constants.NV_PVLIST, None)
1307 test = pvlist is None
1308 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1310 # check that ':' is not present in PV names, since it's a
1311 # special character for lvcreate (denotes the range of PEs to
1313 for _, pvname, owner_vg in pvlist:
1314 test = ":" in pvname
1315 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1316 " '%s' of VG '%s'", pvname, owner_vg)
1318 def _VerifyNodeNetwork(self, ninfo, nresult):
1319 """Check the node time.
1321 @type ninfo: L{objects.Node}
1322 @param ninfo: the node to check
1323 @param nresult: the remote results for the node
1327 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1329 test = constants.NV_NODELIST not in nresult
1330 _ErrorIf(test, self.ENODESSH, node,
1331 "node hasn't returned node ssh connectivity data")
1333 if nresult[constants.NV_NODELIST]:
1334 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1335 _ErrorIf(True, self.ENODESSH, node,
1336 "ssh communication with node '%s': %s", a_node, a_msg)
1338 test = constants.NV_NODENETTEST not in nresult
1339 _ErrorIf(test, self.ENODENET, node,
1340 "node hasn't returned node tcp connectivity data")
1342 if nresult[constants.NV_NODENETTEST]:
1343 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1345 _ErrorIf(True, self.ENODENET, node,
1346 "tcp communication with node '%s': %s",
1347 anode, nresult[constants.NV_NODENETTEST][anode])
1349 test = constants.NV_MASTERIP not in nresult
1350 _ErrorIf(test, self.ENODENET, node,
1351 "node hasn't returned node master IP reachability data")
1353 if not nresult[constants.NV_MASTERIP]:
1354 if node == self.master_node:
1355 msg = "the master node cannot reach the master IP (not configured?)"
1357 msg = "cannot reach the master IP"
1358 _ErrorIf(True, self.ENODENET, node, msg)
1361 def _VerifyInstance(self, instance, instanceconfig, node_image):
1362 """Verify an instance.
1364 This function checks to see if the required block devices are
1365 available on the instance's node.
1368 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1369 node_current = instanceconfig.primary_node
1371 node_vol_should = {}
1372 instanceconfig.MapLVsByNode(node_vol_should)
1374 for node in node_vol_should:
1375 n_img = node_image[node]
1376 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1377 # ignore missing volumes on offline or broken nodes
1379 for volume in node_vol_should[node]:
1380 test = volume not in n_img.volumes
1381 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1382 "volume %s missing on node %s", volume, node)
1384 if instanceconfig.admin_up:
1385 pri_img = node_image[node_current]
1386 test = instance not in pri_img.instances and not pri_img.offline
1387 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1388 "instance not running on its primary node %s",
1391 for node, n_img in node_image.items():
1392 if (not node == node_current):
1393 test = instance in n_img.instances
1394 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1395 "instance should not run on node %s", node)
1397 def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1398 """Verify if there are any unknown volumes in the cluster.
1400 The .os, .swap and backup volumes are ignored. All other volumes are
1401 reported as unknown.
1404 for node, n_img in node_image.items():
1405 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1406 # skip non-healthy nodes
1408 for volume in n_img.volumes:
1409 test = (node not in node_vol_should or
1410 volume not in node_vol_should[node])
1411 self._ErrorIf(test, self.ENODEORPHANLV, node,
1412 "volume %s is unknown", volume)
1414 def _VerifyOrphanInstances(self, instancelist, node_image):
1415 """Verify the list of running instances.
1417 This checks what instances are running but unknown to the cluster.
1420 for node, n_img in node_image.items():
1421 for o_inst in n_img.instances:
1422 test = o_inst not in instancelist
1423 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1424 "instance %s on node %s should not exist", o_inst, node)
1426 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1427 """Verify N+1 Memory Resilience.
1429 Check that if one single node dies we can still start all the
1430 instances it was primary for.
1433 for node, n_img in node_image.items():
1434 # This code checks that every node which is now listed as
1435 # secondary has enough memory to host all instances it is
1436 # supposed to should a single other node in the cluster fail.
1437 # FIXME: not ready for failover to an arbitrary node
1438 # FIXME: does not support file-backed instances
1439 # WARNING: we currently take into account down instances as well
1440 # as up ones, considering that even if they're down someone
1441 # might want to start them even in the event of a node failure.
1442 for prinode, instances in n_img.sbp.items():
1444 for instance in instances:
1445 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1446 if bep[constants.BE_AUTO_BALANCE]:
1447 needed_mem += bep[constants.BE_MEMORY]
1448 test = n_img.mfree < needed_mem
1449 self._ErrorIf(test, self.ENODEN1, node,
1450 "not enough memory on to accommodate"
1451 " failovers should peer node %s fail", prinode)
1453 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1455 """Verifies and computes the node required file checksums.
1457 @type ninfo: L{objects.Node}
1458 @param ninfo: the node to check
1459 @param nresult: the remote results for the node
1460 @param file_list: required list of files
1461 @param local_cksum: dictionary of local files and their checksums
1462 @param master_files: list of files that only masters should have
1466 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1468 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1469 test = not isinstance(remote_cksum, dict)
1470 _ErrorIf(test, self.ENODEFILECHECK, node,
1471 "node hasn't returned file checksum data")
1475 for file_name in file_list:
1476 node_is_mc = ninfo.master_candidate
1477 must_have = (file_name not in master_files) or node_is_mc
1479 test1 = file_name not in remote_cksum
1481 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1483 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1484 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1485 "file '%s' missing", file_name)
1486 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1487 "file '%s' has wrong checksum", file_name)
1488 # not candidate and this is not a must-have file
1489 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1490 "file '%s' should not exist on non master"
1491 " candidates (and the file is outdated)", file_name)
1492 # all good, except non-master/non-must have combination
1493 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1494 "file '%s' should not exist"
1495 " on non master candidates", file_name)
1497 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1498 """Verifies and the node DRBD status.
1500 @type ninfo: L{objects.Node}
1501 @param ninfo: the node to check
1502 @param nresult: the remote results for the node
1503 @param instanceinfo: the dict of instances
1504 @param drbd_map: the DRBD map as returned by
1505 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1509 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1511 # compute the DRBD minors
1513 for minor, instance in drbd_map[node].items():
1514 test = instance not in instanceinfo
1515 _ErrorIf(test, self.ECLUSTERCFG, None,
1516 "ghost instance '%s' in temporary DRBD map", instance)
1517 # ghost instance should not be running, but otherwise we
1518 # don't give double warnings (both ghost instance and
1519 # unallocated minor in use)
1521 node_drbd[minor] = (instance, False)
1523 instance = instanceinfo[instance]
1524 node_drbd[minor] = (instance.name, instance.admin_up)
1526 # and now check them
1527 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1528 test = not isinstance(used_minors, (tuple, list))
1529 _ErrorIf(test, self.ENODEDRBD, node,
1530 "cannot parse drbd status file: %s", str(used_minors))
1532 # we cannot check drbd status
1535 for minor, (iname, must_exist) in node_drbd.items():
1536 test = minor not in used_minors and must_exist
1537 _ErrorIf(test, self.ENODEDRBD, node,
1538 "drbd minor %d of instance %s is not active", minor, iname)
1539 for minor in used_minors:
1540 test = minor not in node_drbd
1541 _ErrorIf(test, self.ENODEDRBD, node,
1542 "unallocated drbd minor %d is in use", minor)
1544 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1545 """Verifies and updates the node volume data.
1547 This function will update a L{NodeImage}'s internal structures
1548 with data from the remote call.
1550 @type ninfo: L{objects.Node}
1551 @param ninfo: the node to check
1552 @param nresult: the remote results for the node
1553 @param nimg: the node image object
1554 @param vg_name: the configured VG name
1558 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1560 nimg.lvm_fail = True
1561 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1564 elif isinstance(lvdata, basestring):
1565 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1566 utils.SafeEncode(lvdata))
1567 elif not isinstance(lvdata, dict):
1568 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1570 nimg.volumes = lvdata
1571 nimg.lvm_fail = False
1573 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1574 """Verifies and updates the node instance list.
1576 If the listing was successful, then updates this node's instance
1577 list. Otherwise, it marks the RPC call as failed for the instance
1580 @type ninfo: L{objects.Node}
1581 @param ninfo: the node to check
1582 @param nresult: the remote results for the node
1583 @param nimg: the node image object
1586 idata = nresult.get(constants.NV_INSTANCELIST, None)
1587 test = not isinstance(idata, list)
1588 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1589 " (instancelist): %s", utils.SafeEncode(str(idata)))
1591 nimg.hyp_fail = True
1593 nimg.instances = idata
1595 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1596 """Verifies and computes a node information map
1598 @type ninfo: L{objects.Node}
1599 @param ninfo: the node to check
1600 @param nresult: the remote results for the node
1601 @param nimg: the node image object
1602 @param vg_name: the configured VG name
1606 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1608 # try to read free memory (from the hypervisor)
1609 hv_info = nresult.get(constants.NV_HVINFO, None)
1610 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1611 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1614 nimg.mfree = int(hv_info["memory_free"])
1615 except (ValueError, TypeError):
1616 _ErrorIf(True, self.ENODERPC, node,
1617 "node returned invalid nodeinfo, check hypervisor")
1619 # FIXME: devise a free space model for file based instances as well
1620 if vg_name is not None:
1621 test = (constants.NV_VGLIST not in nresult or
1622 vg_name not in nresult[constants.NV_VGLIST])
1623 _ErrorIf(test, self.ENODELVM, node,
1624 "node didn't return data for the volume group '%s'"
1625 " - it is either missing or broken", vg_name)
1628 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1629 except (ValueError, TypeError):
1630 _ErrorIf(True, self.ENODERPC, node,
1631 "node returned invalid LVM info, check LVM status")
1633 def CheckPrereq(self):
1634 """Check prerequisites.
1636 Transform the list of checks we're going to skip into a set and check that
1637 all its members are valid.
1640 self.skip_set = frozenset(self.op.skip_checks)
1641 if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1642 raise errors.OpPrereqError("Invalid checks to be skipped specified",
1645 def BuildHooksEnv(self):
1648 Cluster-Verify hooks just ran in the post phase and their failure makes
1649 the output be logged in the verify output and the verification to fail.
1652 all_nodes = self.cfg.GetNodeList()
1654 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1656 for node in self.cfg.GetAllNodesInfo().values():
1657 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1659 return env, [], all_nodes
1661 def Exec(self, feedback_fn):
1662 """Verify integrity of cluster, performing various test on nodes.
1666 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1667 verbose = self.op.verbose
1668 self._feedback_fn = feedback_fn
1669 feedback_fn("* Verifying global settings")
1670 for msg in self.cfg.VerifyConfig():
1671 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1673 # Check the cluster certificates
1674 for cert_filename in constants.ALL_CERT_FILES:
1675 (errcode, msg) = _VerifyCertificate(cert_filename)
1676 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1678 vg_name = self.cfg.GetVGName()
1679 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1680 cluster = self.cfg.GetClusterInfo()
1681 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1682 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1683 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1684 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1685 for iname in instancelist)
1686 i_non_redundant = [] # Non redundant instances
1687 i_non_a_balanced = [] # Non auto-balanced instances
1688 n_offline = 0 # Count of offline nodes
1689 n_drained = 0 # Count of nodes being drained
1690 node_vol_should = {}
1692 # FIXME: verify OS list
1693 # do local checksums
1694 master_files = [constants.CLUSTER_CONF_FILE]
1695 master_node = self.master_node = self.cfg.GetMasterNode()
1696 master_ip = self.cfg.GetMasterIP()
1698 file_names = ssconf.SimpleStore().GetFileList()
1699 file_names.extend(constants.ALL_CERT_FILES)
1700 file_names.extend(master_files)
1701 if cluster.modify_etc_hosts:
1702 file_names.append(constants.ETC_HOSTS)
1704 local_checksums = utils.FingerprintFiles(file_names)
1706 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1707 node_verify_param = {
1708 constants.NV_FILELIST: file_names,
1709 constants.NV_NODELIST: [node.name for node in nodeinfo
1710 if not node.offline],
1711 constants.NV_HYPERVISOR: hypervisors,
1712 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1713 node.secondary_ip) for node in nodeinfo
1714 if not node.offline],
1715 constants.NV_INSTANCELIST: hypervisors,
1716 constants.NV_VERSION: None,
1717 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1718 constants.NV_NODESETUP: None,
1719 constants.NV_TIME: None,
1720 constants.NV_MASTERIP: (master_node, master_ip),
1723 if vg_name is not None:
1724 node_verify_param[constants.NV_VGLIST] = None
1725 node_verify_param[constants.NV_LVLIST] = vg_name
1726 node_verify_param[constants.NV_PVLIST] = [vg_name]
1727 node_verify_param[constants.NV_DRBDLIST] = None
1729 # Build our expected cluster state
1730 node_image = dict((node.name, self.NodeImage(offline=node.offline))
1731 for node in nodeinfo)
1733 for instance in instancelist:
1734 inst_config = instanceinfo[instance]
1736 for nname in inst_config.all_nodes:
1737 if nname not in node_image:
1739 gnode = self.NodeImage()
1741 node_image[nname] = gnode
1743 inst_config.MapLVsByNode(node_vol_should)
1745 pnode = inst_config.primary_node
1746 node_image[pnode].pinst.append(instance)
1748 for snode in inst_config.secondary_nodes:
1749 nimg = node_image[snode]
1750 nimg.sinst.append(instance)
1751 if pnode not in nimg.sbp:
1752 nimg.sbp[pnode] = []
1753 nimg.sbp[pnode].append(instance)
1755 # At this point, we have the in-memory data structures complete,
1756 # except for the runtime information, which we'll gather next
1758 # Due to the way our RPC system works, exact response times cannot be
1759 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1760 # time before and after executing the request, we can at least have a time
1762 nvinfo_starttime = time.time()
1763 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1764 self.cfg.GetClusterName())
1765 nvinfo_endtime = time.time()
1767 all_drbd_map = self.cfg.ComputeDRBDMap()
1769 feedback_fn("* Verifying node status")
1770 for node_i in nodeinfo:
1772 nimg = node_image[node]
1776 feedback_fn("* Skipping offline node %s" % (node,))
1780 if node == master_node:
1782 elif node_i.master_candidate:
1783 ntype = "master candidate"
1784 elif node_i.drained:
1790 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1792 msg = all_nvinfo[node].fail_msg
1793 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1795 nimg.rpc_fail = True
1798 nresult = all_nvinfo[node].payload
1800 nimg.call_ok = self._VerifyNode(node_i, nresult)
1801 self._VerifyNodeNetwork(node_i, nresult)
1802 self._VerifyNodeLVM(node_i, nresult, vg_name)
1803 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1805 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1806 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1808 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1809 self._UpdateNodeInstances(node_i, nresult, nimg)
1810 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1812 feedback_fn("* Verifying instance status")
1813 for instance in instancelist:
1815 feedback_fn("* Verifying instance %s" % instance)
1816 inst_config = instanceinfo[instance]
1817 self._VerifyInstance(instance, inst_config, node_image)
1818 inst_nodes_offline = []
1820 pnode = inst_config.primary_node
1821 pnode_img = node_image[pnode]
1822 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1823 self.ENODERPC, pnode, "instance %s, connection to"
1824 " primary node failed", instance)
1826 if pnode_img.offline:
1827 inst_nodes_offline.append(pnode)
1829 # If the instance is non-redundant we cannot survive losing its primary
1830 # node, so we are not N+1 compliant. On the other hand we have no disk
1831 # templates with more than one secondary so that situation is not well
1833 # FIXME: does not support file-backed instances
1834 if not inst_config.secondary_nodes:
1835 i_non_redundant.append(instance)
1836 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1837 instance, "instance has multiple secondary nodes: %s",
1838 utils.CommaJoin(inst_config.secondary_nodes),
1839 code=self.ETYPE_WARNING)
1841 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1842 i_non_a_balanced.append(instance)
1844 for snode in inst_config.secondary_nodes:
1845 s_img = node_image[snode]
1846 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1847 "instance %s, connection to secondary node failed", instance)
1850 inst_nodes_offline.append(snode)
1852 # warn that the instance lives on offline nodes
1853 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1854 "instance lives on offline node(s) %s",
1855 utils.CommaJoin(inst_nodes_offline))
1856 # ... or ghost nodes
1857 for node in inst_config.all_nodes:
1858 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1859 "instance lives on ghost node %s", node)
1861 feedback_fn("* Verifying orphan volumes")
1862 self._VerifyOrphanVolumes(node_vol_should, node_image)
1864 feedback_fn("* Verifying orphan instances")
1865 self._VerifyOrphanInstances(instancelist, node_image)
1867 if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1868 feedback_fn("* Verifying N+1 Memory redundancy")
1869 self._VerifyNPlusOneMemory(node_image, instanceinfo)
1871 feedback_fn("* Other Notes")
1873 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
1874 % len(i_non_redundant))
1876 if i_non_a_balanced:
1877 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
1878 % len(i_non_a_balanced))
1881 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
1884 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
1888 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1889 """Analyze the post-hooks' result
1891 This method analyses the hook result, handles it, and sends some
1892 nicely-formatted feedback back to the user.
1894 @param phase: one of L{constants.HOOKS_PHASE_POST} or
1895 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1896 @param hooks_results: the results of the multi-node hooks rpc call
1897 @param feedback_fn: function used send feedback back to the caller
1898 @param lu_result: previous Exec result
1899 @return: the new Exec result, based on the previous result
1903 # We only really run POST phase hooks, and are only interested in
1905 if phase == constants.HOOKS_PHASE_POST:
1906 # Used to change hooks' output to proper indentation
1907 indent_re = re.compile('^', re.M)
1908 feedback_fn("* Hooks Results")
1909 assert hooks_results, "invalid result from hooks"
1911 for node_name in hooks_results:
1912 res = hooks_results[node_name]
1914 test = msg and not res.offline
1915 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1916 "Communication failure in hooks execution: %s", msg)
1917 if res.offline or msg:
1918 # No need to investigate payload if node is offline or gave an error.
1919 # override manually lu_result here as _ErrorIf only
1920 # overrides self.bad
1923 for script, hkr, output in res.payload:
1924 test = hkr == constants.HKR_FAIL
1925 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1926 "Script %s failed, output:", script)
1928 output = indent_re.sub(' ', output)
1929 feedback_fn("%s" % output)
1935 class LUVerifyDisks(NoHooksLU):
1936 """Verifies the cluster disks status.
1942 def ExpandNames(self):
1943 self.needed_locks = {
1944 locking.LEVEL_NODE: locking.ALL_SET,
1945 locking.LEVEL_INSTANCE: locking.ALL_SET,
1947 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1949 def CheckPrereq(self):
1950 """Check prerequisites.
1952 This has no prerequisites.
1957 def Exec(self, feedback_fn):
1958 """Verify integrity of cluster disks.
1960 @rtype: tuple of three items
1961 @return: a tuple of (dict of node-to-node_error, list of instances
1962 which need activate-disks, dict of instance: (node, volume) for
1966 result = res_nodes, res_instances, res_missing = {}, [], {}
1968 vg_name = self.cfg.GetVGName()
1969 nodes = utils.NiceSort(self.cfg.GetNodeList())
1970 instances = [self.cfg.GetInstanceInfo(name)
1971 for name in self.cfg.GetInstanceList()]
1974 for inst in instances:
1976 if (not inst.admin_up or
1977 inst.disk_template not in constants.DTS_NET_MIRROR):
1979 inst.MapLVsByNode(inst_lvs)
1980 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1981 for node, vol_list in inst_lvs.iteritems():
1982 for vol in vol_list:
1983 nv_dict[(node, vol)] = inst
1988 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1992 node_res = node_lvs[node]
1993 if node_res.offline:
1995 msg = node_res.fail_msg
1997 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1998 res_nodes[node] = msg
2001 lvs = node_res.payload
2002 for lv_name, (_, _, lv_online) in lvs.items():
2003 inst = nv_dict.pop((node, lv_name), None)
2004 if (not lv_online and inst is not None
2005 and inst.name not in res_instances):
2006 res_instances.append(inst.name)
2008 # any leftover items in nv_dict are missing LVs, let's arrange the
2010 for key, inst in nv_dict.iteritems():
2011 if inst.name not in res_missing:
2012 res_missing[inst.name] = []
2013 res_missing[inst.name].append(key)
2018 class LURepairDiskSizes(NoHooksLU):
2019 """Verifies the cluster disks sizes.
2022 _OP_REQP = ["instances"]
2025 def ExpandNames(self):
2026 if not isinstance(self.op.instances, list):
2027 raise errors.OpPrereqError("Invalid argument type 'instances'",
2030 if self.op.instances:
2031 self.wanted_names = []
2032 for name in self.op.instances:
2033 full_name = _ExpandInstanceName(self.cfg, name)
2034 self.wanted_names.append(full_name)
2035 self.needed_locks = {
2036 locking.LEVEL_NODE: [],
2037 locking.LEVEL_INSTANCE: self.wanted_names,
2039 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2041 self.wanted_names = None
2042 self.needed_locks = {
2043 locking.LEVEL_NODE: locking.ALL_SET,
2044 locking.LEVEL_INSTANCE: locking.ALL_SET,
2046 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2048 def DeclareLocks(self, level):
2049 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2050 self._LockInstancesNodes(primary_only=True)
2052 def CheckPrereq(self):
2053 """Check prerequisites.
2055 This only checks the optional instance list against the existing names.
2058 if self.wanted_names is None:
2059 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2061 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2062 in self.wanted_names]
2064 def _EnsureChildSizes(self, disk):
2065 """Ensure children of the disk have the needed disk size.
2067 This is valid mainly for DRBD8 and fixes an issue where the
2068 children have smaller disk size.
2070 @param disk: an L{ganeti.objects.Disk} object
2073 if disk.dev_type == constants.LD_DRBD8:
2074 assert disk.children, "Empty children for DRBD8?"
2075 fchild = disk.children[0]
2076 mismatch = fchild.size < disk.size
2078 self.LogInfo("Child disk has size %d, parent %d, fixing",
2079 fchild.size, disk.size)
2080 fchild.size = disk.size
2082 # and we recurse on this child only, not on the metadev
2083 return self._EnsureChildSizes(fchild) or mismatch
2087 def Exec(self, feedback_fn):
2088 """Verify the size of cluster disks.
2091 # TODO: check child disks too
2092 # TODO: check differences in size between primary/secondary nodes
2094 for instance in self.wanted_instances:
2095 pnode = instance.primary_node
2096 if pnode not in per_node_disks:
2097 per_node_disks[pnode] = []
2098 for idx, disk in enumerate(instance.disks):
2099 per_node_disks[pnode].append((instance, idx, disk))
2102 for node, dskl in per_node_disks.items():
2103 newl = [v[2].Copy() for v in dskl]
2105 self.cfg.SetDiskID(dsk, node)
2106 result = self.rpc.call_blockdev_getsizes(node, newl)
2108 self.LogWarning("Failure in blockdev_getsizes call to node"
2109 " %s, ignoring", node)
2111 if len(result.data) != len(dskl):
2112 self.LogWarning("Invalid result from node %s, ignoring node results",
2115 for ((instance, idx, disk), size) in zip(dskl, result.data):
2117 self.LogWarning("Disk %d of instance %s did not return size"
2118 " information, ignoring", idx, instance.name)
2120 if not isinstance(size, (int, long)):
2121 self.LogWarning("Disk %d of instance %s did not return valid"
2122 " size information, ignoring", idx, instance.name)
2125 if size != disk.size:
2126 self.LogInfo("Disk %d of instance %s has mismatched size,"
2127 " correcting: recorded %d, actual %d", idx,
2128 instance.name, disk.size, size)
2130 self.cfg.Update(instance, feedback_fn)
2131 changed.append((instance.name, idx, size))
2132 if self._EnsureChildSizes(disk):
2133 self.cfg.Update(instance, feedback_fn)
2134 changed.append((instance.name, idx, disk.size))
2138 class LURenameCluster(LogicalUnit):
2139 """Rename the cluster.
2142 HPATH = "cluster-rename"
2143 HTYPE = constants.HTYPE_CLUSTER
2146 def BuildHooksEnv(self):
2151 "OP_TARGET": self.cfg.GetClusterName(),
2152 "NEW_NAME": self.op.name,
2154 mn = self.cfg.GetMasterNode()
2155 all_nodes = self.cfg.GetNodeList()
2156 return env, [mn], all_nodes
2158 def CheckPrereq(self):
2159 """Verify that the passed name is a valid one.
2162 hostname = utils.GetHostInfo(self.op.name)
2164 new_name = hostname.name
2165 self.ip = new_ip = hostname.ip
2166 old_name = self.cfg.GetClusterName()
2167 old_ip = self.cfg.GetMasterIP()
2168 if new_name == old_name and new_ip == old_ip:
2169 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2170 " cluster has changed",
2172 if new_ip != old_ip:
2173 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2174 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2175 " reachable on the network. Aborting." %
2176 new_ip, errors.ECODE_NOTUNIQUE)
2178 self.op.name = new_name
2180 def Exec(self, feedback_fn):
2181 """Rename the cluster.
2184 clustername = self.op.name
2187 # shutdown the master IP
2188 master = self.cfg.GetMasterNode()
2189 result = self.rpc.call_node_stop_master(master, False)
2190 result.Raise("Could not disable the master role")
2193 cluster = self.cfg.GetClusterInfo()
2194 cluster.cluster_name = clustername
2195 cluster.master_ip = ip
2196 self.cfg.Update(cluster, feedback_fn)
2198 # update the known hosts file
2199 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2200 node_list = self.cfg.GetNodeList()
2202 node_list.remove(master)
2205 result = self.rpc.call_upload_file(node_list,
2206 constants.SSH_KNOWN_HOSTS_FILE)
2207 for to_node, to_result in result.iteritems():
2208 msg = to_result.fail_msg
2210 msg = ("Copy of file %s to node %s failed: %s" %
2211 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2212 self.proc.LogWarning(msg)
2215 result = self.rpc.call_node_start_master(master, False, False)
2216 msg = result.fail_msg
2218 self.LogWarning("Could not re-enable the master role on"
2219 " the master, please restart manually: %s", msg)
2222 def _RecursiveCheckIfLVMBased(disk):
2223 """Check if the given disk or its children are lvm-based.
2225 @type disk: L{objects.Disk}
2226 @param disk: the disk to check
2228 @return: boolean indicating whether a LD_LV dev_type was found or not
2232 for chdisk in disk.children:
2233 if _RecursiveCheckIfLVMBased(chdisk):
2235 return disk.dev_type == constants.LD_LV
2238 class LUSetClusterParams(LogicalUnit):
2239 """Change the parameters of the cluster.
2242 HPATH = "cluster-modify"
2243 HTYPE = constants.HTYPE_CLUSTER
2247 def CheckArguments(self):
2251 for attr in ["candidate_pool_size",
2252 "uid_pool", "add_uids", "remove_uids"]:
2253 if not hasattr(self.op, attr):
2254 setattr(self.op, attr, None)
2256 if self.op.candidate_pool_size is not None:
2258 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2259 except (ValueError, TypeError), err:
2260 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2261 str(err), errors.ECODE_INVAL)
2262 if self.op.candidate_pool_size < 1:
2263 raise errors.OpPrereqError("At least one master candidate needed",
2266 _CheckBooleanOpField(self.op, "maintain_node_health")
2268 if self.op.uid_pool:
2269 uidpool.CheckUidPool(self.op.uid_pool)
2271 if self.op.add_uids:
2272 uidpool.CheckUidPool(self.op.add_uids)
2274 if self.op.remove_uids:
2275 uidpool.CheckUidPool(self.op.remove_uids)
2277 def ExpandNames(self):
2278 # FIXME: in the future maybe other cluster params won't require checking on
2279 # all nodes to be modified.
2280 self.needed_locks = {
2281 locking.LEVEL_NODE: locking.ALL_SET,
2283 self.share_locks[locking.LEVEL_NODE] = 1
2285 def BuildHooksEnv(self):
2290 "OP_TARGET": self.cfg.GetClusterName(),
2291 "NEW_VG_NAME": self.op.vg_name,
2293 mn = self.cfg.GetMasterNode()
2294 return env, [mn], [mn]
2296 def CheckPrereq(self):
2297 """Check prerequisites.
2299 This checks whether the given params don't conflict and
2300 if the given volume group is valid.
2303 if self.op.vg_name is not None and not self.op.vg_name:
2304 instances = self.cfg.GetAllInstancesInfo().values()
2305 for inst in instances:
2306 for disk in inst.disks:
2307 if _RecursiveCheckIfLVMBased(disk):
2308 raise errors.OpPrereqError("Cannot disable lvm storage while"
2309 " lvm-based instances exist",
2312 node_list = self.acquired_locks[locking.LEVEL_NODE]
2314 # if vg_name not None, checks given volume group on all nodes
2316 vglist = self.rpc.call_vg_list(node_list)
2317 for node in node_list:
2318 msg = vglist[node].fail_msg
2320 # ignoring down node
2321 self.LogWarning("Error while gathering data on node %s"
2322 " (ignoring node): %s", node, msg)
2324 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2326 constants.MIN_VG_SIZE)
2328 raise errors.OpPrereqError("Error on node '%s': %s" %
2329 (node, vgstatus), errors.ECODE_ENVIRON)
2331 self.cluster = cluster = self.cfg.GetClusterInfo()
2332 # validate params changes
2333 if self.op.beparams:
2334 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2335 self.new_beparams = objects.FillDict(
2336 cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2338 if self.op.nicparams:
2339 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2340 self.new_nicparams = objects.FillDict(
2341 cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2342 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2345 # check all instances for consistency
2346 for instance in self.cfg.GetAllInstancesInfo().values():
2347 for nic_idx, nic in enumerate(instance.nics):
2348 params_copy = copy.deepcopy(nic.nicparams)
2349 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2351 # check parameter syntax
2353 objects.NIC.CheckParameterSyntax(params_filled)
2354 except errors.ConfigurationError, err:
2355 nic_errors.append("Instance %s, nic/%d: %s" %
2356 (instance.name, nic_idx, err))
2358 # if we're moving instances to routed, check that they have an ip
2359 target_mode = params_filled[constants.NIC_MODE]
2360 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2361 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2362 (instance.name, nic_idx))
2364 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2365 "\n".join(nic_errors))
2367 # hypervisor list/parameters
2368 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2369 if self.op.hvparams:
2370 if not isinstance(self.op.hvparams, dict):
2371 raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2373 for hv_name, hv_dict in self.op.hvparams.items():
2374 if hv_name not in self.new_hvparams:
2375 self.new_hvparams[hv_name] = hv_dict
2377 self.new_hvparams[hv_name].update(hv_dict)
2379 # os hypervisor parameters
2380 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2382 if not isinstance(self.op.os_hvp, dict):
2383 raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2385 for os_name, hvs in self.op.os_hvp.items():
2386 if not isinstance(hvs, dict):
2387 raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2388 " input"), errors.ECODE_INVAL)
2389 if os_name not in self.new_os_hvp:
2390 self.new_os_hvp[os_name] = hvs
2392 for hv_name, hv_dict in hvs.items():
2393 if hv_name not in self.new_os_hvp[os_name]:
2394 self.new_os_hvp[os_name][hv_name] = hv_dict
2396 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2398 # changes to the hypervisor list
2399 if self.op.enabled_hypervisors is not None:
2400 self.hv_list = self.op.enabled_hypervisors
2401 if not self.hv_list:
2402 raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2403 " least one member",
2405 invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2407 raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2409 utils.CommaJoin(invalid_hvs),
2411 for hv in self.hv_list:
2412 # if the hypervisor doesn't already exist in the cluster
2413 # hvparams, we initialize it to empty, and then (in both
2414 # cases) we make sure to fill the defaults, as we might not
2415 # have a complete defaults list if the hypervisor wasn't
2417 if hv not in new_hvp:
2419 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2420 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2422 self.hv_list = cluster.enabled_hypervisors
2424 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2425 # either the enabled list has changed, or the parameters have, validate
2426 for hv_name, hv_params in self.new_hvparams.items():
2427 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2428 (self.op.enabled_hypervisors and
2429 hv_name in self.op.enabled_hypervisors)):
2430 # either this is a new hypervisor, or its parameters have changed
2431 hv_class = hypervisor.GetHypervisor(hv_name)
2432 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2433 hv_class.CheckParameterSyntax(hv_params)
2434 _CheckHVParams(self, node_list, hv_name, hv_params)
2437 # no need to check any newly-enabled hypervisors, since the
2438 # defaults have already been checked in the above code-block
2439 for os_name, os_hvp in self.new_os_hvp.items():
2440 for hv_name, hv_params in os_hvp.items():
2441 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2442 # we need to fill in the new os_hvp on top of the actual hv_p
2443 cluster_defaults = self.new_hvparams.get(hv_name, {})
2444 new_osp = objects.FillDict(cluster_defaults, hv_params)
2445 hv_class = hypervisor.GetHypervisor(hv_name)
2446 hv_class.CheckParameterSyntax(new_osp)
2447 _CheckHVParams(self, node_list, hv_name, new_osp)
2450 def Exec(self, feedback_fn):
2451 """Change the parameters of the cluster.
2454 if self.op.vg_name is not None:
2455 new_volume = self.op.vg_name
2458 if new_volume != self.cfg.GetVGName():
2459 self.cfg.SetVGName(new_volume)
2461 feedback_fn("Cluster LVM configuration already in desired"
2462 " state, not changing")
2463 if self.op.hvparams:
2464 self.cluster.hvparams = self.new_hvparams
2466 self.cluster.os_hvp = self.new_os_hvp
2467 if self.op.enabled_hypervisors is not None:
2468 self.cluster.hvparams = self.new_hvparams
2469 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2470 if self.op.beparams:
2471 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2472 if self.op.nicparams:
2473 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2475 if self.op.candidate_pool_size is not None:
2476 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2477 # we need to update the pool size here, otherwise the save will fail
2478 _AdjustCandidatePool(self, [])
2480 if self.op.maintain_node_health is not None:
2481 self.cluster.maintain_node_health = self.op.maintain_node_health
2483 if self.op.add_uids is not None:
2484 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2486 if self.op.remove_uids is not None:
2487 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2489 if self.op.uid_pool is not None:
2490 self.cluster.uid_pool = self.op.uid_pool
2492 self.cfg.Update(self.cluster, feedback_fn)
2495 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2496 """Distribute additional files which are part of the cluster configuration.
2498 ConfigWriter takes care of distributing the config and ssconf files, but
2499 there are more files which should be distributed to all nodes. This function
2500 makes sure those are copied.
2502 @param lu: calling logical unit
2503 @param additional_nodes: list of nodes not in the config to distribute to
2506 # 1. Gather target nodes
2507 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2508 dist_nodes = lu.cfg.GetOnlineNodeList()
2509 if additional_nodes is not None:
2510 dist_nodes.extend(additional_nodes)
2511 if myself.name in dist_nodes:
2512 dist_nodes.remove(myself.name)
2514 # 2. Gather files to distribute
2515 dist_files = set([constants.ETC_HOSTS,
2516 constants.SSH_KNOWN_HOSTS_FILE,
2517 constants.RAPI_CERT_FILE,
2518 constants.RAPI_USERS_FILE,
2519 constants.CONFD_HMAC_KEY,
2520 constants.CLUSTER_DOMAIN_SECRET_FILE,
2523 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2524 for hv_name in enabled_hypervisors:
2525 hv_class = hypervisor.GetHypervisor(hv_name)
2526 dist_files.update(hv_class.GetAncillaryFiles())
2528 # 3. Perform the files upload
2529 for fname in dist_files:
2530 if os.path.exists(fname):
2531 result = lu.rpc.call_upload_file(dist_nodes, fname)
2532 for to_node, to_result in result.items():
2533 msg = to_result.fail_msg
2535 msg = ("Copy of file %s to node %s failed: %s" %
2536 (fname, to_node, msg))
2537 lu.proc.LogWarning(msg)
2540 class LURedistributeConfig(NoHooksLU):
2541 """Force the redistribution of cluster configuration.
2543 This is a very simple LU.
2549 def ExpandNames(self):
2550 self.needed_locks = {
2551 locking.LEVEL_NODE: locking.ALL_SET,
2553 self.share_locks[locking.LEVEL_NODE] = 1
2555 def CheckPrereq(self):
2556 """Check prerequisites.
2560 def Exec(self, feedback_fn):
2561 """Redistribute the configuration.
2564 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2565 _RedistributeAncillaryFiles(self)
2568 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2569 """Sleep and poll for an instance's disk to sync.
2572 if not instance.disks or disks is not None and not disks:
2575 disks = _ExpandCheckDisks(instance, disks)
2578 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2580 node = instance.primary_node
2583 lu.cfg.SetDiskID(dev, node)
2585 # TODO: Convert to utils.Retry
2588 degr_retries = 10 # in seconds, as we sleep 1 second each time
2592 cumul_degraded = False
2593 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2594 msg = rstats.fail_msg
2596 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2599 raise errors.RemoteError("Can't contact node %s for mirror data,"
2600 " aborting." % node)
2603 rstats = rstats.payload
2605 for i, mstat in enumerate(rstats):
2607 lu.LogWarning("Can't compute data for node %s/%s",
2608 node, disks[i].iv_name)
2611 cumul_degraded = (cumul_degraded or
2612 (mstat.is_degraded and mstat.sync_percent is None))
2613 if mstat.sync_percent is not None:
2615 if mstat.estimated_time is not None:
2616 rem_time = ("%s remaining (estimated)" %
2617 utils.FormatSeconds(mstat.estimated_time))
2618 max_time = mstat.estimated_time
2620 rem_time = "no time estimate"
2621 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2622 (disks[i].iv_name, mstat.sync_percent, rem_time))
2624 # if we're done but degraded, let's do a few small retries, to
2625 # make sure we see a stable and not transient situation; therefore
2626 # we force restart of the loop
2627 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2628 logging.info("Degraded disks found, %d retries left", degr_retries)
2636 time.sleep(min(60, max_time))
2639 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2640 return not cumul_degraded
2643 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2644 """Check that mirrors are not degraded.
2646 The ldisk parameter, if True, will change the test from the
2647 is_degraded attribute (which represents overall non-ok status for
2648 the device(s)) to the ldisk (representing the local storage status).
2651 lu.cfg.SetDiskID(dev, node)
2655 if on_primary or dev.AssembleOnSecondary():
2656 rstats = lu.rpc.call_blockdev_find(node, dev)
2657 msg = rstats.fail_msg
2659 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2661 elif not rstats.payload:
2662 lu.LogWarning("Can't find disk on node %s", node)
2666 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2668 result = result and not rstats.payload.is_degraded
2671 for child in dev.children:
2672 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2677 class LUDiagnoseOS(NoHooksLU):
2678 """Logical unit for OS diagnose/query.
2681 _OP_REQP = ["output_fields", "names"]
2683 _FIELDS_STATIC = utils.FieldSet()
2684 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2685 # Fields that need calculation of global os validity
2686 _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2688 def ExpandNames(self):
2690 raise errors.OpPrereqError("Selective OS query not supported",
2693 _CheckOutputFields(static=self._FIELDS_STATIC,
2694 dynamic=self._FIELDS_DYNAMIC,
2695 selected=self.op.output_fields)
2697 # Lock all nodes, in shared mode
2698 # Temporary removal of locks, should be reverted later
2699 # TODO: reintroduce locks when they are lighter-weight
2700 self.needed_locks = {}
2701 #self.share_locks[locking.LEVEL_NODE] = 1
2702 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2704 def CheckPrereq(self):
2705 """Check prerequisites.
2710 def _DiagnoseByOS(rlist):
2711 """Remaps a per-node return list into an a per-os per-node dictionary
2713 @param rlist: a map with node names as keys and OS objects as values
2716 @return: a dictionary with osnames as keys and as value another map, with
2717 nodes as keys and tuples of (path, status, diagnose) as values, eg::
2719 {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2720 (/srv/..., False, "invalid api")],
2721 "node2": [(/srv/..., True, "")]}
2726 # we build here the list of nodes that didn't fail the RPC (at RPC
2727 # level), so that nodes with a non-responding node daemon don't
2728 # make all OSes invalid
2729 good_nodes = [node_name for node_name in rlist
2730 if not rlist[node_name].fail_msg]
2731 for node_name, nr in rlist.items():
2732 if nr.fail_msg or not nr.payload:
2734 for name, path, status, diagnose, variants in nr.payload:
2735 if name not in all_os:
2736 # build a list of nodes for this os containing empty lists
2737 # for each node in node_list
2739 for nname in good_nodes:
2740 all_os[name][nname] = []
2741 all_os[name][node_name].append((path, status, diagnose, variants))
2744 def Exec(self, feedback_fn):
2745 """Compute the list of OSes.
2748 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2749 node_data = self.rpc.call_os_diagnose(valid_nodes)
2750 pol = self._DiagnoseByOS(node_data)
2752 calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2753 calc_variants = "variants" in self.op.output_fields
2755 for os_name, os_data in pol.items():
2760 for osl in os_data.values():
2761 valid = valid and osl and osl[0][1]
2766 node_variants = osl[0][3]
2767 if variants is None:
2768 variants = node_variants
2770 variants = [v for v in variants if v in node_variants]
2772 for field in self.op.output_fields:
2775 elif field == "valid":
2777 elif field == "node_status":
2778 # this is just a copy of the dict
2780 for node_name, nos_list in os_data.items():
2781 val[node_name] = nos_list
2782 elif field == "variants":
2785 raise errors.ParameterError(field)
2792 class LURemoveNode(LogicalUnit):
2793 """Logical unit for removing a node.
2796 HPATH = "node-remove"
2797 HTYPE = constants.HTYPE_NODE
2798 _OP_REQP = ["node_name"]
2800 def BuildHooksEnv(self):
2803 This doesn't run on the target node in the pre phase as a failed
2804 node would then be impossible to remove.
2808 "OP_TARGET": self.op.node_name,
2809 "NODE_NAME": self.op.node_name,
2811 all_nodes = self.cfg.GetNodeList()
2813 all_nodes.remove(self.op.node_name)
2815 logging.warning("Node %s which is about to be removed not found"
2816 " in the all nodes list", self.op.node_name)
2817 return env, all_nodes, all_nodes
2819 def CheckPrereq(self):
2820 """Check prerequisites.
2823 - the node exists in the configuration
2824 - it does not have primary or secondary instances
2825 - it's not the master
2827 Any errors are signaled by raising errors.OpPrereqError.
2830 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2831 node = self.cfg.GetNodeInfo(self.op.node_name)
2832 assert node is not None
2834 instance_list = self.cfg.GetInstanceList()
2836 masternode = self.cfg.GetMasterNode()
2837 if node.name == masternode:
2838 raise errors.OpPrereqError("Node is the master node,"
2839 " you need to failover first.",
2842 for instance_name in instance_list:
2843 instance = self.cfg.GetInstanceInfo(instance_name)
2844 if node.name in instance.all_nodes:
2845 raise errors.OpPrereqError("Instance %s is still running on the node,"
2846 " please remove first." % instance_name,
2848 self.op.node_name = node.name
2851 def Exec(self, feedback_fn):
2852 """Removes the node from the cluster.
2856 logging.info("Stopping the node daemon and removing configs from node %s",
2859 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2861 # Promote nodes to master candidate as needed
2862 _AdjustCandidatePool(self, exceptions=[node.name])
2863 self.context.RemoveNode(node.name)
2865 # Run post hooks on the node before it's removed
2866 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2868 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2870 # pylint: disable-msg=W0702
2871 self.LogWarning("Errors occurred running hooks on %s" % node.name)
2873 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2874 msg = result.fail_msg
2876 self.LogWarning("Errors encountered on the remote node while leaving"
2877 " the cluster: %s", msg)
2879 # Remove node from our /etc/hosts
2880 if self.cfg.GetClusterInfo().modify_etc_hosts:
2881 # FIXME: this should be done via an rpc call to node daemon
2882 utils.RemoveHostFromEtcHosts(node.name)
2883 _RedistributeAncillaryFiles(self)
2886 class LUQueryNodes(NoHooksLU):
2887 """Logical unit for querying nodes.
2890 # pylint: disable-msg=W0142
2891 _OP_REQP = ["output_fields", "names", "use_locking"]
2894 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2895 "master_candidate", "offline", "drained"]
2897 _FIELDS_DYNAMIC = utils.FieldSet(
2899 "mtotal", "mnode", "mfree",
2901 "ctotal", "cnodes", "csockets",
2904 _FIELDS_STATIC = utils.FieldSet(*[
2905 "pinst_cnt", "sinst_cnt",
2906 "pinst_list", "sinst_list",
2907 "pip", "sip", "tags",
2909 "role"] + _SIMPLE_FIELDS
2912 def ExpandNames(self):
2913 _CheckOutputFields(static=self._FIELDS_STATIC,
2914 dynamic=self._FIELDS_DYNAMIC,
2915 selected=self.op.output_fields)
2917 self.needed_locks = {}
2918 self.share_locks[locking.LEVEL_NODE] = 1
2921 self.wanted = _GetWantedNodes(self, self.op.names)
2923 self.wanted = locking.ALL_SET
2925 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2926 self.do_locking = self.do_node_query and self.op.use_locking
2928 # if we don't request only static fields, we need to lock the nodes
2929 self.needed_locks[locking.LEVEL_NODE] = self.wanted
2931 def CheckPrereq(self):
2932 """Check prerequisites.
2935 # The validation of the node list is done in the _GetWantedNodes,
2936 # if non empty, and if empty, there's no validation to do
2939 def Exec(self, feedback_fn):
2940 """Computes the list of nodes and their attributes.
2943 all_info = self.cfg.GetAllNodesInfo()
2945 nodenames = self.acquired_locks[locking.LEVEL_NODE]
2946 elif self.wanted != locking.ALL_SET:
2947 nodenames = self.wanted
2948 missing = set(nodenames).difference(all_info.keys())
2950 raise errors.OpExecError(
2951 "Some nodes were removed before retrieving their data: %s" % missing)
2953 nodenames = all_info.keys()
2955 nodenames = utils.NiceSort(nodenames)
2956 nodelist = [all_info[name] for name in nodenames]
2958 # begin data gathering
2960 if self.do_node_query:
2962 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2963 self.cfg.GetHypervisorType())
2964 for name in nodenames:
2965 nodeinfo = node_data[name]
2966 if not nodeinfo.fail_msg and nodeinfo.payload:
2967 nodeinfo = nodeinfo.payload
2968 fn = utils.TryConvert
2970 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2971 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2972 "mfree": fn(int, nodeinfo.get('memory_free', None)),
2973 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2974 "dfree": fn(int, nodeinfo.get('vg_free', None)),
2975 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2976 "bootid": nodeinfo.get('bootid', None),
2977 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2978 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2981 live_data[name] = {}
2983 live_data = dict.fromkeys(nodenames, {})
2985 node_to_primary = dict([(name, set()) for name in nodenames])
2986 node_to_secondary = dict([(name, set()) for name in nodenames])
2988 inst_fields = frozenset(("pinst_cnt", "pinst_list",
2989 "sinst_cnt", "sinst_list"))
2990 if inst_fields & frozenset(self.op.output_fields):
2991 inst_data = self.cfg.GetAllInstancesInfo()
2993 for inst in inst_data.values():
2994 if inst.primary_node in node_to_primary:
2995 node_to_primary[inst.primary_node].add(inst.name)
2996 for secnode in inst.secondary_nodes:
2997 if secnode in node_to_secondary:
2998 node_to_secondary[secnode].add(inst.name)
3000 master_node = self.cfg.GetMasterNode()
3002 # end data gathering
3005 for node in nodelist:
3007 for field in self.op.output_fields:
3008 if field in self._SIMPLE_FIELDS:
3009 val = getattr(node, field)
3010 elif field == "pinst_list":
3011 val = list(node_to_primary[node.name])
3012 elif field == "sinst_list":
3013 val = list(node_to_secondary[node.name])
3014 elif field == "pinst_cnt":
3015 val = len(node_to_primary[node.name])
3016 elif field == "sinst_cnt":
3017 val = len(node_to_secondary[node.name])
3018 elif field == "pip":
3019 val = node.primary_ip
3020 elif field == "sip":
3021 val = node.secondary_ip
3022 elif field == "tags":
3023 val = list(node.GetTags())
3024 elif field == "master":
3025 val = node.name == master_node
3026 elif self._FIELDS_DYNAMIC.Matches(field):
3027 val = live_data[node.name].get(field, None)
3028 elif field == "role":
3029 if node.name == master_node:
3031 elif node.master_candidate:
3040 raise errors.ParameterError(field)
3041 node_output.append(val)
3042 output.append(node_output)
3047 class LUQueryNodeVolumes(NoHooksLU):
3048 """Logical unit for getting volumes on node(s).
3051 _OP_REQP = ["nodes", "output_fields"]
3053 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3054 _FIELDS_STATIC = utils.FieldSet("node")
3056 def ExpandNames(self):
3057 _CheckOutputFields(static=self._FIELDS_STATIC,
3058 dynamic=self._FIELDS_DYNAMIC,
3059 selected=self.op.output_fields)
3061 self.needed_locks = {}
3062 self.share_locks[locking.LEVEL_NODE] = 1
3063 if not self.op.nodes:
3064 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3066 self.needed_locks[locking.LEVEL_NODE] = \
3067 _GetWantedNodes(self, self.op.nodes)
3069 def CheckPrereq(self):
3070 """Check prerequisites.
3072 This checks that the fields required are valid output fields.
3075 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3077 def Exec(self, feedback_fn):
3078 """Computes the list of nodes and their attributes.
3081 nodenames = self.nodes
3082 volumes = self.rpc.call_node_volumes(nodenames)
3084 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3085 in self.cfg.GetInstanceList()]
3087 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3090 for node in nodenames:
3091 nresult = volumes[node]
3094 msg = nresult.fail_msg
3096 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3099 node_vols = nresult.payload[:]
3100 node_vols.sort(key=lambda vol: vol['dev'])
3102 for vol in node_vols:
3104 for field in self.op.output_fields:
3107 elif field == "phys":
3111 elif field == "name":
3113 elif field == "size":
3114 val = int(float(vol['size']))
3115 elif field == "instance":
3117 if node not in lv_by_node[inst]:
3119 if vol['name'] in lv_by_node[inst][node]:
3125 raise errors.ParameterError(field)
3126 node_output.append(str(val))
3128 output.append(node_output)
3133 class LUQueryNodeStorage(NoHooksLU):
3134 """Logical unit for getting information on storage units on node(s).
3137 _OP_REQP = ["nodes", "storage_type", "output_fields"]
3139 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3141 def CheckArguments(self):
3142 _CheckStorageType(self.op.storage_type)
3144 _CheckOutputFields(static=self._FIELDS_STATIC,
3145 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3146 selected=self.op.output_fields)
3148 def ExpandNames(self):
3149 self.needed_locks = {}
3150 self.share_locks[locking.LEVEL_NODE] = 1
3153 self.needed_locks[locking.LEVEL_NODE] = \
3154 _GetWantedNodes(self, self.op.nodes)
3156 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3158 def CheckPrereq(self):
3159 """Check prerequisites.
3161 This checks that the fields required are valid output fields.
3164 self.op.name = getattr(self.op, "name", None)
3166 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3168 def Exec(self, feedback_fn):
3169 """Computes the list of nodes and their attributes.
3172 # Always get name to sort by
3173 if constants.SF_NAME in self.op.output_fields:
3174 fields = self.op.output_fields[:]
3176 fields = [constants.SF_NAME] + self.op.output_fields
3178 # Never ask for node or type as it's only known to the LU
3179 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3180 while extra in fields:
3181 fields.remove(extra)
3183 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3184 name_idx = field_idx[constants.SF_NAME]
3186 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3187 data = self.rpc.call_storage_list(self.nodes,
3188 self.op.storage_type, st_args,
3189 self.op.name, fields)
3193 for node in utils.NiceSort(self.nodes):
3194 nresult = data[node]
3198 msg = nresult.fail_msg
3200 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3203 rows = dict([(row[name_idx], row) for row in nresult.payload])
3205 for name in utils.NiceSort(rows.keys()):
3210 for field in self.op.output_fields:
3211 if field == constants.SF_NODE:
3213 elif field == constants.SF_TYPE:
3214 val = self.op.storage_type
3215 elif field in field_idx:
3216 val = row[field_idx[field]]
3218 raise errors.ParameterError(field)
3227 class LUModifyNodeStorage(NoHooksLU):
3228 """Logical unit for modifying a storage volume on a node.
3231 _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3234 def CheckArguments(self):
3235 self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3237 _CheckStorageType(self.op.storage_type)
3239 def ExpandNames(self):
3240 self.needed_locks = {
3241 locking.LEVEL_NODE: self.op.node_name,
3244 def CheckPrereq(self):
3245 """Check prerequisites.
3248 storage_type = self.op.storage_type
3251 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3253 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3254 " modified" % storage_type,
3257 diff = set(self.op.changes.keys()) - modifiable
3259 raise errors.OpPrereqError("The following fields can not be modified for"
3260 " storage units of type '%s': %r" %
3261 (storage_type, list(diff)),
3264 def Exec(self, feedback_fn):
3265 """Computes the list of nodes and their attributes.
3268 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3269 result = self.rpc.call_storage_modify(self.op.node_name,
3270 self.op.storage_type, st_args,
3271 self.op.name, self.op.changes)
3272 result.Raise("Failed to modify storage unit '%s' on %s" %
3273 (self.op.name, self.op.node_name))
3276 class LUAddNode(LogicalUnit):
3277 """Logical unit for adding node to the cluster.
3281 HTYPE = constants.HTYPE_NODE
3282 _OP_REQP = ["node_name"]
3284 def CheckArguments(self):
3285 # validate/normalize the node name
3286 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3288 def BuildHooksEnv(self):
3291 This will run on all nodes before, and on all nodes + the new node after.
3295 "OP_TARGET": self.op.node_name,
3296 "NODE_NAME": self.op.node_name,
3297 "NODE_PIP": self.op.primary_ip,
3298 "NODE_SIP": self.op.secondary_ip,
3300 nodes_0 = self.cfg.GetNodeList()
3301 nodes_1 = nodes_0 + [self.op.node_name, ]
3302 return env, nodes_0, nodes_1
3304 def CheckPrereq(self):
3305 """Check prerequisites.
3308 - the new node is not already in the config
3310 - its parameters (single/dual homed) matches the cluster
3312 Any errors are signaled by raising errors.OpPrereqError.
3315 node_name = self.op.node_name
3318 dns_data = utils.GetHostInfo(node_name)
3320 node = dns_data.name
3321 primary_ip = self.op.primary_ip = dns_data.ip
3322 secondary_ip = getattr(self.op, "secondary_ip", None)
3323 if secondary_ip is None:
3324 secondary_ip = primary_ip
3325 if not utils.IsValidIP(secondary_ip):
3326 raise errors.OpPrereqError("Invalid secondary IP given",
3328 self.op.secondary_ip = secondary_ip
3330 node_list = cfg.GetNodeList()
3331 if not self.op.readd and node in node_list:
3332 raise errors.OpPrereqError("Node %s is already in the configuration" %
3333 node, errors.ECODE_EXISTS)
3334 elif self.op.readd and node not in node_list:
3335 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3338 self.changed_primary_ip = False
3340 for existing_node_name in node_list:
3341 existing_node = cfg.GetNodeInfo(existing_node_name)
3343 if self.op.readd and node == existing_node_name:
3344 if existing_node.secondary_ip != secondary_ip:
3345 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3346 " address configuration as before",
3348 if existing_node.primary_ip != primary_ip:
3349 self.changed_primary_ip = True
3353 if (existing_node.primary_ip == primary_ip or
3354 existing_node.secondary_ip == primary_ip or
3355 existing_node.primary_ip == secondary_ip or
3356 existing_node.secondary_ip == secondary_ip):
3357 raise errors.OpPrereqError("New node ip address(es) conflict with"
3358 " existing node %s" % existing_node.name,
3359 errors.ECODE_NOTUNIQUE)
3361 # check that the type of the node (single versus dual homed) is the
3362 # same as for the master
3363 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3364 master_singlehomed = myself.secondary_ip == myself.primary_ip
3365 newbie_singlehomed = secondary_ip == primary_ip
3366 if master_singlehomed != newbie_singlehomed:
3367 if master_singlehomed:
3368 raise errors.OpPrereqError("The master has no private ip but the"
3369 " new node has one",
3372 raise errors.OpPrereqError("The master has a private ip but the"
3373 " new node doesn't have one",
3376 # checks reachability
3377 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3378 raise errors.OpPrereqError("Node not reachable by ping",
3379 errors.ECODE_ENVIRON)
3381 if not newbie_singlehomed:
3382 # check reachability from my secondary ip to newbie's secondary ip
3383 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3384 source=myself.secondary_ip):
3385 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3386 " based ping to noded port",
3387 errors.ECODE_ENVIRON)
3394 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3397 self.new_node = self.cfg.GetNodeInfo(node)
3398 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3400 self.new_node = objects.Node(name=node,
3401 primary_ip=primary_ip,
3402 secondary_ip=secondary_ip,
3403 master_candidate=self.master_candidate,
3404 offline=False, drained=False)
3406 def Exec(self, feedback_fn):
3407 """Adds the new node to the cluster.
3410 new_node = self.new_node
3411 node = new_node.name
3413 # for re-adds, reset the offline/drained/master-candidate flags;
3414 # we need to reset here, otherwise offline would prevent RPC calls
3415 # later in the procedure; this also means that if the re-add
3416 # fails, we are left with a non-offlined, broken node
3418 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3419 self.LogInfo("Readding a node, the offline/drained flags were reset")
3420 # if we demote the node, we do cleanup later in the procedure
3421 new_node.master_candidate = self.master_candidate
3422 if self.changed_primary_ip:
3423 new_node.primary_ip = self.op.primary_ip
3425 # notify the user about any possible mc promotion
3426 if new_node.master_candidate:
3427 self.LogInfo("Node will be a master candidate")
3429 # check connectivity
3430 result = self.rpc.call_version([node])[node]
3431 result.Raise("Can't get version information from node %s" % node)
3432 if constants.PROTOCOL_VERSION == result.payload:
3433 logging.info("Communication to node %s fine, sw version %s match",
3434 node, result.payload)
3436 raise errors.OpExecError("Version mismatch master version %s,"
3437 " node version %s" %
3438 (constants.PROTOCOL_VERSION, result.payload))
3441 if self.cfg.GetClusterInfo().modify_ssh_setup:
3442 logging.info("Copy ssh key to node %s", node)
3443 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3445 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3446 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3450 keyarray.append(utils.ReadFile(i))
3452 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3453 keyarray[2], keyarray[3], keyarray[4],
3455 result.Raise("Cannot transfer ssh keys to the new node")
3457 # Add node to our /etc/hosts, and add key to known_hosts
3458 if self.cfg.GetClusterInfo().modify_etc_hosts:
3459 # FIXME: this should be done via an rpc call to node daemon
3460 utils.AddHostToEtcHosts(new_node.name)
3462 if new_node.secondary_ip != new_node.primary_ip:
3463 result = self.rpc.call_node_has_ip_address(new_node.name,
3464 new_node.secondary_ip)
3465 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3466 prereq=True, ecode=errors.ECODE_ENVIRON)
3467 if not result.payload:
3468 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3469 " you gave (%s). Please fix and re-run this"
3470 " command." % new_node.secondary_ip)
3472 node_verify_list = [self.cfg.GetMasterNode()]
3473 node_verify_param = {
3474 constants.NV_NODELIST: [node],
3475 # TODO: do a node-net-test as well?
3478 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3479 self.cfg.GetClusterName())
3480 for verifier in node_verify_list:
3481 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3482 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3484 for failed in nl_payload:
3485 feedback_fn("ssh/hostname verification failed"
3486 " (checking from %s): %s" %
3487 (verifier, nl_payload[failed]))
3488 raise errors.OpExecError("ssh/hostname verification failed.")
3491 _RedistributeAncillaryFiles(self)
3492 self.context.ReaddNode(new_node)
3493 # make sure we redistribute the config
3494 self.cfg.Update(new_node, feedback_fn)
3495 # and make sure the new node will not have old files around
3496 if not new_node.master_candidate:
3497 result = self.rpc.call_node_demote_from_mc(new_node.name)
3498 msg = result.fail_msg
3500 self.LogWarning("Node failed to demote itself from master"
3501 " candidate status: %s" % msg)
3503 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3504 self.context.AddNode(new_node, self.proc.GetECId())
3507 class LUSetNodeParams(LogicalUnit):
3508 """Modifies the parameters of a node.
3511 HPATH = "node-modify"
3512 HTYPE = constants.HTYPE_NODE
3513 _OP_REQP = ["node_name"]
3516 def CheckArguments(self):
3517 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3518 _CheckBooleanOpField(self.op, 'master_candidate')
3519 _CheckBooleanOpField(self.op, 'offline')
3520 _CheckBooleanOpField(self.op, 'drained')
3521 _CheckBooleanOpField(self.op, 'auto_promote')
3522 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3523 if all_mods.count(None) == 3:
3524 raise errors.OpPrereqError("Please pass at least one modification",
3526 if all_mods.count(True) > 1:
3527 raise errors.OpPrereqError("Can't set the node into more than one"
3528 " state at the same time",
3531 # Boolean value that tells us whether we're offlining or draining the node
3532 self.offline_or_drain = (self.op.offline == True or
3533 self.op.drained == True)
3534 self.deoffline_or_drain = (self.op.offline == False or
3535 self.op.drained == False)
3536 self.might_demote = (self.op.master_candidate == False or
3537 self.offline_or_drain)
3539 self.lock_all = self.op.auto_promote and self.might_demote
3542 def ExpandNames(self):
3544 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3546 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3548 def BuildHooksEnv(self):
3551 This runs on the master node.
3555 "OP_TARGET": self.op.node_name,
3556 "MASTER_CANDIDATE": str(self.op.master_candidate),
3557 "OFFLINE": str(self.op.offline),
3558 "DRAINED": str(self.op.drained),
3560 nl = [self.cfg.GetMasterNode(),
3564 def CheckPrereq(self):
3565 """Check prerequisites.
3567 This only checks the instance list against the existing names.
3570 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3572 if (self.op.master_candidate is not None or
3573 self.op.drained is not None or
3574 self.op.offline is not None):
3575 # we can't change the master's node flags
3576 if self.op.node_name == self.cfg.GetMasterNode():
3577 raise errors.OpPrereqError("The master role can be changed"
3578 " only via masterfailover",
3582 if node.master_candidate and self.might_demote and not self.lock_all:
3583 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3584 # check if after removing the current node, we're missing master
3586 (mc_remaining, mc_should, _) = \
3587 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3588 if mc_remaining < mc_should:
3589 raise errors.OpPrereqError("Not enough master candidates, please"
3590 " pass auto_promote to allow promotion",
3593 if (self.op.master_candidate == True and
3594 ((node.offline and not self.op.offline == False) or
3595 (node.drained and not self.op.drained == False))):
3596 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3597 " to master_candidate" % node.name,
3600 # If we're being deofflined/drained, we'll MC ourself if needed
3601 if (self.deoffline_or_drain and not self.offline_or_drain and not
3602 self.op.master_candidate == True and not node.master_candidate):
3603 self.op.master_candidate = _DecideSelfPromotion(self)
3604 if self.op.master_candidate:
3605 self.LogInfo("Autopromoting node to master candidate")
3609 def Exec(self, feedback_fn):
3618 if self.op.offline is not None:
3619 node.offline = self.op.offline
3620 result.append(("offline", str(self.op.offline)))
3621 if self.op.offline == True:
3622 if node.master_candidate:
3623 node.master_candidate = False
3625 result.append(("master_candidate", "auto-demotion due to offline"))
3627 node.drained = False
3628 result.append(("drained", "clear drained status due to offline"))
3630 if self.op.master_candidate is not None:
3631 node.master_candidate = self.op.master_candidate
3633 result.append(("master_candidate", str(self.op.master_candidate)))
3634 if self.op.master_candidate == False:
3635 rrc = self.rpc.call_node_demote_from_mc(node.name)
3638 self.LogWarning("Node failed to demote itself: %s" % msg)
3640 if self.op.drained is not None:
3641 node.drained = self.op.drained
3642 result.append(("drained", str(self.op.drained)))
3643 if self.op.drained == True:
3644 if node.master_candidate:
3645 node.master_candidate = False
3647 result.append(("master_candidate", "auto-demotion due to drain"))
3648 rrc = self.rpc.call_node_demote_from_mc(node.name)
3651 self.LogWarning("Node failed to demote itself: %s" % msg)
3653 node.offline = False
3654 result.append(("offline", "clear offline status due to drain"))
3656 # we locked all nodes, we adjust the CP before updating this node
3658 _AdjustCandidatePool(self, [node.name])
3660 # this will trigger configuration file update, if needed
3661 self.cfg.Update(node, feedback_fn)
3663 # this will trigger job queue propagation or cleanup
3665 self.context.ReaddNode(node)
3670 class LUPowercycleNode(NoHooksLU):
3671 """Powercycles a node.
3674 _OP_REQP = ["node_name", "force"]
3677 def CheckArguments(self):
3678 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3679 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3680 raise errors.OpPrereqError("The node is the master and the force"
3681 " parameter was not set",
3684 def ExpandNames(self):
3685 """Locking for PowercycleNode.
3687 This is a last-resort option and shouldn't block on other
3688 jobs. Therefore, we grab no locks.
3691 self.needed_locks = {}
3693 def CheckPrereq(self):
3694 """Check prerequisites.
3696 This LU has no prereqs.
3701 def Exec(self, feedback_fn):
3705 result = self.rpc.call_node_powercycle(self.op.node_name,
3706 self.cfg.GetHypervisorType())
3707 result.Raise("Failed to schedule the reboot")
3708 return result.payload
3711 class LUQueryClusterInfo(NoHooksLU):
3712 """Query cluster configuration.
3718 def ExpandNames(self):
3719 self.needed_locks = {}
3721 def CheckPrereq(self):
3722 """No prerequsites needed for this LU.
3727 def Exec(self, feedback_fn):
3728 """Return cluster config.
3731 cluster = self.cfg.GetClusterInfo()
3734 # Filter just for enabled hypervisors
3735 for os_name, hv_dict in cluster.os_hvp.items():
3736 os_hvp[os_name] = {}
3737 for hv_name, hv_params in hv_dict.items():
3738 if hv_name in cluster.enabled_hypervisors:
3739 os_hvp[os_name][hv_name] = hv_params
3742 "software_version": constants.RELEASE_VERSION,
3743 "protocol_version": constants.PROTOCOL_VERSION,
3744 "config_version": constants.CONFIG_VERSION,
3745 "os_api_version": max(constants.OS_API_VERSIONS),
3746 "export_version": constants.EXPORT_VERSION,
3747 "architecture": (platform.architecture()[0], platform.machine()),
3748 "name": cluster.cluster_name,
3749 "master": cluster.master_node,
3750 "default_hypervisor": cluster.enabled_hypervisors[0],
3751 "enabled_hypervisors": cluster.enabled_hypervisors,
3752 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3753 for hypervisor_name in cluster.enabled_hypervisors]),
3755 "beparams": cluster.beparams,
3756 "nicparams": cluster.nicparams,
3757 "candidate_pool_size": cluster.candidate_pool_size,
3758 "master_netdev": cluster.master_netdev,
3759 "volume_group_name": cluster.volume_group_name,
3760 "file_storage_dir": cluster.file_storage_dir,
3761 "maintain_node_health": cluster.maintain_node_health,
3762 "ctime": cluster.ctime,
3763 "mtime": cluster.mtime,
3764 "uuid": cluster.uuid,
3765 "tags": list(cluster.GetTags()),
3766 "uid_pool": cluster.uid_pool,
3772 class LUQueryConfigValues(NoHooksLU):
3773 """Return configuration values.
3778 _FIELDS_DYNAMIC = utils.FieldSet()
3779 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3782 def ExpandNames(self):
3783 self.needed_locks = {}
3785 _CheckOutputFields(static=self._FIELDS_STATIC,
3786 dynamic=self._FIELDS_DYNAMIC,
3787 selected=self.op.output_fields)
3789 def CheckPrereq(self):
3790 """No prerequisites.
3795 def Exec(self, feedback_fn):
3796 """Dump a representation of the cluster config to the standard output.
3800 for field in self.op.output_fields:
3801 if field == "cluster_name":
3802 entry = self.cfg.GetClusterName()
3803 elif field == "master_node":
3804 entry = self.cfg.GetMasterNode()
3805 elif field == "drain_flag":
3806 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3807 elif field == "watcher_pause":
3808 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3810 raise errors.ParameterError(field)
3811 values.append(entry)
3815 class LUActivateInstanceDisks(NoHooksLU):
3816 """Bring up an instance's disks.
3819 _OP_REQP = ["instance_name"]
3822 def ExpandNames(self):
3823 self._ExpandAndLockInstance()
3824 self.needed_locks[locking.LEVEL_NODE] = []
3825 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3827 def DeclareLocks(self, level):
3828 if level == locking.LEVEL_NODE:
3829 self._LockInstancesNodes()
3831 def CheckPrereq(self):
3832 """Check prerequisites.
3834 This checks that the instance is in the cluster.
3837 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3838 assert self.instance is not None, \
3839 "Cannot retrieve locked instance %s" % self.op.instance_name
3840 _CheckNodeOnline(self, self.instance.primary_node)
3841 if not hasattr(self.op, "ignore_size"):
3842 self.op.ignore_size = False
3844 def Exec(self, feedback_fn):
3845 """Activate the disks.
3848 disks_ok, disks_info = \
3849 _AssembleInstanceDisks(self, self.instance,
3850 ignore_size=self.op.ignore_size)
3852 raise errors.OpExecError("Cannot activate block devices")
3857 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
3859 """Prepare the block devices for an instance.
3861 This sets up the block devices on all nodes.
3863 @type lu: L{LogicalUnit}
3864 @param lu: the logical unit on whose behalf we execute
3865 @type instance: L{objects.Instance}
3866 @param instance: the instance for whose disks we assemble
3867 @type disks: list of L{objects.Disk} or None
3868 @param disks: which disks to assemble (or all, if None)
3869 @type ignore_secondaries: boolean
3870 @param ignore_secondaries: if true, errors on secondary nodes
3871 won't result in an error return from the function
3872 @type ignore_size: boolean
3873 @param ignore_size: if true, the current known size of the disk
3874 will not be used during the disk activation, useful for cases
3875 when the size is wrong
3876 @return: False if the operation failed, otherwise a list of
3877 (host, instance_visible_name, node_visible_name)
3878 with the mapping from node devices to instance devices
3883 iname = instance.name
3884 disks = _ExpandCheckDisks(instance, disks)
3886 # With the two passes mechanism we try to reduce the window of
3887 # opportunity for the race condition of switching DRBD to primary
3888 # before handshaking occured, but we do not eliminate it
3890 # The proper fix would be to wait (with some limits) until the
3891 # connection has been made and drbd transitions from WFConnection
3892 # into any other network-connected state (Connected, SyncTarget,
3895 # 1st pass, assemble on all nodes in secondary mode
3896 for inst_disk in disks:
3897 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3899 node_disk = node_disk.Copy()
3900 node_disk.UnsetSize()
3901 lu.cfg.SetDiskID(node_disk, node)
3902 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3903 msg = result.fail_msg
3905 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3906 " (is_primary=False, pass=1): %s",
3907 inst_disk.iv_name, node, msg)
3908 if not ignore_secondaries:
3911 # FIXME: race condition on drbd migration to primary
3913 # 2nd pass, do only the primary node
3914 for inst_disk in disks:
3917 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3918 if node != instance.primary_node:
3921 node_disk = node_disk.Copy()
3922 node_disk.UnsetSize()
3923 lu.cfg.SetDiskID(node_disk, node)
3924 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3925 msg = result.fail_msg
3927 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3928 " (is_primary=True, pass=2): %s",
3929 inst_disk.iv_name, node, msg)
3932 dev_path = result.payload
3934 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3936 # leave the disks configured for the primary node
3937 # this is a workaround that would be fixed better by
3938 # improving the logical/physical id handling
3940 lu.cfg.SetDiskID(disk, instance.primary_node)
3942 return disks_ok, device_info
3945 def _StartInstanceDisks(lu, instance, force):
3946 """Start the disks of an instance.
3949 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3950 ignore_secondaries=force)
3952 _ShutdownInstanceDisks(lu, instance)
3953 if force is not None and not force:
3954 lu.proc.LogWarning("", hint="If the message above refers to a"
3956 " you can retry the operation using '--force'.")
3957 raise errors.OpExecError("Disk consistency error")
3960 class LUDeactivateInstanceDisks(NoHooksLU):
3961 """Shutdown an instance's disks.
3964 _OP_REQP = ["instance_name"]
3967 def ExpandNames(self):
3968 self._ExpandAndLockInstance()
3969 self.needed_locks[locking.LEVEL_NODE] = []
3970 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3972 def DeclareLocks(self, level):
3973 if level == locking.LEVEL_NODE:
3974 self._LockInstancesNodes()
3976 def CheckPrereq(self):
3977 """Check prerequisites.
3979 This checks that the instance is in the cluster.
3982 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3983 assert self.instance is not None, \
3984 "Cannot retrieve locked instance %s" % self.op.instance_name
3986 def Exec(self, feedback_fn):
3987 """Deactivate the disks
3990 instance = self.instance
3991 _SafeShutdownInstanceDisks(self, instance)
3994 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
3995 """Shutdown block devices of an instance.
3997 This function checks if an instance is running, before calling
3998 _ShutdownInstanceDisks.
4001 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4002 _ShutdownInstanceDisks(lu, instance, disks=disks)
4005 def _ExpandCheckDisks(instance, disks):
4006 """Return the instance disks selected by the disks list
4008 @type disks: list of L{objects.Disk} or None
4009 @param disks: selected disks
4010 @rtype: list of L{objects.Disk}
4011 @return: selected instance disks to act on
4015 return instance.disks
4017 if not set(disks).issubset(instance.disks):
4018 raise errors.ProgrammerError("Can only act on disks belonging to the"
4023 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4024 """Shutdown block devices of an instance.
4026 This does the shutdown on all nodes of the instance.
4028 If the ignore_primary is false, errors on the primary node are
4033 disks = _ExpandCheckDisks(instance, disks)
4036 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4037 lu.cfg.SetDiskID(top_disk, node)
4038 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4039 msg = result.fail_msg
4041 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4042 disk.iv_name, node, msg)
4043 if not ignore_primary or node != instance.primary_node:
4048 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4049 """Checks if a node has enough free memory.
4051 This function check if a given node has the needed amount of free
4052 memory. In case the node has less memory or we cannot get the
4053 information from the node, this function raise an OpPrereqError
4056 @type lu: C{LogicalUnit}
4057 @param lu: a logical unit from which we get configuration data
4059 @param node: the node to check
4060 @type reason: C{str}
4061 @param reason: string to use in the error message
4062 @type requested: C{int}
4063 @param requested: the amount of memory in MiB to check for
4064 @type hypervisor_name: C{str}
4065 @param hypervisor_name: the hypervisor to ask for memory stats
4066 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4067 we cannot check the node
4070 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4071 nodeinfo[node].Raise("Can't get data from node %s" % node,
4072 prereq=True, ecode=errors.ECODE_ENVIRON)
4073 free_mem = nodeinfo[node].payload.get('memory_free', None)
4074 if not isinstance(free_mem, int):
4075 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4076 " was '%s'" % (node, free_mem),
4077 errors.ECODE_ENVIRON)
4078 if requested > free_mem:
4079 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4080 " needed %s MiB, available %s MiB" %
4081 (node, reason, requested, free_mem),
4085 def _CheckNodesFreeDisk(lu, nodenames, requested):
4086 """Checks if nodes have enough free disk space in the default VG.
4088 This function check if all given nodes have the needed amount of
4089 free disk. In case any node has less disk or we cannot get the
4090 information from the node, this function raise an OpPrereqError
4093 @type lu: C{LogicalUnit}
4094 @param lu: a logical unit from which we get configuration data
4095 @type nodenames: C{list}
4096 @param nodenames: the list of node names to check
4097 @type requested: C{int}
4098 @param requested: the amount of disk in MiB to check for
4099 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4100 we cannot check the node
4103 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4104 lu.cfg.GetHypervisorType())
4105 for node in nodenames:
4106 info = nodeinfo[node]
4107 info.Raise("Cannot get current information from node %s" % node,
4108 prereq=True, ecode=errors.ECODE_ENVIRON)
4109 vg_free = info.payload.get("vg_free", None)
4110 if not isinstance(vg_free, int):
4111 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4112 " result was '%s'" % (node, vg_free),
4113 errors.ECODE_ENVIRON)
4114 if requested > vg_free:
4115 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4116 " required %d MiB, available %d MiB" %
4117 (node, requested, vg_free),
4121 class LUStartupInstance(LogicalUnit):
4122 """Starts an instance.
4125 HPATH = "instance-start"
4126 HTYPE = constants.HTYPE_INSTANCE
4127 _OP_REQP = ["instance_name", "force"]
4130 def ExpandNames(self):
4131 self._ExpandAndLockInstance()
4133 def BuildHooksEnv(self):
4136 This runs on master, primary and secondary nodes of the instance.
4140 "FORCE": self.op.force,
4142 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4143 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4146 def CheckPrereq(self):
4147 """Check prerequisites.
4149 This checks that the instance is in the cluster.
4152 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4153 assert self.instance is not None, \
4154 "Cannot retrieve locked instance %s" % self.op.instance_name
4157 self.beparams = getattr(self.op, "beparams", {})
4159 if not isinstance(self.beparams, dict):
4160 raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4161 " dict" % (type(self.beparams), ),
4163 # fill the beparams dict
4164 utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4165 self.op.beparams = self.beparams
4168 self.hvparams = getattr(self.op, "hvparams", {})
4170 if not isinstance(self.hvparams, dict):
4171 raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4172 " dict" % (type(self.hvparams), ),
4175 # check hypervisor parameter syntax (locally)
4176 cluster = self.cfg.GetClusterInfo()
4177 utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4178 filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4180 filled_hvp.update(self.hvparams)
4181 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4182 hv_type.CheckParameterSyntax(filled_hvp)
4183 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4184 self.op.hvparams = self.hvparams
4186 _CheckNodeOnline(self, instance.primary_node)
4188 bep = self.cfg.GetClusterInfo().FillBE(instance)
4189 # check bridges existence
4190 _CheckInstanceBridgesExist(self, instance)
4192 remote_info = self.rpc.call_instance_info(instance.primary_node,
4194 instance.hypervisor)
4195 remote_info.Raise("Error checking node %s" % instance.primary_node,
4196 prereq=True, ecode=errors.ECODE_ENVIRON)
4197 if not remote_info.payload: # not running already
4198 _CheckNodeFreeMemory(self, instance.primary_node,
4199 "starting instance %s" % instance.name,
4200 bep[constants.BE_MEMORY], instance.hypervisor)
4202 def Exec(self, feedback_fn):
4203 """Start the instance.
4206 instance = self.instance
4207 force = self.op.force
4209 self.cfg.MarkInstanceUp(instance.name)
4211 node_current = instance.primary_node
4213 _StartInstanceDisks(self, instance, force)
4215 result = self.rpc.call_instance_start(node_current, instance,
4216 self.hvparams, self.beparams)
4217 msg = result.fail_msg
4219 _ShutdownInstanceDisks(self, instance)
4220 raise errors.OpExecError("Could not start instance: %s" % msg)
4223 class LURebootInstance(LogicalUnit):
4224 """Reboot an instance.
4227 HPATH = "instance-reboot"
4228 HTYPE = constants.HTYPE_INSTANCE
4229 _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4232 def CheckArguments(self):
4233 """Check the arguments.
4236 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4237 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4239 def ExpandNames(self):
4240 if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4241 constants.INSTANCE_REBOOT_HARD,
4242 constants.INSTANCE_REBOOT_FULL]:
4243 raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4244 (constants.INSTANCE_REBOOT_SOFT,
4245 constants.INSTANCE_REBOOT_HARD,
4246 constants.INSTANCE_REBOOT_FULL))
4247 self._ExpandAndLockInstance()
4249 def BuildHooksEnv(self):
4252 This runs on master, primary and secondary nodes of the instance.
4256 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4257 "REBOOT_TYPE": self.op.reboot_type,
4258 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4260 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4261 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4264 def CheckPrereq(self):
4265 """Check prerequisites.
4267 This checks that the instance is in the cluster.
4270 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4271 assert self.instance is not None, \
4272 "Cannot retrieve locked instance %s" % self.op.instance_name
4274 _CheckNodeOnline(self, instance.primary_node)
4276 # check bridges existence
4277 _CheckInstanceBridgesExist(self, instance)
4279 def Exec(self, feedback_fn):
4280 """Reboot the instance.
4283 instance = self.instance
4284 ignore_secondaries = self.op.ignore_secondaries
4285 reboot_type = self.op.reboot_type
4287 node_current = instance.primary_node
4289 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4290 constants.INSTANCE_REBOOT_HARD]:
4291 for disk in instance.disks:
4292 self.cfg.SetDiskID(disk, node_current)
4293 result = self.rpc.call_instance_reboot(node_current, instance,
4295 self.shutdown_timeout)
4296 result.Raise("Could not reboot instance")
4298 result = self.rpc.call_instance_shutdown(node_current, instance,
4299 self.shutdown_timeout)
4300 result.Raise("Could not shutdown instance for full reboot")
4301 _ShutdownInstanceDisks(self, instance)
4302 _StartInstanceDisks(self, instance, ignore_secondaries)
4303 result = self.rpc.call_instance_start(node_current, instance, None, None)
4304 msg = result.fail_msg
4306 _ShutdownInstanceDisks(self, instance)
4307 raise errors.OpExecError("Could not start instance for"
4308 " full reboot: %s" % msg)
4310 self.cfg.MarkInstanceUp(instance.name)
4313 class LUShutdownInstance(LogicalUnit):
4314 """Shutdown an instance.
4317 HPATH = "instance-stop"
4318 HTYPE = constants.HTYPE_INSTANCE
4319 _OP_REQP = ["instance_name"]
4322 def CheckArguments(self):
4323 """Check the arguments.
4326 self.timeout = getattr(self.op, "timeout",
4327 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4329 def ExpandNames(self):
4330 self._ExpandAndLockInstance()
4332 def BuildHooksEnv(self):
4335 This runs on master, primary and secondary nodes of the instance.
4338 env = _BuildInstanceHookEnvByObject(self, self.instance)
4339 env["TIMEOUT"] = self.timeout
4340 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4343 def CheckPrereq(self):
4344 """Check prerequisites.
4346 This checks that the instance is in the cluster.
4349 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4350 assert self.instance is not None, \
4351 "Cannot retrieve locked instance %s" % self.op.instance_name
4352 _CheckNodeOnline(self, self.instance.primary_node)
4354 def Exec(self, feedback_fn):
4355 """Shutdown the instance.
4358 instance = self.instance
4359 node_current = instance.primary_node
4360 timeout = self.timeout
4361 self.cfg.MarkInstanceDown(instance.name)
4362 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4363 msg = result.fail_msg
4365 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4367 _ShutdownInstanceDisks(self, instance)
4370 class LUReinstallInstance(LogicalUnit):
4371 """Reinstall an instance.
4374 HPATH = "instance-reinstall"
4375 HTYPE = constants.HTYPE_INSTANCE
4376 _OP_REQP = ["instance_name"]
4379 def ExpandNames(self):
4380 self._ExpandAndLockInstance()
4382 def BuildHooksEnv(self):
4385 This runs on master, primary and secondary nodes of the instance.
4388 env = _BuildInstanceHookEnvByObject(self, self.instance)
4389 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4392 def CheckPrereq(self):
4393 """Check prerequisites.
4395 This checks that the instance is in the cluster and is not running.
4398 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4399 assert instance is not None, \
4400 "Cannot retrieve locked instance %s" % self.op.instance_name
4401 _CheckNodeOnline(self, instance.primary_node)
4403 if instance.disk_template == constants.DT_DISKLESS:
4404 raise errors.OpPrereqError("Instance '%s' has no disks" %
4405 self.op.instance_name,
4407 _CheckInstanceDown(self, instance, "cannot reinstall")
4409 self.op.os_type = getattr(self.op, "os_type", None)
4410 self.op.force_variant = getattr(self.op, "force_variant", False)
4411 if self.op.os_type is not None:
4413 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4414 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4416 self.instance = instance
4418 def Exec(self, feedback_fn):
4419 """Reinstall the instance.
4422 inst = self.instance
4424 if self.op.os_type is not None:
4425 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4426 inst.os = self.op.os_type
4427 self.cfg.Update(inst, feedback_fn)
4429 _StartInstanceDisks(self, inst, None)
4431 feedback_fn("Running the instance OS create scripts...")
4432 # FIXME: pass debug option from opcode to backend
4433 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4434 self.op.debug_level)
4435 result.Raise("Could not install OS for instance %s on node %s" %
4436 (inst.name, inst.primary_node))
4438 _ShutdownInstanceDisks(self, inst)
4441 class LURecreateInstanceDisks(LogicalUnit):
4442 """Recreate an instance's missing disks.
4445 HPATH = "instance-recreate-disks"
4446 HTYPE = constants.HTYPE_INSTANCE
4447 _OP_REQP = ["instance_name", "disks"]
4450 def CheckArguments(self):
4451 """Check the arguments.
4454 if not isinstance(self.op.disks, list):
4455 raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4456 for item in self.op.disks:
4457 if (not isinstance(item, int) or
4459 raise errors.OpPrereqError("Invalid disk specification '%s'" %
4460 str(item), errors.ECODE_INVAL)
4462 def ExpandNames(self):
4463 self._ExpandAndLockInstance()
4465 def BuildHooksEnv(self):
4468 This runs on master, primary and secondary nodes of the instance.
4471 env = _BuildInstanceHookEnvByObject(self, self.instance)
4472 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4475 def CheckPrereq(self):
4476 """Check prerequisites.
4478 This checks that the instance is in the cluster and is not running.
4481 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4482 assert instance is not None, \
4483 "Cannot retrieve locked instance %s" % self.op.instance_name
4484 _CheckNodeOnline(self, instance.primary_node)
4486 if instance.disk_template == constants.DT_DISKLESS:
4487 raise errors.OpPrereqError("Instance '%s' has no disks" %
4488 self.op.instance_name, errors.ECODE_INVAL)
4489 _CheckInstanceDown(self, instance, "cannot recreate disks")
4491 if not self.op.disks:
4492 self.op.disks = range(len(instance.disks))
4494 for idx in self.op.disks:
4495 if idx >= len(instance.disks):
4496 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4499 self.instance = instance
4501 def Exec(self, feedback_fn):
4502 """Recreate the disks.
4506 for idx, _ in enumerate(self.instance.disks):
4507 if idx not in self.op.disks: # disk idx has not been passed in
4511 _CreateDisks(self, self.instance, to_skip=to_skip)
4514 class LURenameInstance(LogicalUnit):
4515 """Rename an instance.
4518 HPATH = "instance-rename"
4519 HTYPE = constants.HTYPE_INSTANCE
4520 _OP_REQP = ["instance_name", "new_name"]
4522 def BuildHooksEnv(self):
4525 This runs on master, primary and secondary nodes of the instance.
4528 env = _BuildInstanceHookEnvByObject(self, self.instance)
4529 env["INSTANCE_NEW_NAME"] = self.op.new_name
4530 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4533 def CheckPrereq(self):
4534 """Check prerequisites.
4536 This checks that the instance is in the cluster and is not running.
4539 self.op.instance_name = _ExpandInstanceName(self.cfg,
4540 self.op.instance_name)
4541 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4542 assert instance is not None
4543 _CheckNodeOnline(self, instance.primary_node)
4544 _CheckInstanceDown(self, instance, "cannot rename")
4545 self.instance = instance
4547 # new name verification
4548 name_info = utils.GetHostInfo(self.op.new_name)
4550 self.op.new_name = new_name = name_info.name
4551 instance_list = self.cfg.GetInstanceList()
4552 if new_name in instance_list:
4553 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4554 new_name, errors.ECODE_EXISTS)
4556 if not getattr(self.op, "ignore_ip", False):
4557 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4558 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4559 (name_info.ip, new_name),
4560 errors.ECODE_NOTUNIQUE)
4563 def Exec(self, feedback_fn):
4564 """Reinstall the instance.
4567 inst = self.instance
4568 old_name = inst.name
4570 if inst.disk_template == constants.DT_FILE:
4571 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4573 self.cfg.RenameInstance(inst.name, self.op.new_name)
4574 # Change the instance lock. This is definitely safe while we hold the BGL
4575 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4576 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4578 # re-read the instance from the configuration after rename
4579 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4581 if inst.disk_template == constants.DT_FILE:
4582 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4583 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4584 old_file_storage_dir,
4585 new_file_storage_dir)
4586 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4587 " (but the instance has been renamed in Ganeti)" %
4588 (inst.primary_node, old_file_storage_dir,
4589 new_file_storage_dir))
4591 _StartInstanceDisks(self, inst, None)
4593 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4594 old_name, self.op.debug_level)
4595 msg = result.fail_msg
4597 msg = ("Could not run OS rename script for instance %s on node %s"
4598 " (but the instance has been renamed in Ganeti): %s" %
4599 (inst.name, inst.primary_node, msg))
4600 self.proc.LogWarning(msg)
4602 _ShutdownInstanceDisks(self, inst)
4605 class LURemoveInstance(LogicalUnit):
4606 """Remove an instance.
4609 HPATH = "instance-remove"
4610 HTYPE = constants.HTYPE_INSTANCE
4611 _OP_REQP = ["instance_name", "ignore_failures"]
4614 def CheckArguments(self):
4615 """Check the arguments.
4618 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4619 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4621 def ExpandNames(self):
4622 self._ExpandAndLockInstance()
4623 self.needed_locks[locking.LEVEL_NODE] = []
4624 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4626 def DeclareLocks(self, level):
4627 if level == locking.LEVEL_NODE:
4628 self._LockInstancesNodes()
4630 def BuildHooksEnv(self):
4633 This runs on master, primary and secondary nodes of the instance.
4636 env = _BuildInstanceHookEnvByObject(self, self.instance)
4637 env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4638 nl = [self.cfg.GetMasterNode()]
4639 nl_post = list(self.instance.all_nodes) + nl
4640 return env, nl, nl_post
4642 def CheckPrereq(self):
4643 """Check prerequisites.
4645 This checks that the instance is in the cluster.
4648 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4649 assert self.instance is not None, \
4650 "Cannot retrieve locked instance %s" % self.op.instance_name
4652 def Exec(self, feedback_fn):
4653 """Remove the instance.
4656 instance = self.instance
4657 logging.info("Shutting down instance %s on node %s",
4658 instance.name, instance.primary_node)
4660 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4661 self.shutdown_timeout)
4662 msg = result.fail_msg
4664 if self.op.ignore_failures:
4665 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4667 raise errors.OpExecError("Could not shutdown instance %s on"
4669 (instance.name, instance.primary_node, msg))
4671 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4674 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4675 """Utility function to remove an instance.
4678 logging.info("Removing block devices for instance %s", instance.name)
4680 if not _RemoveDisks(lu, instance):
4681 if not ignore_failures:
4682 raise errors.OpExecError("Can't remove instance's disks")
4683 feedback_fn("Warning: can't remove instance's disks")
4685 logging.info("Removing instance %s out of cluster config", instance.name)
4687 lu.cfg.RemoveInstance(instance.name)
4689 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4690 "Instance lock removal conflict"
4692 # Remove lock for the instance
4693 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4696 class LUQueryInstances(NoHooksLU):
4697 """Logical unit for querying instances.
4700 # pylint: disable-msg=W0142
4701 _OP_REQP = ["output_fields", "names", "use_locking"]
4703 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4704 "serial_no", "ctime", "mtime", "uuid"]
4705 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4707 "disk_template", "ip", "mac", "bridge",
4708 "nic_mode", "nic_link",
4709 "sda_size", "sdb_size", "vcpus", "tags",
4710 "network_port", "beparams",
4711 r"(disk)\.(size)/([0-9]+)",
4712 r"(disk)\.(sizes)", "disk_usage",
4713 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4714 r"(nic)\.(bridge)/([0-9]+)",
4715 r"(nic)\.(macs|ips|modes|links|bridges)",
4716 r"(disk|nic)\.(count)",
4718 ] + _SIMPLE_FIELDS +
4720 for name in constants.HVS_PARAMETERS
4721 if name not in constants.HVC_GLOBALS] +
4723 for name in constants.BES_PARAMETERS])
4724 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4727 def ExpandNames(self):
4728 _CheckOutputFields(static=self._FIELDS_STATIC,
4729 dynamic=self._FIELDS_DYNAMIC,
4730 selected=self.op.output_fields)
4732 self.needed_locks = {}
4733 self.share_locks[locking.LEVEL_INSTANCE] = 1
4734 self.share_locks[locking.LEVEL_NODE] = 1
4737 self.wanted = _GetWantedInstances(self, self.op.names)
4739 self.wanted = locking.ALL_SET
4741 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4742 self.do_locking = self.do_node_query and self.op.use_locking
4744 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4745 self.needed_locks[locking.LEVEL_NODE] = []
4746 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4748 def DeclareLocks(self, level):
4749 if level == locking.LEVEL_NODE and self.do_locking:
4750 self._LockInstancesNodes()
4752 def CheckPrereq(self):
4753 """Check prerequisites.
4758 def Exec(self, feedback_fn):
4759 """Computes the list of nodes and their attributes.
4762 # pylint: disable-msg=R0912
4763 # way too many branches here
4764 all_info = self.cfg.GetAllInstancesInfo()
4765 if self.wanted == locking.ALL_SET:
4766 # caller didn't specify instance names, so ordering is not important
4768 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4770 instance_names = all_info.keys()
4771 instance_names = utils.NiceSort(instance_names)
4773 # caller did specify names, so we must keep the ordering
4775 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4777 tgt_set = all_info.keys()
4778 missing = set(self.wanted).difference(tgt_set)
4780 raise errors.OpExecError("Some instances were removed before"
4781 " retrieving their data: %s" % missing)
4782 instance_names = self.wanted
4784 instance_list = [all_info[iname] for iname in instance_names]
4786 # begin data gathering
4788 nodes = frozenset([inst.primary_node for inst in instance_list])
4789 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4793 if self.do_node_query:
4795 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4797 result = node_data[name]
4799 # offline nodes will be in both lists
4800 off_nodes.append(name)
4802 bad_nodes.append(name)
4805 live_data.update(result.payload)
4806 # else no instance is alive
4808 live_data = dict([(name, {}) for name in instance_names])
4810 # end data gathering
4815 cluster = self.cfg.GetClusterInfo()
4816 for instance in instance_list:
4818 i_hv = cluster.FillHV(instance, skip_globals=True)
4819 i_be = cluster.FillBE(instance)
4820 i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4821 nic.nicparams) for nic in instance.nics]
4822 for field in self.op.output_fields:
4823 st_match = self._FIELDS_STATIC.Matches(field)
4824 if field in self._SIMPLE_FIELDS:
4825 val = getattr(instance, field)
4826 elif field == "pnode":
4827 val = instance.primary_node
4828 elif field == "snodes":
4829 val = list(instance.secondary_nodes)
4830 elif field == "admin_state":
4831 val = instance.admin_up
4832 elif field == "oper_state":
4833 if instance.primary_node in bad_nodes:
4836 val = bool(live_data.get(instance.name))
4837 elif field == "status":
4838 if instance.primary_node in off_nodes:
4839 val = "ERROR_nodeoffline"
4840 elif instance.primary_node in bad_nodes:
4841 val = "ERROR_nodedown"
4843 running = bool(live_data.get(instance.name))
4845 if instance.admin_up:
4850 if instance.admin_up:
4854 elif field == "oper_ram":
4855 if instance.primary_node in bad_nodes:
4857 elif instance.name in live_data:
4858 val = live_data[instance.name].get("memory", "?")
4861 elif field == "vcpus":
4862 val = i_be[constants.BE_VCPUS]
4863 elif field == "disk_template":
4864 val = instance.disk_template
4867 val = instance.nics[0].ip
4870 elif field == "nic_mode":
4872 val = i_nicp[0][constants.NIC_MODE]
4875 elif field == "nic_link":
4877 val = i_nicp[0][constants.NIC_LINK]
4880 elif field == "bridge":
4881 if (instance.nics and
4882 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4883 val = i_nicp[0][constants.NIC_LINK]
4886 elif field == "mac":
4888 val = instance.nics[0].mac
4891 elif field == "sda_size" or field == "sdb_size":
4892 idx = ord(field[2]) - ord('a')
4894 val = instance.FindDisk(idx).size
4895 except errors.OpPrereqError:
4897 elif field == "disk_usage": # total disk usage per node
4898 disk_sizes = [{'size': disk.size} for disk in instance.disks]
4899 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4900 elif field == "tags":
4901 val = list(instance.GetTags())
4902 elif field == "hvparams":
4904 elif (field.startswith(HVPREFIX) and
4905 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4906 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4907 val = i_hv.get(field[len(HVPREFIX):], None)
4908 elif field == "beparams":
4910 elif (field.startswith(BEPREFIX) and
4911 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4912 val = i_be.get(field[len(BEPREFIX):], None)
4913 elif st_match and st_match.groups():
4914 # matches a variable list
4915 st_groups = st_match.groups()
4916 if st_groups and st_groups[0] == "disk":
4917 if st_groups[1] == "count":
4918 val = len(instance.disks)
4919 elif st_groups[1] == "sizes":
4920 val = [disk.size for disk in instance.disks]
4921 elif st_groups[1] == "size":
4923 val = instance.FindDisk(st_groups[2]).size
4924 except errors.OpPrereqError:
4927 assert False, "Unhandled disk parameter"
4928 elif st_groups[0] == "nic":
4929 if st_groups[1] == "count":
4930 val = len(instance.nics)
4931 elif st_groups[1] == "macs":
4932 val = [nic.mac for nic in instance.nics]
4933 elif st_groups[1] == "ips":
4934 val = [nic.ip for nic in instance.nics]
4935 elif st_groups[1] == "modes":
4936 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4937 elif st_groups[1] == "links":
4938 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4939 elif st_groups[1] == "bridges":
4942 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4943 val.append(nicp[constants.NIC_LINK])
4948 nic_idx = int(st_groups[2])
4949 if nic_idx >= len(instance.nics):
4952 if st_groups[1] == "mac":
4953 val = instance.nics[nic_idx].mac
4954 elif st_groups[1] == "ip":
4955 val = instance.nics[nic_idx].ip
4956 elif st_groups[1] == "mode":
4957 val = i_nicp[nic_idx][constants.NIC_MODE]
4958 elif st_groups[1] == "link":
4959 val = i_nicp[nic_idx][constants.NIC_LINK]
4960 elif st_groups[1] == "bridge":
4961 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4962 if nic_mode == constants.NIC_MODE_BRIDGED:
4963 val = i_nicp[nic_idx][constants.NIC_LINK]
4967 assert False, "Unhandled NIC parameter"
4969 assert False, ("Declared but unhandled variable parameter '%s'" %
4972 assert False, "Declared but unhandled parameter '%s'" % field
4979 class LUFailoverInstance(LogicalUnit):
4980 """Failover an instance.
4983 HPATH = "instance-failover"
4984 HTYPE = constants.HTYPE_INSTANCE
4985 _OP_REQP = ["instance_name", "ignore_consistency"]
4988 def CheckArguments(self):
4989 """Check the arguments.
4992 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4993 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4995 def ExpandNames(self):
4996 self._ExpandAndLockInstance()
4997 self.needed_locks[locking.LEVEL_NODE] = []
4998 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5000 def DeclareLocks(self, level):
5001 if level == locking.LEVEL_NODE:
5002 self._LockInstancesNodes()
5004 def BuildHooksEnv(self):
5007 This runs on master, primary and secondary nodes of the instance.
5010 instance = self.instance
5011 source_node = instance.primary_node
5012 target_node = instance.secondary_nodes[0]
5014 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5015 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5016 "OLD_PRIMARY": source_node,
5017 "OLD_SECONDARY": target_node,
5018 "NEW_PRIMARY": target_node,
5019 "NEW_SECONDARY": source_node,
5021 env.update(_BuildInstanceHookEnvByObject(self, instance))
5022 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5024 nl_post.append(source_node)
5025 return env, nl, nl_post
5027 def CheckPrereq(self):
5028 """Check prerequisites.
5030 This checks that the instance is in the cluster.
5033 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5034 assert self.instance is not None, \
5035 "Cannot retrieve locked instance %s" % self.op.instance_name
5037 bep = self.cfg.GetClusterInfo().FillBE(instance)
5038 if instance.disk_template not in constants.DTS_NET_MIRROR:
5039 raise errors.OpPrereqError("Instance's disk layout is not"
5040 " network mirrored, cannot failover.",
5043 secondary_nodes = instance.secondary_nodes
5044 if not secondary_nodes:
5045 raise errors.ProgrammerError("no secondary node but using "
5046 "a mirrored disk template")
5048 target_node = secondary_nodes[0]
5049 _CheckNodeOnline(self, target_node)
5050 _CheckNodeNotDrained(self, target_node)
5051 if instance.admin_up:
5052 # check memory requirements on the secondary node
5053 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5054 instance.name, bep[constants.BE_MEMORY],
5055 instance.hypervisor)
5057 self.LogInfo("Not checking memory on the secondary node as"
5058 " instance will not be started")
5060 # check bridge existance
5061 _CheckInstanceBridgesExist(self, instance, node=target_node)
5063 def Exec(self, feedback_fn):
5064 """Failover an instance.
5066 The failover is done by shutting it down on its present node and
5067 starting it on the secondary.
5070 instance = self.instance
5072 source_node = instance.primary_node
5073 target_node = instance.secondary_nodes[0]
5075 if instance.admin_up:
5076 feedback_fn("* checking disk consistency between source and target")
5077 for dev in instance.disks:
5078 # for drbd, these are drbd over lvm
5079 if not _CheckDiskConsistency(self, dev, target_node, False):
5080 if not self.op.ignore_consistency:
5081 raise errors.OpExecError("Disk %s is degraded on target node,"
5082 " aborting failover." % dev.iv_name)
5084 feedback_fn("* not checking disk consistency as instance is not running")
5086 feedback_fn("* shutting down instance on source node")
5087 logging.info("Shutting down instance %s on node %s",
5088 instance.name, source_node)
5090 result = self.rpc.call_instance_shutdown(source_node, instance,
5091 self.shutdown_timeout)
5092 msg = result.fail_msg
5094 if self.op.ignore_consistency:
5095 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5096 " Proceeding anyway. Please make sure node"
5097 " %s is down. Error details: %s",
5098 instance.name, source_node, source_node, msg)
5100 raise errors.OpExecError("Could not shutdown instance %s on"
5102 (instance.name, source_node, msg))
5104 feedback_fn("* deactivating the instance's disks on source node")
5105 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5106 raise errors.OpExecError("Can't shut down the instance's disks.")
5108 instance.primary_node = target_node
5109 # distribute new instance config to the other nodes
5110 self.cfg.Update(instance, feedback_fn)
5112 # Only start the instance if it's marked as up
5113 if instance.admin_up:
5114 feedback_fn("* activating the instance's disks on target node")
5115 logging.info("Starting instance %s on node %s",
5116 instance.name, target_node)
5118 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5119 ignore_secondaries=True)
5121 _ShutdownInstanceDisks(self, instance)
5122 raise errors.OpExecError("Can't activate the instance's disks")
5124 feedback_fn("* starting the instance on the target node")
5125 result = self.rpc.call_instance_start(target_node, instance, None, None)
5126 msg = result.fail_msg
5128 _ShutdownInstanceDisks(self, instance)
5129 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5130 (instance.name, target_node, msg))
5133 class LUMigrateInstance(LogicalUnit):
5134 """Migrate an instance.
5136 This is migration without shutting down, compared to the failover,
5137 which is done with shutdown.
5140 HPATH = "instance-migrate"
5141 HTYPE = constants.HTYPE_INSTANCE
5142 _OP_REQP = ["instance_name", "live", "cleanup"]
5146 def ExpandNames(self):
5147 self._ExpandAndLockInstance()
5149 self.needed_locks[locking.LEVEL_NODE] = []
5150 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5152 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5153 self.op.live, self.op.cleanup)
5154 self.tasklets = [self._migrater]
5156 def DeclareLocks(self, level):
5157 if level == locking.LEVEL_NODE:
5158 self._LockInstancesNodes()
5160 def BuildHooksEnv(self):
5163 This runs on master, primary and secondary nodes of the instance.
5166 instance = self._migrater.instance
5167 source_node = instance.primary_node
5168 target_node = instance.secondary_nodes[0]
5169 env = _BuildInstanceHookEnvByObject(self, instance)
5170 env["MIGRATE_LIVE"] = self.op.live
5171 env["MIGRATE_CLEANUP"] = self.op.cleanup
5173 "OLD_PRIMARY": source_node,
5174 "OLD_SECONDARY": target_node,
5175 "NEW_PRIMARY": target_node,
5176 "NEW_SECONDARY": source_node,
5178 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5180 nl_post.append(source_node)
5181 return env, nl, nl_post
5184 class LUMoveInstance(LogicalUnit):
5185 """Move an instance by data-copying.
5188 HPATH = "instance-move"
5189 HTYPE = constants.HTYPE_INSTANCE
5190 _OP_REQP = ["instance_name", "target_node"]
5193 def CheckArguments(self):
5194 """Check the arguments.
5197 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5198 constants.DEFAULT_SHUTDOWN_TIMEOUT)
5200 def ExpandNames(self):
5201 self._ExpandAndLockInstance()
5202 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5203 self.op.target_node = target_node
5204 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5205 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5207 def DeclareLocks(self, level):
5208 if level == locking.LEVEL_NODE:
5209 self._LockInstancesNodes(primary_only=True)
5211 def BuildHooksEnv(self):
5214 This runs on master, primary and secondary nodes of the instance.
5218 "TARGET_NODE": self.op.target_node,
5219 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5221 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5222 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5223 self.op.target_node]
5226 def CheckPrereq(self):
5227 """Check prerequisites.
5229 This checks that the instance is in the cluster.
5232 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5233 assert self.instance is not None, \
5234 "Cannot retrieve locked instance %s" % self.op.instance_name
5236 node = self.cfg.GetNodeInfo(self.op.target_node)
5237 assert node is not None, \
5238 "Cannot retrieve locked node %s" % self.op.target_node
5240 self.target_node = target_node = node.name
5242 if target_node == instance.primary_node:
5243 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5244 (instance.name, target_node),
5247 bep = self.cfg.GetClusterInfo().FillBE(instance)
5249 for idx, dsk in enumerate(instance.disks):
5250 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5251 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5252 " cannot copy" % idx, errors.ECODE_STATE)
5254 _CheckNodeOnline(self, target_node)
5255 _CheckNodeNotDrained(self, target_node)
5257 if instance.admin_up:
5258 # check memory requirements on the secondary node
5259 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5260 instance.name, bep[constants.BE_MEMORY],
5261 instance.hypervisor)
5263 self.LogInfo("Not checking memory on the secondary node as"
5264 " instance will not be started")
5266 # check bridge existance
5267 _CheckInstanceBridgesExist(self, instance, node=target_node)
5269 def Exec(self, feedback_fn):
5270 """Move an instance.
5272 The move is done by shutting it down on its present node, copying
5273 the data over (slow) and starting it on the new node.
5276 instance = self.instance
5278 source_node = instance.primary_node
5279 target_node = self.target_node
5281 self.LogInfo("Shutting down instance %s on source node %s",
5282 instance.name, source_node)
5284 result = self.rpc.call_instance_shutdown(source_node, instance,
5285 self.shutdown_timeout)
5286 msg = result.fail_msg
5288 if self.op.ignore_consistency:
5289 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5290 " Proceeding anyway. Please make sure node"
5291 " %s is down. Error details: %s",
5292 instance.name, source_node, source_node, msg)
5294 raise errors.OpExecError("Could not shutdown instance %s on"
5296 (instance.name, source_node, msg))
5298 # create the target disks
5300 _CreateDisks(self, instance, target_node=target_node)
5301 except errors.OpExecError:
5302 self.LogWarning("Device creation failed, reverting...")
5304 _RemoveDisks(self, instance, target_node=target_node)
5306 self.cfg.ReleaseDRBDMinors(instance.name)
5309 cluster_name = self.cfg.GetClusterInfo().cluster_name
5312 # activate, get path, copy the data over
5313 for idx, disk in enumerate(instance.disks):
5314 self.LogInfo("Copying data for disk %d", idx)
5315 result = self.rpc.call_blockdev_assemble(target_node, disk,
5316 instance.name, True)
5318 self.LogWarning("Can't assemble newly created disk %d: %s",
5319 idx, result.fail_msg)
5320 errs.append(result.fail_msg)
5322 dev_path = result.payload
5323 result = self.rpc.call_blockdev_export(source_node, disk,
5324 target_node, dev_path,
5327 self.LogWarning("Can't copy data over for disk %d: %s",
5328 idx, result.fail_msg)
5329 errs.append(result.fail_msg)
5333 self.LogWarning("Some disks failed to copy, aborting")
5335 _RemoveDisks(self, instance, target_node=target_node)
5337 self.cfg.ReleaseDRBDMinors(instance.name)
5338 raise errors.OpExecError("Errors during disk copy: %s" %
5341 instance.primary_node = target_node
5342 self.cfg.Update(instance, feedback_fn)
5344 self.LogInfo("Removing the disks on the original node")
5345 _RemoveDisks(self, instance, target_node=source_node)
5347 # Only start the instance if it's marked as up
5348 if instance.admin_up:
5349 self.LogInfo("Starting instance %s on node %s",
5350 instance.name, target_node)
5352 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5353 ignore_secondaries=True)
5355 _ShutdownInstanceDisks(self, instance)
5356 raise errors.OpExecError("Can't activate the instance's disks")
5358 result = self.rpc.call_instance_start(target_node, instance, None, None)
5359 msg = result.fail_msg
5361 _ShutdownInstanceDisks(self, instance)
5362 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5363 (instance.name, target_node, msg))
5366 class LUMigrateNode(LogicalUnit):
5367 """Migrate all instances from a node.
5370 HPATH = "node-migrate"
5371 HTYPE = constants.HTYPE_NODE
5372 _OP_REQP = ["node_name", "live"]
5375 def ExpandNames(self):
5376 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5378 self.needed_locks = {
5379 locking.LEVEL_NODE: [self.op.node_name],
5382 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5384 # Create tasklets for migrating instances for all instances on this node
5388 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5389 logging.debug("Migrating instance %s", inst.name)
5390 names.append(inst.name)
5392 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5394 self.tasklets = tasklets
5396 # Declare instance locks
5397 self.needed_locks[locking.LEVEL_INSTANCE] = names
5399 def DeclareLocks(self, level):
5400 if level == locking.LEVEL_NODE:
5401 self._LockInstancesNodes()
5403 def BuildHooksEnv(self):
5406 This runs on the master, the primary and all the secondaries.
5410 "NODE_NAME": self.op.node_name,
5413 nl = [self.cfg.GetMasterNode()]
5415 return (env, nl, nl)
5418 class TLMigrateInstance(Tasklet):
5419 def __init__(self, lu, instance_name, live, cleanup):
5420 """Initializes this class.
5423 Tasklet.__init__(self, lu)
5426 self.instance_name = instance_name
5428 self.cleanup = cleanup
5430 def CheckPrereq(self):
5431 """Check prerequisites.
5433 This checks that the instance is in the cluster.
5436 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5437 instance = self.cfg.GetInstanceInfo(instance_name)
5438 assert instance is not None
5440 if instance.disk_template != constants.DT_DRBD8:
5441 raise errors.OpPrereqError("Instance's disk layout is not"
5442 " drbd8, cannot migrate.", errors.ECODE_STATE)
5444 secondary_nodes = instance.secondary_nodes
5445 if not secondary_nodes:
5446 raise errors.ConfigurationError("No secondary node but using"
5447 " drbd8 disk template")
5449 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5451 target_node = secondary_nodes[0]
5452 # check memory requirements on the secondary node
5453 _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5454 instance.name, i_be[constants.BE_MEMORY],
5455 instance.hypervisor)
5457 # check bridge existance
5458 _CheckInstanceBridgesExist(self, instance, node=target_node)
5460 if not self.cleanup:
5461 _CheckNodeNotDrained(self, target_node)
5462 result = self.rpc.call_instance_migratable(instance.primary_node,
5464 result.Raise("Can't migrate, please use failover",
5465 prereq=True, ecode=errors.ECODE_STATE)
5467 self.instance = instance
5469 def _WaitUntilSync(self):
5470 """Poll with custom rpc for disk sync.
5472 This uses our own step-based rpc call.
5475 self.feedback_fn("* wait until resync is done")
5479 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5481 self.instance.disks)
5483 for node, nres in result.items():
5484 nres.Raise("Cannot resync disks on node %s" % node)
5485 node_done, node_percent = nres.payload
5486 all_done = all_done and node_done
5487 if node_percent is not None:
5488 min_percent = min(min_percent, node_percent)
5490 if min_percent < 100:
5491 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5494 def _EnsureSecondary(self, node):
5495 """Demote a node to secondary.
5498 self.feedback_fn("* switching node %s to secondary mode" % node)
5500 for dev in self.instance.disks:
5501 self.cfg.SetDiskID(dev, node)
5503 result = self.rpc.call_blockdev_close(node, self.instance.name,
5504 self.instance.disks)
5505 result.Raise("Cannot change disk to secondary on node %s" % node)
5507 def _GoStandalone(self):
5508 """Disconnect from the network.
5511 self.feedback_fn("* changing into standalone mode")
5512 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5513 self.instance.disks)
5514 for node, nres in result.items():
5515 nres.Raise("Cannot disconnect disks node %s" % node)
5517 def _GoReconnect(self, multimaster):
5518 """Reconnect to the network.
5524 msg = "single-master"
5525 self.feedback_fn("* changing disks into %s mode" % msg)
5526 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5527 self.instance.disks,
5528 self.instance.name, multimaster)
5529 for node, nres in result.items():
5530 nres.Raise("Cannot change disks config on node %s" % node)
5532 def _ExecCleanup(self):
5533 """Try to cleanup after a failed migration.
5535 The cleanup is done by:
5536 - check that the instance is running only on one node
5537 (and update the config if needed)
5538 - change disks on its secondary node to secondary
5539 - wait until disks are fully synchronized
5540 - disconnect from the network
5541 - change disks into single-master mode
5542 - wait again until disks are fully synchronized
5545 instance = self.instance
5546 target_node = self.target_node
5547 source_node = self.source_node
5549 # check running on only one node
5550 self.feedback_fn("* checking where the instance actually runs"
5551 " (if this hangs, the hypervisor might be in"
5553 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5554 for node, result in ins_l.items():
5555 result.Raise("Can't contact node %s" % node)
5557 runningon_source = instance.name in ins_l[source_node].payload
5558 runningon_target = instance.name in ins_l[target_node].payload
5560 if runningon_source and runningon_target:
5561 raise errors.OpExecError("Instance seems to be running on two nodes,"
5562 " or the hypervisor is confused. You will have"
5563 " to ensure manually that it runs only on one"
5564 " and restart this operation.")
5566 if not (runningon_source or runningon_target):
5567 raise errors.OpExecError("Instance does not seem to be running at all."
5568 " In this case, it's safer to repair by"
5569 " running 'gnt-instance stop' to ensure disk"
5570 " shutdown, and then restarting it.")
5572 if runningon_target:
5573 # the migration has actually succeeded, we need to update the config
5574 self.feedback_fn("* instance running on secondary node (%s),"
5575 " updating config" % target_node)
5576 instance.primary_node = target_node
5577 self.cfg.Update(instance, self.feedback_fn)
5578 demoted_node = source_node
5580 self.feedback_fn("* instance confirmed to be running on its"
5581 " primary node (%s)" % source_node)
5582 demoted_node = target_node
5584 self._EnsureSecondary(demoted_node)
5586 self._WaitUntilSync()
5587 except errors.OpExecError:
5588 # we ignore here errors, since if the device is standalone, it
5589 # won't be able to sync
5591 self._GoStandalone()
5592 self._GoReconnect(False)
5593 self._WaitUntilSync()
5595 self.feedback_fn("* done")
5597 def _RevertDiskStatus(self):
5598 """Try to revert the disk status after a failed migration.
5601 target_node = self.target_node
5603 self._EnsureSecondary(target_node)
5604 self._GoStandalone()
5605 self._GoReconnect(False)
5606 self._WaitUntilSync()
5607 except errors.OpExecError, err:
5608 self.lu.LogWarning("Migration failed and I can't reconnect the"
5609 " drives: error '%s'\n"
5610 "Please look and recover the instance status" %
5613 def _AbortMigration(self):
5614 """Call the hypervisor code to abort a started migration.
5617 instance = self.instance
5618 target_node = self.target_node
5619 migration_info = self.migration_info
5621 abort_result = self.rpc.call_finalize_migration(target_node,
5625 abort_msg = abort_result.fail_msg
5627 logging.error("Aborting migration failed on target node %s: %s",
5628 target_node, abort_msg)
5629 # Don't raise an exception here, as we stil have to try to revert the
5630 # disk status, even if this step failed.
5632 def _ExecMigration(self):
5633 """Migrate an instance.
5635 The migrate is done by:
5636 - change the disks into dual-master mode
5637 - wait until disks are fully synchronized again
5638 - migrate the instance
5639 - change disks on the new secondary node (the old primary) to secondary
5640 - wait until disks are fully synchronized
5641 - change disks into single-master mode
5644 instance = self.instance
5645 target_node = self.target_node
5646 source_node = self.source_node
5648 self.feedback_fn("* checking disk consistency between source and target")
5649 for dev in instance.disks:
5650 if not _CheckDiskConsistency(self, dev, target_node, False):
5651 raise errors.OpExecError("Disk %s is degraded or not fully"
5652 " synchronized on target node,"
5653 " aborting migrate." % dev.iv_name)
5655 # First get the migration information from the remote node
5656 result = self.rpc.call_migration_info(source_node, instance)
5657 msg = result.fail_msg
5659 log_err = ("Failed fetching source migration information from %s: %s" %
5661 logging.error(log_err)
5662 raise errors.OpExecError(log_err)
5664 self.migration_info = migration_info = result.payload
5666 # Then switch the disks to master/master mode
5667 self._EnsureSecondary(target_node)
5668 self._GoStandalone()
5669 self._GoReconnect(True)
5670 self._WaitUntilSync()
5672 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5673 result = self.rpc.call_accept_instance(target_node,
5676 self.nodes_ip[target_node])
5678 msg = result.fail_msg
5680 logging.error("Instance pre-migration failed, trying to revert"
5681 " disk status: %s", msg)
5682 self.feedback_fn("Pre-migration failed, aborting")
5683 self._AbortMigration()
5684 self._RevertDiskStatus()
5685 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5686 (instance.name, msg))
5688 self.feedback_fn("* migrating instance to %s" % target_node)
5690 result = self.rpc.call_instance_migrate(source_node, instance,
5691 self.nodes_ip[target_node],
5693 msg = result.fail_msg
5695 logging.error("Instance migration failed, trying to revert"
5696 " disk status: %s", msg)
5697 self.feedback_fn("Migration failed, aborting")
5698 self._AbortMigration()
5699 self._RevertDiskStatus()
5700 raise errors.OpExecError("Could not migrate instance %s: %s" %
5701 (instance.name, msg))
5704 instance.primary_node = target_node
5705 # distribute new instance config to the other nodes
5706 self.cfg.Update(instance, self.feedback_fn)
5708 result = self.rpc.call_finalize_migration(target_node,
5712 msg = result.fail_msg
5714 logging.error("Instance migration succeeded, but finalization failed:"
5716 raise errors.OpExecError("Could not finalize instance migration: %s" %
5719 self._EnsureSecondary(source_node)
5720 self._WaitUntilSync()
5721 self._GoStandalone()
5722 self._GoReconnect(False)
5723 self._WaitUntilSync()
5725 self.feedback_fn("* done")
5727 def Exec(self, feedback_fn):
5728 """Perform the migration.
5731 feedback_fn("Migrating instance %s" % self.instance.name)
5733 self.feedback_fn = feedback_fn
5735 self.source_node = self.instance.primary_node
5736 self.target_node = self.instance.secondary_nodes[0]
5737 self.all_nodes = [self.source_node, self.target_node]
5739 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5740 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5744 return self._ExecCleanup()
5746 return self._ExecMigration()
5749 def _CreateBlockDev(lu, node, instance, device, force_create,
5751 """Create a tree of block devices on a given node.
5753 If this device type has to be created on secondaries, create it and
5756 If not, just recurse to children keeping the same 'force' value.
5758 @param lu: the lu on whose behalf we execute
5759 @param node: the node on which to create the device
5760 @type instance: L{objects.Instance}
5761 @param instance: the instance which owns the device
5762 @type device: L{objects.Disk}
5763 @param device: the device to create
5764 @type force_create: boolean
5765 @param force_create: whether to force creation of this device; this
5766 will be change to True whenever we find a device which has
5767 CreateOnSecondary() attribute
5768 @param info: the extra 'metadata' we should attach to the device
5769 (this will be represented as a LVM tag)
5770 @type force_open: boolean
5771 @param force_open: this parameter will be passes to the
5772 L{backend.BlockdevCreate} function where it specifies
5773 whether we run on primary or not, and it affects both
5774 the child assembly and the device own Open() execution
5777 if device.CreateOnSecondary():
5781 for child in device.children:
5782 _CreateBlockDev(lu, node, instance, child, force_create,
5785 if not force_create:
5788 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5791 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5792 """Create a single block device on a given node.
5794 This will not recurse over children of the device, so they must be
5797 @param lu: the lu on whose behalf we execute
5798 @param node: the node on which to create the device
5799 @type instance: L{objects.Instance}
5800 @param instance: the instance which owns the device
5801 @type device: L{objects.Disk}
5802 @param device: the device to create
5803 @param info: the extra 'metadata' we should attach to the device
5804 (this will be represented as a LVM tag)
5805 @type force_open: boolean
5806 @param force_open: this parameter will be passes to the
5807 L{backend.BlockdevCreate} function where it specifies
5808 whether we run on primary or not, and it affects both
5809 the child assembly and the device own Open() execution
5812 lu.cfg.SetDiskID(device, node)
5813 result = lu.rpc.call_blockdev_create(node, device, device.size,
5814 instance.name, force_open, info)
5815 result.Raise("Can't create block device %s on"
5816 " node %s for instance %s" % (device, node, instance.name))
5817 if device.physical_id is None:
5818 device.physical_id = result.payload
5821 def _GenerateUniqueNames(lu, exts):
5822 """Generate a suitable LV name.
5824 This will generate a logical volume name for the given instance.
5829 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5830 results.append("%s%s" % (new_id, val))
5834 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5836 """Generate a drbd8 device complete with its children.
5839 port = lu.cfg.AllocatePort()
5840 vgname = lu.cfg.GetVGName()
5841 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5842 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5843 logical_id=(vgname, names[0]))
5844 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5845 logical_id=(vgname, names[1]))
5846 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5847 logical_id=(primary, secondary, port,
5850 children=[dev_data, dev_meta],
5855 def _GenerateDiskTemplate(lu, template_name,
5856 instance_name, primary_node,
5857 secondary_nodes, disk_info,
5858 file_storage_dir, file_driver,
5860 """Generate the entire disk layout for a given template type.
5863 #TODO: compute space requirements
5865 vgname = lu.cfg.GetVGName()
5866 disk_count = len(disk_info)
5868 if template_name == constants.DT_DISKLESS:
5870 elif template_name == constants.DT_PLAIN:
5871 if len(secondary_nodes) != 0:
5872 raise errors.ProgrammerError("Wrong template configuration")
5874 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5875 for i in range(disk_count)])
5876 for idx, disk in enumerate(disk_info):
5877 disk_index = idx + base_index
5878 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5879 logical_id=(vgname, names[idx]),
5880 iv_name="disk/%d" % disk_index,
5882 disks.append(disk_dev)
5883 elif template_name == constants.DT_DRBD8:
5884 if len(secondary_nodes) != 1:
5885 raise errors.ProgrammerError("Wrong template configuration")
5886 remote_node = secondary_nodes[0]
5887 minors = lu.cfg.AllocateDRBDMinor(
5888 [primary_node, remote_node] * len(disk_info), instance_name)
5891 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5892 for i in range(disk_count)]):
5893 names.append(lv_prefix + "_data")
5894 names.append(lv_prefix + "_meta")
5895 for idx, disk in enumerate(disk_info):
5896 disk_index = idx + base_index
5897 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5898 disk["size"], names[idx*2:idx*2+2],
5899 "disk/%d" % disk_index,
5900 minors[idx*2], minors[idx*2+1])
5901 disk_dev.mode = disk["mode"]
5902 disks.append(disk_dev)
5903 elif template_name == constants.DT_FILE:
5904 if len(secondary_nodes) != 0:
5905 raise errors.ProgrammerError("Wrong template configuration")
5907 _RequireFileStorage()
5909 for idx, disk in enumerate(disk_info):
5910 disk_index = idx + base_index
5911 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5912 iv_name="disk/%d" % disk_index,
5913 logical_id=(file_driver,
5914 "%s/disk%d" % (file_storage_dir,
5917 disks.append(disk_dev)
5919 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5923 def _GetInstanceInfoText(instance):
5924 """Compute that text that should be added to the disk's metadata.
5927 return "originstname+%s" % instance.name
5930 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5931 """Create all disks for an instance.
5933 This abstracts away some work from AddInstance.
5935 @type lu: L{LogicalUnit}
5936 @param lu: the logical unit on whose behalf we execute
5937 @type instance: L{objects.Instance}
5938 @param instance: the instance whose disks we should create
5940 @param to_skip: list of indices to skip
5941 @type target_node: string
5942 @param target_node: if passed, overrides the target node for creation
5944 @return: the success of the creation
5947 info = _GetInstanceInfoText(instance)
5948 if target_node is None:
5949 pnode = instance.primary_node
5950 all_nodes = instance.all_nodes
5955 if instance.disk_template == constants.DT_FILE:
5956 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5957 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5959 result.Raise("Failed to create directory '%s' on"
5960 " node %s" % (file_storage_dir, pnode))
5962 # Note: this needs to be kept in sync with adding of disks in
5963 # LUSetInstanceParams
5964 for idx, device in enumerate(instance.disks):
5965 if to_skip and idx in to_skip:
5967 logging.info("Creating volume %s for instance %s",
5968 device.iv_name, instance.name)
5970 for node in all_nodes:
5971 f_create = node == pnode
5972 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5975 def _RemoveDisks(lu, instance, target_node=None):
5976 """Remove all disks for an instance.
5978 This abstracts away some work from `AddInstance()` and
5979 `RemoveInstance()`. Note that in case some of the devices couldn't
5980 be removed, the removal will continue with the other ones (compare
5981 with `_CreateDisks()`).
5983 @type lu: L{LogicalUnit}
5984 @param lu: the logical unit on whose behalf we execute
5985 @type instance: L{objects.Instance}
5986 @param instance: the instance whose disks we should remove
5987 @type target_node: string
5988 @param target_node: used to override the node on which to remove the disks
5990 @return: the success of the removal
5993 logging.info("Removing block devices for instance %s", instance.name)
5996 for device in instance.disks:
5998 edata = [(target_node, device)]
6000 edata = device.ComputeNodeTree(instance.primary_node)
6001 for node, disk in edata:
6002 lu.cfg.SetDiskID(disk, node)
6003 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6005 lu.LogWarning("Could not remove block device %s on node %s,"
6006 " continuing anyway: %s", device.iv_name, node, msg)
6009 if instance.disk_template == constants.DT_FILE:
6010 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6014 tgt = instance.primary_node
6015 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6017 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6018 file_storage_dir, instance.primary_node, result.fail_msg)
6024 def _ComputeDiskSize(disk_template, disks):
6025 """Compute disk size requirements in the volume group
6028 # Required free disk space as a function of disk and swap space
6030 constants.DT_DISKLESS: None,
6031 constants.DT_PLAIN: sum(d["size"] for d in disks),
6032 # 128 MB are added for drbd metadata for each disk
6033 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6034 constants.DT_FILE: None,
6037 if disk_template not in req_size_dict:
6038 raise errors.ProgrammerError("Disk template '%s' size requirement"
6039 " is unknown" % disk_template)
6041 return req_size_dict[disk_template]
6044 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6045 """Hypervisor parameter validation.
6047 This function abstract the hypervisor parameter validation to be
6048 used in both instance create and instance modify.
6050 @type lu: L{LogicalUnit}
6051 @param lu: the logical unit for which we check
6052 @type nodenames: list
6053 @param nodenames: the list of nodes on which we should check
6054 @type hvname: string
6055 @param hvname: the name of the hypervisor we should use
6056 @type hvparams: dict
6057 @param hvparams: the parameters which we need to check
6058 @raise errors.OpPrereqError: if the parameters are not valid
6061 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6064 for node in nodenames:
6068 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6071 class LUCreateInstance(LogicalUnit):
6072 """Create an instance.
6075 HPATH = "instance-add"
6076 HTYPE = constants.HTYPE_INSTANCE
6077 _OP_REQP = ["instance_name", "disks",
6079 "wait_for_sync", "ip_check", "nics",
6080 "hvparams", "beparams"]
6083 def CheckArguments(self):
6087 # set optional parameters to none if they don't exist
6088 for attr in ["pnode", "snode", "iallocator", "hypervisor",
6089 "disk_template", "identify_defaults"]:
6090 if not hasattr(self.op, attr):
6091 setattr(self.op, attr, None)
6093 # do not require name_check to ease forward/backward compatibility
6095 if not hasattr(self.op, "name_check"):
6096 self.op.name_check = True
6097 if not hasattr(self.op, "no_install"):
6098 self.op.no_install = False
6099 if self.op.no_install and self.op.start:
6100 self.LogInfo("No-installation mode selected, disabling startup")
6101 self.op.start = False
6102 # validate/normalize the instance name
6103 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6104 if self.op.ip_check and not self.op.name_check:
6105 # TODO: make the ip check more flexible and not depend on the name check
6106 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6108 # check disk information: either all adopt, or no adopt
6109 has_adopt = has_no_adopt = False
6110 for disk in self.op.disks:
6115 if has_adopt and has_no_adopt:
6116 raise errors.OpPrereqError("Either all disks are adopted or none is",
6119 if self.op.disk_template != constants.DT_PLAIN:
6120 raise errors.OpPrereqError("Disk adoption is only supported for the"
6121 " 'plain' disk template",
6123 if self.op.iallocator is not None:
6124 raise errors.OpPrereqError("Disk adoption not allowed with an"
6125 " iallocator script", errors.ECODE_INVAL)
6126 if self.op.mode == constants.INSTANCE_IMPORT:
6127 raise errors.OpPrereqError("Disk adoption not allowed for"
6128 " instance import", errors.ECODE_INVAL)
6130 self.adopt_disks = has_adopt
6132 # verify creation mode
6133 if self.op.mode not in constants.INSTANCE_CREATE_MODES:
6134 raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6135 self.op.mode, errors.ECODE_INVAL)
6137 # instance name verification
6138 if self.op.name_check:
6139 self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6140 self.op.instance_name = self.hostname1.name
6141 # used in CheckPrereq for ip ping check
6142 self.check_ip = self.hostname1.ip
6143 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6144 raise errors.OpPrereqError("Remote imports require names to be checked" %
6147 self.check_ip = None
6149 # file storage checks
6150 if (self.op.file_driver and
6151 not self.op.file_driver in constants.FILE_DRIVER):
6152 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6153 self.op.file_driver, errors.ECODE_INVAL)
6155 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6156 raise errors.OpPrereqError("File storage directory path not absolute",
6159 ### Node/iallocator related checks
6160 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6161 raise errors.OpPrereqError("One and only one of iallocator and primary"
6162 " node must be given",
6165 self._cds = _GetClusterDomainSecret()
6167 if self.op.mode == constants.INSTANCE_IMPORT:
6168 # On import force_variant must be True, because if we forced it at
6169 # initial install, our only chance when importing it back is that it
6171 self.op.force_variant = True
6173 if self.op.no_install:
6174 self.LogInfo("No-installation mode has no effect during import")
6176 elif self.op.mode == constants.INSTANCE_CREATE:
6177 if getattr(self.op, "os_type", None) is None:
6178 raise errors.OpPrereqError("No guest OS specified",
6180 self.op.force_variant = getattr(self.op, "force_variant", False)
6181 if self.op.disk_template is None:
6182 raise errors.OpPrereqError("No disk template specified",
6185 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6186 # Check handshake to ensure both clusters have the same domain secret
6187 src_handshake = getattr(self.op, "source_handshake", None)
6188 if not src_handshake:
6189 raise errors.OpPrereqError("Missing source handshake",
6192 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6195 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6198 # Load and check source CA
6199 self.source_x509_ca_pem = getattr(self.op, "source_x509_ca", None)
6200 if not self.source_x509_ca_pem:
6201 raise errors.OpPrereqError("Missing source X509 CA",
6205 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6207 except OpenSSL.crypto.Error, err:
6208 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6209 (err, ), errors.ECODE_INVAL)
6211 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6212 if errcode is not None:
6213 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6216 self.source_x509_ca = cert
6218 src_instance_name = getattr(self.op, "source_instance_name", None)
6219 if not src_instance_name:
6220 raise errors.OpPrereqError("Missing source instance name",
6223 self.source_instance_name = \
6224 utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6227 raise errors.OpPrereqError("Invalid instance creation mode %r" %
6228 self.op.mode, errors.ECODE_INVAL)
6230 def ExpandNames(self):
6231 """ExpandNames for CreateInstance.
6233 Figure out the right locks for instance creation.
6236 self.needed_locks = {}
6238 instance_name = self.op.instance_name
6239 # this is just a preventive check, but someone might still add this
6240 # instance in the meantime, and creation will fail at lock-add time
6241 if instance_name in self.cfg.GetInstanceList():
6242 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6243 instance_name, errors.ECODE_EXISTS)
6245 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6247 if self.op.iallocator:
6248 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6250 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6251 nodelist = [self.op.pnode]
6252 if self.op.snode is not None:
6253 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6254 nodelist.append(self.op.snode)
6255 self.needed_locks[locking.LEVEL_NODE] = nodelist
6257 # in case of import lock the source node too
6258 if self.op.mode == constants.INSTANCE_IMPORT:
6259 src_node = getattr(self.op, "src_node", None)
6260 src_path = getattr(self.op, "src_path", None)
6262 if src_path is None:
6263 self.op.src_path = src_path = self.op.instance_name
6265 if src_node is None:
6266 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6267 self.op.src_node = None
6268 if os.path.isabs(src_path):
6269 raise errors.OpPrereqError("Importing an instance from an absolute"
6270 " path requires a source node option.",
6273 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6274 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6275 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6276 if not os.path.isabs(src_path):
6277 self.op.src_path = src_path = \
6278 utils.PathJoin(constants.EXPORT_DIR, src_path)
6280 def _RunAllocator(self):
6281 """Run the allocator based on input opcode.
6284 nics = [n.ToDict() for n in self.nics]
6285 ial = IAllocator(self.cfg, self.rpc,
6286 mode=constants.IALLOCATOR_MODE_ALLOC,
6287 name=self.op.instance_name,
6288 disk_template=self.op.disk_template,
6291 vcpus=self.be_full[constants.BE_VCPUS],
6292 mem_size=self.be_full[constants.BE_MEMORY],
6295 hypervisor=self.op.hypervisor,
6298 ial.Run(self.op.iallocator)
6301 raise errors.OpPrereqError("Can't compute nodes using"
6302 " iallocator '%s': %s" %
6303 (self.op.iallocator, ial.info),
6305 if len(ial.result) != ial.required_nodes:
6306 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6307 " of nodes (%s), required %s" %
6308 (self.op.iallocator, len(ial.result),
6309 ial.required_nodes), errors.ECODE_FAULT)
6310 self.op.pnode = ial.result[0]
6311 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6312 self.op.instance_name, self.op.iallocator,
6313 utils.CommaJoin(ial.result))
6314 if ial.required_nodes == 2:
6315 self.op.snode = ial.result[1]
6317 def BuildHooksEnv(self):
6320 This runs on master, primary and secondary nodes of the instance.
6324 "ADD_MODE": self.op.mode,
6326 if self.op.mode == constants.INSTANCE_IMPORT:
6327 env["SRC_NODE"] = self.op.src_node
6328 env["SRC_PATH"] = self.op.src_path
6329 env["SRC_IMAGES"] = self.src_images
6331 env.update(_BuildInstanceHookEnv(
6332 name=self.op.instance_name,
6333 primary_node=self.op.pnode,
6334 secondary_nodes=self.secondaries,
6335 status=self.op.start,
6336 os_type=self.op.os_type,
6337 memory=self.be_full[constants.BE_MEMORY],
6338 vcpus=self.be_full[constants.BE_VCPUS],
6339 nics=_NICListToTuple(self, self.nics),
6340 disk_template=self.op.disk_template,
6341 disks=[(d["size"], d["mode"]) for d in self.disks],
6344 hypervisor_name=self.op.hypervisor,
6347 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6351 def _ReadExportInfo(self):
6352 """Reads the export information from disk.
6354 It will override the opcode source node and path with the actual
6355 information, if these two were not specified before.
6357 @return: the export information
6360 assert self.op.mode == constants.INSTANCE_IMPORT
6362 src_node = self.op.src_node
6363 src_path = self.op.src_path
6365 if src_node is None:
6366 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6367 exp_list = self.rpc.call_export_list(locked_nodes)
6369 for node in exp_list:
6370 if exp_list[node].fail_msg:
6372 if src_path in exp_list[node].payload:
6374 self.op.src_node = src_node = node
6375 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6379 raise errors.OpPrereqError("No export found for relative path %s" %
6380 src_path, errors.ECODE_INVAL)
6382 _CheckNodeOnline(self, src_node)
6383 result = self.rpc.call_export_info(src_node, src_path)
6384 result.Raise("No export or invalid export found in dir %s" % src_path)
6386 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6387 if not export_info.has_section(constants.INISECT_EXP):
6388 raise errors.ProgrammerError("Corrupted export config",
6389 errors.ECODE_ENVIRON)
6391 ei_version = export_info.get(constants.INISECT_EXP, "version")
6392 if (int(ei_version) != constants.EXPORT_VERSION):
6393 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6394 (ei_version, constants.EXPORT_VERSION),
6395 errors.ECODE_ENVIRON)
6398 def _ReadExportParams(self, einfo):
6399 """Use export parameters as defaults.
6401 In case the opcode doesn't specify (as in override) some instance
6402 parameters, then try to use them from the export information, if
6406 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6408 if self.op.disk_template is None:
6409 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6410 self.op.disk_template = einfo.get(constants.INISECT_INS,
6413 raise errors.OpPrereqError("No disk template specified and the export"
6414 " is missing the disk_template information",
6417 if not self.op.disks:
6418 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6420 # TODO: import the disk iv_name too
6421 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6422 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6423 disks.append({"size": disk_sz})
6424 self.op.disks = disks
6426 raise errors.OpPrereqError("No disk info specified and the export"
6427 " is missing the disk information",
6430 if (not self.op.nics and
6431 einfo.has_option(constants.INISECT_INS, "nic_count")):
6433 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6435 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6436 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6441 if (self.op.hypervisor is None and
6442 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6443 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6444 if einfo.has_section(constants.INISECT_HYP):
6445 # use the export parameters but do not override the ones
6446 # specified by the user
6447 for name, value in einfo.items(constants.INISECT_HYP):
6448 if name not in self.op.hvparams:
6449 self.op.hvparams[name] = value
6451 if einfo.has_section(constants.INISECT_BEP):
6452 # use the parameters, without overriding
6453 for name, value in einfo.items(constants.INISECT_BEP):
6454 if name not in self.op.beparams:
6455 self.op.beparams[name] = value
6457 # try to read the parameters old style, from the main section
6458 for name in constants.BES_PARAMETERS:
6459 if (name not in self.op.beparams and
6460 einfo.has_option(constants.INISECT_INS, name)):
6461 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6463 def _RevertToDefaults(self, cluster):
6464 """Revert the instance parameters to the default values.
6468 hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6469 for name in self.op.hvparams.keys():
6470 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6471 del self.op.hvparams[name]
6473 be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6474 for name in self.op.beparams.keys():
6475 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6476 del self.op.beparams[name]
6478 nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6479 for nic in self.op.nics:
6480 for name in constants.NICS_PARAMETERS:
6481 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6484 def CheckPrereq(self):
6485 """Check prerequisites.
6488 if self.op.mode == constants.INSTANCE_IMPORT:
6489 export_info = self._ReadExportInfo()
6490 self._ReadExportParams(export_info)
6492 _CheckDiskTemplate(self.op.disk_template)
6494 if (not self.cfg.GetVGName() and
6495 self.op.disk_template not in constants.DTS_NOT_LVM):
6496 raise errors.OpPrereqError("Cluster does not support lvm-based"
6497 " instances", errors.ECODE_STATE)
6499 if self.op.hypervisor is None:
6500 self.op.hypervisor = self.cfg.GetHypervisorType()
6502 cluster = self.cfg.GetClusterInfo()
6503 enabled_hvs = cluster.enabled_hypervisors
6504 if self.op.hypervisor not in enabled_hvs:
6505 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6506 " cluster (%s)" % (self.op.hypervisor,
6507 ",".join(enabled_hvs)),
6510 # check hypervisor parameter syntax (locally)
6511 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6512 filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6515 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6516 hv_type.CheckParameterSyntax(filled_hvp)
6517 self.hv_full = filled_hvp
6518 # check that we don't specify global parameters on an instance
6519 _CheckGlobalHvParams(self.op.hvparams)
6521 # fill and remember the beparams dict
6522 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6523 self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6526 # now that hvp/bep are in final format, let's reset to defaults,
6528 if self.op.identify_defaults:
6529 self._RevertToDefaults(cluster)
6533 for idx, nic in enumerate(self.op.nics):
6534 nic_mode_req = nic.get("mode", None)
6535 nic_mode = nic_mode_req
6536 if nic_mode is None:
6537 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6539 # in routed mode, for the first nic, the default ip is 'auto'
6540 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6541 default_ip_mode = constants.VALUE_AUTO
6543 default_ip_mode = constants.VALUE_NONE
6545 # ip validity checks
6546 ip = nic.get("ip", default_ip_mode)
6547 if ip is None or ip.lower() == constants.VALUE_NONE:
6549 elif ip.lower() == constants.VALUE_AUTO:
6550 if not self.op.name_check:
6551 raise errors.OpPrereqError("IP address set to auto but name checks"
6552 " have been skipped. Aborting.",
6554 nic_ip = self.hostname1.ip
6556 if not utils.IsValidIP(ip):
6557 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6558 " like a valid IP" % ip,
6562 # TODO: check the ip address for uniqueness
6563 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6564 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6567 # MAC address verification
6568 mac = nic.get("mac", constants.VALUE_AUTO)
6569 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6570 mac = utils.NormalizeAndValidateMac(mac)
6573 self.cfg.ReserveMAC(mac, self.proc.GetECId())
6574 except errors.ReservationError:
6575 raise errors.OpPrereqError("MAC address %s already in use"
6576 " in cluster" % mac,
6577 errors.ECODE_NOTUNIQUE)
6579 # bridge verification
6580 bridge = nic.get("bridge", None)
6581 link = nic.get("link", None)
6583 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6584 " at the same time", errors.ECODE_INVAL)
6585 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6586 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6593 nicparams[constants.NIC_MODE] = nic_mode_req
6595 nicparams[constants.NIC_LINK] = link
6597 check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6599 objects.NIC.CheckParameterSyntax(check_params)
6600 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6602 # disk checks/pre-build
6604 for disk in self.op.disks:
6605 mode = disk.get("mode", constants.DISK_RDWR)
6606 if mode not in constants.DISK_ACCESS_SET:
6607 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6608 mode, errors.ECODE_INVAL)
6609 size = disk.get("size", None)
6611 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6614 except (TypeError, ValueError):
6615 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6617 new_disk = {"size": size, "mode": mode}
6619 new_disk["adopt"] = disk["adopt"]
6620 self.disks.append(new_disk)
6622 if self.op.mode == constants.INSTANCE_IMPORT:
6624 # Check that the new instance doesn't have less disks than the export
6625 instance_disks = len(self.disks)
6626 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6627 if instance_disks < export_disks:
6628 raise errors.OpPrereqError("Not enough disks to import."
6629 " (instance: %d, export: %d)" %
6630 (instance_disks, export_disks),
6634 for idx in range(export_disks):
6635 option = 'disk%d_dump' % idx
6636 if export_info.has_option(constants.INISECT_INS, option):
6637 # FIXME: are the old os-es, disk sizes, etc. useful?
6638 export_name = export_info.get(constants.INISECT_INS, option)
6639 image = utils.PathJoin(self.op.src_path, export_name)
6640 disk_images.append(image)
6642 disk_images.append(False)
6644 self.src_images = disk_images
6646 old_name = export_info.get(constants.INISECT_INS, 'name')
6648 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6649 except (TypeError, ValueError), err:
6650 raise errors.OpPrereqError("Invalid export file, nic_count is not"
6651 " an integer: %s" % str(err),
6653 if self.op.instance_name == old_name:
6654 for idx, nic in enumerate(self.nics):
6655 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6656 nic_mac_ini = 'nic%d_mac' % idx
6657 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6659 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6661 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6662 if self.op.ip_check:
6663 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6664 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6665 (self.check_ip, self.op.instance_name),
6666 errors.ECODE_NOTUNIQUE)
6668 #### mac address generation
6669 # By generating here the mac address both the allocator and the hooks get
6670 # the real final mac address rather than the 'auto' or 'generate' value.
6671 # There is a race condition between the generation and the instance object
6672 # creation, which means that we know the mac is valid now, but we're not
6673 # sure it will be when we actually add the instance. If things go bad
6674 # adding the instance will abort because of a duplicate mac, and the
6675 # creation job will fail.
6676 for nic in self.nics:
6677 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6678 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6682 if self.op.iallocator is not None:
6683 self._RunAllocator()
6685 #### node related checks
6687 # check primary node
6688 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6689 assert self.pnode is not None, \
6690 "Cannot retrieve locked node %s" % self.op.pnode
6692 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6693 pnode.name, errors.ECODE_STATE)
6695 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6696 pnode.name, errors.ECODE_STATE)
6698 self.secondaries = []
6700 # mirror node verification
6701 if self.op.disk_template in constants.DTS_NET_MIRROR:
6702 if self.op.snode is None:
6703 raise errors.OpPrereqError("The networked disk templates need"
6704 " a mirror node", errors.ECODE_INVAL)
6705 if self.op.snode == pnode.name:
6706 raise errors.OpPrereqError("The secondary node cannot be the"
6707 " primary node.", errors.ECODE_INVAL)
6708 _CheckNodeOnline(self, self.op.snode)
6709 _CheckNodeNotDrained(self, self.op.snode)
6710 self.secondaries.append(self.op.snode)
6712 nodenames = [pnode.name] + self.secondaries
6714 req_size = _ComputeDiskSize(self.op.disk_template,
6717 # Check lv size requirements, if not adopting
6718 if req_size is not None and not self.adopt_disks:
6719 _CheckNodesFreeDisk(self, nodenames, req_size)
6721 if self.adopt_disks: # instead, we must check the adoption data
6722 all_lvs = set([i["adopt"] for i in self.disks])
6723 if len(all_lvs) != len(self.disks):
6724 raise errors.OpPrereqError("Duplicate volume names given for adoption",
6726 for lv_name in all_lvs:
6728 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6729 except errors.ReservationError:
6730 raise errors.OpPrereqError("LV named %s used by another instance" %
6731 lv_name, errors.ECODE_NOTUNIQUE)
6733 node_lvs = self.rpc.call_lv_list([pnode.name],
6734 self.cfg.GetVGName())[pnode.name]
6735 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6736 node_lvs = node_lvs.payload
6737 delta = all_lvs.difference(node_lvs.keys())
6739 raise errors.OpPrereqError("Missing logical volume(s): %s" %
6740 utils.CommaJoin(delta),
6742 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6744 raise errors.OpPrereqError("Online logical volumes found, cannot"
6745 " adopt: %s" % utils.CommaJoin(online_lvs),
6747 # update the size of disk based on what is found
6748 for dsk in self.disks:
6749 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6751 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6753 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6755 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6757 # memory check on primary node
6759 _CheckNodeFreeMemory(self, self.pnode.name,
6760 "creating instance %s" % self.op.instance_name,
6761 self.be_full[constants.BE_MEMORY],
6764 self.dry_run_result = list(nodenames)
6766 def Exec(self, feedback_fn):
6767 """Create and add the instance to the cluster.
6770 instance = self.op.instance_name
6771 pnode_name = self.pnode.name
6773 ht_kind = self.op.hypervisor
6774 if ht_kind in constants.HTS_REQ_PORT:
6775 network_port = self.cfg.AllocatePort()
6779 if constants.ENABLE_FILE_STORAGE:
6780 # this is needed because os.path.join does not accept None arguments
6781 if self.op.file_storage_dir is None:
6782 string_file_storage_dir = ""
6784 string_file_storage_dir = self.op.file_storage_dir
6786 # build the full file storage dir path
6787 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6788 string_file_storage_dir, instance)
6790 file_storage_dir = ""
6792 disks = _GenerateDiskTemplate(self,
6793 self.op.disk_template,
6794 instance, pnode_name,
6798 self.op.file_driver,
6801 iobj = objects.Instance(name=instance, os=self.op.os_type,
6802 primary_node=pnode_name,
6803 nics=self.nics, disks=disks,
6804 disk_template=self.op.disk_template,
6806 network_port=network_port,
6807 beparams=self.op.beparams,
6808 hvparams=self.op.hvparams,
6809 hypervisor=self.op.hypervisor,
6812 if self.adopt_disks:
6813 # rename LVs to the newly-generated names; we need to construct
6814 # 'fake' LV disks with the old data, plus the new unique_id
6815 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6817 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6818 rename_to.append(t_dsk.logical_id)
6819 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6820 self.cfg.SetDiskID(t_dsk, pnode_name)
6821 result = self.rpc.call_blockdev_rename(pnode_name,
6822 zip(tmp_disks, rename_to))
6823 result.Raise("Failed to rename adoped LVs")
6825 feedback_fn("* creating instance disks...")
6827 _CreateDisks(self, iobj)
6828 except errors.OpExecError:
6829 self.LogWarning("Device creation failed, reverting...")
6831 _RemoveDisks(self, iobj)
6833 self.cfg.ReleaseDRBDMinors(instance)
6836 feedback_fn("adding instance %s to cluster config" % instance)
6838 self.cfg.AddInstance(iobj, self.proc.GetECId())
6840 # Declare that we don't want to remove the instance lock anymore, as we've
6841 # added the instance to the config
6842 del self.remove_locks[locking.LEVEL_INSTANCE]
6843 # Unlock all the nodes
6844 if self.op.mode == constants.INSTANCE_IMPORT:
6845 nodes_keep = [self.op.src_node]
6846 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6847 if node != self.op.src_node]
6848 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6849 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6851 self.context.glm.release(locking.LEVEL_NODE)
6852 del self.acquired_locks[locking.LEVEL_NODE]
6854 if self.op.wait_for_sync:
6855 disk_abort = not _WaitForSync(self, iobj)
6856 elif iobj.disk_template in constants.DTS_NET_MIRROR:
6857 # make sure the disks are not degraded (still sync-ing is ok)
6859 feedback_fn("* checking mirrors status")
6860 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6865 _RemoveDisks(self, iobj)
6866 self.cfg.RemoveInstance(iobj.name)
6867 # Make sure the instance lock gets removed
6868 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6869 raise errors.OpExecError("There are some degraded disks for"
6872 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6873 if self.op.mode == constants.INSTANCE_CREATE:
6874 if not self.op.no_install:
6875 feedback_fn("* running the instance OS create scripts...")
6876 # FIXME: pass debug option from opcode to backend
6877 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6878 self.op.debug_level)
6879 result.Raise("Could not add os for instance %s"
6880 " on node %s" % (instance, pnode_name))
6882 elif self.op.mode == constants.INSTANCE_IMPORT:
6883 feedback_fn("* running the instance OS import scripts...")
6887 for idx, image in enumerate(self.src_images):
6891 # FIXME: pass debug option from opcode to backend
6892 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
6893 constants.IEIO_FILE, (image, ),
6894 constants.IEIO_SCRIPT,
6895 (iobj.disks[idx], idx),
6897 transfers.append(dt)
6900 masterd.instance.TransferInstanceData(self, feedback_fn,
6901 self.op.src_node, pnode_name,
6902 self.pnode.secondary_ip,
6904 if not compat.all(import_result):
6905 self.LogWarning("Some disks for instance %s on node %s were not"
6906 " imported successfully" % (instance, pnode_name))
6908 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6909 feedback_fn("* preparing remote import...")
6910 connect_timeout = constants.RIE_CONNECT_TIMEOUT
6911 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
6913 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
6914 self.source_x509_ca,
6915 self._cds, timeouts)
6916 if not compat.all(disk_results):
6917 # TODO: Should the instance still be started, even if some disks
6918 # failed to import (valid for local imports, too)?
6919 self.LogWarning("Some disks for instance %s on node %s were not"
6920 " imported successfully" % (instance, pnode_name))
6922 # Run rename script on newly imported instance
6923 assert iobj.name == instance
6924 feedback_fn("Running rename script for %s" % instance)
6925 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
6926 self.source_instance_name,
6927 self.op.debug_level)
6929 self.LogWarning("Failed to run rename script for %s on node"
6930 " %s: %s" % (instance, pnode_name, result.fail_msg))
6933 # also checked in the prereq part
6934 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6938 iobj.admin_up = True
6939 self.cfg.Update(iobj, feedback_fn)
6940 logging.info("Starting instance %s on node %s", instance, pnode_name)
6941 feedback_fn("* starting instance...")
6942 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6943 result.Raise("Could not start instance")
6945 return list(iobj.all_nodes)
6948 class LUConnectConsole(NoHooksLU):
6949 """Connect to an instance's console.
6951 This is somewhat special in that it returns the command line that
6952 you need to run on the master node in order to connect to the
6956 _OP_REQP = ["instance_name"]
6959 def ExpandNames(self):
6960 self._ExpandAndLockInstance()
6962 def CheckPrereq(self):
6963 """Check prerequisites.
6965 This checks that the instance is in the cluster.
6968 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6969 assert self.instance is not None, \
6970 "Cannot retrieve locked instance %s" % self.op.instance_name
6971 _CheckNodeOnline(self, self.instance.primary_node)
6973 def Exec(self, feedback_fn):
6974 """Connect to the console of an instance
6977 instance = self.instance
6978 node = instance.primary_node
6980 node_insts = self.rpc.call_instance_list([node],
6981 [instance.hypervisor])[node]
6982 node_insts.Raise("Can't get node information from %s" % node)
6984 if instance.name not in node_insts.payload:
6985 raise errors.OpExecError("Instance %s is not running." % instance.name)
6987 logging.debug("Connecting to console of %s on %s", instance.name, node)
6989 hyper = hypervisor.GetHypervisor(instance.hypervisor)
6990 cluster = self.cfg.GetClusterInfo()
6991 # beparams and hvparams are passed separately, to avoid editing the
6992 # instance and then saving the defaults in the instance itself.
6993 hvparams = cluster.FillHV(instance)
6994 beparams = cluster.FillBE(instance)
6995 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6998 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7001 class LUReplaceDisks(LogicalUnit):
7002 """Replace the disks of an instance.
7005 HPATH = "mirrors-replace"
7006 HTYPE = constants.HTYPE_INSTANCE
7007 _OP_REQP = ["instance_name", "mode", "disks"]
7010 def CheckArguments(self):
7011 if not hasattr(self.op, "remote_node"):
7012 self.op.remote_node = None
7013 if not hasattr(self.op, "iallocator"):
7014 self.op.iallocator = None
7015 if not hasattr(self.op, "early_release"):
7016 self.op.early_release = False
7018 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7021 def ExpandNames(self):
7022 self._ExpandAndLockInstance()
7024 if self.op.iallocator is not None:
7025 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7027 elif self.op.remote_node is not None:
7028 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7029 self.op.remote_node = remote_node
7031 # Warning: do not remove the locking of the new secondary here
7032 # unless DRBD8.AddChildren is changed to work in parallel;
7033 # currently it doesn't since parallel invocations of
7034 # FindUnusedMinor will conflict
7035 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7036 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7039 self.needed_locks[locking.LEVEL_NODE] = []
7040 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7042 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7043 self.op.iallocator, self.op.remote_node,
7044 self.op.disks, False, self.op.early_release)
7046 self.tasklets = [self.replacer]
7048 def DeclareLocks(self, level):
7049 # If we're not already locking all nodes in the set we have to declare the
7050 # instance's primary/secondary nodes.
7051 if (level == locking.LEVEL_NODE and
7052 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7053 self._LockInstancesNodes()
7055 def BuildHooksEnv(self):
7058 This runs on the master, the primary and all the secondaries.
7061 instance = self.replacer.instance
7063 "MODE": self.op.mode,
7064 "NEW_SECONDARY": self.op.remote_node,
7065 "OLD_SECONDARY": instance.secondary_nodes[0],
7067 env.update(_BuildInstanceHookEnvByObject(self, instance))
7069 self.cfg.GetMasterNode(),
7070 instance.primary_node,
7072 if self.op.remote_node is not None:
7073 nl.append(self.op.remote_node)
7077 class LUEvacuateNode(LogicalUnit):
7078 """Relocate the secondary instances from a node.
7081 HPATH = "node-evacuate"
7082 HTYPE = constants.HTYPE_NODE
7083 _OP_REQP = ["node_name"]
7086 def CheckArguments(self):
7087 if not hasattr(self.op, "remote_node"):
7088 self.op.remote_node = None
7089 if not hasattr(self.op, "iallocator"):
7090 self.op.iallocator = None
7091 if not hasattr(self.op, "early_release"):
7092 self.op.early_release = False
7094 TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
7095 self.op.remote_node,
7098 def ExpandNames(self):
7099 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7101 self.needed_locks = {}
7103 # Declare node locks
7104 if self.op.iallocator is not None:
7105 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7107 elif self.op.remote_node is not None:
7108 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7110 # Warning: do not remove the locking of the new secondary here
7111 # unless DRBD8.AddChildren is changed to work in parallel;
7112 # currently it doesn't since parallel invocations of
7113 # FindUnusedMinor will conflict
7114 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7115 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7118 raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7120 # Create tasklets for replacing disks for all secondary instances on this
7125 for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7126 logging.debug("Replacing disks for instance %s", inst.name)
7127 names.append(inst.name)
7129 replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7130 self.op.iallocator, self.op.remote_node, [],
7131 True, self.op.early_release)
7132 tasklets.append(replacer)
7134 self.tasklets = tasklets
7135 self.instance_names = names
7137 # Declare instance locks
7138 self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7140 def DeclareLocks(self, level):
7141 # If we're not already locking all nodes in the set we have to declare the
7142 # instance's primary/secondary nodes.
7143 if (level == locking.LEVEL_NODE and
7144 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7145 self._LockInstancesNodes()
7147 def BuildHooksEnv(self):
7150 This runs on the master, the primary and all the secondaries.
7154 "NODE_NAME": self.op.node_name,
7157 nl = [self.cfg.GetMasterNode()]
7159 if self.op.remote_node is not None:
7160 env["NEW_SECONDARY"] = self.op.remote_node
7161 nl.append(self.op.remote_node)
7163 return (env, nl, nl)
7166 class TLReplaceDisks(Tasklet):
7167 """Replaces disks for an instance.
7169 Note: Locking is not within the scope of this class.
7172 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7173 disks, delay_iallocator, early_release):
7174 """Initializes this class.
7177 Tasklet.__init__(self, lu)
7180 self.instance_name = instance_name
7182 self.iallocator_name = iallocator_name
7183 self.remote_node = remote_node
7185 self.delay_iallocator = delay_iallocator
7186 self.early_release = early_release
7189 self.instance = None
7190 self.new_node = None
7191 self.target_node = None
7192 self.other_node = None
7193 self.remote_node_info = None
7194 self.node_secondary_ip = None
7197 def CheckArguments(mode, remote_node, iallocator):
7198 """Helper function for users of this class.
7201 # check for valid parameter combination
7202 if mode == constants.REPLACE_DISK_CHG:
7203 if remote_node is None and iallocator is None:
7204 raise errors.OpPrereqError("When changing the secondary either an"
7205 " iallocator script must be used or the"
7206 " new node given", errors.ECODE_INVAL)
7208 if remote_node is not None and iallocator is not None:
7209 raise errors.OpPrereqError("Give either the iallocator or the new"
7210 " secondary, not both", errors.ECODE_INVAL)
7212 elif remote_node is not None or iallocator is not None:
7213 # Not replacing the secondary
7214 raise errors.OpPrereqError("The iallocator and new node options can"
7215 " only be used when changing the"
7216 " secondary node", errors.ECODE_INVAL)
7219 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7220 """Compute a new secondary node using an IAllocator.
7223 ial = IAllocator(lu.cfg, lu.rpc,
7224 mode=constants.IALLOCATOR_MODE_RELOC,
7226 relocate_from=relocate_from)
7228 ial.Run(iallocator_name)
7231 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7232 " %s" % (iallocator_name, ial.info),
7235 if len(ial.result) != ial.required_nodes:
7236 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7237 " of nodes (%s), required %s" %
7239 len(ial.result), ial.required_nodes),
7242 remote_node_name = ial.result[0]
7244 lu.LogInfo("Selected new secondary for instance '%s': %s",
7245 instance_name, remote_node_name)
7247 return remote_node_name
7249 def _FindFaultyDisks(self, node_name):
7250 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7253 def CheckPrereq(self):
7254 """Check prerequisites.
7256 This checks that the instance is in the cluster.
7259 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7260 assert instance is not None, \
7261 "Cannot retrieve locked instance %s" % self.instance_name
7263 if instance.disk_template != constants.DT_DRBD8:
7264 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7265 " instances", errors.ECODE_INVAL)
7267 if len(instance.secondary_nodes) != 1:
7268 raise errors.OpPrereqError("The instance has a strange layout,"
7269 " expected one secondary but found %d" %
7270 len(instance.secondary_nodes),
7273 if not self.delay_iallocator:
7274 self._CheckPrereq2()
7276 def _CheckPrereq2(self):
7277 """Check prerequisites, second part.
7279 This function should always be part of CheckPrereq. It was separated and is
7280 now called from Exec because during node evacuation iallocator was only
7281 called with an unmodified cluster model, not taking planned changes into
7285 instance = self.instance
7286 secondary_node = instance.secondary_nodes[0]
7288 if self.iallocator_name is None:
7289 remote_node = self.remote_node
7291 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7292 instance.name, instance.secondary_nodes)
7294 if remote_node is not None:
7295 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7296 assert self.remote_node_info is not None, \
7297 "Cannot retrieve locked node %s" % remote_node
7299 self.remote_node_info = None
7301 if remote_node == self.instance.primary_node:
7302 raise errors.OpPrereqError("The specified node is the primary node of"
7303 " the instance.", errors.ECODE_INVAL)
7305 if remote_node == secondary_node:
7306 raise errors.OpPrereqError("The specified node is already the"
7307 " secondary node of the instance.",
7310 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7311 constants.REPLACE_DISK_CHG):
7312 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7315 if self.mode == constants.REPLACE_DISK_AUTO:
7316 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7317 faulty_secondary = self._FindFaultyDisks(secondary_node)
7319 if faulty_primary and faulty_secondary:
7320 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7321 " one node and can not be repaired"
7322 " automatically" % self.instance_name,
7326 self.disks = faulty_primary
7327 self.target_node = instance.primary_node
7328 self.other_node = secondary_node
7329 check_nodes = [self.target_node, self.other_node]
7330 elif faulty_secondary:
7331 self.disks = faulty_secondary
7332 self.target_node = secondary_node
7333 self.other_node = instance.primary_node
7334 check_nodes = [self.target_node, self.other_node]
7340 # Non-automatic modes
7341 if self.mode == constants.REPLACE_DISK_PRI:
7342 self.target_node = instance.primary_node
7343 self.other_node = secondary_node
7344 check_nodes = [self.target_node, self.other_node]
7346 elif self.mode == constants.REPLACE_DISK_SEC:
7347 self.target_node = secondary_node
7348 self.other_node = instance.primary_node
7349 check_nodes = [self.target_node, self.other_node]
7351 elif self.mode == constants.REPLACE_DISK_CHG:
7352 self.new_node = remote_node
7353 self.other_node = instance.primary_node
7354 self.target_node = secondary_node
7355 check_nodes = [self.new_node, self.other_node]
7357 _CheckNodeNotDrained(self.lu, remote_node)
7359 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7360 assert old_node_info is not None
7361 if old_node_info.offline and not self.early_release:
7362 # doesn't make sense to delay the release
7363 self.early_release = True
7364 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7365 " early-release mode", secondary_node)
7368 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7371 # If not specified all disks should be replaced
7373 self.disks = range(len(self.instance.disks))
7375 for node in check_nodes:
7376 _CheckNodeOnline(self.lu, node)
7378 # Check whether disks are valid
7379 for disk_idx in self.disks:
7380 instance.FindDisk(disk_idx)
7382 # Get secondary node IP addresses
7385 for node_name in [self.target_node, self.other_node, self.new_node]:
7386 if node_name is not None:
7387 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7389 self.node_secondary_ip = node_2nd_ip
7391 def Exec(self, feedback_fn):
7392 """Execute disk replacement.
7394 This dispatches the disk replacement to the appropriate handler.
7397 if self.delay_iallocator:
7398 self._CheckPrereq2()
7401 feedback_fn("No disks need replacement")
7404 feedback_fn("Replacing disk(s) %s for %s" %
7405 (utils.CommaJoin(self.disks), self.instance.name))
7407 activate_disks = (not self.instance.admin_up)
7409 # Activate the instance disks if we're replacing them on a down instance
7411 _StartInstanceDisks(self.lu, self.instance, True)
7414 # Should we replace the secondary node?
7415 if self.new_node is not None:
7416 fn = self._ExecDrbd8Secondary
7418 fn = self._ExecDrbd8DiskOnly
7420 return fn(feedback_fn)
7423 # Deactivate the instance disks if we're replacing them on a
7426 _SafeShutdownInstanceDisks(self.lu, self.instance)
7428 def _CheckVolumeGroup(self, nodes):
7429 self.lu.LogInfo("Checking volume groups")
7431 vgname = self.cfg.GetVGName()
7433 # Make sure volume group exists on all involved nodes
7434 results = self.rpc.call_vg_list(nodes)
7436 raise errors.OpExecError("Can't list volume groups on the nodes")
7440 res.Raise("Error checking node %s" % node)
7441 if vgname not in res.payload:
7442 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7445 def _CheckDisksExistence(self, nodes):
7446 # Check disk existence
7447 for idx, dev in enumerate(self.instance.disks):
7448 if idx not in self.disks:
7452 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7453 self.cfg.SetDiskID(dev, node)
7455 result = self.rpc.call_blockdev_find(node, dev)
7457 msg = result.fail_msg
7458 if msg or not result.payload:
7460 msg = "disk not found"
7461 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7464 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7465 for idx, dev in enumerate(self.instance.disks):
7466 if idx not in self.disks:
7469 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7472 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7474 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7475 " replace disks for instance %s" %
7476 (node_name, self.instance.name))
7478 def _CreateNewStorage(self, node_name):
7479 vgname = self.cfg.GetVGName()
7482 for idx, dev in enumerate(self.instance.disks):
7483 if idx not in self.disks:
7486 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7488 self.cfg.SetDiskID(dev, node_name)
7490 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7491 names = _GenerateUniqueNames(self.lu, lv_names)
7493 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7494 logical_id=(vgname, names[0]))
7495 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7496 logical_id=(vgname, names[1]))
7498 new_lvs = [lv_data, lv_meta]
7499 old_lvs = dev.children
7500 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7502 # we pass force_create=True to force the LVM creation
7503 for new_lv in new_lvs:
7504 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7505 _GetInstanceInfoText(self.instance), False)
7509 def _CheckDevices(self, node_name, iv_names):
7510 for name, (dev, _, _) in iv_names.iteritems():
7511 self.cfg.SetDiskID(dev, node_name)
7513 result = self.rpc.call_blockdev_find(node_name, dev)
7515 msg = result.fail_msg
7516 if msg or not result.payload:
7518 msg = "disk not found"
7519 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7522 if result.payload.is_degraded:
7523 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7525 def _RemoveOldStorage(self, node_name, iv_names):
7526 for name, (_, old_lvs, _) in iv_names.iteritems():
7527 self.lu.LogInfo("Remove logical volumes for %s" % name)
7530 self.cfg.SetDiskID(lv, node_name)
7532 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7534 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7535 hint="remove unused LVs manually")
7537 def _ReleaseNodeLock(self, node_name):
7538 """Releases the lock for a given node."""
7539 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7541 def _ExecDrbd8DiskOnly(self, feedback_fn):
7542 """Replace a disk on the primary or secondary for DRBD 8.
7544 The algorithm for replace is quite complicated:
7546 1. for each disk to be replaced:
7548 1. create new LVs on the target node with unique names
7549 1. detach old LVs from the drbd device
7550 1. rename old LVs to name_replaced.<time_t>
7551 1. rename new LVs to old LVs
7552 1. attach the new LVs (with the old names now) to the drbd device
7554 1. wait for sync across all devices
7556 1. for each modified disk:
7558 1. remove old LVs (which have the name name_replaces.<time_t>)
7560 Failures are not very well handled.
7565 # Step: check device activation
7566 self.lu.LogStep(1, steps_total, "Check device existence")
7567 self._CheckDisksExistence([self.other_node, self.target_node])
7568 self._CheckVolumeGroup([self.target_node, self.other_node])
7570 # Step: check other node consistency
7571 self.lu.LogStep(2, steps_total, "Check peer consistency")
7572 self._CheckDisksConsistency(self.other_node,
7573 self.other_node == self.instance.primary_node,
7576 # Step: create new storage
7577 self.lu.LogStep(3, steps_total, "Allocate new storage")
7578 iv_names = self._CreateNewStorage(self.target_node)
7580 # Step: for each lv, detach+rename*2+attach
7581 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7582 for dev, old_lvs, new_lvs in iv_names.itervalues():
7583 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7585 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7587 result.Raise("Can't detach drbd from local storage on node"
7588 " %s for device %s" % (self.target_node, dev.iv_name))
7590 #cfg.Update(instance)
7592 # ok, we created the new LVs, so now we know we have the needed
7593 # storage; as such, we proceed on the target node to rename
7594 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7595 # using the assumption that logical_id == physical_id (which in
7596 # turn is the unique_id on that node)
7598 # FIXME(iustin): use a better name for the replaced LVs
7599 temp_suffix = int(time.time())
7600 ren_fn = lambda d, suff: (d.physical_id[0],
7601 d.physical_id[1] + "_replaced-%s" % suff)
7603 # Build the rename list based on what LVs exist on the node
7604 rename_old_to_new = []
7605 for to_ren in old_lvs:
7606 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7607 if not result.fail_msg and result.payload:
7609 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7611 self.lu.LogInfo("Renaming the old LVs on the target node")
7612 result = self.rpc.call_blockdev_rename(self.target_node,
7614 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7616 # Now we rename the new LVs to the old LVs
7617 self.lu.LogInfo("Renaming the new LVs on the target node")
7618 rename_new_to_old = [(new, old.physical_id)
7619 for old, new in zip(old_lvs, new_lvs)]
7620 result = self.rpc.call_blockdev_rename(self.target_node,
7622 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7624 for old, new in zip(old_lvs, new_lvs):
7625 new.logical_id = old.logical_id
7626 self.cfg.SetDiskID(new, self.target_node)
7628 for disk in old_lvs:
7629 disk.logical_id = ren_fn(disk, temp_suffix)
7630 self.cfg.SetDiskID(disk, self.target_node)
7632 # Now that the new lvs have the old name, we can add them to the device
7633 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7634 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7636 msg = result.fail_msg
7638 for new_lv in new_lvs:
7639 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7642 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7643 hint=("cleanup manually the unused logical"
7645 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7647 dev.children = new_lvs
7649 self.cfg.Update(self.instance, feedback_fn)
7652 if self.early_release:
7653 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7655 self._RemoveOldStorage(self.target_node, iv_names)
7656 # WARNING: we release both node locks here, do not do other RPCs
7657 # than WaitForSync to the primary node
7658 self._ReleaseNodeLock([self.target_node, self.other_node])
7661 # This can fail as the old devices are degraded and _WaitForSync
7662 # does a combined result over all disks, so we don't check its return value
7663 self.lu.LogStep(cstep, steps_total, "Sync devices")
7665 _WaitForSync(self.lu, self.instance)
7667 # Check all devices manually
7668 self._CheckDevices(self.instance.primary_node, iv_names)
7670 # Step: remove old storage
7671 if not self.early_release:
7672 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7674 self._RemoveOldStorage(self.target_node, iv_names)
7676 def _ExecDrbd8Secondary(self, feedback_fn):
7677 """Replace the secondary node for DRBD 8.
7679 The algorithm for replace is quite complicated:
7680 - for all disks of the instance:
7681 - create new LVs on the new node with same names
7682 - shutdown the drbd device on the old secondary
7683 - disconnect the drbd network on the primary
7684 - create the drbd device on the new secondary
7685 - network attach the drbd on the primary, using an artifice:
7686 the drbd code for Attach() will connect to the network if it
7687 finds a device which is connected to the good local disks but
7689 - wait for sync across all devices
7690 - remove all disks from the old secondary
7692 Failures are not very well handled.
7697 # Step: check device activation
7698 self.lu.LogStep(1, steps_total, "Check device existence")
7699 self._CheckDisksExistence([self.instance.primary_node])
7700 self._CheckVolumeGroup([self.instance.primary_node])
7702 # Step: check other node consistency
7703 self.lu.LogStep(2, steps_total, "Check peer consistency")
7704 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7706 # Step: create new storage
7707 self.lu.LogStep(3, steps_total, "Allocate new storage")
7708 for idx, dev in enumerate(self.instance.disks):
7709 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7710 (self.new_node, idx))
7711 # we pass force_create=True to force LVM creation
7712 for new_lv in dev.children:
7713 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7714 _GetInstanceInfoText(self.instance), False)
7716 # Step 4: dbrd minors and drbd setups changes
7717 # after this, we must manually remove the drbd minors on both the
7718 # error and the success paths
7719 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7720 minors = self.cfg.AllocateDRBDMinor([self.new_node
7721 for dev in self.instance.disks],
7723 logging.debug("Allocated minors %r", minors)
7726 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7727 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7728 (self.new_node, idx))
7729 # create new devices on new_node; note that we create two IDs:
7730 # one without port, so the drbd will be activated without
7731 # networking information on the new node at this stage, and one
7732 # with network, for the latter activation in step 4
7733 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7734 if self.instance.primary_node == o_node1:
7737 assert self.instance.primary_node == o_node2, "Three-node instance?"
7740 new_alone_id = (self.instance.primary_node, self.new_node, None,
7741 p_minor, new_minor, o_secret)
7742 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7743 p_minor, new_minor, o_secret)
7745 iv_names[idx] = (dev, dev.children, new_net_id)
7746 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7748 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7749 logical_id=new_alone_id,
7750 children=dev.children,
7753 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7754 _GetInstanceInfoText(self.instance), False)
7755 except errors.GenericError:
7756 self.cfg.ReleaseDRBDMinors(self.instance.name)
7759 # We have new devices, shutdown the drbd on the old secondary
7760 for idx, dev in enumerate(self.instance.disks):
7761 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7762 self.cfg.SetDiskID(dev, self.target_node)
7763 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7765 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7766 "node: %s" % (idx, msg),
7767 hint=("Please cleanup this device manually as"
7768 " soon as possible"))
7770 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7771 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7772 self.node_secondary_ip,
7773 self.instance.disks)\
7774 [self.instance.primary_node]
7776 msg = result.fail_msg
7778 # detaches didn't succeed (unlikely)
7779 self.cfg.ReleaseDRBDMinors(self.instance.name)
7780 raise errors.OpExecError("Can't detach the disks from the network on"
7781 " old node: %s" % (msg,))
7783 # if we managed to detach at least one, we update all the disks of
7784 # the instance to point to the new secondary
7785 self.lu.LogInfo("Updating instance configuration")
7786 for dev, _, new_logical_id in iv_names.itervalues():
7787 dev.logical_id = new_logical_id
7788 self.cfg.SetDiskID(dev, self.instance.primary_node)
7790 self.cfg.Update(self.instance, feedback_fn)
7792 # and now perform the drbd attach
7793 self.lu.LogInfo("Attaching primary drbds to new secondary"
7794 " (standalone => connected)")
7795 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7797 self.node_secondary_ip,
7798 self.instance.disks,
7801 for to_node, to_result in result.items():
7802 msg = to_result.fail_msg
7804 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7806 hint=("please do a gnt-instance info to see the"
7807 " status of disks"))
7809 if self.early_release:
7810 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7812 self._RemoveOldStorage(self.target_node, iv_names)
7813 # WARNING: we release all node locks here, do not do other RPCs
7814 # than WaitForSync to the primary node
7815 self._ReleaseNodeLock([self.instance.primary_node,
7820 # This can fail as the old devices are degraded and _WaitForSync
7821 # does a combined result over all disks, so we don't check its return value
7822 self.lu.LogStep(cstep, steps_total, "Sync devices")
7824 _WaitForSync(self.lu, self.instance)
7826 # Check all devices manually
7827 self._CheckDevices(self.instance.primary_node, iv_names)
7829 # Step: remove old storage
7830 if not self.early_release:
7831 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7832 self._RemoveOldStorage(self.target_node, iv_names)
7835 class LURepairNodeStorage(NoHooksLU):
7836 """Repairs the volume group on a node.
7839 _OP_REQP = ["node_name"]
7842 def CheckArguments(self):
7843 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7845 _CheckStorageType(self.op.storage_type)
7847 def ExpandNames(self):
7848 self.needed_locks = {
7849 locking.LEVEL_NODE: [self.op.node_name],
7852 def _CheckFaultyDisks(self, instance, node_name):
7853 """Ensure faulty disks abort the opcode or at least warn."""
7855 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7857 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7858 " node '%s'" % (instance.name, node_name),
7860 except errors.OpPrereqError, err:
7861 if self.op.ignore_consistency:
7862 self.proc.LogWarning(str(err.args[0]))
7866 def CheckPrereq(self):
7867 """Check prerequisites.
7870 storage_type = self.op.storage_type
7872 if (constants.SO_FIX_CONSISTENCY not in
7873 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7874 raise errors.OpPrereqError("Storage units of type '%s' can not be"
7875 " repaired" % storage_type,
7878 # Check whether any instance on this node has faulty disks
7879 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7880 if not inst.admin_up:
7882 check_nodes = set(inst.all_nodes)
7883 check_nodes.discard(self.op.node_name)
7884 for inst_node_name in check_nodes:
7885 self._CheckFaultyDisks(inst, inst_node_name)
7887 def Exec(self, feedback_fn):
7888 feedback_fn("Repairing storage unit '%s' on %s ..." %
7889 (self.op.name, self.op.node_name))
7891 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7892 result = self.rpc.call_storage_execute(self.op.node_name,
7893 self.op.storage_type, st_args,
7895 constants.SO_FIX_CONSISTENCY)
7896 result.Raise("Failed to repair storage unit '%s' on %s" %
7897 (self.op.name, self.op.node_name))
7900 class LUNodeEvacuationStrategy(NoHooksLU):
7901 """Computes the node evacuation strategy.
7904 _OP_REQP = ["nodes"]
7907 def CheckArguments(self):
7908 if not hasattr(self.op, "remote_node"):
7909 self.op.remote_node = None
7910 if not hasattr(self.op, "iallocator"):
7911 self.op.iallocator = None
7912 if self.op.remote_node is not None and self.op.iallocator is not None:
7913 raise errors.OpPrereqError("Give either the iallocator or the new"
7914 " secondary, not both", errors.ECODE_INVAL)
7916 def ExpandNames(self):
7917 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7918 self.needed_locks = locks = {}
7919 if self.op.remote_node is None:
7920 locks[locking.LEVEL_NODE] = locking.ALL_SET
7922 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7923 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7925 def CheckPrereq(self):
7928 def Exec(self, feedback_fn):
7929 if self.op.remote_node is not None:
7931 for node in self.op.nodes:
7932 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7935 if i.primary_node == self.op.remote_node:
7936 raise errors.OpPrereqError("Node %s is the primary node of"
7937 " instance %s, cannot use it as"
7939 (self.op.remote_node, i.name),
7941 result.append([i.name, self.op.remote_node])
7943 ial = IAllocator(self.cfg, self.rpc,
7944 mode=constants.IALLOCATOR_MODE_MEVAC,
7945 evac_nodes=self.op.nodes)
7946 ial.Run(self.op.iallocator, validate=True)
7948 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7954 class LUGrowDisk(LogicalUnit):
7955 """Grow a disk of an instance.
7959 HTYPE = constants.HTYPE_INSTANCE
7960 _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7963 def ExpandNames(self):
7964 self._ExpandAndLockInstance()
7965 self.needed_locks[locking.LEVEL_NODE] = []
7966 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7968 def DeclareLocks(self, level):
7969 if level == locking.LEVEL_NODE:
7970 self._LockInstancesNodes()
7972 def BuildHooksEnv(self):
7975 This runs on the master, the primary and all the secondaries.
7979 "DISK": self.op.disk,
7980 "AMOUNT": self.op.amount,
7982 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7983 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7986 def CheckPrereq(self):
7987 """Check prerequisites.
7989 This checks that the instance is in the cluster.
7992 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7993 assert instance is not None, \
7994 "Cannot retrieve locked instance %s" % self.op.instance_name
7995 nodenames = list(instance.all_nodes)
7996 for node in nodenames:
7997 _CheckNodeOnline(self, node)
8000 self.instance = instance
8002 if instance.disk_template not in constants.DTS_GROWABLE:
8003 raise errors.OpPrereqError("Instance's disk layout does not support"
8004 " growing.", errors.ECODE_INVAL)
8006 self.disk = instance.FindDisk(self.op.disk)
8008 if instance.disk_template != constants.DT_FILE:
8009 # TODO: check the free disk space for file, when that feature will be
8011 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8013 def Exec(self, feedback_fn):
8014 """Execute disk grow.
8017 instance = self.instance
8020 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8022 raise errors.OpExecError("Cannot activate block device to grow")
8024 for node in instance.all_nodes:
8025 self.cfg.SetDiskID(disk, node)
8026 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8027 result.Raise("Grow request failed to node %s" % node)
8029 # TODO: Rewrite code to work properly
8030 # DRBD goes into sync mode for a short amount of time after executing the
8031 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8032 # calling "resize" in sync mode fails. Sleeping for a short amount of
8033 # time is a work-around.
8036 disk.RecordGrow(self.op.amount)
8037 self.cfg.Update(instance, feedback_fn)
8038 if self.op.wait_for_sync:
8039 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8041 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8042 " status.\nPlease check the instance.")
8043 if not instance.admin_up:
8044 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8045 elif not instance.admin_up:
8046 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8047 " not supposed to be running because no wait for"
8048 " sync mode was requested.")
8051 class LUQueryInstanceData(NoHooksLU):
8052 """Query runtime instance data.
8055 _OP_REQP = ["instances", "static"]
8058 def ExpandNames(self):
8059 self.needed_locks = {}
8060 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8062 if not isinstance(self.op.instances, list):
8063 raise errors.OpPrereqError("Invalid argument type 'instances'",
8066 if self.op.instances:
8067 self.wanted_names = []
8068 for name in self.op.instances:
8069 full_name = _ExpandInstanceName(self.cfg, name)
8070 self.wanted_names.append(full_name)
8071 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8073 self.wanted_names = None
8074 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8076 self.needed_locks[locking.LEVEL_NODE] = []
8077 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8079 def DeclareLocks(self, level):
8080 if level == locking.LEVEL_NODE:
8081 self._LockInstancesNodes()
8083 def CheckPrereq(self):
8084 """Check prerequisites.
8086 This only checks the optional instance list against the existing names.
8089 if self.wanted_names is None:
8090 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8092 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8093 in self.wanted_names]
8096 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8097 """Returns the status of a block device
8100 if self.op.static or not node:
8103 self.cfg.SetDiskID(dev, node)
8105 result = self.rpc.call_blockdev_find(node, dev)
8109 result.Raise("Can't compute disk status for %s" % instance_name)
8111 status = result.payload
8115 return (status.dev_path, status.major, status.minor,
8116 status.sync_percent, status.estimated_time,
8117 status.is_degraded, status.ldisk_status)
8119 def _ComputeDiskStatus(self, instance, snode, dev):
8120 """Compute block device status.
8123 if dev.dev_type in constants.LDS_DRBD:
8124 # we change the snode then (otherwise we use the one passed in)
8125 if dev.logical_id[0] == instance.primary_node:
8126 snode = dev.logical_id[1]
8128 snode = dev.logical_id[0]
8130 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8132 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8135 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8136 for child in dev.children]
8141 "iv_name": dev.iv_name,
8142 "dev_type": dev.dev_type,
8143 "logical_id": dev.logical_id,
8144 "physical_id": dev.physical_id,
8145 "pstatus": dev_pstatus,
8146 "sstatus": dev_sstatus,
8147 "children": dev_children,
8154 def Exec(self, feedback_fn):
8155 """Gather and return data"""
8158 cluster = self.cfg.GetClusterInfo()
8160 for instance in self.wanted_instances:
8161 if not self.op.static:
8162 remote_info = self.rpc.call_instance_info(instance.primary_node,
8164 instance.hypervisor)
8165 remote_info.Raise("Error checking node %s" % instance.primary_node)
8166 remote_info = remote_info.payload
8167 if remote_info and "state" in remote_info:
8170 remote_state = "down"
8173 if instance.admin_up:
8176 config_state = "down"
8178 disks = [self._ComputeDiskStatus(instance, None, device)
8179 for device in instance.disks]
8182 "name": instance.name,
8183 "config_state": config_state,
8184 "run_state": remote_state,
8185 "pnode": instance.primary_node,
8186 "snodes": instance.secondary_nodes,
8188 # this happens to be the same format used for hooks
8189 "nics": _NICListToTuple(self, instance.nics),
8190 "disk_template": instance.disk_template,
8192 "hypervisor": instance.hypervisor,
8193 "network_port": instance.network_port,
8194 "hv_instance": instance.hvparams,
8195 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8196 "be_instance": instance.beparams,
8197 "be_actual": cluster.FillBE(instance),
8198 "serial_no": instance.serial_no,
8199 "mtime": instance.mtime,
8200 "ctime": instance.ctime,
8201 "uuid": instance.uuid,
8204 result[instance.name] = idict
8209 class LUSetInstanceParams(LogicalUnit):
8210 """Modifies an instances's parameters.
8213 HPATH = "instance-modify"
8214 HTYPE = constants.HTYPE_INSTANCE
8215 _OP_REQP = ["instance_name"]
8218 def CheckArguments(self):
8219 if not hasattr(self.op, 'nics'):
8221 if not hasattr(self.op, 'disks'):
8223 if not hasattr(self.op, 'beparams'):
8224 self.op.beparams = {}
8225 if not hasattr(self.op, 'hvparams'):
8226 self.op.hvparams = {}
8227 if not hasattr(self.op, "disk_template"):
8228 self.op.disk_template = None
8229 if not hasattr(self.op, "remote_node"):
8230 self.op.remote_node = None
8231 if not hasattr(self.op, "os_name"):
8232 self.op.os_name = None
8233 if not hasattr(self.op, "force_variant"):
8234 self.op.force_variant = False
8235 self.op.force = getattr(self.op, "force", False)
8236 if not (self.op.nics or self.op.disks or self.op.disk_template or
8237 self.op.hvparams or self.op.beparams or self.op.os_name):
8238 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8240 if self.op.hvparams:
8241 _CheckGlobalHvParams(self.op.hvparams)
8245 for disk_op, disk_dict in self.op.disks:
8246 if disk_op == constants.DDM_REMOVE:
8249 elif disk_op == constants.DDM_ADD:
8252 if not isinstance(disk_op, int):
8253 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8254 if not isinstance(disk_dict, dict):
8255 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8256 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8258 if disk_op == constants.DDM_ADD:
8259 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8260 if mode not in constants.DISK_ACCESS_SET:
8261 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8263 size = disk_dict.get('size', None)
8265 raise errors.OpPrereqError("Required disk parameter size missing",
8269 except (TypeError, ValueError), err:
8270 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8271 str(err), errors.ECODE_INVAL)
8272 disk_dict['size'] = size
8274 # modification of disk
8275 if 'size' in disk_dict:
8276 raise errors.OpPrereqError("Disk size change not possible, use"
8277 " grow-disk", errors.ECODE_INVAL)
8279 if disk_addremove > 1:
8280 raise errors.OpPrereqError("Only one disk add or remove operation"
8281 " supported at a time", errors.ECODE_INVAL)
8283 if self.op.disks and self.op.disk_template is not None:
8284 raise errors.OpPrereqError("Disk template conversion and other disk"
8285 " changes not supported at the same time",
8288 if self.op.disk_template:
8289 _CheckDiskTemplate(self.op.disk_template)
8290 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8291 self.op.remote_node is None):
8292 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8293 " one requires specifying a secondary node",
8298 for nic_op, nic_dict in self.op.nics:
8299 if nic_op == constants.DDM_REMOVE:
8302 elif nic_op == constants.DDM_ADD:
8305 if not isinstance(nic_op, int):
8306 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8307 if not isinstance(nic_dict, dict):
8308 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8309 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8311 # nic_dict should be a dict
8312 nic_ip = nic_dict.get('ip', None)
8313 if nic_ip is not None:
8314 if nic_ip.lower() == constants.VALUE_NONE:
8315 nic_dict['ip'] = None
8317 if not utils.IsValidIP(nic_ip):
8318 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8321 nic_bridge = nic_dict.get('bridge', None)
8322 nic_link = nic_dict.get('link', None)
8323 if nic_bridge and nic_link:
8324 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8325 " at the same time", errors.ECODE_INVAL)
8326 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8327 nic_dict['bridge'] = None
8328 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8329 nic_dict['link'] = None
8331 if nic_op == constants.DDM_ADD:
8332 nic_mac = nic_dict.get('mac', None)
8334 nic_dict['mac'] = constants.VALUE_AUTO
8336 if 'mac' in nic_dict:
8337 nic_mac = nic_dict['mac']
8338 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8339 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8341 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8342 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8343 " modifying an existing nic",
8346 if nic_addremove > 1:
8347 raise errors.OpPrereqError("Only one NIC add or remove operation"
8348 " supported at a time", errors.ECODE_INVAL)
8350 def ExpandNames(self):
8351 self._ExpandAndLockInstance()
8352 self.needed_locks[locking.LEVEL_NODE] = []
8353 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8355 def DeclareLocks(self, level):
8356 if level == locking.LEVEL_NODE:
8357 self._LockInstancesNodes()
8358 if self.op.disk_template and self.op.remote_node:
8359 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8360 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8362 def BuildHooksEnv(self):
8365 This runs on the master, primary and secondaries.
8369 if constants.BE_MEMORY in self.be_new:
8370 args['memory'] = self.be_new[constants.BE_MEMORY]
8371 if constants.BE_VCPUS in self.be_new:
8372 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8373 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8374 # information at all.
8377 nic_override = dict(self.op.nics)
8378 c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8379 for idx, nic in enumerate(self.instance.nics):
8380 if idx in nic_override:
8381 this_nic_override = nic_override[idx]
8383 this_nic_override = {}
8384 if 'ip' in this_nic_override:
8385 ip = this_nic_override['ip']
8388 if 'mac' in this_nic_override:
8389 mac = this_nic_override['mac']
8392 if idx in self.nic_pnew:
8393 nicparams = self.nic_pnew[idx]
8395 nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8396 mode = nicparams[constants.NIC_MODE]
8397 link = nicparams[constants.NIC_LINK]
8398 args['nics'].append((ip, mac, mode, link))
8399 if constants.DDM_ADD in nic_override:
8400 ip = nic_override[constants.DDM_ADD].get('ip', None)
8401 mac = nic_override[constants.DDM_ADD]['mac']
8402 nicparams = self.nic_pnew[constants.DDM_ADD]
8403 mode = nicparams[constants.NIC_MODE]
8404 link = nicparams[constants.NIC_LINK]
8405 args['nics'].append((ip, mac, mode, link))
8406 elif constants.DDM_REMOVE in nic_override:
8407 del args['nics'][-1]
8409 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8410 if self.op.disk_template:
8411 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8412 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8416 def _GetUpdatedParams(old_params, update_dict,
8417 default_values, parameter_types):
8418 """Return the new params dict for the given params.
8420 @type old_params: dict
8421 @param old_params: old parameters
8422 @type update_dict: dict
8423 @param update_dict: dict containing new parameter values,
8424 or constants.VALUE_DEFAULT to reset the
8425 parameter to its default value
8426 @type default_values: dict
8427 @param default_values: default values for the filled parameters
8428 @type parameter_types: dict
8429 @param parameter_types: dict mapping target dict keys to types
8430 in constants.ENFORCEABLE_TYPES
8431 @rtype: (dict, dict)
8432 @return: (new_parameters, filled_parameters)
8435 params_copy = copy.deepcopy(old_params)
8436 for key, val in update_dict.iteritems():
8437 if val == constants.VALUE_DEFAULT:
8439 del params_copy[key]
8443 params_copy[key] = val
8444 utils.ForceDictType(params_copy, parameter_types)
8445 params_filled = objects.FillDict(default_values, params_copy)
8446 return (params_copy, params_filled)
8448 def CheckPrereq(self):
8449 """Check prerequisites.
8451 This only checks the instance list against the existing names.
8454 self.force = self.op.force
8456 # checking the new params on the primary/secondary nodes
8458 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8459 cluster = self.cluster = self.cfg.GetClusterInfo()
8460 assert self.instance is not None, \
8461 "Cannot retrieve locked instance %s" % self.op.instance_name
8462 pnode = instance.primary_node
8463 nodelist = list(instance.all_nodes)
8465 if self.op.disk_template:
8466 if instance.disk_template == self.op.disk_template:
8467 raise errors.OpPrereqError("Instance already has disk template %s" %
8468 instance.disk_template, errors.ECODE_INVAL)
8470 if (instance.disk_template,
8471 self.op.disk_template) not in self._DISK_CONVERSIONS:
8472 raise errors.OpPrereqError("Unsupported disk template conversion from"
8473 " %s to %s" % (instance.disk_template,
8474 self.op.disk_template),
8476 if self.op.disk_template in constants.DTS_NET_MIRROR:
8477 _CheckNodeOnline(self, self.op.remote_node)
8478 _CheckNodeNotDrained(self, self.op.remote_node)
8479 disks = [{"size": d.size} for d in instance.disks]
8480 required = _ComputeDiskSize(self.op.disk_template, disks)
8481 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8482 _CheckInstanceDown(self, instance, "cannot change disk template")
8484 # hvparams processing
8485 if self.op.hvparams:
8486 i_hvdict, hv_new = self._GetUpdatedParams(
8487 instance.hvparams, self.op.hvparams,
8488 cluster.hvparams[instance.hypervisor],
8489 constants.HVS_PARAMETER_TYPES)
8491 hypervisor.GetHypervisor(
8492 instance.hypervisor).CheckParameterSyntax(hv_new)
8493 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8494 self.hv_new = hv_new # the new actual values
8495 self.hv_inst = i_hvdict # the new dict (without defaults)
8497 self.hv_new = self.hv_inst = {}
8499 # beparams processing
8500 if self.op.beparams:
8501 i_bedict, be_new = self._GetUpdatedParams(
8502 instance.beparams, self.op.beparams,
8503 cluster.beparams[constants.PP_DEFAULT],
8504 constants.BES_PARAMETER_TYPES)
8505 self.be_new = be_new # the new actual values
8506 self.be_inst = i_bedict # the new dict (without defaults)
8508 self.be_new = self.be_inst = {}
8512 if constants.BE_MEMORY in self.op.beparams and not self.force:
8513 mem_check_list = [pnode]
8514 if be_new[constants.BE_AUTO_BALANCE]:
8515 # either we changed auto_balance to yes or it was from before
8516 mem_check_list.extend(instance.secondary_nodes)
8517 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8518 instance.hypervisor)
8519 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8520 instance.hypervisor)
8521 pninfo = nodeinfo[pnode]
8522 msg = pninfo.fail_msg
8524 # Assume the primary node is unreachable and go ahead
8525 self.warn.append("Can't get info from primary node %s: %s" %
8527 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8528 self.warn.append("Node data from primary node %s doesn't contain"
8529 " free memory information" % pnode)
8530 elif instance_info.fail_msg:
8531 self.warn.append("Can't get instance runtime information: %s" %
8532 instance_info.fail_msg)
8534 if instance_info.payload:
8535 current_mem = int(instance_info.payload['memory'])
8537 # Assume instance not running
8538 # (there is a slight race condition here, but it's not very probable,
8539 # and we have no other way to check)
8541 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8542 pninfo.payload['memory_free'])
8544 raise errors.OpPrereqError("This change will prevent the instance"
8545 " from starting, due to %d MB of memory"
8546 " missing on its primary node" % miss_mem,
8549 if be_new[constants.BE_AUTO_BALANCE]:
8550 for node, nres in nodeinfo.items():
8551 if node not in instance.secondary_nodes:
8555 self.warn.append("Can't get info from secondary node %s: %s" %
8557 elif not isinstance(nres.payload.get('memory_free', None), int):
8558 self.warn.append("Secondary node %s didn't return free"
8559 " memory information" % node)
8560 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8561 self.warn.append("Not enough memory to failover instance to"
8562 " secondary node %s" % node)
8567 for nic_op, nic_dict in self.op.nics:
8568 if nic_op == constants.DDM_REMOVE:
8569 if not instance.nics:
8570 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8573 if nic_op != constants.DDM_ADD:
8575 if not instance.nics:
8576 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8577 " no NICs" % nic_op,
8579 if nic_op < 0 or nic_op >= len(instance.nics):
8580 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8582 (nic_op, len(instance.nics) - 1),
8584 old_nic_params = instance.nics[nic_op].nicparams
8585 old_nic_ip = instance.nics[nic_op].ip
8590 update_params_dict = dict([(key, nic_dict[key])
8591 for key in constants.NICS_PARAMETERS
8592 if key in nic_dict])
8594 if 'bridge' in nic_dict:
8595 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8597 new_nic_params, new_filled_nic_params = \
8598 self._GetUpdatedParams(old_nic_params, update_params_dict,
8599 cluster.nicparams[constants.PP_DEFAULT],
8600 constants.NICS_PARAMETER_TYPES)
8601 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8602 self.nic_pinst[nic_op] = new_nic_params
8603 self.nic_pnew[nic_op] = new_filled_nic_params
8604 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8606 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8607 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8608 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8610 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8612 self.warn.append(msg)
8614 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8615 if new_nic_mode == constants.NIC_MODE_ROUTED:
8616 if 'ip' in nic_dict:
8617 nic_ip = nic_dict['ip']
8621 raise errors.OpPrereqError('Cannot set the nic ip to None'
8622 ' on a routed nic', errors.ECODE_INVAL)
8623 if 'mac' in nic_dict:
8624 nic_mac = nic_dict['mac']
8626 raise errors.OpPrereqError('Cannot set the nic mac to None',
8628 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8629 # otherwise generate the mac
8630 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8632 # or validate/reserve the current one
8634 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8635 except errors.ReservationError:
8636 raise errors.OpPrereqError("MAC address %s already in use"
8637 " in cluster" % nic_mac,
8638 errors.ECODE_NOTUNIQUE)
8641 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8642 raise errors.OpPrereqError("Disk operations not supported for"
8643 " diskless instances",
8645 for disk_op, _ in self.op.disks:
8646 if disk_op == constants.DDM_REMOVE:
8647 if len(instance.disks) == 1:
8648 raise errors.OpPrereqError("Cannot remove the last disk of"
8649 " an instance", errors.ECODE_INVAL)
8650 _CheckInstanceDown(self, instance, "cannot remove disks")
8652 if (disk_op == constants.DDM_ADD and
8653 len(instance.nics) >= constants.MAX_DISKS):
8654 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8655 " add more" % constants.MAX_DISKS,
8657 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8659 if disk_op < 0 or disk_op >= len(instance.disks):
8660 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8662 (disk_op, len(instance.disks)),
8666 if self.op.os_name and not self.op.force:
8667 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8668 self.op.force_variant)
8672 def _ConvertPlainToDrbd(self, feedback_fn):
8673 """Converts an instance from plain to drbd.
8676 feedback_fn("Converting template to drbd")
8677 instance = self.instance
8678 pnode = instance.primary_node
8679 snode = self.op.remote_node
8681 # create a fake disk info for _GenerateDiskTemplate
8682 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8683 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8684 instance.name, pnode, [snode],
8685 disk_info, None, None, 0)
8686 info = _GetInstanceInfoText(instance)
8687 feedback_fn("Creating aditional volumes...")
8688 # first, create the missing data and meta devices
8689 for disk in new_disks:
8690 # unfortunately this is... not too nice
8691 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8693 for child in disk.children:
8694 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8695 # at this stage, all new LVs have been created, we can rename the
8697 feedback_fn("Renaming original volumes...")
8698 rename_list = [(o, n.children[0].logical_id)
8699 for (o, n) in zip(instance.disks, new_disks)]
8700 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8701 result.Raise("Failed to rename original LVs")
8703 feedback_fn("Initializing DRBD devices...")
8704 # all child devices are in place, we can now create the DRBD devices
8705 for disk in new_disks:
8706 for node in [pnode, snode]:
8707 f_create = node == pnode
8708 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8710 # at this point, the instance has been modified
8711 instance.disk_template = constants.DT_DRBD8
8712 instance.disks = new_disks
8713 self.cfg.Update(instance, feedback_fn)
8715 # disks are created, waiting for sync
8716 disk_abort = not _WaitForSync(self, instance)
8718 raise errors.OpExecError("There are some degraded disks for"
8719 " this instance, please cleanup manually")
8721 def _ConvertDrbdToPlain(self, feedback_fn):
8722 """Converts an instance from drbd to plain.
8725 instance = self.instance
8726 assert len(instance.secondary_nodes) == 1
8727 pnode = instance.primary_node
8728 snode = instance.secondary_nodes[0]
8729 feedback_fn("Converting template to plain")
8731 old_disks = instance.disks
8732 new_disks = [d.children[0] for d in old_disks]
8734 # copy over size and mode
8735 for parent, child in zip(old_disks, new_disks):
8736 child.size = parent.size
8737 child.mode = parent.mode
8739 # update instance structure
8740 instance.disks = new_disks
8741 instance.disk_template = constants.DT_PLAIN
8742 self.cfg.Update(instance, feedback_fn)
8744 feedback_fn("Removing volumes on the secondary node...")
8745 for disk in old_disks:
8746 self.cfg.SetDiskID(disk, snode)
8747 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8749 self.LogWarning("Could not remove block device %s on node %s,"
8750 " continuing anyway: %s", disk.iv_name, snode, msg)
8752 feedback_fn("Removing unneeded volumes on the primary node...")
8753 for idx, disk in enumerate(old_disks):
8754 meta = disk.children[1]
8755 self.cfg.SetDiskID(meta, pnode)
8756 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8758 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8759 " continuing anyway: %s", idx, pnode, msg)
8762 def Exec(self, feedback_fn):
8763 """Modifies an instance.
8765 All parameters take effect only at the next restart of the instance.
8768 # Process here the warnings from CheckPrereq, as we don't have a
8769 # feedback_fn there.
8770 for warn in self.warn:
8771 feedback_fn("WARNING: %s" % warn)
8774 instance = self.instance
8776 for disk_op, disk_dict in self.op.disks:
8777 if disk_op == constants.DDM_REMOVE:
8778 # remove the last disk
8779 device = instance.disks.pop()
8780 device_idx = len(instance.disks)
8781 for node, disk in device.ComputeNodeTree(instance.primary_node):
8782 self.cfg.SetDiskID(disk, node)
8783 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8785 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8786 " continuing anyway", device_idx, node, msg)
8787 result.append(("disk/%d" % device_idx, "remove"))
8788 elif disk_op == constants.DDM_ADD:
8790 if instance.disk_template == constants.DT_FILE:
8791 file_driver, file_path = instance.disks[0].logical_id
8792 file_path = os.path.dirname(file_path)
8794 file_driver = file_path = None
8795 disk_idx_base = len(instance.disks)
8796 new_disk = _GenerateDiskTemplate(self,
8797 instance.disk_template,
8798 instance.name, instance.primary_node,
8799 instance.secondary_nodes,
8804 instance.disks.append(new_disk)
8805 info = _GetInstanceInfoText(instance)
8807 logging.info("Creating volume %s for instance %s",
8808 new_disk.iv_name, instance.name)
8809 # Note: this needs to be kept in sync with _CreateDisks
8811 for node in instance.all_nodes:
8812 f_create = node == instance.primary_node
8814 _CreateBlockDev(self, node, instance, new_disk,
8815 f_create, info, f_create)
8816 except errors.OpExecError, err:
8817 self.LogWarning("Failed to create volume %s (%s) on"
8819 new_disk.iv_name, new_disk, node, err)
8820 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8821 (new_disk.size, new_disk.mode)))
8823 # change a given disk
8824 instance.disks[disk_op].mode = disk_dict['mode']
8825 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8827 if self.op.disk_template:
8828 r_shut = _ShutdownInstanceDisks(self, instance)
8830 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8831 " proceed with disk template conversion")
8832 mode = (instance.disk_template, self.op.disk_template)
8834 self._DISK_CONVERSIONS[mode](self, feedback_fn)
8836 self.cfg.ReleaseDRBDMinors(instance.name)
8838 result.append(("disk_template", self.op.disk_template))
8841 for nic_op, nic_dict in self.op.nics:
8842 if nic_op == constants.DDM_REMOVE:
8843 # remove the last nic
8844 del instance.nics[-1]
8845 result.append(("nic.%d" % len(instance.nics), "remove"))
8846 elif nic_op == constants.DDM_ADD:
8847 # mac and bridge should be set, by now
8848 mac = nic_dict['mac']
8849 ip = nic_dict.get('ip', None)
8850 nicparams = self.nic_pinst[constants.DDM_ADD]
8851 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8852 instance.nics.append(new_nic)
8853 result.append(("nic.%d" % (len(instance.nics) - 1),
8854 "add:mac=%s,ip=%s,mode=%s,link=%s" %
8855 (new_nic.mac, new_nic.ip,
8856 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8857 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8860 for key in 'mac', 'ip':
8862 setattr(instance.nics[nic_op], key, nic_dict[key])
8863 if nic_op in self.nic_pinst:
8864 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8865 for key, val in nic_dict.iteritems():
8866 result.append(("nic.%s/%d" % (key, nic_op), val))
8869 if self.op.hvparams:
8870 instance.hvparams = self.hv_inst
8871 for key, val in self.op.hvparams.iteritems():
8872 result.append(("hv/%s" % key, val))
8875 if self.op.beparams:
8876 instance.beparams = self.be_inst
8877 for key, val in self.op.beparams.iteritems():
8878 result.append(("be/%s" % key, val))
8882 instance.os = self.op.os_name
8884 self.cfg.Update(instance, feedback_fn)
8888 _DISK_CONVERSIONS = {
8889 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8890 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8894 class LUQueryExports(NoHooksLU):
8895 """Query the exports list
8898 _OP_REQP = ['nodes']
8901 def ExpandNames(self):
8902 self.needed_locks = {}
8903 self.share_locks[locking.LEVEL_NODE] = 1
8904 if not self.op.nodes:
8905 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8907 self.needed_locks[locking.LEVEL_NODE] = \
8908 _GetWantedNodes(self, self.op.nodes)
8910 def CheckPrereq(self):
8911 """Check prerequisites.
8914 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8916 def Exec(self, feedback_fn):
8917 """Compute the list of all the exported system images.
8920 @return: a dictionary with the structure node->(export-list)
8921 where export-list is a list of the instances exported on
8925 rpcresult = self.rpc.call_export_list(self.nodes)
8927 for node in rpcresult:
8928 if rpcresult[node].fail_msg:
8929 result[node] = False
8931 result[node] = rpcresult[node].payload
8936 class LUPrepareExport(NoHooksLU):
8937 """Prepares an instance for an export and returns useful information.
8940 _OP_REQP = ["instance_name", "mode"]
8943 def CheckArguments(self):
8944 """Check the arguments.
8947 if self.op.mode not in constants.EXPORT_MODES:
8948 raise errors.OpPrereqError("Invalid export mode %r" % self.op.mode,
8951 def ExpandNames(self):
8952 self._ExpandAndLockInstance()
8954 def CheckPrereq(self):
8955 """Check prerequisites.
8958 instance_name = self.op.instance_name
8960 self.instance = self.cfg.GetInstanceInfo(instance_name)
8961 assert self.instance is not None, \
8962 "Cannot retrieve locked instance %s" % self.op.instance_name
8963 _CheckNodeOnline(self, self.instance.primary_node)
8965 self._cds = _GetClusterDomainSecret()
8967 def Exec(self, feedback_fn):
8968 """Prepares an instance for an export.
8971 instance = self.instance
8973 if self.op.mode == constants.EXPORT_MODE_REMOTE:
8974 salt = utils.GenerateSecret(8)
8976 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
8977 result = self.rpc.call_x509_cert_create(instance.primary_node,
8978 constants.RIE_CERT_VALIDITY)
8979 result.Raise("Can't create X509 key and certificate on %s" % result.node)
8981 (name, cert_pem) = result.payload
8983 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
8987 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
8988 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
8990 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
8996 class LUExportInstance(LogicalUnit):
8997 """Export an instance to an image in the cluster.
9000 HPATH = "instance-export"
9001 HTYPE = constants.HTYPE_INSTANCE
9002 _OP_REQP = ["instance_name", "target_node", "shutdown"]
9005 def CheckArguments(self):
9006 """Check the arguments.
9009 _CheckBooleanOpField(self.op, "remove_instance")
9010 _CheckBooleanOpField(self.op, "ignore_remove_failures")
9012 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
9013 constants.DEFAULT_SHUTDOWN_TIMEOUT)
9014 self.remove_instance = getattr(self.op, "remove_instance", False)
9015 self.ignore_remove_failures = getattr(self.op, "ignore_remove_failures",
9017 self.export_mode = getattr(self.op, "mode", constants.EXPORT_MODE_LOCAL)
9018 self.x509_key_name = getattr(self.op, "x509_key_name", None)
9019 self.dest_x509_ca_pem = getattr(self.op, "destination_x509_ca", None)
9021 if self.remove_instance and not self.op.shutdown:
9022 raise errors.OpPrereqError("Can not remove instance without shutting it"
9025 if self.export_mode not in constants.EXPORT_MODES:
9026 raise errors.OpPrereqError("Invalid export mode %r" % self.export_mode,
9029 if self.export_mode == constants.EXPORT_MODE_REMOTE:
9030 if not self.x509_key_name:
9031 raise errors.OpPrereqError("Missing X509 key name for encryption",
9034 if not self.dest_x509_ca_pem:
9035 raise errors.OpPrereqError("Missing destination X509 CA",
9038 def ExpandNames(self):
9039 self._ExpandAndLockInstance()
9041 # Lock all nodes for local exports
9042 if self.export_mode == constants.EXPORT_MODE_LOCAL:
9043 # FIXME: lock only instance primary and destination node
9045 # Sad but true, for now we have do lock all nodes, as we don't know where
9046 # the previous export might be, and in this LU we search for it and
9047 # remove it from its current node. In the future we could fix this by:
9048 # - making a tasklet to search (share-lock all), then create the new one,
9049 # then one to remove, after
9050 # - removing the removal operation altogether
9051 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9053 def DeclareLocks(self, level):
9054 """Last minute lock declaration."""
9055 # All nodes are locked anyway, so nothing to do here.
9057 def BuildHooksEnv(self):
9060 This will run on the master, primary node and target node.
9064 "EXPORT_MODE": self.export_mode,
9065 "EXPORT_NODE": self.op.target_node,
9066 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9067 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
9068 # TODO: Generic function for boolean env variables
9069 "REMOVE_INSTANCE": str(bool(self.remove_instance)),
9072 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9074 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9076 if self.export_mode == constants.EXPORT_MODE_LOCAL:
9077 nl.append(self.op.target_node)
9081 def CheckPrereq(self):
9082 """Check prerequisites.
9084 This checks that the instance and node names are valid.
9087 instance_name = self.op.instance_name
9089 self.instance = self.cfg.GetInstanceInfo(instance_name)
9090 assert self.instance is not None, \
9091 "Cannot retrieve locked instance %s" % self.op.instance_name
9092 _CheckNodeOnline(self, self.instance.primary_node)
9094 if self.export_mode == constants.EXPORT_MODE_LOCAL:
9095 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9096 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9097 assert self.dst_node is not None
9099 _CheckNodeOnline(self, self.dst_node.name)
9100 _CheckNodeNotDrained(self, self.dst_node.name)
9103 self.dest_disk_info = None
9104 self.dest_x509_ca = None
9106 elif self.export_mode == constants.EXPORT_MODE_REMOTE:
9107 self.dst_node = None
9109 if len(self.op.target_node) != len(self.instance.disks):
9110 raise errors.OpPrereqError(("Received destination information for %s"
9111 " disks, but instance %s has %s disks") %
9112 (len(self.op.target_node), instance_name,
9113 len(self.instance.disks)),
9116 cds = _GetClusterDomainSecret()
9118 # Check X509 key name
9120 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9121 except (TypeError, ValueError), err:
9122 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9124 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9125 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9128 # Load and verify CA
9130 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9131 except OpenSSL.crypto.Error, err:
9132 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9133 (err, ), errors.ECODE_INVAL)
9135 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9136 if errcode is not None:
9137 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" % (msg, ),
9140 self.dest_x509_ca = cert
9142 # Verify target information
9144 for idx, disk_data in enumerate(self.op.target_node):
9146 (host, port, magic) = \
9147 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9148 except errors.GenericError, err:
9149 raise errors.OpPrereqError("Target info for disk %s: %s" % (idx, err),
9152 disk_info.append((host, port, magic))
9154 assert len(disk_info) == len(self.op.target_node)
9155 self.dest_disk_info = disk_info
9158 raise errors.ProgrammerError("Unhandled export mode %r" %
9161 # instance disk type verification
9162 # TODO: Implement export support for file-based disks
9163 for disk in self.instance.disks:
9164 if disk.dev_type == constants.LD_FILE:
9165 raise errors.OpPrereqError("Export not supported for instances with"
9166 " file-based disks", errors.ECODE_INVAL)
9168 def _CleanupExports(self, feedback_fn):
9169 """Removes exports of current instance from all other nodes.
9171 If an instance in a cluster with nodes A..D was exported to node C, its
9172 exports will be removed from the nodes A, B and D.
9175 assert self.export_mode != constants.EXPORT_MODE_REMOTE
9177 nodelist = self.cfg.GetNodeList()
9178 nodelist.remove(self.dst_node.name)
9180 # on one-node clusters nodelist will be empty after the removal
9181 # if we proceed the backup would be removed because OpQueryExports
9182 # substitutes an empty list with the full cluster node list.
9183 iname = self.instance.name
9185 feedback_fn("Removing old exports for instance %s" % iname)
9186 exportlist = self.rpc.call_export_list(nodelist)
9187 for node in exportlist:
9188 if exportlist[node].fail_msg:
9190 if iname in exportlist[node].payload:
9191 msg = self.rpc.call_export_remove(node, iname).fail_msg
9193 self.LogWarning("Could not remove older export for instance %s"
9194 " on node %s: %s", iname, node, msg)
9196 def Exec(self, feedback_fn):
9197 """Export an instance to an image in the cluster.
9200 assert self.export_mode in constants.EXPORT_MODES
9202 instance = self.instance
9203 src_node = instance.primary_node
9205 if self.op.shutdown:
9206 # shutdown the instance, but not the disks
9207 feedback_fn("Shutting down instance %s" % instance.name)
9208 result = self.rpc.call_instance_shutdown(src_node, instance,
9209 self.shutdown_timeout)
9210 # TODO: Maybe ignore failures if ignore_remove_failures is set
9211 result.Raise("Could not shutdown instance %s on"
9212 " node %s" % (instance.name, src_node))
9214 # set the disks ID correctly since call_instance_start needs the
9215 # correct drbd minor to create the symlinks
9216 for disk in instance.disks:
9217 self.cfg.SetDiskID(disk, src_node)
9219 activate_disks = (not instance.admin_up)
9222 # Activate the instance disks if we'exporting a stopped instance
9223 feedback_fn("Activating disks for %s" % instance.name)
9224 _StartInstanceDisks(self, instance, None)
9227 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9230 helper.CreateSnapshots()
9232 if self.export_mode == constants.EXPORT_MODE_LOCAL:
9233 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9234 elif self.export_mode == constants.EXPORT_MODE_REMOTE:
9235 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9236 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9238 (key_name, _, _) = self.x509_key_name
9241 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9244 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9245 key_name, dest_ca_pem,
9250 # Check for backwards compatibility
9251 assert len(dresults) == len(instance.disks)
9252 assert compat.all(isinstance(i, bool) for i in dresults), \
9253 "Not all results are boolean: %r" % dresults
9257 feedback_fn("Deactivating disks for %s" % instance.name)
9258 _ShutdownInstanceDisks(self, instance)
9260 # Remove instance if requested
9261 if self.remove_instance:
9262 if not (compat.all(dresults) and fin_resu):
9263 feedback_fn("Not removing instance %s as parts of the export failed" %
9266 feedback_fn("Removing instance %s" % instance.name)
9267 _RemoveInstance(self, feedback_fn, instance,
9268 self.ignore_remove_failures)
9270 if self.export_mode == constants.EXPORT_MODE_LOCAL:
9271 self._CleanupExports(feedback_fn)
9273 return fin_resu, dresults
9276 class LURemoveExport(NoHooksLU):
9277 """Remove exports related to the named instance.
9280 _OP_REQP = ["instance_name"]
9283 def ExpandNames(self):
9284 self.needed_locks = {}
9285 # We need all nodes to be locked in order for RemoveExport to work, but we
9286 # don't need to lock the instance itself, as nothing will happen to it (and
9287 # we can remove exports also for a removed instance)
9288 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9290 def CheckPrereq(self):
9291 """Check prerequisites.
9295 def Exec(self, feedback_fn):
9296 """Remove any export.
9299 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9300 # If the instance was not found we'll try with the name that was passed in.
9301 # This will only work if it was an FQDN, though.
9303 if not instance_name:
9305 instance_name = self.op.instance_name
9307 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9308 exportlist = self.rpc.call_export_list(locked_nodes)
9310 for node in exportlist:
9311 msg = exportlist[node].fail_msg
9313 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9315 if instance_name in exportlist[node].payload:
9317 result = self.rpc.call_export_remove(node, instance_name)
9318 msg = result.fail_msg
9320 logging.error("Could not remove export for instance %s"
9321 " on node %s: %s", instance_name, node, msg)
9323 if fqdn_warn and not found:
9324 feedback_fn("Export not found. If trying to remove an export belonging"
9325 " to a deleted instance please use its Fully Qualified"
9329 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9332 This is an abstract class which is the parent of all the other tags LUs.
9336 def ExpandNames(self):
9337 self.needed_locks = {}
9338 if self.op.kind == constants.TAG_NODE:
9339 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9340 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9341 elif self.op.kind == constants.TAG_INSTANCE:
9342 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9343 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9345 def CheckPrereq(self):
9346 """Check prerequisites.
9349 if self.op.kind == constants.TAG_CLUSTER:
9350 self.target = self.cfg.GetClusterInfo()
9351 elif self.op.kind == constants.TAG_NODE:
9352 self.target = self.cfg.GetNodeInfo(self.op.name)
9353 elif self.op.kind == constants.TAG_INSTANCE:
9354 self.target = self.cfg.GetInstanceInfo(self.op.name)
9356 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9357 str(self.op.kind), errors.ECODE_INVAL)
9360 class LUGetTags(TagsLU):
9361 """Returns the tags of a given object.
9364 _OP_REQP = ["kind", "name"]
9367 def Exec(self, feedback_fn):
9368 """Returns the tag list.
9371 return list(self.target.GetTags())
9374 class LUSearchTags(NoHooksLU):
9375 """Searches the tags for a given pattern.
9378 _OP_REQP = ["pattern"]
9381 def ExpandNames(self):
9382 self.needed_locks = {}
9384 def CheckPrereq(self):
9385 """Check prerequisites.
9387 This checks the pattern passed for validity by compiling it.
9391 self.re = re.compile(self.op.pattern)
9392 except re.error, err:
9393 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9394 (self.op.pattern, err), errors.ECODE_INVAL)
9396 def Exec(self, feedback_fn):
9397 """Returns the tag list.
9401 tgts = [("/cluster", cfg.GetClusterInfo())]
9402 ilist = cfg.GetAllInstancesInfo().values()
9403 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9404 nlist = cfg.GetAllNodesInfo().values()
9405 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9407 for path, target in tgts:
9408 for tag in target.GetTags():
9409 if self.re.search(tag):
9410 results.append((path, tag))
9414 class LUAddTags(TagsLU):
9415 """Sets a tag on a given object.
9418 _OP_REQP = ["kind", "name", "tags"]
9421 def CheckPrereq(self):
9422 """Check prerequisites.
9424 This checks the type and length of the tag name and value.
9427 TagsLU.CheckPrereq(self)
9428 for tag in self.op.tags:
9429 objects.TaggableObject.ValidateTag(tag)
9431 def Exec(self, feedback_fn):
9436 for tag in self.op.tags:
9437 self.target.AddTag(tag)
9438 except errors.TagError, err:
9439 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9440 self.cfg.Update(self.target, feedback_fn)
9443 class LUDelTags(TagsLU):
9444 """Delete a list of tags from a given object.
9447 _OP_REQP = ["kind", "name", "tags"]
9450 def CheckPrereq(self):
9451 """Check prerequisites.
9453 This checks that we have the given tag.
9456 TagsLU.CheckPrereq(self)
9457 for tag in self.op.tags:
9458 objects.TaggableObject.ValidateTag(tag)
9459 del_tags = frozenset(self.op.tags)
9460 cur_tags = self.target.GetTags()
9461 if not del_tags <= cur_tags:
9462 diff_tags = del_tags - cur_tags
9463 diff_names = ["'%s'" % tag for tag in diff_tags]
9465 raise errors.OpPrereqError("Tag(s) %s not found" %
9466 (",".join(diff_names)), errors.ECODE_NOENT)
9468 def Exec(self, feedback_fn):
9469 """Remove the tag from the object.
9472 for tag in self.op.tags:
9473 self.target.RemoveTag(tag)
9474 self.cfg.Update(self.target, feedback_fn)
9477 class LUTestDelay(NoHooksLU):
9478 """Sleep for a specified amount of time.
9480 This LU sleeps on the master and/or nodes for a specified amount of
9484 _OP_REQP = ["duration", "on_master", "on_nodes"]
9487 def ExpandNames(self):
9488 """Expand names and set required locks.
9490 This expands the node list, if any.
9493 self.needed_locks = {}
9494 if self.op.on_nodes:
9495 # _GetWantedNodes can be used here, but is not always appropriate to use
9496 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9498 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9499 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9501 def CheckPrereq(self):
9502 """Check prerequisites.
9506 def Exec(self, feedback_fn):
9507 """Do the actual sleep.
9510 if self.op.on_master:
9511 if not utils.TestDelay(self.op.duration):
9512 raise errors.OpExecError("Error during master delay test")
9513 if self.op.on_nodes:
9514 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9515 for node, node_result in result.items():
9516 node_result.Raise("Failure during rpc call to node %s" % node)
9519 class IAllocator(object):
9520 """IAllocator framework.
9522 An IAllocator instance has three sets of attributes:
9523 - cfg that is needed to query the cluster
9524 - input data (all members of the _KEYS class attribute are required)
9525 - four buffer attributes (in|out_data|text), that represent the
9526 input (to the external script) in text and data structure format,
9527 and the output from it, again in two formats
9528 - the result variables from the script (success, info, nodes) for
9532 # pylint: disable-msg=R0902
9533 # lots of instance attributes
9535 "name", "mem_size", "disks", "disk_template",
9536 "os", "tags", "nics", "vcpus", "hypervisor",
9539 "name", "relocate_from",
9545 def __init__(self, cfg, rpc, mode, **kwargs):
9548 # init buffer variables
9549 self.in_text = self.out_text = self.in_data = self.out_data = None
9550 # init all input fields so that pylint is happy
9552 self.mem_size = self.disks = self.disk_template = None
9553 self.os = self.tags = self.nics = self.vcpus = None
9554 self.hypervisor = None
9555 self.relocate_from = None
9557 self.evac_nodes = None
9559 self.required_nodes = None
9560 # init result fields
9561 self.success = self.info = self.result = None
9562 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9563 keyset = self._ALLO_KEYS
9564 fn = self._AddNewInstance
9565 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9566 keyset = self._RELO_KEYS
9567 fn = self._AddRelocateInstance
9568 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9569 keyset = self._EVAC_KEYS
9570 fn = self._AddEvacuateNodes
9572 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9573 " IAllocator" % self.mode)
9575 if key not in keyset:
9576 raise errors.ProgrammerError("Invalid input parameter '%s' to"
9577 " IAllocator" % key)
9578 setattr(self, key, kwargs[key])
9581 if key not in kwargs:
9582 raise errors.ProgrammerError("Missing input parameter '%s' to"
9583 " IAllocator" % key)
9584 self._BuildInputData(fn)
9586 def _ComputeClusterData(self):
9587 """Compute the generic allocator input data.
9589 This is the data that is independent of the actual operation.
9593 cluster_info = cfg.GetClusterInfo()
9596 "version": constants.IALLOCATOR_VERSION,
9597 "cluster_name": cfg.GetClusterName(),
9598 "cluster_tags": list(cluster_info.GetTags()),
9599 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9600 # we don't have job IDs
9602 iinfo = cfg.GetAllInstancesInfo().values()
9603 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9607 node_list = cfg.GetNodeList()
9609 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9610 hypervisor_name = self.hypervisor
9611 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9612 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9613 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9614 hypervisor_name = cluster_info.enabled_hypervisors[0]
9616 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9619 self.rpc.call_all_instances_info(node_list,
9620 cluster_info.enabled_hypervisors)
9621 for nname, nresult in node_data.items():
9622 # first fill in static (config-based) values
9623 ninfo = cfg.GetNodeInfo(nname)
9625 "tags": list(ninfo.GetTags()),
9626 "primary_ip": ninfo.primary_ip,
9627 "secondary_ip": ninfo.secondary_ip,
9628 "offline": ninfo.offline,
9629 "drained": ninfo.drained,
9630 "master_candidate": ninfo.master_candidate,
9633 if not (ninfo.offline or ninfo.drained):
9634 nresult.Raise("Can't get data for node %s" % nname)
9635 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9637 remote_info = nresult.payload
9639 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9640 'vg_size', 'vg_free', 'cpu_total']:
9641 if attr not in remote_info:
9642 raise errors.OpExecError("Node '%s' didn't return attribute"
9643 " '%s'" % (nname, attr))
9644 if not isinstance(remote_info[attr], int):
9645 raise errors.OpExecError("Node '%s' returned invalid value"
9647 (nname, attr, remote_info[attr]))
9648 # compute memory used by primary instances
9649 i_p_mem = i_p_up_mem = 0
9650 for iinfo, beinfo in i_list:
9651 if iinfo.primary_node == nname:
9652 i_p_mem += beinfo[constants.BE_MEMORY]
9653 if iinfo.name not in node_iinfo[nname].payload:
9656 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9657 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9658 remote_info['memory_free'] -= max(0, i_mem_diff)
9661 i_p_up_mem += beinfo[constants.BE_MEMORY]
9663 # compute memory used by instances
9665 "total_memory": remote_info['memory_total'],
9666 "reserved_memory": remote_info['memory_dom0'],
9667 "free_memory": remote_info['memory_free'],
9668 "total_disk": remote_info['vg_size'],
9669 "free_disk": remote_info['vg_free'],
9670 "total_cpus": remote_info['cpu_total'],
9671 "i_pri_memory": i_p_mem,
9672 "i_pri_up_memory": i_p_up_mem,
9676 node_results[nname] = pnr
9677 data["nodes"] = node_results
9681 for iinfo, beinfo in i_list:
9683 for nic in iinfo.nics:
9684 filled_params = objects.FillDict(
9685 cluster_info.nicparams[constants.PP_DEFAULT],
9687 nic_dict = {"mac": nic.mac,
9689 "mode": filled_params[constants.NIC_MODE],
9690 "link": filled_params[constants.NIC_LINK],
9692 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9693 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9694 nic_data.append(nic_dict)
9696 "tags": list(iinfo.GetTags()),
9697 "admin_up": iinfo.admin_up,
9698 "vcpus": beinfo[constants.BE_VCPUS],
9699 "memory": beinfo[constants.BE_MEMORY],
9701 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9703 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9704 "disk_template": iinfo.disk_template,
9705 "hypervisor": iinfo.hypervisor,
9707 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9709 instance_data[iinfo.name] = pir
9711 data["instances"] = instance_data
9715 def _AddNewInstance(self):
9716 """Add new instance data to allocator structure.
9718 This in combination with _AllocatorGetClusterData will create the
9719 correct structure needed as input for the allocator.
9721 The checks for the completeness of the opcode must have already been
9725 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9727 if self.disk_template in constants.DTS_NET_MIRROR:
9728 self.required_nodes = 2
9730 self.required_nodes = 1
9733 "disk_template": self.disk_template,
9736 "vcpus": self.vcpus,
9737 "memory": self.mem_size,
9738 "disks": self.disks,
9739 "disk_space_total": disk_space,
9741 "required_nodes": self.required_nodes,
9745 def _AddRelocateInstance(self):
9746 """Add relocate instance data to allocator structure.
9748 This in combination with _IAllocatorGetClusterData will create the
9749 correct structure needed as input for the allocator.
9751 The checks for the completeness of the opcode must have already been
9755 instance = self.cfg.GetInstanceInfo(self.name)
9756 if instance is None:
9757 raise errors.ProgrammerError("Unknown instance '%s' passed to"
9758 " IAllocator" % self.name)
9760 if instance.disk_template not in constants.DTS_NET_MIRROR:
9761 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9764 if len(instance.secondary_nodes) != 1:
9765 raise errors.OpPrereqError("Instance has not exactly one secondary node",
9768 self.required_nodes = 1
9769 disk_sizes = [{'size': disk.size} for disk in instance.disks]
9770 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9774 "disk_space_total": disk_space,
9775 "required_nodes": self.required_nodes,
9776 "relocate_from": self.relocate_from,
9780 def _AddEvacuateNodes(self):
9781 """Add evacuate nodes data to allocator structure.
9785 "evac_nodes": self.evac_nodes
9789 def _BuildInputData(self, fn):
9790 """Build input data structures.
9793 self._ComputeClusterData()
9796 request["type"] = self.mode
9797 self.in_data["request"] = request
9799 self.in_text = serializer.Dump(self.in_data)
9801 def Run(self, name, validate=True, call_fn=None):
9802 """Run an instance allocator and return the results.
9806 call_fn = self.rpc.call_iallocator_runner
9808 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9809 result.Raise("Failure while running the iallocator script")
9811 self.out_text = result.payload
9813 self._ValidateResult()
9815 def _ValidateResult(self):
9816 """Process the allocator results.
9818 This will process and if successful save the result in
9819 self.out_data and the other parameters.
9823 rdict = serializer.Load(self.out_text)
9824 except Exception, err:
9825 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9827 if not isinstance(rdict, dict):
9828 raise errors.OpExecError("Can't parse iallocator results: not a dict")
9830 # TODO: remove backwards compatiblity in later versions
9831 if "nodes" in rdict and "result" not in rdict:
9832 rdict["result"] = rdict["nodes"]
9835 for key in "success", "info", "result":
9836 if key not in rdict:
9837 raise errors.OpExecError("Can't parse iallocator results:"
9838 " missing key '%s'" % key)
9839 setattr(self, key, rdict[key])
9841 if not isinstance(rdict["result"], list):
9842 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9844 self.out_data = rdict
9847 class LUTestAllocator(NoHooksLU):
9848 """Run allocator tests.
9850 This LU runs the allocator tests
9853 _OP_REQP = ["direction", "mode", "name"]
9855 def CheckPrereq(self):
9856 """Check prerequisites.
9858 This checks the opcode parameters depending on the director and mode test.
9861 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9862 for attr in ["name", "mem_size", "disks", "disk_template",
9863 "os", "tags", "nics", "vcpus"]:
9864 if not hasattr(self.op, attr):
9865 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9866 attr, errors.ECODE_INVAL)
9867 iname = self.cfg.ExpandInstanceName(self.op.name)
9868 if iname is not None:
9869 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9870 iname, errors.ECODE_EXISTS)
9871 if not isinstance(self.op.nics, list):
9872 raise errors.OpPrereqError("Invalid parameter 'nics'",
9874 for row in self.op.nics:
9875 if (not isinstance(row, dict) or
9878 "bridge" not in row):
9879 raise errors.OpPrereqError("Invalid contents of the 'nics'"
9880 " parameter", errors.ECODE_INVAL)
9881 if not isinstance(self.op.disks, list):
9882 raise errors.OpPrereqError("Invalid parameter 'disks'",
9884 for row in self.op.disks:
9885 if (not isinstance(row, dict) or
9886 "size" not in row or
9887 not isinstance(row["size"], int) or
9888 "mode" not in row or
9889 row["mode"] not in ['r', 'w']):
9890 raise errors.OpPrereqError("Invalid contents of the 'disks'"
9891 " parameter", errors.ECODE_INVAL)
9892 if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9893 self.op.hypervisor = self.cfg.GetHypervisorType()
9894 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9895 if not hasattr(self.op, "name"):
9896 raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9898 fname = _ExpandInstanceName(self.cfg, self.op.name)
9899 self.op.name = fname
9900 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9901 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9902 if not hasattr(self.op, "evac_nodes"):
9903 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9904 " opcode input", errors.ECODE_INVAL)
9906 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9907 self.op.mode, errors.ECODE_INVAL)
9909 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9910 if not hasattr(self.op, "allocator") or self.op.allocator is None:
9911 raise errors.OpPrereqError("Missing allocator name",
9913 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9914 raise errors.OpPrereqError("Wrong allocator test '%s'" %
9915 self.op.direction, errors.ECODE_INVAL)
9917 def Exec(self, feedback_fn):
9918 """Run the allocator test.
9921 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9922 ial = IAllocator(self.cfg, self.rpc,
9925 mem_size=self.op.mem_size,
9926 disks=self.op.disks,
9927 disk_template=self.op.disk_template,
9931 vcpus=self.op.vcpus,
9932 hypervisor=self.op.hypervisor,
9934 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9935 ial = IAllocator(self.cfg, self.rpc,
9938 relocate_from=list(self.relocate_from),
9940 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9941 ial = IAllocator(self.cfg, self.rpc,
9943 evac_nodes=self.op.evac_nodes)
9945 raise errors.ProgrammerError("Uncatched mode %s in"
9946 " LUTestAllocator.Exec", self.op.mode)
9948 if self.op.direction == constants.IALLOCATOR_DIR_IN:
9949 result = ial.in_text
9951 ial.Run(self.op.allocator, validate=False)
9952 result = ial.out_text