4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
38 from ganeti import ssh
39 from ganeti import utils
40 from ganeti import errors
41 from ganeti import hypervisor
42 from ganeti import locking
43 from ganeti import constants
44 from ganeti import objects
45 from ganeti import serializer
46 from ganeti import ssconf
47 from ganeti import uidpool
48 from ganeti import compat
49 from ganeti import masterd
51 import ganeti.masterd.instance # pylint: disable-msg=W0611
54 class LogicalUnit(object):
55 """Logical Unit base class.
57 Subclasses must follow these rules:
58 - implement ExpandNames
59 - implement CheckPrereq (except when tasklets are used)
60 - implement Exec (except when tasklets are used)
61 - implement BuildHooksEnv
62 - redefine HPATH and HTYPE
63 - optionally redefine their run requirements:
64 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
66 Note that all commands require root permissions.
68 @ivar dry_run_result: the value (if any) that will be returned to the caller
69 in dry-run mode (signalled by opcode dry_run parameter)
77 def __init__(self, processor, op, context, rpc):
78 """Constructor for LogicalUnit.
80 This needs to be overridden in derived classes in order to check op
86 self.cfg = context.cfg
87 self.context = context
89 # Dicts used to declare locking needs to mcpu
90 self.needed_locks = None
91 self.acquired_locks = {}
92 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
94 self.remove_locks = {}
95 # Used to force good behavior when calling helper functions
96 self.recalculate_locks = {}
99 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
100 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
101 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
102 # support for dry-run
103 self.dry_run_result = None
104 # support for generic debug attribute
105 if (not hasattr(self.op, "debug_level") or
106 not isinstance(self.op.debug_level, int)):
107 self.op.debug_level = 0
112 for attr_name in self._OP_REQP:
113 attr_val = getattr(op, attr_name, None)
115 raise errors.OpPrereqError("Required parameter '%s' missing" %
116 attr_name, errors.ECODE_INVAL)
118 self.CheckArguments()
121 """Returns the SshRunner object
125 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
128 ssh = property(fget=__GetSSH)
130 def CheckArguments(self):
131 """Check syntactic validity for the opcode arguments.
133 This method is for doing a simple syntactic check and ensure
134 validity of opcode parameters, without any cluster-related
135 checks. While the same can be accomplished in ExpandNames and/or
136 CheckPrereq, doing these separate is better because:
138 - ExpandNames is left as as purely a lock-related function
139 - CheckPrereq is run after we have acquired locks (and possible
142 The function is allowed to change the self.op attribute so that
143 later methods can no longer worry about missing parameters.
148 def ExpandNames(self):
149 """Expand names for this LU.
151 This method is called before starting to execute the opcode, and it should
152 update all the parameters of the opcode to their canonical form (e.g. a
153 short node name must be fully expanded after this method has successfully
154 completed). This way locking, hooks, logging, ecc. can work correctly.
156 LUs which implement this method must also populate the self.needed_locks
157 member, as a dict with lock levels as keys, and a list of needed lock names
160 - use an empty dict if you don't need any lock
161 - if you don't need any lock at a particular level omit that level
162 - don't put anything for the BGL level
163 - if you want all locks at a level use locking.ALL_SET as a value
165 If you need to share locks (rather than acquire them exclusively) at one
166 level you can modify self.share_locks, setting a true value (usually 1) for
167 that level. By default locks are not shared.
169 This function can also define a list of tasklets, which then will be
170 executed in order instead of the usual LU-level CheckPrereq and Exec
171 functions, if those are not defined by the LU.
175 # Acquire all nodes and one instance
176 self.needed_locks = {
177 locking.LEVEL_NODE: locking.ALL_SET,
178 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
180 # Acquire just two nodes
181 self.needed_locks = {
182 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
185 self.needed_locks = {} # No, you can't leave it to the default value None
188 # The implementation of this method is mandatory only if the new LU is
189 # concurrent, so that old LUs don't need to be changed all at the same
192 self.needed_locks = {} # Exclusive LUs don't need locks.
194 raise NotImplementedError
196 def DeclareLocks(self, level):
197 """Declare LU locking needs for a level
199 While most LUs can just declare their locking needs at ExpandNames time,
200 sometimes there's the need to calculate some locks after having acquired
201 the ones before. This function is called just before acquiring locks at a
202 particular level, but after acquiring the ones at lower levels, and permits
203 such calculations. It can be used to modify self.needed_locks, and by
204 default it does nothing.
206 This function is only called if you have something already set in
207 self.needed_locks for the level.
209 @param level: Locking level which is going to be locked
210 @type level: member of ganeti.locking.LEVELS
214 def CheckPrereq(self):
215 """Check prerequisites for this LU.
217 This method should check that the prerequisites for the execution
218 of this LU are fulfilled. It can do internode communication, but
219 it should be idempotent - no cluster or system changes are
222 The method should raise errors.OpPrereqError in case something is
223 not fulfilled. Its return value is ignored.
225 This method should also update all the parameters of the opcode to
226 their canonical form if it hasn't been done by ExpandNames before.
229 if self.tasklets is not None:
230 for (idx, tl) in enumerate(self.tasklets):
231 logging.debug("Checking prerequisites for tasklet %s/%s",
232 idx + 1, len(self.tasklets))
235 raise NotImplementedError
237 def Exec(self, feedback_fn):
240 This method should implement the actual work. It should raise
241 errors.OpExecError for failures that are somewhat dealt with in
245 if self.tasklets is not None:
246 for (idx, tl) in enumerate(self.tasklets):
247 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
250 raise NotImplementedError
252 def BuildHooksEnv(self):
253 """Build hooks environment for this LU.
255 This method should return a three-node tuple consisting of: a dict
256 containing the environment that will be used for running the
257 specific hook for this LU, a list of node names on which the hook
258 should run before the execution, and a list of node names on which
259 the hook should run after the execution.
261 The keys of the dict must not have 'GANETI_' prefixed as this will
262 be handled in the hooks runner. Also note additional keys will be
263 added by the hooks runner. If the LU doesn't define any
264 environment, an empty dict (and not None) should be returned.
266 No nodes should be returned as an empty list (and not None).
268 Note that if the HPATH for a LU class is None, this function will
272 raise NotImplementedError
274 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
275 """Notify the LU about the results of its hooks.
277 This method is called every time a hooks phase is executed, and notifies
278 the Logical Unit about the hooks' result. The LU can then use it to alter
279 its result based on the hooks. By default the method does nothing and the
280 previous result is passed back unchanged but any LU can define it if it
281 wants to use the local cluster hook-scripts somehow.
283 @param phase: one of L{constants.HOOKS_PHASE_POST} or
284 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
285 @param hook_results: the results of the multi-node hooks rpc call
286 @param feedback_fn: function used send feedback back to the caller
287 @param lu_result: the previous Exec result this LU had, or None
289 @return: the new Exec result, based on the previous result
293 # API must be kept, thus we ignore the unused argument and could
294 # be a function warnings
295 # pylint: disable-msg=W0613,R0201
298 def _ExpandAndLockInstance(self):
299 """Helper function to expand and lock an instance.
301 Many LUs that work on an instance take its name in self.op.instance_name
302 and need to expand it and then declare the expanded name for locking. This
303 function does it, and then updates self.op.instance_name to the expanded
304 name. It also initializes needed_locks as a dict, if this hasn't been done
308 if self.needed_locks is None:
309 self.needed_locks = {}
311 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
312 "_ExpandAndLockInstance called with instance-level locks set"
313 self.op.instance_name = _ExpandInstanceName(self.cfg,
314 self.op.instance_name)
315 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
317 def _LockInstancesNodes(self, primary_only=False):
318 """Helper function to declare instances' nodes for locking.
320 This function should be called after locking one or more instances to lock
321 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
322 with all primary or secondary nodes for instances already locked and
323 present in self.needed_locks[locking.LEVEL_INSTANCE].
325 It should be called from DeclareLocks, and for safety only works if
326 self.recalculate_locks[locking.LEVEL_NODE] is set.
328 In the future it may grow parameters to just lock some instance's nodes, or
329 to just lock primaries or secondary nodes, if needed.
331 If should be called in DeclareLocks in a way similar to::
333 if level == locking.LEVEL_NODE:
334 self._LockInstancesNodes()
336 @type primary_only: boolean
337 @param primary_only: only lock primary nodes of locked instances
340 assert locking.LEVEL_NODE in self.recalculate_locks, \
341 "_LockInstancesNodes helper function called with no nodes to recalculate"
343 # TODO: check if we're really been called with the instance locks held
345 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
346 # future we might want to have different behaviors depending on the value
347 # of self.recalculate_locks[locking.LEVEL_NODE]
349 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
350 instance = self.context.cfg.GetInstanceInfo(instance_name)
351 wanted_nodes.append(instance.primary_node)
353 wanted_nodes.extend(instance.secondary_nodes)
355 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
356 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
357 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
358 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
360 del self.recalculate_locks[locking.LEVEL_NODE]
363 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
364 """Simple LU which runs no hooks.
366 This LU is intended as a parent for other LogicalUnits which will
367 run no hooks, in order to reduce duplicate code.
373 def BuildHooksEnv(self):
374 """Empty BuildHooksEnv for NoHooksLu.
376 This just raises an error.
379 assert False, "BuildHooksEnv called for NoHooksLUs"
383 """Tasklet base class.
385 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
386 they can mix legacy code with tasklets. Locking needs to be done in the LU,
387 tasklets know nothing about locks.
389 Subclasses must follow these rules:
390 - Implement CheckPrereq
394 def __init__(self, lu):
401 def CheckPrereq(self):
402 """Check prerequisites for this tasklets.
404 This method should check whether the prerequisites for the execution of
405 this tasklet are fulfilled. It can do internode communication, but it
406 should be idempotent - no cluster or system changes are allowed.
408 The method should raise errors.OpPrereqError in case something is not
409 fulfilled. Its return value is ignored.
411 This method should also update all parameters to their canonical form if it
412 hasn't been done before.
415 raise NotImplementedError
417 def Exec(self, feedback_fn):
418 """Execute the tasklet.
420 This method should implement the actual work. It should raise
421 errors.OpExecError for failures that are somewhat dealt with in code, or
425 raise NotImplementedError
428 def _GetWantedNodes(lu, nodes):
429 """Returns list of checked and expanded node names.
431 @type lu: L{LogicalUnit}
432 @param lu: the logical unit on whose behalf we execute
434 @param nodes: list of node names or None for all nodes
436 @return: the list of nodes, sorted
437 @raise errors.ProgrammerError: if the nodes parameter is wrong type
440 if not isinstance(nodes, list):
441 raise errors.OpPrereqError("Invalid argument type 'nodes'",
445 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
446 " non-empty list of nodes whose name is to be expanded.")
448 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
449 return utils.NiceSort(wanted)
452 def _GetWantedInstances(lu, instances):
453 """Returns list of checked and expanded instance names.
455 @type lu: L{LogicalUnit}
456 @param lu: the logical unit on whose behalf we execute
457 @type instances: list
458 @param instances: list of instance names or None for all instances
460 @return: the list of instances, sorted
461 @raise errors.OpPrereqError: if the instances parameter is wrong type
462 @raise errors.OpPrereqError: if any of the passed instances is not found
465 if not isinstance(instances, list):
466 raise errors.OpPrereqError("Invalid argument type 'instances'",
470 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
472 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
476 def _CheckOutputFields(static, dynamic, selected):
477 """Checks whether all selected fields are valid.
479 @type static: L{utils.FieldSet}
480 @param static: static fields set
481 @type dynamic: L{utils.FieldSet}
482 @param dynamic: dynamic fields set
489 delta = f.NonMatching(selected)
491 raise errors.OpPrereqError("Unknown output fields selected: %s"
492 % ",".join(delta), errors.ECODE_INVAL)
495 def _CheckBooleanOpField(op, name):
496 """Validates boolean opcode parameters.
498 This will ensure that an opcode parameter is either a boolean value,
499 or None (but that it always exists).
502 val = getattr(op, name, None)
503 if not (val is None or isinstance(val, bool)):
504 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
505 (name, str(val)), errors.ECODE_INVAL)
506 setattr(op, name, val)
509 def _CheckGlobalHvParams(params):
510 """Validates that given hypervisor params are not global ones.
512 This will ensure that instances don't get customised versions of
516 used_globals = constants.HVC_GLOBALS.intersection(params)
518 msg = ("The following hypervisor parameters are global and cannot"
519 " be customized at instance level, please modify them at"
520 " cluster level: %s" % utils.CommaJoin(used_globals))
521 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
524 def _CheckNodeOnline(lu, node):
525 """Ensure that a given node is online.
527 @param lu: the LU on behalf of which we make the check
528 @param node: the node to check
529 @raise errors.OpPrereqError: if the node is offline
532 if lu.cfg.GetNodeInfo(node).offline:
533 raise errors.OpPrereqError("Can't use offline node %s" % node,
537 def _CheckNodeNotDrained(lu, node):
538 """Ensure that a given node is not drained.
540 @param lu: the LU on behalf of which we make the check
541 @param node: the node to check
542 @raise errors.OpPrereqError: if the node is drained
545 if lu.cfg.GetNodeInfo(node).drained:
546 raise errors.OpPrereqError("Can't use drained node %s" % node,
550 def _CheckNodeHasOS(lu, node, os_name, force_variant):
551 """Ensure that a node supports a given OS.
553 @param lu: the LU on behalf of which we make the check
554 @param node: the node to check
555 @param os_name: the OS to query about
556 @param force_variant: whether to ignore variant errors
557 @raise errors.OpPrereqError: if the node is not supporting the OS
560 result = lu.rpc.call_os_get(node, os_name)
561 result.Raise("OS '%s' not in supported OS list for node %s" %
563 prereq=True, ecode=errors.ECODE_INVAL)
564 if not force_variant:
565 _CheckOSVariant(result.payload, os_name)
568 def _RequireFileStorage():
569 """Checks that file storage is enabled.
571 @raise errors.OpPrereqError: when file storage is disabled
574 if not constants.ENABLE_FILE_STORAGE:
575 raise errors.OpPrereqError("File storage disabled at configure time",
579 def _CheckDiskTemplate(template):
580 """Ensure a given disk template is valid.
583 if template not in constants.DISK_TEMPLATES:
584 msg = ("Invalid disk template name '%s', valid templates are: %s" %
585 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
586 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
587 if template == constants.DT_FILE:
588 _RequireFileStorage()
591 def _CheckStorageType(storage_type):
592 """Ensure a given storage type is valid.
595 if storage_type not in constants.VALID_STORAGE_TYPES:
596 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
598 if storage_type == constants.ST_FILE:
599 _RequireFileStorage()
602 def _GetClusterDomainSecret():
603 """Reads the cluster domain secret.
606 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
610 def _CheckInstanceDown(lu, instance, reason):
611 """Ensure that an instance is not running."""
612 if instance.admin_up:
613 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
614 (instance.name, reason), errors.ECODE_STATE)
616 pnode = instance.primary_node
617 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
618 ins_l.Raise("Can't contact node %s for instance information" % pnode,
619 prereq=True, ecode=errors.ECODE_ENVIRON)
621 if instance.name in ins_l.payload:
622 raise errors.OpPrereqError("Instance %s is running, %s" %
623 (instance.name, reason), errors.ECODE_STATE)
626 def _ExpandItemName(fn, name, kind):
627 """Expand an item name.
629 @param fn: the function to use for expansion
630 @param name: requested item name
631 @param kind: text description ('Node' or 'Instance')
632 @return: the resolved (full) name
633 @raise errors.OpPrereqError: if the item is not found
637 if full_name is None:
638 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
643 def _ExpandNodeName(cfg, name):
644 """Wrapper over L{_ExpandItemName} for nodes."""
645 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
648 def _ExpandInstanceName(cfg, name):
649 """Wrapper over L{_ExpandItemName} for instance."""
650 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
653 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
654 memory, vcpus, nics, disk_template, disks,
655 bep, hvp, hypervisor_name):
656 """Builds instance related env variables for hooks
658 This builds the hook environment from individual variables.
661 @param name: the name of the instance
662 @type primary_node: string
663 @param primary_node: the name of the instance's primary node
664 @type secondary_nodes: list
665 @param secondary_nodes: list of secondary nodes as strings
666 @type os_type: string
667 @param os_type: the name of the instance's OS
668 @type status: boolean
669 @param status: the should_run status of the instance
671 @param memory: the memory size of the instance
673 @param vcpus: the count of VCPUs the instance has
675 @param nics: list of tuples (ip, mac, mode, link) representing
676 the NICs the instance has
677 @type disk_template: string
678 @param disk_template: the disk template of the instance
680 @param disks: the list of (size, mode) pairs
682 @param bep: the backend parameters for the instance
684 @param hvp: the hypervisor parameters for the instance
685 @type hypervisor_name: string
686 @param hypervisor_name: the hypervisor for the instance
688 @return: the hook environment for this instance
697 "INSTANCE_NAME": name,
698 "INSTANCE_PRIMARY": primary_node,
699 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
700 "INSTANCE_OS_TYPE": os_type,
701 "INSTANCE_STATUS": str_status,
702 "INSTANCE_MEMORY": memory,
703 "INSTANCE_VCPUS": vcpus,
704 "INSTANCE_DISK_TEMPLATE": disk_template,
705 "INSTANCE_HYPERVISOR": hypervisor_name,
709 nic_count = len(nics)
710 for idx, (ip, mac, mode, link) in enumerate(nics):
713 env["INSTANCE_NIC%d_IP" % idx] = ip
714 env["INSTANCE_NIC%d_MAC" % idx] = mac
715 env["INSTANCE_NIC%d_MODE" % idx] = mode
716 env["INSTANCE_NIC%d_LINK" % idx] = link
717 if mode == constants.NIC_MODE_BRIDGED:
718 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
722 env["INSTANCE_NIC_COUNT"] = nic_count
725 disk_count = len(disks)
726 for idx, (size, mode) in enumerate(disks):
727 env["INSTANCE_DISK%d_SIZE" % idx] = size
728 env["INSTANCE_DISK%d_MODE" % idx] = mode
732 env["INSTANCE_DISK_COUNT"] = disk_count
734 for source, kind in [(bep, "BE"), (hvp, "HV")]:
735 for key, value in source.items():
736 env["INSTANCE_%s_%s" % (kind, key)] = value
741 def _NICListToTuple(lu, nics):
742 """Build a list of nic information tuples.
744 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
745 value in LUQueryInstanceData.
747 @type lu: L{LogicalUnit}
748 @param lu: the logical unit on whose behalf we execute
749 @type nics: list of L{objects.NIC}
750 @param nics: list of nics to convert to hooks tuples
754 c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
758 filled_params = objects.FillDict(c_nicparams, nic.nicparams)
759 mode = filled_params[constants.NIC_MODE]
760 link = filled_params[constants.NIC_LINK]
761 hooks_nics.append((ip, mac, mode, link))
765 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
766 """Builds instance related env variables for hooks from an object.
768 @type lu: L{LogicalUnit}
769 @param lu: the logical unit on whose behalf we execute
770 @type instance: L{objects.Instance}
771 @param instance: the instance for which we should build the
774 @param override: dictionary with key/values that will override
777 @return: the hook environment dictionary
780 cluster = lu.cfg.GetClusterInfo()
781 bep = cluster.FillBE(instance)
782 hvp = cluster.FillHV(instance)
784 'name': instance.name,
785 'primary_node': instance.primary_node,
786 'secondary_nodes': instance.secondary_nodes,
787 'os_type': instance.os,
788 'status': instance.admin_up,
789 'memory': bep[constants.BE_MEMORY],
790 'vcpus': bep[constants.BE_VCPUS],
791 'nics': _NICListToTuple(lu, instance.nics),
792 'disk_template': instance.disk_template,
793 'disks': [(disk.size, disk.mode) for disk in instance.disks],
796 'hypervisor_name': instance.hypervisor,
799 args.update(override)
800 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
803 def _AdjustCandidatePool(lu, exceptions):
804 """Adjust the candidate pool after node operations.
807 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
809 lu.LogInfo("Promoted nodes to master candidate role: %s",
810 utils.CommaJoin(node.name for node in mod_list))
811 for name in mod_list:
812 lu.context.ReaddNode(name)
813 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
815 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
819 def _DecideSelfPromotion(lu, exceptions=None):
820 """Decide whether I should promote myself as a master candidate.
823 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
824 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
825 # the new node will increase mc_max with one, so:
826 mc_should = min(mc_should + 1, cp_size)
827 return mc_now < mc_should
830 def _CheckNicsBridgesExist(lu, target_nics, target_node,
831 profile=constants.PP_DEFAULT):
832 """Check that the brigdes needed by a list of nics exist.
835 c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
836 paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
837 for nic in target_nics]
838 brlist = [params[constants.NIC_LINK] for params in paramslist
839 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
841 result = lu.rpc.call_bridges_exist(target_node, brlist)
842 result.Raise("Error checking bridges on destination node '%s'" %
843 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
846 def _CheckInstanceBridgesExist(lu, instance, node=None):
847 """Check that the brigdes needed by an instance exist.
851 node = instance.primary_node
852 _CheckNicsBridgesExist(lu, instance.nics, node)
855 def _CheckOSVariant(os_obj, name):
856 """Check whether an OS name conforms to the os variants specification.
858 @type os_obj: L{objects.OS}
859 @param os_obj: OS object to check
861 @param name: OS name passed by the user, to check for validity
864 if not os_obj.supported_variants:
867 variant = name.split("+", 1)[1]
869 raise errors.OpPrereqError("OS name must include a variant",
872 if variant not in os_obj.supported_variants:
873 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
876 def _GetNodeInstancesInner(cfg, fn):
877 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
880 def _GetNodeInstances(cfg, node_name):
881 """Returns a list of all primary and secondary instances on a node.
885 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
888 def _GetNodePrimaryInstances(cfg, node_name):
889 """Returns primary instances on a node.
892 return _GetNodeInstancesInner(cfg,
893 lambda inst: node_name == inst.primary_node)
896 def _GetNodeSecondaryInstances(cfg, node_name):
897 """Returns secondary instances on a node.
900 return _GetNodeInstancesInner(cfg,
901 lambda inst: node_name in inst.secondary_nodes)
904 def _GetStorageTypeArgs(cfg, storage_type):
905 """Returns the arguments for a storage type.
908 # Special case for file storage
909 if storage_type == constants.ST_FILE:
910 # storage.FileStorage wants a list of storage directories
911 return [[cfg.GetFileStorageDir()]]
916 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
919 for dev in instance.disks:
920 cfg.SetDiskID(dev, node_name)
922 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
923 result.Raise("Failed to get disk status from node %s" % node_name,
924 prereq=prereq, ecode=errors.ECODE_ENVIRON)
926 for idx, bdev_status in enumerate(result.payload):
927 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
933 class LUPostInitCluster(LogicalUnit):
934 """Logical unit for running hooks after cluster initialization.
937 HPATH = "cluster-init"
938 HTYPE = constants.HTYPE_CLUSTER
941 def BuildHooksEnv(self):
945 env = {"OP_TARGET": self.cfg.GetClusterName()}
946 mn = self.cfg.GetMasterNode()
949 def CheckPrereq(self):
950 """No prerequisites to check.
955 def Exec(self, feedback_fn):
962 class LUDestroyCluster(LogicalUnit):
963 """Logical unit for destroying the cluster.
966 HPATH = "cluster-destroy"
967 HTYPE = constants.HTYPE_CLUSTER
970 def BuildHooksEnv(self):
974 env = {"OP_TARGET": self.cfg.GetClusterName()}
977 def CheckPrereq(self):
978 """Check prerequisites.
980 This checks whether the cluster is empty.
982 Any errors are signaled by raising errors.OpPrereqError.
985 master = self.cfg.GetMasterNode()
987 nodelist = self.cfg.GetNodeList()
988 if len(nodelist) != 1 or nodelist[0] != master:
989 raise errors.OpPrereqError("There are still %d node(s) in"
990 " this cluster." % (len(nodelist) - 1),
992 instancelist = self.cfg.GetInstanceList()
994 raise errors.OpPrereqError("There are still %d instance(s) in"
995 " this cluster." % len(instancelist),
998 def Exec(self, feedback_fn):
999 """Destroys the cluster.
1002 master = self.cfg.GetMasterNode()
1003 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1005 # Run post hooks on master node before it's removed
1006 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1008 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1010 # pylint: disable-msg=W0702
1011 self.LogWarning("Errors occurred running hooks on %s" % master)
1013 result = self.rpc.call_node_stop_master(master, False)
1014 result.Raise("Could not disable the master role")
1016 if modify_ssh_setup:
1017 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1018 utils.CreateBackup(priv_key)
1019 utils.CreateBackup(pub_key)
1024 def _VerifyCertificate(filename):
1025 """Verifies a certificate for LUVerifyCluster.
1027 @type filename: string
1028 @param filename: Path to PEM file
1032 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1033 utils.ReadFile(filename))
1034 except Exception, err: # pylint: disable-msg=W0703
1035 return (LUVerifyCluster.ETYPE_ERROR,
1036 "Failed to load X509 certificate %s: %s" % (filename, err))
1039 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1040 constants.SSL_CERT_EXPIRATION_ERROR)
1043 fnamemsg = "While verifying %s: %s" % (filename, msg)
1048 return (None, fnamemsg)
1049 elif errcode == utils.CERT_WARNING:
1050 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1051 elif errcode == utils.CERT_ERROR:
1052 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1054 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1057 class LUVerifyCluster(LogicalUnit):
1058 """Verifies the cluster status.
1061 HPATH = "cluster-verify"
1062 HTYPE = constants.HTYPE_CLUSTER
1063 _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1066 TCLUSTER = "cluster"
1068 TINSTANCE = "instance"
1070 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1071 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1072 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1073 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1074 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1075 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1076 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1077 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1078 ENODEDRBD = (TNODE, "ENODEDRBD")
1079 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1080 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1081 ENODEHV = (TNODE, "ENODEHV")
1082 ENODELVM = (TNODE, "ENODELVM")
1083 ENODEN1 = (TNODE, "ENODEN1")
1084 ENODENET = (TNODE, "ENODENET")
1085 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1086 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1087 ENODERPC = (TNODE, "ENODERPC")
1088 ENODESSH = (TNODE, "ENODESSH")
1089 ENODEVERSION = (TNODE, "ENODEVERSION")
1090 ENODESETUP = (TNODE, "ENODESETUP")
1091 ENODETIME = (TNODE, "ENODETIME")
1093 ETYPE_FIELD = "code"
1094 ETYPE_ERROR = "ERROR"
1095 ETYPE_WARNING = "WARNING"
1097 class NodeImage(object):
1098 """A class representing the logical and physical status of a node.
1100 @ivar volumes: a structure as returned from
1101 L{ganeti.backend.GetVolumeList} (runtime)
1102 @ivar instances: a list of running instances (runtime)
1103 @ivar pinst: list of configured primary instances (config)
1104 @ivar sinst: list of configured secondary instances (config)
1105 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1106 of this node (config)
1107 @ivar mfree: free memory, as reported by hypervisor (runtime)
1108 @ivar dfree: free disk, as reported by the node (runtime)
1109 @ivar offline: the offline status (config)
1110 @type rpc_fail: boolean
1111 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1112 not whether the individual keys were correct) (runtime)
1113 @type lvm_fail: boolean
1114 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1115 @type hyp_fail: boolean
1116 @ivar hyp_fail: whether the RPC call didn't return the instance list
1117 @type ghost: boolean
1118 @ivar ghost: whether this is a known node or not (config)
1121 def __init__(self, offline=False):
1129 self.offline = offline
1130 self.rpc_fail = False
1131 self.lvm_fail = False
1132 self.hyp_fail = False
1135 def ExpandNames(self):
1136 self.needed_locks = {
1137 locking.LEVEL_NODE: locking.ALL_SET,
1138 locking.LEVEL_INSTANCE: locking.ALL_SET,
1140 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1142 def _Error(self, ecode, item, msg, *args, **kwargs):
1143 """Format an error message.
1145 Based on the opcode's error_codes parameter, either format a
1146 parseable error code, or a simpler error string.
1148 This must be called only from Exec and functions called from Exec.
1151 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1153 # first complete the msg
1156 # then format the whole message
1157 if self.op.error_codes:
1158 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1164 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1165 # and finally report it via the feedback_fn
1166 self._feedback_fn(" - %s" % msg)
1168 def _ErrorIf(self, cond, *args, **kwargs):
1169 """Log an error message if the passed condition is True.
1172 cond = bool(cond) or self.op.debug_simulate_errors
1174 self._Error(*args, **kwargs)
1175 # do not mark the operation as failed for WARN cases only
1176 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1177 self.bad = self.bad or cond
1179 def _VerifyNode(self, ninfo, nresult):
1180 """Run multiple tests against a node.
1184 - compares ganeti version
1185 - checks vg existence and size > 20G
1186 - checks config file checksum
1187 - checks ssh to other nodes
1189 @type ninfo: L{objects.Node}
1190 @param ninfo: the node to check
1191 @param nresult: the results from the node
1193 @return: whether overall this call was successful (and we can expect
1194 reasonable values in the respose)
1198 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1200 # main result, nresult should be a non-empty dict
1201 test = not nresult or not isinstance(nresult, dict)
1202 _ErrorIf(test, self.ENODERPC, node,
1203 "unable to verify node: no data returned")
1207 # compares ganeti version
1208 local_version = constants.PROTOCOL_VERSION
1209 remote_version = nresult.get("version", None)
1210 test = not (remote_version and
1211 isinstance(remote_version, (list, tuple)) and
1212 len(remote_version) == 2)
1213 _ErrorIf(test, self.ENODERPC, node,
1214 "connection to node returned invalid data")
1218 test = local_version != remote_version[0]
1219 _ErrorIf(test, self.ENODEVERSION, node,
1220 "incompatible protocol versions: master %s,"
1221 " node %s", local_version, remote_version[0])
1225 # node seems compatible, we can actually try to look into its results
1227 # full package version
1228 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1229 self.ENODEVERSION, node,
1230 "software version mismatch: master %s, node %s",
1231 constants.RELEASE_VERSION, remote_version[1],
1232 code=self.ETYPE_WARNING)
1234 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1235 if isinstance(hyp_result, dict):
1236 for hv_name, hv_result in hyp_result.iteritems():
1237 test = hv_result is not None
1238 _ErrorIf(test, self.ENODEHV, node,
1239 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1242 test = nresult.get(constants.NV_NODESETUP,
1243 ["Missing NODESETUP results"])
1244 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1249 def _VerifyNodeTime(self, ninfo, nresult,
1250 nvinfo_starttime, nvinfo_endtime):
1251 """Check the node time.
1253 @type ninfo: L{objects.Node}
1254 @param ninfo: the node to check
1255 @param nresult: the remote results for the node
1256 @param nvinfo_starttime: the start time of the RPC call
1257 @param nvinfo_endtime: the end time of the RPC call
1261 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1263 ntime = nresult.get(constants.NV_TIME, None)
1265 ntime_merged = utils.MergeTime(ntime)
1266 except (ValueError, TypeError):
1267 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1270 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1271 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1272 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1273 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1277 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1278 "Node time diverges by at least %s from master node time",
1281 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1282 """Check the node time.
1284 @type ninfo: L{objects.Node}
1285 @param ninfo: the node to check
1286 @param nresult: the remote results for the node
1287 @param vg_name: the configured VG name
1294 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1296 # checks vg existence and size > 20G
1297 vglist = nresult.get(constants.NV_VGLIST, None)
1299 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1301 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1302 constants.MIN_VG_SIZE)
1303 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1306 pvlist = nresult.get(constants.NV_PVLIST, None)
1307 test = pvlist is None
1308 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1310 # check that ':' is not present in PV names, since it's a
1311 # special character for lvcreate (denotes the range of PEs to
1313 for _, pvname, owner_vg in pvlist:
1314 test = ":" in pvname
1315 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1316 " '%s' of VG '%s'", pvname, owner_vg)
1318 def _VerifyNodeNetwork(self, ninfo, nresult):
1319 """Check the node time.
1321 @type ninfo: L{objects.Node}
1322 @param ninfo: the node to check
1323 @param nresult: the remote results for the node
1327 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1329 test = constants.NV_NODELIST not in nresult
1330 _ErrorIf(test, self.ENODESSH, node,
1331 "node hasn't returned node ssh connectivity data")
1333 if nresult[constants.NV_NODELIST]:
1334 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1335 _ErrorIf(True, self.ENODESSH, node,
1336 "ssh communication with node '%s': %s", a_node, a_msg)
1338 test = constants.NV_NODENETTEST not in nresult
1339 _ErrorIf(test, self.ENODENET, node,
1340 "node hasn't returned node tcp connectivity data")
1342 if nresult[constants.NV_NODENETTEST]:
1343 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1345 _ErrorIf(True, self.ENODENET, node,
1346 "tcp communication with node '%s': %s",
1347 anode, nresult[constants.NV_NODENETTEST][anode])
1349 test = constants.NV_MASTERIP not in nresult
1350 _ErrorIf(test, self.ENODENET, node,
1351 "node hasn't returned node master IP reachability data")
1353 if not nresult[constants.NV_MASTERIP]:
1354 if node == self.master_node:
1355 msg = "the master node cannot reach the master IP (not configured?)"
1357 msg = "cannot reach the master IP"
1358 _ErrorIf(True, self.ENODENET, node, msg)
1361 def _VerifyInstance(self, instance, instanceconfig, node_image):
1362 """Verify an instance.
1364 This function checks to see if the required block devices are
1365 available on the instance's node.
1368 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1369 node_current = instanceconfig.primary_node
1371 node_vol_should = {}
1372 instanceconfig.MapLVsByNode(node_vol_should)
1374 for node in node_vol_should:
1375 n_img = node_image[node]
1376 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1377 # ignore missing volumes on offline or broken nodes
1379 for volume in node_vol_should[node]:
1380 test = volume not in n_img.volumes
1381 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1382 "volume %s missing on node %s", volume, node)
1384 if instanceconfig.admin_up:
1385 pri_img = node_image[node_current]
1386 test = instance not in pri_img.instances and not pri_img.offline
1387 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1388 "instance not running on its primary node %s",
1391 for node, n_img in node_image.items():
1392 if (not node == node_current):
1393 test = instance in n_img.instances
1394 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1395 "instance should not run on node %s", node)
1397 def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1398 """Verify if there are any unknown volumes in the cluster.
1400 The .os, .swap and backup volumes are ignored. All other volumes are
1401 reported as unknown.
1404 for node, n_img in node_image.items():
1405 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1406 # skip non-healthy nodes
1408 for volume in n_img.volumes:
1409 test = (node not in node_vol_should or
1410 volume not in node_vol_should[node])
1411 self._ErrorIf(test, self.ENODEORPHANLV, node,
1412 "volume %s is unknown", volume)
1414 def _VerifyOrphanInstances(self, instancelist, node_image):
1415 """Verify the list of running instances.
1417 This checks what instances are running but unknown to the cluster.
1420 for node, n_img in node_image.items():
1421 for o_inst in n_img.instances:
1422 test = o_inst not in instancelist
1423 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1424 "instance %s on node %s should not exist", o_inst, node)
1426 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1427 """Verify N+1 Memory Resilience.
1429 Check that if one single node dies we can still start all the
1430 instances it was primary for.
1433 for node, n_img in node_image.items():
1434 # This code checks that every node which is now listed as
1435 # secondary has enough memory to host all instances it is
1436 # supposed to should a single other node in the cluster fail.
1437 # FIXME: not ready for failover to an arbitrary node
1438 # FIXME: does not support file-backed instances
1439 # WARNING: we currently take into account down instances as well
1440 # as up ones, considering that even if they're down someone
1441 # might want to start them even in the event of a node failure.
1442 for prinode, instances in n_img.sbp.items():
1444 for instance in instances:
1445 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1446 if bep[constants.BE_AUTO_BALANCE]:
1447 needed_mem += bep[constants.BE_MEMORY]
1448 test = n_img.mfree < needed_mem
1449 self._ErrorIf(test, self.ENODEN1, node,
1450 "not enough memory on to accommodate"
1451 " failovers should peer node %s fail", prinode)
1453 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1455 """Verifies and computes the node required file checksums.
1457 @type ninfo: L{objects.Node}
1458 @param ninfo: the node to check
1459 @param nresult: the remote results for the node
1460 @param file_list: required list of files
1461 @param local_cksum: dictionary of local files and their checksums
1462 @param master_files: list of files that only masters should have
1466 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1468 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1469 test = not isinstance(remote_cksum, dict)
1470 _ErrorIf(test, self.ENODEFILECHECK, node,
1471 "node hasn't returned file checksum data")
1475 for file_name in file_list:
1476 node_is_mc = ninfo.master_candidate
1477 must_have = (file_name not in master_files) or node_is_mc
1479 test1 = file_name not in remote_cksum
1481 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1483 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1484 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1485 "file '%s' missing", file_name)
1486 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1487 "file '%s' has wrong checksum", file_name)
1488 # not candidate and this is not a must-have file
1489 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1490 "file '%s' should not exist on non master"
1491 " candidates (and the file is outdated)", file_name)
1492 # all good, except non-master/non-must have combination
1493 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1494 "file '%s' should not exist"
1495 " on non master candidates", file_name)
1497 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1498 """Verifies and the node DRBD status.
1500 @type ninfo: L{objects.Node}
1501 @param ninfo: the node to check
1502 @param nresult: the remote results for the node
1503 @param instanceinfo: the dict of instances
1504 @param drbd_map: the DRBD map as returned by
1505 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1509 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1511 # compute the DRBD minors
1513 for minor, instance in drbd_map[node].items():
1514 test = instance not in instanceinfo
1515 _ErrorIf(test, self.ECLUSTERCFG, None,
1516 "ghost instance '%s' in temporary DRBD map", instance)
1517 # ghost instance should not be running, but otherwise we
1518 # don't give double warnings (both ghost instance and
1519 # unallocated minor in use)
1521 node_drbd[minor] = (instance, False)
1523 instance = instanceinfo[instance]
1524 node_drbd[minor] = (instance.name, instance.admin_up)
1526 # and now check them
1527 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1528 test = not isinstance(used_minors, (tuple, list))
1529 _ErrorIf(test, self.ENODEDRBD, node,
1530 "cannot parse drbd status file: %s", str(used_minors))
1532 # we cannot check drbd status
1535 for minor, (iname, must_exist) in node_drbd.items():
1536 test = minor not in used_minors and must_exist
1537 _ErrorIf(test, self.ENODEDRBD, node,
1538 "drbd minor %d of instance %s is not active", minor, iname)
1539 for minor in used_minors:
1540 test = minor not in node_drbd
1541 _ErrorIf(test, self.ENODEDRBD, node,
1542 "unallocated drbd minor %d is in use", minor)
1544 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1545 """Verifies and updates the node volume data.
1547 This function will update a L{NodeImage}'s internal structures
1548 with data from the remote call.
1550 @type ninfo: L{objects.Node}
1551 @param ninfo: the node to check
1552 @param nresult: the remote results for the node
1553 @param nimg: the node image object
1554 @param vg_name: the configured VG name
1558 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1560 nimg.lvm_fail = True
1561 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1564 elif isinstance(lvdata, basestring):
1565 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1566 utils.SafeEncode(lvdata))
1567 elif not isinstance(lvdata, dict):
1568 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1570 nimg.volumes = lvdata
1571 nimg.lvm_fail = False
1573 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1574 """Verifies and updates the node instance list.
1576 If the listing was successful, then updates this node's instance
1577 list. Otherwise, it marks the RPC call as failed for the instance
1580 @type ninfo: L{objects.Node}
1581 @param ninfo: the node to check
1582 @param nresult: the remote results for the node
1583 @param nimg: the node image object
1586 idata = nresult.get(constants.NV_INSTANCELIST, None)
1587 test = not isinstance(idata, list)
1588 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1589 " (instancelist): %s", utils.SafeEncode(str(idata)))
1591 nimg.hyp_fail = True
1593 nimg.instances = idata
1595 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1596 """Verifies and computes a node information map
1598 @type ninfo: L{objects.Node}
1599 @param ninfo: the node to check
1600 @param nresult: the remote results for the node
1601 @param nimg: the node image object
1602 @param vg_name: the configured VG name
1606 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1608 # try to read free memory (from the hypervisor)
1609 hv_info = nresult.get(constants.NV_HVINFO, None)
1610 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1611 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1614 nimg.mfree = int(hv_info["memory_free"])
1615 except (ValueError, TypeError):
1616 _ErrorIf(True, self.ENODERPC, node,
1617 "node returned invalid nodeinfo, check hypervisor")
1619 # FIXME: devise a free space model for file based instances as well
1620 if vg_name is not None:
1621 test = (constants.NV_VGLIST not in nresult or
1622 vg_name not in nresult[constants.NV_VGLIST])
1623 _ErrorIf(test, self.ENODELVM, node,
1624 "node didn't return data for the volume group '%s'"
1625 " - it is either missing or broken", vg_name)
1628 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1629 except (ValueError, TypeError):
1630 _ErrorIf(True, self.ENODERPC, node,
1631 "node returned invalid LVM info, check LVM status")
1633 def CheckPrereq(self):
1634 """Check prerequisites.
1636 Transform the list of checks we're going to skip into a set and check that
1637 all its members are valid.
1640 self.skip_set = frozenset(self.op.skip_checks)
1641 if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1642 raise errors.OpPrereqError("Invalid checks to be skipped specified",
1645 def BuildHooksEnv(self):
1648 Cluster-Verify hooks just ran in the post phase and their failure makes
1649 the output be logged in the verify output and the verification to fail.
1652 all_nodes = self.cfg.GetNodeList()
1654 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1656 for node in self.cfg.GetAllNodesInfo().values():
1657 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1659 return env, [], all_nodes
1661 def Exec(self, feedback_fn):
1662 """Verify integrity of cluster, performing various test on nodes.
1666 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1667 verbose = self.op.verbose
1668 self._feedback_fn = feedback_fn
1669 feedback_fn("* Verifying global settings")
1670 for msg in self.cfg.VerifyConfig():
1671 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1673 # Check the cluster certificates
1674 for cert_filename in constants.ALL_CERT_FILES:
1675 (errcode, msg) = _VerifyCertificate(cert_filename)
1676 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1678 vg_name = self.cfg.GetVGName()
1679 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1680 cluster = self.cfg.GetClusterInfo()
1681 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1682 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1683 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1684 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1685 for iname in instancelist)
1686 i_non_redundant = [] # Non redundant instances
1687 i_non_a_balanced = [] # Non auto-balanced instances
1688 n_offline = 0 # Count of offline nodes
1689 n_drained = 0 # Count of nodes being drained
1690 node_vol_should = {}
1692 # FIXME: verify OS list
1693 # do local checksums
1694 master_files = [constants.CLUSTER_CONF_FILE]
1695 master_node = self.master_node = self.cfg.GetMasterNode()
1696 master_ip = self.cfg.GetMasterIP()
1698 file_names = ssconf.SimpleStore().GetFileList()
1699 file_names.extend(constants.ALL_CERT_FILES)
1700 file_names.extend(master_files)
1701 if cluster.modify_etc_hosts:
1702 file_names.append(constants.ETC_HOSTS)
1704 local_checksums = utils.FingerprintFiles(file_names)
1706 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1707 node_verify_param = {
1708 constants.NV_FILELIST: file_names,
1709 constants.NV_NODELIST: [node.name for node in nodeinfo
1710 if not node.offline],
1711 constants.NV_HYPERVISOR: hypervisors,
1712 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1713 node.secondary_ip) for node in nodeinfo
1714 if not node.offline],
1715 constants.NV_INSTANCELIST: hypervisors,
1716 constants.NV_VERSION: None,
1717 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1718 constants.NV_NODESETUP: None,
1719 constants.NV_TIME: None,
1720 constants.NV_MASTERIP: (master_node, master_ip),
1723 if vg_name is not None:
1724 node_verify_param[constants.NV_VGLIST] = None
1725 node_verify_param[constants.NV_LVLIST] = vg_name
1726 node_verify_param[constants.NV_PVLIST] = [vg_name]
1727 node_verify_param[constants.NV_DRBDLIST] = None
1729 # Build our expected cluster state
1730 node_image = dict((node.name, self.NodeImage(offline=node.offline))
1731 for node in nodeinfo)
1733 for instance in instancelist:
1734 inst_config = instanceinfo[instance]
1736 for nname in inst_config.all_nodes:
1737 if nname not in node_image:
1739 gnode = self.NodeImage()
1741 node_image[nname] = gnode
1743 inst_config.MapLVsByNode(node_vol_should)
1745 pnode = inst_config.primary_node
1746 node_image[pnode].pinst.append(instance)
1748 for snode in inst_config.secondary_nodes:
1749 nimg = node_image[snode]
1750 nimg.sinst.append(instance)
1751 if pnode not in nimg.sbp:
1752 nimg.sbp[pnode] = []
1753 nimg.sbp[pnode].append(instance)
1755 # At this point, we have the in-memory data structures complete,
1756 # except for the runtime information, which we'll gather next
1758 # Due to the way our RPC system works, exact response times cannot be
1759 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1760 # time before and after executing the request, we can at least have a time
1762 nvinfo_starttime = time.time()
1763 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1764 self.cfg.GetClusterName())
1765 nvinfo_endtime = time.time()
1767 all_drbd_map = self.cfg.ComputeDRBDMap()
1769 feedback_fn("* Verifying node status")
1770 for node_i in nodeinfo:
1772 nimg = node_image[node]
1776 feedback_fn("* Skipping offline node %s" % (node,))
1780 if node == master_node:
1782 elif node_i.master_candidate:
1783 ntype = "master candidate"
1784 elif node_i.drained:
1790 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1792 msg = all_nvinfo[node].fail_msg
1793 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1795 nimg.rpc_fail = True
1798 nresult = all_nvinfo[node].payload
1800 nimg.call_ok = self._VerifyNode(node_i, nresult)
1801 self._VerifyNodeNetwork(node_i, nresult)
1802 self._VerifyNodeLVM(node_i, nresult, vg_name)
1803 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1805 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1806 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1808 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1809 self._UpdateNodeInstances(node_i, nresult, nimg)
1810 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1812 feedback_fn("* Verifying instance status")
1813 for instance in instancelist:
1815 feedback_fn("* Verifying instance %s" % instance)
1816 inst_config = instanceinfo[instance]
1817 self._VerifyInstance(instance, inst_config, node_image)
1818 inst_nodes_offline = []
1820 pnode = inst_config.primary_node
1821 pnode_img = node_image[pnode]
1822 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1823 self.ENODERPC, pnode, "instance %s, connection to"
1824 " primary node failed", instance)
1826 if pnode_img.offline:
1827 inst_nodes_offline.append(pnode)
1829 # If the instance is non-redundant we cannot survive losing its primary
1830 # node, so we are not N+1 compliant. On the other hand we have no disk
1831 # templates with more than one secondary so that situation is not well
1833 # FIXME: does not support file-backed instances
1834 if not inst_config.secondary_nodes:
1835 i_non_redundant.append(instance)
1836 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1837 instance, "instance has multiple secondary nodes: %s",
1838 utils.CommaJoin(inst_config.secondary_nodes),
1839 code=self.ETYPE_WARNING)
1841 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1842 i_non_a_balanced.append(instance)
1844 for snode in inst_config.secondary_nodes:
1845 s_img = node_image[snode]
1846 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1847 "instance %s, connection to secondary node failed", instance)
1850 inst_nodes_offline.append(snode)
1852 # warn that the instance lives on offline nodes
1853 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1854 "instance lives on offline node(s) %s",
1855 utils.CommaJoin(inst_nodes_offline))
1856 # ... or ghost nodes
1857 for node in inst_config.all_nodes:
1858 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1859 "instance lives on ghost node %s", node)
1861 feedback_fn("* Verifying orphan volumes")
1862 self._VerifyOrphanVolumes(node_vol_should, node_image)
1864 feedback_fn("* Verifying orphan instances")
1865 self._VerifyOrphanInstances(instancelist, node_image)
1867 if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1868 feedback_fn("* Verifying N+1 Memory redundancy")
1869 self._VerifyNPlusOneMemory(node_image, instanceinfo)
1871 feedback_fn("* Other Notes")
1873 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
1874 % len(i_non_redundant))
1876 if i_non_a_balanced:
1877 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
1878 % len(i_non_a_balanced))
1881 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
1884 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
1888 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1889 """Analyze the post-hooks' result
1891 This method analyses the hook result, handles it, and sends some
1892 nicely-formatted feedback back to the user.
1894 @param phase: one of L{constants.HOOKS_PHASE_POST} or
1895 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1896 @param hooks_results: the results of the multi-node hooks rpc call
1897 @param feedback_fn: function used send feedback back to the caller
1898 @param lu_result: previous Exec result
1899 @return: the new Exec result, based on the previous result
1903 # We only really run POST phase hooks, and are only interested in
1905 if phase == constants.HOOKS_PHASE_POST:
1906 # Used to change hooks' output to proper indentation
1907 indent_re = re.compile('^', re.M)
1908 feedback_fn("* Hooks Results")
1909 assert hooks_results, "invalid result from hooks"
1911 for node_name in hooks_results:
1912 res = hooks_results[node_name]
1914 test = msg and not res.offline
1915 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1916 "Communication failure in hooks execution: %s", msg)
1917 if res.offline or msg:
1918 # No need to investigate payload if node is offline or gave an error.
1919 # override manually lu_result here as _ErrorIf only
1920 # overrides self.bad
1923 for script, hkr, output in res.payload:
1924 test = hkr == constants.HKR_FAIL
1925 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1926 "Script %s failed, output:", script)
1928 output = indent_re.sub(' ', output)
1929 feedback_fn("%s" % output)
1935 class LUVerifyDisks(NoHooksLU):
1936 """Verifies the cluster disks status.
1942 def ExpandNames(self):
1943 self.needed_locks = {
1944 locking.LEVEL_NODE: locking.ALL_SET,
1945 locking.LEVEL_INSTANCE: locking.ALL_SET,
1947 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1949 def CheckPrereq(self):
1950 """Check prerequisites.
1952 This has no prerequisites.
1957 def Exec(self, feedback_fn):
1958 """Verify integrity of cluster disks.
1960 @rtype: tuple of three items
1961 @return: a tuple of (dict of node-to-node_error, list of instances
1962 which need activate-disks, dict of instance: (node, volume) for
1966 result = res_nodes, res_instances, res_missing = {}, [], {}
1968 vg_name = self.cfg.GetVGName()
1969 nodes = utils.NiceSort(self.cfg.GetNodeList())
1970 instances = [self.cfg.GetInstanceInfo(name)
1971 for name in self.cfg.GetInstanceList()]
1974 for inst in instances:
1976 if (not inst.admin_up or
1977 inst.disk_template not in constants.DTS_NET_MIRROR):
1979 inst.MapLVsByNode(inst_lvs)
1980 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1981 for node, vol_list in inst_lvs.iteritems():
1982 for vol in vol_list:
1983 nv_dict[(node, vol)] = inst
1988 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1992 node_res = node_lvs[node]
1993 if node_res.offline:
1995 msg = node_res.fail_msg
1997 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1998 res_nodes[node] = msg
2001 lvs = node_res.payload
2002 for lv_name, (_, _, lv_online) in lvs.items():
2003 inst = nv_dict.pop((node, lv_name), None)
2004 if (not lv_online and inst is not None
2005 and inst.name not in res_instances):
2006 res_instances.append(inst.name)
2008 # any leftover items in nv_dict are missing LVs, let's arrange the
2010 for key, inst in nv_dict.iteritems():
2011 if inst.name not in res_missing:
2012 res_missing[inst.name] = []
2013 res_missing[inst.name].append(key)
2018 class LURepairDiskSizes(NoHooksLU):
2019 """Verifies the cluster disks sizes.
2022 _OP_REQP = ["instances"]
2025 def ExpandNames(self):
2026 if not isinstance(self.op.instances, list):
2027 raise errors.OpPrereqError("Invalid argument type 'instances'",
2030 if self.op.instances:
2031 self.wanted_names = []
2032 for name in self.op.instances:
2033 full_name = _ExpandInstanceName(self.cfg, name)
2034 self.wanted_names.append(full_name)
2035 self.needed_locks = {
2036 locking.LEVEL_NODE: [],
2037 locking.LEVEL_INSTANCE: self.wanted_names,
2039 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2041 self.wanted_names = None
2042 self.needed_locks = {
2043 locking.LEVEL_NODE: locking.ALL_SET,
2044 locking.LEVEL_INSTANCE: locking.ALL_SET,
2046 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2048 def DeclareLocks(self, level):
2049 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2050 self._LockInstancesNodes(primary_only=True)
2052 def CheckPrereq(self):
2053 """Check prerequisites.
2055 This only checks the optional instance list against the existing names.
2058 if self.wanted_names is None:
2059 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2061 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2062 in self.wanted_names]
2064 def _EnsureChildSizes(self, disk):
2065 """Ensure children of the disk have the needed disk size.
2067 This is valid mainly for DRBD8 and fixes an issue where the
2068 children have smaller disk size.
2070 @param disk: an L{ganeti.objects.Disk} object
2073 if disk.dev_type == constants.LD_DRBD8:
2074 assert disk.children, "Empty children for DRBD8?"
2075 fchild = disk.children[0]
2076 mismatch = fchild.size < disk.size
2078 self.LogInfo("Child disk has size %d, parent %d, fixing",
2079 fchild.size, disk.size)
2080 fchild.size = disk.size
2082 # and we recurse on this child only, not on the metadev
2083 return self._EnsureChildSizes(fchild) or mismatch
2087 def Exec(self, feedback_fn):
2088 """Verify the size of cluster disks.
2091 # TODO: check child disks too
2092 # TODO: check differences in size between primary/secondary nodes
2094 for instance in self.wanted_instances:
2095 pnode = instance.primary_node
2096 if pnode not in per_node_disks:
2097 per_node_disks[pnode] = []
2098 for idx, disk in enumerate(instance.disks):
2099 per_node_disks[pnode].append((instance, idx, disk))
2102 for node, dskl in per_node_disks.items():
2103 newl = [v[2].Copy() for v in dskl]
2105 self.cfg.SetDiskID(dsk, node)
2106 result = self.rpc.call_blockdev_getsizes(node, newl)
2108 self.LogWarning("Failure in blockdev_getsizes call to node"
2109 " %s, ignoring", node)
2111 if len(result.data) != len(dskl):
2112 self.LogWarning("Invalid result from node %s, ignoring node results",
2115 for ((instance, idx, disk), size) in zip(dskl, result.data):
2117 self.LogWarning("Disk %d of instance %s did not return size"
2118 " information, ignoring", idx, instance.name)
2120 if not isinstance(size, (int, long)):
2121 self.LogWarning("Disk %d of instance %s did not return valid"
2122 " size information, ignoring", idx, instance.name)
2125 if size != disk.size:
2126 self.LogInfo("Disk %d of instance %s has mismatched size,"
2127 " correcting: recorded %d, actual %d", idx,
2128 instance.name, disk.size, size)
2130 self.cfg.Update(instance, feedback_fn)
2131 changed.append((instance.name, idx, size))
2132 if self._EnsureChildSizes(disk):
2133 self.cfg.Update(instance, feedback_fn)
2134 changed.append((instance.name, idx, disk.size))
2138 class LURenameCluster(LogicalUnit):
2139 """Rename the cluster.
2142 HPATH = "cluster-rename"
2143 HTYPE = constants.HTYPE_CLUSTER
2146 def BuildHooksEnv(self):
2151 "OP_TARGET": self.cfg.GetClusterName(),
2152 "NEW_NAME": self.op.name,
2154 mn = self.cfg.GetMasterNode()
2155 all_nodes = self.cfg.GetNodeList()
2156 return env, [mn], all_nodes
2158 def CheckPrereq(self):
2159 """Verify that the passed name is a valid one.
2162 hostname = utils.GetHostInfo(self.op.name)
2164 new_name = hostname.name
2165 self.ip = new_ip = hostname.ip
2166 old_name = self.cfg.GetClusterName()
2167 old_ip = self.cfg.GetMasterIP()
2168 if new_name == old_name and new_ip == old_ip:
2169 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2170 " cluster has changed",
2172 if new_ip != old_ip:
2173 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2174 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2175 " reachable on the network. Aborting." %
2176 new_ip, errors.ECODE_NOTUNIQUE)
2178 self.op.name = new_name
2180 def Exec(self, feedback_fn):
2181 """Rename the cluster.
2184 clustername = self.op.name
2187 # shutdown the master IP
2188 master = self.cfg.GetMasterNode()
2189 result = self.rpc.call_node_stop_master(master, False)
2190 result.Raise("Could not disable the master role")
2193 cluster = self.cfg.GetClusterInfo()
2194 cluster.cluster_name = clustername
2195 cluster.master_ip = ip
2196 self.cfg.Update(cluster, feedback_fn)
2198 # update the known hosts file
2199 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2200 node_list = self.cfg.GetNodeList()
2202 node_list.remove(master)
2205 result = self.rpc.call_upload_file(node_list,
2206 constants.SSH_KNOWN_HOSTS_FILE)
2207 for to_node, to_result in result.iteritems():
2208 msg = to_result.fail_msg
2210 msg = ("Copy of file %s to node %s failed: %s" %
2211 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2212 self.proc.LogWarning(msg)
2215 result = self.rpc.call_node_start_master(master, False, False)
2216 msg = result.fail_msg
2218 self.LogWarning("Could not re-enable the master role on"
2219 " the master, please restart manually: %s", msg)
2222 def _RecursiveCheckIfLVMBased(disk):
2223 """Check if the given disk or its children are lvm-based.
2225 @type disk: L{objects.Disk}
2226 @param disk: the disk to check
2228 @return: boolean indicating whether a LD_LV dev_type was found or not
2232 for chdisk in disk.children:
2233 if _RecursiveCheckIfLVMBased(chdisk):
2235 return disk.dev_type == constants.LD_LV
2238 class LUSetClusterParams(LogicalUnit):
2239 """Change the parameters of the cluster.
2242 HPATH = "cluster-modify"
2243 HTYPE = constants.HTYPE_CLUSTER
2247 def CheckArguments(self):
2251 for attr in ["candidate_pool_size",
2252 "uid_pool", "add_uids", "remove_uids"]:
2253 if not hasattr(self.op, attr):
2254 setattr(self.op, attr, None)
2256 if self.op.candidate_pool_size is not None:
2258 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2259 except (ValueError, TypeError), err:
2260 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2261 str(err), errors.ECODE_INVAL)
2262 if self.op.candidate_pool_size < 1:
2263 raise errors.OpPrereqError("At least one master candidate needed",
2266 _CheckBooleanOpField(self.op, "maintain_node_health")
2268 if self.op.uid_pool:
2269 uidpool.CheckUidPool(self.op.uid_pool)
2271 if self.op.add_uids:
2272 uidpool.CheckUidPool(self.op.add_uids)
2274 if self.op.remove_uids:
2275 uidpool.CheckUidPool(self.op.remove_uids)
2277 def ExpandNames(self):
2278 # FIXME: in the future maybe other cluster params won't require checking on
2279 # all nodes to be modified.
2280 self.needed_locks = {
2281 locking.LEVEL_NODE: locking.ALL_SET,
2283 self.share_locks[locking.LEVEL_NODE] = 1
2285 def BuildHooksEnv(self):
2290 "OP_TARGET": self.cfg.GetClusterName(),
2291 "NEW_VG_NAME": self.op.vg_name,
2293 mn = self.cfg.GetMasterNode()
2294 return env, [mn], [mn]
2296 def CheckPrereq(self):
2297 """Check prerequisites.
2299 This checks whether the given params don't conflict and
2300 if the given volume group is valid.
2303 if self.op.vg_name is not None and not self.op.vg_name:
2304 instances = self.cfg.GetAllInstancesInfo().values()
2305 for inst in instances:
2306 for disk in inst.disks:
2307 if _RecursiveCheckIfLVMBased(disk):
2308 raise errors.OpPrereqError("Cannot disable lvm storage while"
2309 " lvm-based instances exist",
2312 node_list = self.acquired_locks[locking.LEVEL_NODE]
2314 # if vg_name not None, checks given volume group on all nodes
2316 vglist = self.rpc.call_vg_list(node_list)
2317 for node in node_list:
2318 msg = vglist[node].fail_msg
2320 # ignoring down node
2321 self.LogWarning("Error while gathering data on node %s"
2322 " (ignoring node): %s", node, msg)
2324 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2326 constants.MIN_VG_SIZE)
2328 raise errors.OpPrereqError("Error on node '%s': %s" %
2329 (node, vgstatus), errors.ECODE_ENVIRON)
2331 self.cluster = cluster = self.cfg.GetClusterInfo()
2332 # validate params changes
2333 if self.op.beparams:
2334 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2335 self.new_beparams = objects.FillDict(
2336 cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2338 if self.op.nicparams:
2339 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2340 self.new_nicparams = objects.FillDict(
2341 cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2342 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2345 # check all instances for consistency
2346 for instance in self.cfg.GetAllInstancesInfo().values():
2347 for nic_idx, nic in enumerate(instance.nics):
2348 params_copy = copy.deepcopy(nic.nicparams)
2349 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2351 # check parameter syntax
2353 objects.NIC.CheckParameterSyntax(params_filled)
2354 except errors.ConfigurationError, err:
2355 nic_errors.append("Instance %s, nic/%d: %s" %
2356 (instance.name, nic_idx, err))
2358 # if we're moving instances to routed, check that they have an ip
2359 target_mode = params_filled[constants.NIC_MODE]
2360 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2361 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2362 (instance.name, nic_idx))
2364 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2365 "\n".join(nic_errors))
2367 # hypervisor list/parameters
2368 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2369 if self.op.hvparams:
2370 if not isinstance(self.op.hvparams, dict):
2371 raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2373 for hv_name, hv_dict in self.op.hvparams.items():
2374 if hv_name not in self.new_hvparams:
2375 self.new_hvparams[hv_name] = hv_dict
2377 self.new_hvparams[hv_name].update(hv_dict)
2379 # os hypervisor parameters
2380 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2382 if not isinstance(self.op.os_hvp, dict):
2383 raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2385 for os_name, hvs in self.op.os_hvp.items():
2386 if not isinstance(hvs, dict):
2387 raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2388 " input"), errors.ECODE_INVAL)
2389 if os_name not in self.new_os_hvp:
2390 self.new_os_hvp[os_name] = hvs
2392 for hv_name, hv_dict in hvs.items():
2393 if hv_name not in self.new_os_hvp[os_name]:
2394 self.new_os_hvp[os_name][hv_name] = hv_dict
2396 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2398 # changes to the hypervisor list
2399 if self.op.enabled_hypervisors is not None:
2400 self.hv_list = self.op.enabled_hypervisors
2401 if not self.hv_list:
2402 raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2403 " least one member",
2405 invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2407 raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2409 utils.CommaJoin(invalid_hvs),
2411 for hv in self.hv_list:
2412 # if the hypervisor doesn't already exist in the cluster
2413 # hvparams, we initialize it to empty, and then (in both
2414 # cases) we make sure to fill the defaults, as we might not
2415 # have a complete defaults list if the hypervisor wasn't
2417 if hv not in new_hvp:
2419 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2420 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2422 self.hv_list = cluster.enabled_hypervisors
2424 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2425 # either the enabled list has changed, or the parameters have, validate
2426 for hv_name, hv_params in self.new_hvparams.items():
2427 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2428 (self.op.enabled_hypervisors and
2429 hv_name in self.op.enabled_hypervisors)):
2430 # either this is a new hypervisor, or its parameters have changed
2431 hv_class = hypervisor.GetHypervisor(hv_name)
2432 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2433 hv_class.CheckParameterSyntax(hv_params)
2434 _CheckHVParams(self, node_list, hv_name, hv_params)
2437 # no need to check any newly-enabled hypervisors, since the
2438 # defaults have already been checked in the above code-block
2439 for os_name, os_hvp in self.new_os_hvp.items():
2440 for hv_name, hv_params in os_hvp.items():
2441 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2442 # we need to fill in the new os_hvp on top of the actual hv_p
2443 cluster_defaults = self.new_hvparams.get(hv_name, {})
2444 new_osp = objects.FillDict(cluster_defaults, hv_params)
2445 hv_class = hypervisor.GetHypervisor(hv_name)
2446 hv_class.CheckParameterSyntax(new_osp)
2447 _CheckHVParams(self, node_list, hv_name, new_osp)
2450 def Exec(self, feedback_fn):
2451 """Change the parameters of the cluster.
2454 if self.op.vg_name is not None:
2455 new_volume = self.op.vg_name
2458 if new_volume != self.cfg.GetVGName():
2459 self.cfg.SetVGName(new_volume)
2461 feedback_fn("Cluster LVM configuration already in desired"
2462 " state, not changing")
2463 if self.op.hvparams:
2464 self.cluster.hvparams = self.new_hvparams
2466 self.cluster.os_hvp = self.new_os_hvp
2467 if self.op.enabled_hypervisors is not None:
2468 self.cluster.hvparams = self.new_hvparams
2469 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2470 if self.op.beparams:
2471 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2472 if self.op.nicparams:
2473 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2475 if self.op.candidate_pool_size is not None:
2476 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2477 # we need to update the pool size here, otherwise the save will fail
2478 _AdjustCandidatePool(self, [])
2480 if self.op.maintain_node_health is not None:
2481 self.cluster.maintain_node_health = self.op.maintain_node_health
2483 if self.op.add_uids is not None:
2484 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2486 if self.op.remove_uids is not None:
2487 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2489 if self.op.uid_pool is not None:
2490 self.cluster.uid_pool = self.op.uid_pool
2492 self.cfg.Update(self.cluster, feedback_fn)
2495 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2496 """Distribute additional files which are part of the cluster configuration.
2498 ConfigWriter takes care of distributing the config and ssconf files, but
2499 there are more files which should be distributed to all nodes. This function
2500 makes sure those are copied.
2502 @param lu: calling logical unit
2503 @param additional_nodes: list of nodes not in the config to distribute to
2506 # 1. Gather target nodes
2507 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2508 dist_nodes = lu.cfg.GetOnlineNodeList()
2509 if additional_nodes is not None:
2510 dist_nodes.extend(additional_nodes)
2511 if myself.name in dist_nodes:
2512 dist_nodes.remove(myself.name)
2514 # 2. Gather files to distribute
2515 dist_files = set([constants.ETC_HOSTS,
2516 constants.SSH_KNOWN_HOSTS_FILE,
2517 constants.RAPI_CERT_FILE,
2518 constants.RAPI_USERS_FILE,
2519 constants.CONFD_HMAC_KEY,
2520 constants.CLUSTER_DOMAIN_SECRET_FILE,
2523 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2524 for hv_name in enabled_hypervisors:
2525 hv_class = hypervisor.GetHypervisor(hv_name)
2526 dist_files.update(hv_class.GetAncillaryFiles())
2528 # 3. Perform the files upload
2529 for fname in dist_files:
2530 if os.path.exists(fname):
2531 result = lu.rpc.call_upload_file(dist_nodes, fname)
2532 for to_node, to_result in result.items():
2533 msg = to_result.fail_msg
2535 msg = ("Copy of file %s to node %s failed: %s" %
2536 (fname, to_node, msg))
2537 lu.proc.LogWarning(msg)
2540 class LURedistributeConfig(NoHooksLU):
2541 """Force the redistribution of cluster configuration.
2543 This is a very simple LU.
2549 def ExpandNames(self):
2550 self.needed_locks = {
2551 locking.LEVEL_NODE: locking.ALL_SET,
2553 self.share_locks[locking.LEVEL_NODE] = 1
2555 def CheckPrereq(self):
2556 """Check prerequisites.
2560 def Exec(self, feedback_fn):
2561 """Redistribute the configuration.
2564 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2565 _RedistributeAncillaryFiles(self)
2568 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2569 """Sleep and poll for an instance's disk to sync.
2572 if not instance.disks or disks is not None and not disks:
2575 disks = _ExpandCheckDisks(instance, disks)
2578 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2580 node = instance.primary_node
2583 lu.cfg.SetDiskID(dev, node)
2585 # TODO: Convert to utils.Retry
2588 degr_retries = 10 # in seconds, as we sleep 1 second each time
2592 cumul_degraded = False
2593 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2594 msg = rstats.fail_msg
2596 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2599 raise errors.RemoteError("Can't contact node %s for mirror data,"
2600 " aborting." % node)
2603 rstats = rstats.payload
2605 for i, mstat in enumerate(rstats):
2607 lu.LogWarning("Can't compute data for node %s/%s",
2608 node, disks[i].iv_name)
2611 cumul_degraded = (cumul_degraded or
2612 (mstat.is_degraded and mstat.sync_percent is None))
2613 if mstat.sync_percent is not None:
2615 if mstat.estimated_time is not None:
2616 rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2617 max_time = mstat.estimated_time
2619 rem_time = "no time estimate"
2620 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2621 (disks[i].iv_name, mstat.sync_percent, rem_time))
2623 # if we're done but degraded, let's do a few small retries, to
2624 # make sure we see a stable and not transient situation; therefore
2625 # we force restart of the loop
2626 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2627 logging.info("Degraded disks found, %d retries left", degr_retries)
2635 time.sleep(min(60, max_time))
2638 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2639 return not cumul_degraded
2642 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2643 """Check that mirrors are not degraded.
2645 The ldisk parameter, if True, will change the test from the
2646 is_degraded attribute (which represents overall non-ok status for
2647 the device(s)) to the ldisk (representing the local storage status).
2650 lu.cfg.SetDiskID(dev, node)
2654 if on_primary or dev.AssembleOnSecondary():
2655 rstats = lu.rpc.call_blockdev_find(node, dev)
2656 msg = rstats.fail_msg
2658 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2660 elif not rstats.payload:
2661 lu.LogWarning("Can't find disk on node %s", node)
2665 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2667 result = result and not rstats.payload.is_degraded
2670 for child in dev.children:
2671 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2676 class LUDiagnoseOS(NoHooksLU):
2677 """Logical unit for OS diagnose/query.
2680 _OP_REQP = ["output_fields", "names"]
2682 _FIELDS_STATIC = utils.FieldSet()
2683 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2684 # Fields that need calculation of global os validity
2685 _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2687 def ExpandNames(self):
2689 raise errors.OpPrereqError("Selective OS query not supported",
2692 _CheckOutputFields(static=self._FIELDS_STATIC,
2693 dynamic=self._FIELDS_DYNAMIC,
2694 selected=self.op.output_fields)
2696 # Lock all nodes, in shared mode
2697 # Temporary removal of locks, should be reverted later
2698 # TODO: reintroduce locks when they are lighter-weight
2699 self.needed_locks = {}
2700 #self.share_locks[locking.LEVEL_NODE] = 1
2701 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2703 def CheckPrereq(self):
2704 """Check prerequisites.
2709 def _DiagnoseByOS(rlist):
2710 """Remaps a per-node return list into an a per-os per-node dictionary
2712 @param rlist: a map with node names as keys and OS objects as values
2715 @return: a dictionary with osnames as keys and as value another map, with
2716 nodes as keys and tuples of (path, status, diagnose) as values, eg::
2718 {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2719 (/srv/..., False, "invalid api")],
2720 "node2": [(/srv/..., True, "")]}
2725 # we build here the list of nodes that didn't fail the RPC (at RPC
2726 # level), so that nodes with a non-responding node daemon don't
2727 # make all OSes invalid
2728 good_nodes = [node_name for node_name in rlist
2729 if not rlist[node_name].fail_msg]
2730 for node_name, nr in rlist.items():
2731 if nr.fail_msg or not nr.payload:
2733 for name, path, status, diagnose, variants in nr.payload:
2734 if name not in all_os:
2735 # build a list of nodes for this os containing empty lists
2736 # for each node in node_list
2738 for nname in good_nodes:
2739 all_os[name][nname] = []
2740 all_os[name][node_name].append((path, status, diagnose, variants))
2743 def Exec(self, feedback_fn):
2744 """Compute the list of OSes.
2747 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2748 node_data = self.rpc.call_os_diagnose(valid_nodes)
2749 pol = self._DiagnoseByOS(node_data)
2751 calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2752 calc_variants = "variants" in self.op.output_fields
2754 for os_name, os_data in pol.items():
2759 for osl in os_data.values():
2760 valid = valid and osl and osl[0][1]
2765 node_variants = osl[0][3]
2766 if variants is None:
2767 variants = node_variants
2769 variants = [v for v in variants if v in node_variants]
2771 for field in self.op.output_fields:
2774 elif field == "valid":
2776 elif field == "node_status":
2777 # this is just a copy of the dict
2779 for node_name, nos_list in os_data.items():
2780 val[node_name] = nos_list
2781 elif field == "variants":
2784 raise errors.ParameterError(field)
2791 class LURemoveNode(LogicalUnit):
2792 """Logical unit for removing a node.
2795 HPATH = "node-remove"
2796 HTYPE = constants.HTYPE_NODE
2797 _OP_REQP = ["node_name"]
2799 def BuildHooksEnv(self):
2802 This doesn't run on the target node in the pre phase as a failed
2803 node would then be impossible to remove.
2807 "OP_TARGET": self.op.node_name,
2808 "NODE_NAME": self.op.node_name,
2810 all_nodes = self.cfg.GetNodeList()
2812 all_nodes.remove(self.op.node_name)
2814 logging.warning("Node %s which is about to be removed not found"
2815 " in the all nodes list", self.op.node_name)
2816 return env, all_nodes, all_nodes
2818 def CheckPrereq(self):
2819 """Check prerequisites.
2822 - the node exists in the configuration
2823 - it does not have primary or secondary instances
2824 - it's not the master
2826 Any errors are signaled by raising errors.OpPrereqError.
2829 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2830 node = self.cfg.GetNodeInfo(self.op.node_name)
2831 assert node is not None
2833 instance_list = self.cfg.GetInstanceList()
2835 masternode = self.cfg.GetMasterNode()
2836 if node.name == masternode:
2837 raise errors.OpPrereqError("Node is the master node,"
2838 " you need to failover first.",
2841 for instance_name in instance_list:
2842 instance = self.cfg.GetInstanceInfo(instance_name)
2843 if node.name in instance.all_nodes:
2844 raise errors.OpPrereqError("Instance %s is still running on the node,"
2845 " please remove first." % instance_name,
2847 self.op.node_name = node.name
2850 def Exec(self, feedback_fn):
2851 """Removes the node from the cluster.
2855 logging.info("Stopping the node daemon and removing configs from node %s",
2858 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2860 # Promote nodes to master candidate as needed
2861 _AdjustCandidatePool(self, exceptions=[node.name])
2862 self.context.RemoveNode(node.name)
2864 # Run post hooks on the node before it's removed
2865 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2867 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2869 # pylint: disable-msg=W0702
2870 self.LogWarning("Errors occurred running hooks on %s" % node.name)
2872 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2873 msg = result.fail_msg
2875 self.LogWarning("Errors encountered on the remote node while leaving"
2876 " the cluster: %s", msg)
2878 # Remove node from our /etc/hosts
2879 if self.cfg.GetClusterInfo().modify_etc_hosts:
2880 # FIXME: this should be done via an rpc call to node daemon
2881 utils.RemoveHostFromEtcHosts(node.name)
2882 _RedistributeAncillaryFiles(self)
2885 class LUQueryNodes(NoHooksLU):
2886 """Logical unit for querying nodes.
2889 # pylint: disable-msg=W0142
2890 _OP_REQP = ["output_fields", "names", "use_locking"]
2893 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2894 "master_candidate", "offline", "drained"]
2896 _FIELDS_DYNAMIC = utils.FieldSet(
2898 "mtotal", "mnode", "mfree",
2900 "ctotal", "cnodes", "csockets",
2903 _FIELDS_STATIC = utils.FieldSet(*[
2904 "pinst_cnt", "sinst_cnt",
2905 "pinst_list", "sinst_list",
2906 "pip", "sip", "tags",
2908 "role"] + _SIMPLE_FIELDS
2911 def ExpandNames(self):
2912 _CheckOutputFields(static=self._FIELDS_STATIC,
2913 dynamic=self._FIELDS_DYNAMIC,
2914 selected=self.op.output_fields)
2916 self.needed_locks = {}
2917 self.share_locks[locking.LEVEL_NODE] = 1
2920 self.wanted = _GetWantedNodes(self, self.op.names)
2922 self.wanted = locking.ALL_SET
2924 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2925 self.do_locking = self.do_node_query and self.op.use_locking
2927 # if we don't request only static fields, we need to lock the nodes
2928 self.needed_locks[locking.LEVEL_NODE] = self.wanted
2930 def CheckPrereq(self):
2931 """Check prerequisites.
2934 # The validation of the node list is done in the _GetWantedNodes,
2935 # if non empty, and if empty, there's no validation to do
2938 def Exec(self, feedback_fn):
2939 """Computes the list of nodes and their attributes.
2942 all_info = self.cfg.GetAllNodesInfo()
2944 nodenames = self.acquired_locks[locking.LEVEL_NODE]
2945 elif self.wanted != locking.ALL_SET:
2946 nodenames = self.wanted
2947 missing = set(nodenames).difference(all_info.keys())
2949 raise errors.OpExecError(
2950 "Some nodes were removed before retrieving their data: %s" % missing)
2952 nodenames = all_info.keys()
2954 nodenames = utils.NiceSort(nodenames)
2955 nodelist = [all_info[name] for name in nodenames]
2957 # begin data gathering
2959 if self.do_node_query:
2961 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2962 self.cfg.GetHypervisorType())
2963 for name in nodenames:
2964 nodeinfo = node_data[name]
2965 if not nodeinfo.fail_msg and nodeinfo.payload:
2966 nodeinfo = nodeinfo.payload
2967 fn = utils.TryConvert
2969 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2970 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2971 "mfree": fn(int, nodeinfo.get('memory_free', None)),
2972 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2973 "dfree": fn(int, nodeinfo.get('vg_free', None)),
2974 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2975 "bootid": nodeinfo.get('bootid', None),
2976 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2977 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2980 live_data[name] = {}
2982 live_data = dict.fromkeys(nodenames, {})
2984 node_to_primary = dict([(name, set()) for name in nodenames])
2985 node_to_secondary = dict([(name, set()) for name in nodenames])
2987 inst_fields = frozenset(("pinst_cnt", "pinst_list",
2988 "sinst_cnt", "sinst_list"))
2989 if inst_fields & frozenset(self.op.output_fields):
2990 inst_data = self.cfg.GetAllInstancesInfo()
2992 for inst in inst_data.values():
2993 if inst.primary_node in node_to_primary:
2994 node_to_primary[inst.primary_node].add(inst.name)
2995 for secnode in inst.secondary_nodes:
2996 if secnode in node_to_secondary:
2997 node_to_secondary[secnode].add(inst.name)
2999 master_node = self.cfg.GetMasterNode()
3001 # end data gathering
3004 for node in nodelist:
3006 for field in self.op.output_fields:
3007 if field in self._SIMPLE_FIELDS:
3008 val = getattr(node, field)
3009 elif field == "pinst_list":
3010 val = list(node_to_primary[node.name])
3011 elif field == "sinst_list":
3012 val = list(node_to_secondary[node.name])
3013 elif field == "pinst_cnt":
3014 val = len(node_to_primary[node.name])
3015 elif field == "sinst_cnt":
3016 val = len(node_to_secondary[node.name])
3017 elif field == "pip":
3018 val = node.primary_ip
3019 elif field == "sip":
3020 val = node.secondary_ip
3021 elif field == "tags":
3022 val = list(node.GetTags())
3023 elif field == "master":
3024 val = node.name == master_node
3025 elif self._FIELDS_DYNAMIC.Matches(field):
3026 val = live_data[node.name].get(field, None)
3027 elif field == "role":
3028 if node.name == master_node:
3030 elif node.master_candidate:
3039 raise errors.ParameterError(field)
3040 node_output.append(val)
3041 output.append(node_output)
3046 class LUQueryNodeVolumes(NoHooksLU):
3047 """Logical unit for getting volumes on node(s).
3050 _OP_REQP = ["nodes", "output_fields"]
3052 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3053 _FIELDS_STATIC = utils.FieldSet("node")
3055 def ExpandNames(self):
3056 _CheckOutputFields(static=self._FIELDS_STATIC,
3057 dynamic=self._FIELDS_DYNAMIC,
3058 selected=self.op.output_fields)
3060 self.needed_locks = {}
3061 self.share_locks[locking.LEVEL_NODE] = 1
3062 if not self.op.nodes:
3063 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3065 self.needed_locks[locking.LEVEL_NODE] = \
3066 _GetWantedNodes(self, self.op.nodes)
3068 def CheckPrereq(self):
3069 """Check prerequisites.
3071 This checks that the fields required are valid output fields.
3074 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3076 def Exec(self, feedback_fn):
3077 """Computes the list of nodes and their attributes.
3080 nodenames = self.nodes
3081 volumes = self.rpc.call_node_volumes(nodenames)
3083 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3084 in self.cfg.GetInstanceList()]
3086 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3089 for node in nodenames:
3090 nresult = volumes[node]
3093 msg = nresult.fail_msg
3095 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3098 node_vols = nresult.payload[:]
3099 node_vols.sort(key=lambda vol: vol['dev'])
3101 for vol in node_vols:
3103 for field in self.op.output_fields:
3106 elif field == "phys":
3110 elif field == "name":
3112 elif field == "size":
3113 val = int(float(vol['size']))
3114 elif field == "instance":
3116 if node not in lv_by_node[inst]:
3118 if vol['name'] in lv_by_node[inst][node]:
3124 raise errors.ParameterError(field)
3125 node_output.append(str(val))
3127 output.append(node_output)
3132 class LUQueryNodeStorage(NoHooksLU):
3133 """Logical unit for getting information on storage units on node(s).
3136 _OP_REQP = ["nodes", "storage_type", "output_fields"]
3138 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3140 def CheckArguments(self):
3141 _CheckStorageType(self.op.storage_type)
3143 _CheckOutputFields(static=self._FIELDS_STATIC,
3144 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3145 selected=self.op.output_fields)
3147 def ExpandNames(self):
3148 self.needed_locks = {}
3149 self.share_locks[locking.LEVEL_NODE] = 1
3152 self.needed_locks[locking.LEVEL_NODE] = \
3153 _GetWantedNodes(self, self.op.nodes)
3155 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3157 def CheckPrereq(self):
3158 """Check prerequisites.
3160 This checks that the fields required are valid output fields.
3163 self.op.name = getattr(self.op, "name", None)
3165 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3167 def Exec(self, feedback_fn):
3168 """Computes the list of nodes and their attributes.
3171 # Always get name to sort by
3172 if constants.SF_NAME in self.op.output_fields:
3173 fields = self.op.output_fields[:]
3175 fields = [constants.SF_NAME] + self.op.output_fields
3177 # Never ask for node or type as it's only known to the LU
3178 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3179 while extra in fields:
3180 fields.remove(extra)
3182 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3183 name_idx = field_idx[constants.SF_NAME]
3185 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3186 data = self.rpc.call_storage_list(self.nodes,
3187 self.op.storage_type, st_args,
3188 self.op.name, fields)
3192 for node in utils.NiceSort(self.nodes):
3193 nresult = data[node]
3197 msg = nresult.fail_msg
3199 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3202 rows = dict([(row[name_idx], row) for row in nresult.payload])
3204 for name in utils.NiceSort(rows.keys()):
3209 for field in self.op.output_fields:
3210 if field == constants.SF_NODE:
3212 elif field == constants.SF_TYPE:
3213 val = self.op.storage_type
3214 elif field in field_idx:
3215 val = row[field_idx[field]]
3217 raise errors.ParameterError(field)
3226 class LUModifyNodeStorage(NoHooksLU):
3227 """Logical unit for modifying a storage volume on a node.
3230 _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3233 def CheckArguments(self):
3234 self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3236 _CheckStorageType(self.op.storage_type)
3238 def ExpandNames(self):
3239 self.needed_locks = {
3240 locking.LEVEL_NODE: self.op.node_name,
3243 def CheckPrereq(self):
3244 """Check prerequisites.
3247 storage_type = self.op.storage_type
3250 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3252 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3253 " modified" % storage_type,
3256 diff = set(self.op.changes.keys()) - modifiable
3258 raise errors.OpPrereqError("The following fields can not be modified for"
3259 " storage units of type '%s': %r" %
3260 (storage_type, list(diff)),
3263 def Exec(self, feedback_fn):
3264 """Computes the list of nodes and their attributes.
3267 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3268 result = self.rpc.call_storage_modify(self.op.node_name,
3269 self.op.storage_type, st_args,
3270 self.op.name, self.op.changes)
3271 result.Raise("Failed to modify storage unit '%s' on %s" %
3272 (self.op.name, self.op.node_name))
3275 class LUAddNode(LogicalUnit):
3276 """Logical unit for adding node to the cluster.
3280 HTYPE = constants.HTYPE_NODE
3281 _OP_REQP = ["node_name"]
3283 def CheckArguments(self):
3284 # validate/normalize the node name
3285 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3287 def BuildHooksEnv(self):
3290 This will run on all nodes before, and on all nodes + the new node after.
3294 "OP_TARGET": self.op.node_name,
3295 "NODE_NAME": self.op.node_name,
3296 "NODE_PIP": self.op.primary_ip,
3297 "NODE_SIP": self.op.secondary_ip,
3299 nodes_0 = self.cfg.GetNodeList()
3300 nodes_1 = nodes_0 + [self.op.node_name, ]
3301 return env, nodes_0, nodes_1
3303 def CheckPrereq(self):
3304 """Check prerequisites.
3307 - the new node is not already in the config
3309 - its parameters (single/dual homed) matches the cluster
3311 Any errors are signaled by raising errors.OpPrereqError.
3314 node_name = self.op.node_name
3317 dns_data = utils.GetHostInfo(node_name)
3319 node = dns_data.name
3320 primary_ip = self.op.primary_ip = dns_data.ip
3321 secondary_ip = getattr(self.op, "secondary_ip", None)
3322 if secondary_ip is None:
3323 secondary_ip = primary_ip
3324 if not utils.IsValidIP(secondary_ip):
3325 raise errors.OpPrereqError("Invalid secondary IP given",
3327 self.op.secondary_ip = secondary_ip
3329 node_list = cfg.GetNodeList()
3330 if not self.op.readd and node in node_list:
3331 raise errors.OpPrereqError("Node %s is already in the configuration" %
3332 node, errors.ECODE_EXISTS)
3333 elif self.op.readd and node not in node_list:
3334 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3337 self.changed_primary_ip = False
3339 for existing_node_name in node_list:
3340 existing_node = cfg.GetNodeInfo(existing_node_name)
3342 if self.op.readd and node == existing_node_name:
3343 if existing_node.secondary_ip != secondary_ip:
3344 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3345 " address configuration as before",
3347 if existing_node.primary_ip != primary_ip:
3348 self.changed_primary_ip = True
3352 if (existing_node.primary_ip == primary_ip or
3353 existing_node.secondary_ip == primary_ip or
3354 existing_node.primary_ip == secondary_ip or
3355 existing_node.secondary_ip == secondary_ip):
3356 raise errors.OpPrereqError("New node ip address(es) conflict with"
3357 " existing node %s" % existing_node.name,
3358 errors.ECODE_NOTUNIQUE)
3360 # check that the type of the node (single versus dual homed) is the
3361 # same as for the master
3362 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3363 master_singlehomed = myself.secondary_ip == myself.primary_ip
3364 newbie_singlehomed = secondary_ip == primary_ip
3365 if master_singlehomed != newbie_singlehomed:
3366 if master_singlehomed:
3367 raise errors.OpPrereqError("The master has no private ip but the"
3368 " new node has one",
3371 raise errors.OpPrereqError("The master has a private ip but the"
3372 " new node doesn't have one",
3375 # checks reachability
3376 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3377 raise errors.OpPrereqError("Node not reachable by ping",
3378 errors.ECODE_ENVIRON)
3380 if not newbie_singlehomed:
3381 # check reachability from my secondary ip to newbie's secondary ip
3382 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3383 source=myself.secondary_ip):
3384 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3385 " based ping to noded port",
3386 errors.ECODE_ENVIRON)
3393 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3396 self.new_node = self.cfg.GetNodeInfo(node)
3397 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3399 self.new_node = objects.Node(name=node,
3400 primary_ip=primary_ip,
3401 secondary_ip=secondary_ip,
3402 master_candidate=self.master_candidate,
3403 offline=False, drained=False)
3405 def Exec(self, feedback_fn):
3406 """Adds the new node to the cluster.
3409 new_node = self.new_node
3410 node = new_node.name
3412 # for re-adds, reset the offline/drained/master-candidate flags;
3413 # we need to reset here, otherwise offline would prevent RPC calls
3414 # later in the procedure; this also means that if the re-add
3415 # fails, we are left with a non-offlined, broken node
3417 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3418 self.LogInfo("Readding a node, the offline/drained flags were reset")
3419 # if we demote the node, we do cleanup later in the procedure
3420 new_node.master_candidate = self.master_candidate
3421 if self.changed_primary_ip:
3422 new_node.primary_ip = self.op.primary_ip
3424 # notify the user about any possible mc promotion
3425 if new_node.master_candidate:
3426 self.LogInfo("Node will be a master candidate")
3428 # check connectivity
3429 result = self.rpc.call_version([node])[node]
3430 result.Raise("Can't get version information from node %s" % node)
3431 if constants.PROTOCOL_VERSION == result.payload:
3432 logging.info("Communication to node %s fine, sw version %s match",
3433 node, result.payload)
3435 raise errors.OpExecError("Version mismatch master version %s,"
3436 " node version %s" %
3437 (constants.PROTOCOL_VERSION, result.payload))
3440 if self.cfg.GetClusterInfo().modify_ssh_setup:
3441 logging.info("Copy ssh key to node %s", node)
3442 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3444 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3445 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3449 keyarray.append(utils.ReadFile(i))
3451 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3452 keyarray[2], keyarray[3], keyarray[4],
3454 result.Raise("Cannot transfer ssh keys to the new node")
3456 # Add node to our /etc/hosts, and add key to known_hosts
3457 if self.cfg.GetClusterInfo().modify_etc_hosts:
3458 # FIXME: this should be done via an rpc call to node daemon
3459 utils.AddHostToEtcHosts(new_node.name)
3461 if new_node.secondary_ip != new_node.primary_ip:
3462 result = self.rpc.call_node_has_ip_address(new_node.name,
3463 new_node.secondary_ip)
3464 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3465 prereq=True, ecode=errors.ECODE_ENVIRON)
3466 if not result.payload:
3467 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3468 " you gave (%s). Please fix and re-run this"
3469 " command." % new_node.secondary_ip)
3471 node_verify_list = [self.cfg.GetMasterNode()]
3472 node_verify_param = {
3473 constants.NV_NODELIST: [node],
3474 # TODO: do a node-net-test as well?
3477 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3478 self.cfg.GetClusterName())
3479 for verifier in node_verify_list:
3480 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3481 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3483 for failed in nl_payload:
3484 feedback_fn("ssh/hostname verification failed"
3485 " (checking from %s): %s" %
3486 (verifier, nl_payload[failed]))
3487 raise errors.OpExecError("ssh/hostname verification failed.")
3490 _RedistributeAncillaryFiles(self)
3491 self.context.ReaddNode(new_node)
3492 # make sure we redistribute the config
3493 self.cfg.Update(new_node, feedback_fn)
3494 # and make sure the new node will not have old files around
3495 if not new_node.master_candidate:
3496 result = self.rpc.call_node_demote_from_mc(new_node.name)
3497 msg = result.fail_msg
3499 self.LogWarning("Node failed to demote itself from master"
3500 " candidate status: %s" % msg)
3502 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3503 self.context.AddNode(new_node, self.proc.GetECId())
3506 class LUSetNodeParams(LogicalUnit):
3507 """Modifies the parameters of a node.
3510 HPATH = "node-modify"
3511 HTYPE = constants.HTYPE_NODE
3512 _OP_REQP = ["node_name"]
3515 def CheckArguments(self):
3516 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3517 _CheckBooleanOpField(self.op, 'master_candidate')
3518 _CheckBooleanOpField(self.op, 'offline')
3519 _CheckBooleanOpField(self.op, 'drained')
3520 _CheckBooleanOpField(self.op, 'auto_promote')
3521 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3522 if all_mods.count(None) == 3:
3523 raise errors.OpPrereqError("Please pass at least one modification",
3525 if all_mods.count(True) > 1:
3526 raise errors.OpPrereqError("Can't set the node into more than one"
3527 " state at the same time",
3530 # Boolean value that tells us whether we're offlining or draining the node
3531 self.offline_or_drain = (self.op.offline == True or
3532 self.op.drained == True)
3533 self.deoffline_or_drain = (self.op.offline == False or
3534 self.op.drained == False)
3535 self.might_demote = (self.op.master_candidate == False or
3536 self.offline_or_drain)
3538 self.lock_all = self.op.auto_promote and self.might_demote
3541 def ExpandNames(self):
3543 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3545 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3547 def BuildHooksEnv(self):
3550 This runs on the master node.
3554 "OP_TARGET": self.op.node_name,
3555 "MASTER_CANDIDATE": str(self.op.master_candidate),
3556 "OFFLINE": str(self.op.offline),
3557 "DRAINED": str(self.op.drained),
3559 nl = [self.cfg.GetMasterNode(),
3563 def CheckPrereq(self):
3564 """Check prerequisites.
3566 This only checks the instance list against the existing names.
3569 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3571 if (self.op.master_candidate is not None or
3572 self.op.drained is not None or
3573 self.op.offline is not None):
3574 # we can't change the master's node flags
3575 if self.op.node_name == self.cfg.GetMasterNode():
3576 raise errors.OpPrereqError("The master role can be changed"
3577 " only via masterfailover",
3581 if node.master_candidate and self.might_demote and not self.lock_all:
3582 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3583 # check if after removing the current node, we're missing master
3585 (mc_remaining, mc_should, _) = \
3586 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3587 if mc_remaining < mc_should:
3588 raise errors.OpPrereqError("Not enough master candidates, please"
3589 " pass auto_promote to allow promotion",
3592 if (self.op.master_candidate == True and
3593 ((node.offline and not self.op.offline == False) or
3594 (node.drained and not self.op.drained == False))):
3595 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3596 " to master_candidate" % node.name,
3599 # If we're being deofflined/drained, we'll MC ourself if needed
3600 if (self.deoffline_or_drain and not self.offline_or_drain and not
3601 self.op.master_candidate == True and not node.master_candidate):
3602 self.op.master_candidate = _DecideSelfPromotion(self)
3603 if self.op.master_candidate:
3604 self.LogInfo("Autopromoting node to master candidate")
3608 def Exec(self, feedback_fn):
3617 if self.op.offline is not None:
3618 node.offline = self.op.offline
3619 result.append(("offline", str(self.op.offline)))
3620 if self.op.offline == True:
3621 if node.master_candidate:
3622 node.master_candidate = False
3624 result.append(("master_candidate", "auto-demotion due to offline"))
3626 node.drained = False
3627 result.append(("drained", "clear drained status due to offline"))
3629 if self.op.master_candidate is not None:
3630 node.master_candidate = self.op.master_candidate
3632 result.append(("master_candidate", str(self.op.master_candidate)))
3633 if self.op.master_candidate == False:
3634 rrc = self.rpc.call_node_demote_from_mc(node.name)
3637 self.LogWarning("Node failed to demote itself: %s" % msg)
3639 if self.op.drained is not None:
3640 node.drained = self.op.drained
3641 result.append(("drained", str(self.op.drained)))
3642 if self.op.drained == True:
3643 if node.master_candidate:
3644 node.master_candidate = False
3646 result.append(("master_candidate", "auto-demotion due to drain"))
3647 rrc = self.rpc.call_node_demote_from_mc(node.name)
3650 self.LogWarning("Node failed to demote itself: %s" % msg)
3652 node.offline = False
3653 result.append(("offline", "clear offline status due to drain"))
3655 # we locked all nodes, we adjust the CP before updating this node
3657 _AdjustCandidatePool(self, [node.name])
3659 # this will trigger configuration file update, if needed
3660 self.cfg.Update(node, feedback_fn)
3662 # this will trigger job queue propagation or cleanup
3664 self.context.ReaddNode(node)
3669 class LUPowercycleNode(NoHooksLU):
3670 """Powercycles a node.
3673 _OP_REQP = ["node_name", "force"]
3676 def CheckArguments(self):
3677 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3678 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3679 raise errors.OpPrereqError("The node is the master and the force"
3680 " parameter was not set",
3683 def ExpandNames(self):
3684 """Locking for PowercycleNode.
3686 This is a last-resort option and shouldn't block on other
3687 jobs. Therefore, we grab no locks.
3690 self.needed_locks = {}
3692 def CheckPrereq(self):
3693 """Check prerequisites.
3695 This LU has no prereqs.
3700 def Exec(self, feedback_fn):
3704 result = self.rpc.call_node_powercycle(self.op.node_name,
3705 self.cfg.GetHypervisorType())
3706 result.Raise("Failed to schedule the reboot")
3707 return result.payload
3710 class LUQueryClusterInfo(NoHooksLU):
3711 """Query cluster configuration.
3717 def ExpandNames(self):
3718 self.needed_locks = {}
3720 def CheckPrereq(self):
3721 """No prerequsites needed for this LU.
3726 def Exec(self, feedback_fn):
3727 """Return cluster config.
3730 cluster = self.cfg.GetClusterInfo()
3733 # Filter just for enabled hypervisors
3734 for os_name, hv_dict in cluster.os_hvp.items():
3735 os_hvp[os_name] = {}
3736 for hv_name, hv_params in hv_dict.items():
3737 if hv_name in cluster.enabled_hypervisors:
3738 os_hvp[os_name][hv_name] = hv_params
3741 "software_version": constants.RELEASE_VERSION,
3742 "protocol_version": constants.PROTOCOL_VERSION,
3743 "config_version": constants.CONFIG_VERSION,
3744 "os_api_version": max(constants.OS_API_VERSIONS),
3745 "export_version": constants.EXPORT_VERSION,
3746 "architecture": (platform.architecture()[0], platform.machine()),
3747 "name": cluster.cluster_name,
3748 "master": cluster.master_node,
3749 "default_hypervisor": cluster.enabled_hypervisors[0],
3750 "enabled_hypervisors": cluster.enabled_hypervisors,
3751 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3752 for hypervisor_name in cluster.enabled_hypervisors]),
3754 "beparams": cluster.beparams,
3755 "nicparams": cluster.nicparams,
3756 "candidate_pool_size": cluster.candidate_pool_size,
3757 "master_netdev": cluster.master_netdev,
3758 "volume_group_name": cluster.volume_group_name,
3759 "file_storage_dir": cluster.file_storage_dir,
3760 "maintain_node_health": cluster.maintain_node_health,
3761 "ctime": cluster.ctime,
3762 "mtime": cluster.mtime,
3763 "uuid": cluster.uuid,
3764 "tags": list(cluster.GetTags()),
3765 "uid_pool": cluster.uid_pool,
3771 class LUQueryConfigValues(NoHooksLU):
3772 """Return configuration values.
3777 _FIELDS_DYNAMIC = utils.FieldSet()
3778 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3781 def ExpandNames(self):
3782 self.needed_locks = {}
3784 _CheckOutputFields(static=self._FIELDS_STATIC,
3785 dynamic=self._FIELDS_DYNAMIC,
3786 selected=self.op.output_fields)
3788 def CheckPrereq(self):
3789 """No prerequisites.
3794 def Exec(self, feedback_fn):
3795 """Dump a representation of the cluster config to the standard output.
3799 for field in self.op.output_fields:
3800 if field == "cluster_name":
3801 entry = self.cfg.GetClusterName()
3802 elif field == "master_node":
3803 entry = self.cfg.GetMasterNode()
3804 elif field == "drain_flag":
3805 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3806 elif field == "watcher_pause":
3807 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3809 raise errors.ParameterError(field)
3810 values.append(entry)
3814 class LUActivateInstanceDisks(NoHooksLU):
3815 """Bring up an instance's disks.
3818 _OP_REQP = ["instance_name"]
3821 def ExpandNames(self):
3822 self._ExpandAndLockInstance()
3823 self.needed_locks[locking.LEVEL_NODE] = []
3824 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3826 def DeclareLocks(self, level):
3827 if level == locking.LEVEL_NODE:
3828 self._LockInstancesNodes()
3830 def CheckPrereq(self):
3831 """Check prerequisites.
3833 This checks that the instance is in the cluster.
3836 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3837 assert self.instance is not None, \
3838 "Cannot retrieve locked instance %s" % self.op.instance_name
3839 _CheckNodeOnline(self, self.instance.primary_node)
3840 if not hasattr(self.op, "ignore_size"):
3841 self.op.ignore_size = False
3843 def Exec(self, feedback_fn):
3844 """Activate the disks.
3847 disks_ok, disks_info = \
3848 _AssembleInstanceDisks(self, self.instance,
3849 ignore_size=self.op.ignore_size)
3851 raise errors.OpExecError("Cannot activate block devices")
3856 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
3858 """Prepare the block devices for an instance.
3860 This sets up the block devices on all nodes.
3862 @type lu: L{LogicalUnit}
3863 @param lu: the logical unit on whose behalf we execute
3864 @type instance: L{objects.Instance}
3865 @param instance: the instance for whose disks we assemble
3866 @type disks: list of L{objects.Disk} or None
3867 @param disks: which disks to assemble (or all, if None)
3868 @type ignore_secondaries: boolean
3869 @param ignore_secondaries: if true, errors on secondary nodes
3870 won't result in an error return from the function
3871 @type ignore_size: boolean
3872 @param ignore_size: if true, the current known size of the disk
3873 will not be used during the disk activation, useful for cases
3874 when the size is wrong
3875 @return: False if the operation failed, otherwise a list of
3876 (host, instance_visible_name, node_visible_name)
3877 with the mapping from node devices to instance devices
3882 iname = instance.name
3883 disks = _ExpandCheckDisks(instance, disks)
3885 # With the two passes mechanism we try to reduce the window of
3886 # opportunity for the race condition of switching DRBD to primary
3887 # before handshaking occured, but we do not eliminate it
3889 # The proper fix would be to wait (with some limits) until the
3890 # connection has been made and drbd transitions from WFConnection
3891 # into any other network-connected state (Connected, SyncTarget,
3894 # 1st pass, assemble on all nodes in secondary mode
3895 for inst_disk in disks:
3896 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3898 node_disk = node_disk.Copy()
3899 node_disk.UnsetSize()
3900 lu.cfg.SetDiskID(node_disk, node)
3901 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3902 msg = result.fail_msg
3904 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3905 " (is_primary=False, pass=1): %s",
3906 inst_disk.iv_name, node, msg)
3907 if not ignore_secondaries:
3910 # FIXME: race condition on drbd migration to primary
3912 # 2nd pass, do only the primary node
3913 for inst_disk in disks:
3916 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3917 if node != instance.primary_node:
3920 node_disk = node_disk.Copy()
3921 node_disk.UnsetSize()
3922 lu.cfg.SetDiskID(node_disk, node)
3923 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3924 msg = result.fail_msg
3926 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3927 " (is_primary=True, pass=2): %s",
3928 inst_disk.iv_name, node, msg)
3931 dev_path = result.payload
3933 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3935 # leave the disks configured for the primary node
3936 # this is a workaround that would be fixed better by
3937 # improving the logical/physical id handling
3939 lu.cfg.SetDiskID(disk, instance.primary_node)
3941 return disks_ok, device_info
3944 def _StartInstanceDisks(lu, instance, force):
3945 """Start the disks of an instance.
3948 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3949 ignore_secondaries=force)
3951 _ShutdownInstanceDisks(lu, instance)
3952 if force is not None and not force:
3953 lu.proc.LogWarning("", hint="If the message above refers to a"
3955 " you can retry the operation using '--force'.")
3956 raise errors.OpExecError("Disk consistency error")
3959 class LUDeactivateInstanceDisks(NoHooksLU):
3960 """Shutdown an instance's disks.
3963 _OP_REQP = ["instance_name"]
3966 def ExpandNames(self):
3967 self._ExpandAndLockInstance()
3968 self.needed_locks[locking.LEVEL_NODE] = []
3969 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3971 def DeclareLocks(self, level):
3972 if level == locking.LEVEL_NODE:
3973 self._LockInstancesNodes()
3975 def CheckPrereq(self):
3976 """Check prerequisites.
3978 This checks that the instance is in the cluster.
3981 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3982 assert self.instance is not None, \
3983 "Cannot retrieve locked instance %s" % self.op.instance_name
3985 def Exec(self, feedback_fn):
3986 """Deactivate the disks
3989 instance = self.instance
3990 _SafeShutdownInstanceDisks(self, instance)
3993 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
3994 """Shutdown block devices of an instance.
3996 This function checks if an instance is running, before calling
3997 _ShutdownInstanceDisks.
4000 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4001 _ShutdownInstanceDisks(lu, instance, disks=disks)
4004 def _ExpandCheckDisks(instance, disks):
4005 """Return the instance disks selected by the disks list
4007 @type disks: list of L{objects.Disk} or None
4008 @param disks: selected disks
4009 @rtype: list of L{objects.Disk}
4010 @return: selected instance disks to act on
4014 return instance.disks
4016 if not set(disks).issubset(instance.disks):
4017 raise errors.ProgrammerError("Can only act on disks belonging to the"
4022 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4023 """Shutdown block devices of an instance.
4025 This does the shutdown on all nodes of the instance.
4027 If the ignore_primary is false, errors on the primary node are
4032 disks = _ExpandCheckDisks(instance, disks)
4035 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4036 lu.cfg.SetDiskID(top_disk, node)
4037 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4038 msg = result.fail_msg
4040 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4041 disk.iv_name, node, msg)
4042 if not ignore_primary or node != instance.primary_node:
4047 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4048 """Checks if a node has enough free memory.
4050 This function check if a given node has the needed amount of free
4051 memory. In case the node has less memory or we cannot get the
4052 information from the node, this function raise an OpPrereqError
4055 @type lu: C{LogicalUnit}
4056 @param lu: a logical unit from which we get configuration data
4058 @param node: the node to check
4059 @type reason: C{str}
4060 @param reason: string to use in the error message
4061 @type requested: C{int}
4062 @param requested: the amount of memory in MiB to check for
4063 @type hypervisor_name: C{str}
4064 @param hypervisor_name: the hypervisor to ask for memory stats
4065 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4066 we cannot check the node
4069 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4070 nodeinfo[node].Raise("Can't get data from node %s" % node,
4071 prereq=True, ecode=errors.ECODE_ENVIRON)
4072 free_mem = nodeinfo[node].payload.get('memory_free', None)
4073 if not isinstance(free_mem, int):
4074 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4075 " was '%s'" % (node, free_mem),
4076 errors.ECODE_ENVIRON)
4077 if requested > free_mem:
4078 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4079 " needed %s MiB, available %s MiB" %
4080 (node, reason, requested, free_mem),
4084 def _CheckNodesFreeDisk(lu, nodenames, requested):
4085 """Checks if nodes have enough free disk space in the default VG.
4087 This function check if all given nodes have the needed amount of
4088 free disk. In case any node has less disk or we cannot get the
4089 information from the node, this function raise an OpPrereqError
4092 @type lu: C{LogicalUnit}
4093 @param lu: a logical unit from which we get configuration data
4094 @type nodenames: C{list}
4095 @param nodenames: the list of node names to check
4096 @type requested: C{int}
4097 @param requested: the amount of disk in MiB to check for
4098 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4099 we cannot check the node
4102 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4103 lu.cfg.GetHypervisorType())
4104 for node in nodenames:
4105 info = nodeinfo[node]
4106 info.Raise("Cannot get current information from node %s" % node,
4107 prereq=True, ecode=errors.ECODE_ENVIRON)
4108 vg_free = info.payload.get("vg_free", None)
4109 if not isinstance(vg_free, int):
4110 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4111 " result was '%s'" % (node, vg_free),
4112 errors.ECODE_ENVIRON)
4113 if requested > vg_free:
4114 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4115 " required %d MiB, available %d MiB" %
4116 (node, requested, vg_free),
4120 class LUStartupInstance(LogicalUnit):
4121 """Starts an instance.
4124 HPATH = "instance-start"
4125 HTYPE = constants.HTYPE_INSTANCE
4126 _OP_REQP = ["instance_name", "force"]
4129 def ExpandNames(self):
4130 self._ExpandAndLockInstance()
4132 def BuildHooksEnv(self):
4135 This runs on master, primary and secondary nodes of the instance.
4139 "FORCE": self.op.force,
4141 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4142 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4145 def CheckPrereq(self):
4146 """Check prerequisites.
4148 This checks that the instance is in the cluster.
4151 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4152 assert self.instance is not None, \
4153 "Cannot retrieve locked instance %s" % self.op.instance_name
4156 self.beparams = getattr(self.op, "beparams", {})
4158 if not isinstance(self.beparams, dict):
4159 raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4160 " dict" % (type(self.beparams), ),
4162 # fill the beparams dict
4163 utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4164 self.op.beparams = self.beparams
4167 self.hvparams = getattr(self.op, "hvparams", {})
4169 if not isinstance(self.hvparams, dict):
4170 raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4171 " dict" % (type(self.hvparams), ),
4174 # check hypervisor parameter syntax (locally)
4175 cluster = self.cfg.GetClusterInfo()
4176 utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4177 filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4179 filled_hvp.update(self.hvparams)
4180 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4181 hv_type.CheckParameterSyntax(filled_hvp)
4182 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4183 self.op.hvparams = self.hvparams
4185 _CheckNodeOnline(self, instance.primary_node)
4187 bep = self.cfg.GetClusterInfo().FillBE(instance)
4188 # check bridges existence
4189 _CheckInstanceBridgesExist(self, instance)
4191 remote_info = self.rpc.call_instance_info(instance.primary_node,
4193 instance.hypervisor)
4194 remote_info.Raise("Error checking node %s" % instance.primary_node,
4195 prereq=True, ecode=errors.ECODE_ENVIRON)
4196 if not remote_info.payload: # not running already
4197 _CheckNodeFreeMemory(self, instance.primary_node,
4198 "starting instance %s" % instance.name,
4199 bep[constants.BE_MEMORY], instance.hypervisor)
4201 def Exec(self, feedback_fn):
4202 """Start the instance.
4205 instance = self.instance
4206 force = self.op.force
4208 self.cfg.MarkInstanceUp(instance.name)
4210 node_current = instance.primary_node
4212 _StartInstanceDisks(self, instance, force)
4214 result = self.rpc.call_instance_start(node_current, instance,
4215 self.hvparams, self.beparams)
4216 msg = result.fail_msg
4218 _ShutdownInstanceDisks(self, instance)
4219 raise errors.OpExecError("Could not start instance: %s" % msg)
4222 class LURebootInstance(LogicalUnit):
4223 """Reboot an instance.
4226 HPATH = "instance-reboot"
4227 HTYPE = constants.HTYPE_INSTANCE
4228 _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4231 def CheckArguments(self):
4232 """Check the arguments.
4235 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4236 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4238 def ExpandNames(self):
4239 if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4240 constants.INSTANCE_REBOOT_HARD,
4241 constants.INSTANCE_REBOOT_FULL]:
4242 raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4243 (constants.INSTANCE_REBOOT_SOFT,
4244 constants.INSTANCE_REBOOT_HARD,
4245 constants.INSTANCE_REBOOT_FULL))
4246 self._ExpandAndLockInstance()
4248 def BuildHooksEnv(self):
4251 This runs on master, primary and secondary nodes of the instance.
4255 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4256 "REBOOT_TYPE": self.op.reboot_type,
4257 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4259 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4260 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4263 def CheckPrereq(self):
4264 """Check prerequisites.
4266 This checks that the instance is in the cluster.
4269 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4270 assert self.instance is not None, \
4271 "Cannot retrieve locked instance %s" % self.op.instance_name
4273 _CheckNodeOnline(self, instance.primary_node)
4275 # check bridges existence
4276 _CheckInstanceBridgesExist(self, instance)
4278 def Exec(self, feedback_fn):
4279 """Reboot the instance.
4282 instance = self.instance
4283 ignore_secondaries = self.op.ignore_secondaries
4284 reboot_type = self.op.reboot_type
4286 node_current = instance.primary_node
4288 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4289 constants.INSTANCE_REBOOT_HARD]:
4290 for disk in instance.disks:
4291 self.cfg.SetDiskID(disk, node_current)
4292 result = self.rpc.call_instance_reboot(node_current, instance,
4294 self.shutdown_timeout)
4295 result.Raise("Could not reboot instance")
4297 result = self.rpc.call_instance_shutdown(node_current, instance,
4298 self.shutdown_timeout)
4299 result.Raise("Could not shutdown instance for full reboot")
4300 _ShutdownInstanceDisks(self, instance)
4301 _StartInstanceDisks(self, instance, ignore_secondaries)
4302 result = self.rpc.call_instance_start(node_current, instance, None, None)
4303 msg = result.fail_msg
4305 _ShutdownInstanceDisks(self, instance)
4306 raise errors.OpExecError("Could not start instance for"
4307 " full reboot: %s" % msg)
4309 self.cfg.MarkInstanceUp(instance.name)
4312 class LUShutdownInstance(LogicalUnit):
4313 """Shutdown an instance.
4316 HPATH = "instance-stop"
4317 HTYPE = constants.HTYPE_INSTANCE
4318 _OP_REQP = ["instance_name"]
4321 def CheckArguments(self):
4322 """Check the arguments.
4325 self.timeout = getattr(self.op, "timeout",
4326 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4328 def ExpandNames(self):
4329 self._ExpandAndLockInstance()
4331 def BuildHooksEnv(self):
4334 This runs on master, primary and secondary nodes of the instance.
4337 env = _BuildInstanceHookEnvByObject(self, self.instance)
4338 env["TIMEOUT"] = self.timeout
4339 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4342 def CheckPrereq(self):
4343 """Check prerequisites.
4345 This checks that the instance is in the cluster.
4348 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4349 assert self.instance is not None, \
4350 "Cannot retrieve locked instance %s" % self.op.instance_name
4351 _CheckNodeOnline(self, self.instance.primary_node)
4353 def Exec(self, feedback_fn):
4354 """Shutdown the instance.
4357 instance = self.instance
4358 node_current = instance.primary_node
4359 timeout = self.timeout
4360 self.cfg.MarkInstanceDown(instance.name)
4361 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4362 msg = result.fail_msg
4364 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4366 _ShutdownInstanceDisks(self, instance)
4369 class LUReinstallInstance(LogicalUnit):
4370 """Reinstall an instance.
4373 HPATH = "instance-reinstall"
4374 HTYPE = constants.HTYPE_INSTANCE
4375 _OP_REQP = ["instance_name"]
4378 def ExpandNames(self):
4379 self._ExpandAndLockInstance()
4381 def BuildHooksEnv(self):
4384 This runs on master, primary and secondary nodes of the instance.
4387 env = _BuildInstanceHookEnvByObject(self, self.instance)
4388 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4391 def CheckPrereq(self):
4392 """Check prerequisites.
4394 This checks that the instance is in the cluster and is not running.
4397 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4398 assert instance is not None, \
4399 "Cannot retrieve locked instance %s" % self.op.instance_name
4400 _CheckNodeOnline(self, instance.primary_node)
4402 if instance.disk_template == constants.DT_DISKLESS:
4403 raise errors.OpPrereqError("Instance '%s' has no disks" %
4404 self.op.instance_name,
4406 _CheckInstanceDown(self, instance, "cannot reinstall")
4408 self.op.os_type = getattr(self.op, "os_type", None)
4409 self.op.force_variant = getattr(self.op, "force_variant", False)
4410 if self.op.os_type is not None:
4412 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4413 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4415 self.instance = instance
4417 def Exec(self, feedback_fn):
4418 """Reinstall the instance.
4421 inst = self.instance
4423 if self.op.os_type is not None:
4424 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4425 inst.os = self.op.os_type
4426 self.cfg.Update(inst, feedback_fn)
4428 _StartInstanceDisks(self, inst, None)
4430 feedback_fn("Running the instance OS create scripts...")
4431 # FIXME: pass debug option from opcode to backend
4432 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4433 self.op.debug_level)
4434 result.Raise("Could not install OS for instance %s on node %s" %
4435 (inst.name, inst.primary_node))
4437 _ShutdownInstanceDisks(self, inst)
4440 class LURecreateInstanceDisks(LogicalUnit):
4441 """Recreate an instance's missing disks.
4444 HPATH = "instance-recreate-disks"
4445 HTYPE = constants.HTYPE_INSTANCE
4446 _OP_REQP = ["instance_name", "disks"]
4449 def CheckArguments(self):
4450 """Check the arguments.
4453 if not isinstance(self.op.disks, list):
4454 raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4455 for item in self.op.disks:
4456 if (not isinstance(item, int) or
4458 raise errors.OpPrereqError("Invalid disk specification '%s'" %
4459 str(item), errors.ECODE_INVAL)
4461 def ExpandNames(self):
4462 self._ExpandAndLockInstance()
4464 def BuildHooksEnv(self):
4467 This runs on master, primary and secondary nodes of the instance.
4470 env = _BuildInstanceHookEnvByObject(self, self.instance)
4471 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4474 def CheckPrereq(self):
4475 """Check prerequisites.
4477 This checks that the instance is in the cluster and is not running.
4480 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4481 assert instance is not None, \
4482 "Cannot retrieve locked instance %s" % self.op.instance_name
4483 _CheckNodeOnline(self, instance.primary_node)
4485 if instance.disk_template == constants.DT_DISKLESS:
4486 raise errors.OpPrereqError("Instance '%s' has no disks" %
4487 self.op.instance_name, errors.ECODE_INVAL)
4488 _CheckInstanceDown(self, instance, "cannot recreate disks")
4490 if not self.op.disks:
4491 self.op.disks = range(len(instance.disks))
4493 for idx in self.op.disks:
4494 if idx >= len(instance.disks):
4495 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4498 self.instance = instance
4500 def Exec(self, feedback_fn):
4501 """Recreate the disks.
4505 for idx, _ in enumerate(self.instance.disks):
4506 if idx not in self.op.disks: # disk idx has not been passed in
4510 _CreateDisks(self, self.instance, to_skip=to_skip)
4513 class LURenameInstance(LogicalUnit):
4514 """Rename an instance.
4517 HPATH = "instance-rename"
4518 HTYPE = constants.HTYPE_INSTANCE
4519 _OP_REQP = ["instance_name", "new_name"]
4521 def BuildHooksEnv(self):
4524 This runs on master, primary and secondary nodes of the instance.
4527 env = _BuildInstanceHookEnvByObject(self, self.instance)
4528 env["INSTANCE_NEW_NAME"] = self.op.new_name
4529 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4532 def CheckPrereq(self):
4533 """Check prerequisites.
4535 This checks that the instance is in the cluster and is not running.
4538 self.op.instance_name = _ExpandInstanceName(self.cfg,
4539 self.op.instance_name)
4540 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4541 assert instance is not None
4542 _CheckNodeOnline(self, instance.primary_node)
4543 _CheckInstanceDown(self, instance, "cannot rename")
4544 self.instance = instance
4546 # new name verification
4547 name_info = utils.GetHostInfo(self.op.new_name)
4549 self.op.new_name = new_name = name_info.name
4550 instance_list = self.cfg.GetInstanceList()
4551 if new_name in instance_list:
4552 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4553 new_name, errors.ECODE_EXISTS)
4555 if not getattr(self.op, "ignore_ip", False):
4556 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4557 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4558 (name_info.ip, new_name),
4559 errors.ECODE_NOTUNIQUE)
4562 def Exec(self, feedback_fn):
4563 """Reinstall the instance.
4566 inst = self.instance
4567 old_name = inst.name
4569 if inst.disk_template == constants.DT_FILE:
4570 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4572 self.cfg.RenameInstance(inst.name, self.op.new_name)
4573 # Change the instance lock. This is definitely safe while we hold the BGL
4574 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4575 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4577 # re-read the instance from the configuration after rename
4578 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4580 if inst.disk_template == constants.DT_FILE:
4581 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4582 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4583 old_file_storage_dir,
4584 new_file_storage_dir)
4585 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4586 " (but the instance has been renamed in Ganeti)" %
4587 (inst.primary_node, old_file_storage_dir,
4588 new_file_storage_dir))
4590 _StartInstanceDisks(self, inst, None)
4592 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4593 old_name, self.op.debug_level)
4594 msg = result.fail_msg
4596 msg = ("Could not run OS rename script for instance %s on node %s"
4597 " (but the instance has been renamed in Ganeti): %s" %
4598 (inst.name, inst.primary_node, msg))
4599 self.proc.LogWarning(msg)
4601 _ShutdownInstanceDisks(self, inst)
4604 class LURemoveInstance(LogicalUnit):
4605 """Remove an instance.
4608 HPATH = "instance-remove"
4609 HTYPE = constants.HTYPE_INSTANCE
4610 _OP_REQP = ["instance_name", "ignore_failures"]
4613 def CheckArguments(self):
4614 """Check the arguments.
4617 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4618 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4620 def ExpandNames(self):
4621 self._ExpandAndLockInstance()
4622 self.needed_locks[locking.LEVEL_NODE] = []
4623 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4625 def DeclareLocks(self, level):
4626 if level == locking.LEVEL_NODE:
4627 self._LockInstancesNodes()
4629 def BuildHooksEnv(self):
4632 This runs on master, primary and secondary nodes of the instance.
4635 env = _BuildInstanceHookEnvByObject(self, self.instance)
4636 env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4637 nl = [self.cfg.GetMasterNode()]
4638 nl_post = list(self.instance.all_nodes) + nl
4639 return env, nl, nl_post
4641 def CheckPrereq(self):
4642 """Check prerequisites.
4644 This checks that the instance is in the cluster.
4647 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4648 assert self.instance is not None, \
4649 "Cannot retrieve locked instance %s" % self.op.instance_name
4651 def Exec(self, feedback_fn):
4652 """Remove the instance.
4655 instance = self.instance
4656 logging.info("Shutting down instance %s on node %s",
4657 instance.name, instance.primary_node)
4659 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4660 self.shutdown_timeout)
4661 msg = result.fail_msg
4663 if self.op.ignore_failures:
4664 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4666 raise errors.OpExecError("Could not shutdown instance %s on"
4668 (instance.name, instance.primary_node, msg))
4670 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4673 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4674 """Utility function to remove an instance.
4677 logging.info("Removing block devices for instance %s", instance.name)
4679 if not _RemoveDisks(lu, instance):
4680 if not ignore_failures:
4681 raise errors.OpExecError("Can't remove instance's disks")
4682 feedback_fn("Warning: can't remove instance's disks")
4684 logging.info("Removing instance %s out of cluster config", instance.name)
4686 lu.cfg.RemoveInstance(instance.name)
4688 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4689 "Instance lock removal conflict"
4691 # Remove lock for the instance
4692 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4695 class LUQueryInstances(NoHooksLU):
4696 """Logical unit for querying instances.
4699 # pylint: disable-msg=W0142
4700 _OP_REQP = ["output_fields", "names", "use_locking"]
4702 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4703 "serial_no", "ctime", "mtime", "uuid"]
4704 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4706 "disk_template", "ip", "mac", "bridge",
4707 "nic_mode", "nic_link",
4708 "sda_size", "sdb_size", "vcpus", "tags",
4709 "network_port", "beparams",
4710 r"(disk)\.(size)/([0-9]+)",
4711 r"(disk)\.(sizes)", "disk_usage",
4712 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4713 r"(nic)\.(bridge)/([0-9]+)",
4714 r"(nic)\.(macs|ips|modes|links|bridges)",
4715 r"(disk|nic)\.(count)",
4717 ] + _SIMPLE_FIELDS +
4719 for name in constants.HVS_PARAMETERS
4720 if name not in constants.HVC_GLOBALS] +
4722 for name in constants.BES_PARAMETERS])
4723 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4726 def ExpandNames(self):
4727 _CheckOutputFields(static=self._FIELDS_STATIC,
4728 dynamic=self._FIELDS_DYNAMIC,
4729 selected=self.op.output_fields)
4731 self.needed_locks = {}
4732 self.share_locks[locking.LEVEL_INSTANCE] = 1
4733 self.share_locks[locking.LEVEL_NODE] = 1
4736 self.wanted = _GetWantedInstances(self, self.op.names)
4738 self.wanted = locking.ALL_SET
4740 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4741 self.do_locking = self.do_node_query and self.op.use_locking
4743 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4744 self.needed_locks[locking.LEVEL_NODE] = []
4745 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4747 def DeclareLocks(self, level):
4748 if level == locking.LEVEL_NODE and self.do_locking:
4749 self._LockInstancesNodes()
4751 def CheckPrereq(self):
4752 """Check prerequisites.
4757 def Exec(self, feedback_fn):
4758 """Computes the list of nodes and their attributes.
4761 # pylint: disable-msg=R0912
4762 # way too many branches here
4763 all_info = self.cfg.GetAllInstancesInfo()
4764 if self.wanted == locking.ALL_SET:
4765 # caller didn't specify instance names, so ordering is not important
4767 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4769 instance_names = all_info.keys()
4770 instance_names = utils.NiceSort(instance_names)
4772 # caller did specify names, so we must keep the ordering
4774 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4776 tgt_set = all_info.keys()
4777 missing = set(self.wanted).difference(tgt_set)
4779 raise errors.OpExecError("Some instances were removed before"
4780 " retrieving their data: %s" % missing)
4781 instance_names = self.wanted
4783 instance_list = [all_info[iname] for iname in instance_names]
4785 # begin data gathering
4787 nodes = frozenset([inst.primary_node for inst in instance_list])
4788 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4792 if self.do_node_query:
4794 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4796 result = node_data[name]
4798 # offline nodes will be in both lists
4799 off_nodes.append(name)
4801 bad_nodes.append(name)
4804 live_data.update(result.payload)
4805 # else no instance is alive
4807 live_data = dict([(name, {}) for name in instance_names])
4809 # end data gathering
4814 cluster = self.cfg.GetClusterInfo()
4815 for instance in instance_list:
4817 i_hv = cluster.FillHV(instance, skip_globals=True)
4818 i_be = cluster.FillBE(instance)
4819 i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4820 nic.nicparams) for nic in instance.nics]
4821 for field in self.op.output_fields:
4822 st_match = self._FIELDS_STATIC.Matches(field)
4823 if field in self._SIMPLE_FIELDS:
4824 val = getattr(instance, field)
4825 elif field == "pnode":
4826 val = instance.primary_node
4827 elif field == "snodes":
4828 val = list(instance.secondary_nodes)
4829 elif field == "admin_state":
4830 val = instance.admin_up
4831 elif field == "oper_state":
4832 if instance.primary_node in bad_nodes:
4835 val = bool(live_data.get(instance.name))
4836 elif field == "status":
4837 if instance.primary_node in off_nodes:
4838 val = "ERROR_nodeoffline"
4839 elif instance.primary_node in bad_nodes:
4840 val = "ERROR_nodedown"
4842 running = bool(live_data.get(instance.name))
4844 if instance.admin_up:
4849 if instance.admin_up:
4853 elif field == "oper_ram":
4854 if instance.primary_node in bad_nodes:
4856 elif instance.name in live_data:
4857 val = live_data[instance.name].get("memory", "?")
4860 elif field == "vcpus":
4861 val = i_be[constants.BE_VCPUS]
4862 elif field == "disk_template":
4863 val = instance.disk_template
4866 val = instance.nics[0].ip
4869 elif field == "nic_mode":
4871 val = i_nicp[0][constants.NIC_MODE]
4874 elif field == "nic_link":
4876 val = i_nicp[0][constants.NIC_LINK]
4879 elif field == "bridge":
4880 if (instance.nics and
4881 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4882 val = i_nicp[0][constants.NIC_LINK]
4885 elif field == "mac":
4887 val = instance.nics[0].mac
4890 elif field == "sda_size" or field == "sdb_size":
4891 idx = ord(field[2]) - ord('a')
4893 val = instance.FindDisk(idx).size
4894 except errors.OpPrereqError:
4896 elif field == "disk_usage": # total disk usage per node
4897 disk_sizes = [{'size': disk.size} for disk in instance.disks]
4898 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4899 elif field == "tags":
4900 val = list(instance.GetTags())
4901 elif field == "hvparams":
4903 elif (field.startswith(HVPREFIX) and
4904 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4905 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4906 val = i_hv.get(field[len(HVPREFIX):], None)
4907 elif field == "beparams":
4909 elif (field.startswith(BEPREFIX) and
4910 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4911 val = i_be.get(field[len(BEPREFIX):], None)
4912 elif st_match and st_match.groups():
4913 # matches a variable list
4914 st_groups = st_match.groups()
4915 if st_groups and st_groups[0] == "disk":
4916 if st_groups[1] == "count":
4917 val = len(instance.disks)
4918 elif st_groups[1] == "sizes":
4919 val = [disk.size for disk in instance.disks]
4920 elif st_groups[1] == "size":
4922 val = instance.FindDisk(st_groups[2]).size
4923 except errors.OpPrereqError:
4926 assert False, "Unhandled disk parameter"
4927 elif st_groups[0] == "nic":
4928 if st_groups[1] == "count":
4929 val = len(instance.nics)
4930 elif st_groups[1] == "macs":
4931 val = [nic.mac for nic in instance.nics]
4932 elif st_groups[1] == "ips":
4933 val = [nic.ip for nic in instance.nics]
4934 elif st_groups[1] == "modes":
4935 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4936 elif st_groups[1] == "links":
4937 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4938 elif st_groups[1] == "bridges":
4941 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4942 val.append(nicp[constants.NIC_LINK])
4947 nic_idx = int(st_groups[2])
4948 if nic_idx >= len(instance.nics):
4951 if st_groups[1] == "mac":
4952 val = instance.nics[nic_idx].mac
4953 elif st_groups[1] == "ip":
4954 val = instance.nics[nic_idx].ip
4955 elif st_groups[1] == "mode":
4956 val = i_nicp[nic_idx][constants.NIC_MODE]
4957 elif st_groups[1] == "link":
4958 val = i_nicp[nic_idx][constants.NIC_LINK]
4959 elif st_groups[1] == "bridge":
4960 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4961 if nic_mode == constants.NIC_MODE_BRIDGED:
4962 val = i_nicp[nic_idx][constants.NIC_LINK]
4966 assert False, "Unhandled NIC parameter"
4968 assert False, ("Declared but unhandled variable parameter '%s'" %
4971 assert False, "Declared but unhandled parameter '%s'" % field
4978 class LUFailoverInstance(LogicalUnit):
4979 """Failover an instance.
4982 HPATH = "instance-failover"
4983 HTYPE = constants.HTYPE_INSTANCE
4984 _OP_REQP = ["instance_name", "ignore_consistency"]
4987 def CheckArguments(self):
4988 """Check the arguments.
4991 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4992 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4994 def ExpandNames(self):
4995 self._ExpandAndLockInstance()
4996 self.needed_locks[locking.LEVEL_NODE] = []
4997 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4999 def DeclareLocks(self, level):
5000 if level == locking.LEVEL_NODE:
5001 self._LockInstancesNodes()
5003 def BuildHooksEnv(self):
5006 This runs on master, primary and secondary nodes of the instance.
5009 instance = self.instance
5010 source_node = instance.primary_node
5011 target_node = instance.secondary_nodes[0]
5013 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5014 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5015 "OLD_PRIMARY": source_node,
5016 "OLD_SECONDARY": target_node,
5017 "NEW_PRIMARY": target_node,
5018 "NEW_SECONDARY": source_node,
5020 env.update(_BuildInstanceHookEnvByObject(self, instance))
5021 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5023 nl_post.append(source_node)
5024 return env, nl, nl_post
5026 def CheckPrereq(self):
5027 """Check prerequisites.
5029 This checks that the instance is in the cluster.
5032 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5033 assert self.instance is not None, \
5034 "Cannot retrieve locked instance %s" % self.op.instance_name
5036 bep = self.cfg.GetClusterInfo().FillBE(instance)
5037 if instance.disk_template not in constants.DTS_NET_MIRROR:
5038 raise errors.OpPrereqError("Instance's disk layout is not"
5039 " network mirrored, cannot failover.",
5042 secondary_nodes = instance.secondary_nodes
5043 if not secondary_nodes:
5044 raise errors.ProgrammerError("no secondary node but using "
5045 "a mirrored disk template")
5047 target_node = secondary_nodes[0]
5048 _CheckNodeOnline(self, target_node)
5049 _CheckNodeNotDrained(self, target_node)
5050 if instance.admin_up:
5051 # check memory requirements on the secondary node
5052 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5053 instance.name, bep[constants.BE_MEMORY],
5054 instance.hypervisor)
5056 self.LogInfo("Not checking memory on the secondary node as"
5057 " instance will not be started")
5059 # check bridge existance
5060 _CheckInstanceBridgesExist(self, instance, node=target_node)
5062 def Exec(self, feedback_fn):
5063 """Failover an instance.
5065 The failover is done by shutting it down on its present node and
5066 starting it on the secondary.
5069 instance = self.instance
5071 source_node = instance.primary_node
5072 target_node = instance.secondary_nodes[0]
5074 if instance.admin_up:
5075 feedback_fn("* checking disk consistency between source and target")
5076 for dev in instance.disks:
5077 # for drbd, these are drbd over lvm
5078 if not _CheckDiskConsistency(self, dev, target_node, False):
5079 if not self.op.ignore_consistency:
5080 raise errors.OpExecError("Disk %s is degraded on target node,"
5081 " aborting failover." % dev.iv_name)
5083 feedback_fn("* not checking disk consistency as instance is not running")
5085 feedback_fn("* shutting down instance on source node")
5086 logging.info("Shutting down instance %s on node %s",
5087 instance.name, source_node)
5089 result = self.rpc.call_instance_shutdown(source_node, instance,
5090 self.shutdown_timeout)
5091 msg = result.fail_msg
5093 if self.op.ignore_consistency:
5094 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5095 " Proceeding anyway. Please make sure node"
5096 " %s is down. Error details: %s",
5097 instance.name, source_node, source_node, msg)
5099 raise errors.OpExecError("Could not shutdown instance %s on"
5101 (instance.name, source_node, msg))
5103 feedback_fn("* deactivating the instance's disks on source node")
5104 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5105 raise errors.OpExecError("Can't shut down the instance's disks.")
5107 instance.primary_node = target_node
5108 # distribute new instance config to the other nodes
5109 self.cfg.Update(instance, feedback_fn)
5111 # Only start the instance if it's marked as up
5112 if instance.admin_up:
5113 feedback_fn("* activating the instance's disks on target node")
5114 logging.info("Starting instance %s on node %s",
5115 instance.name, target_node)
5117 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5118 ignore_secondaries=True)
5120 _ShutdownInstanceDisks(self, instance)
5121 raise errors.OpExecError("Can't activate the instance's disks")
5123 feedback_fn("* starting the instance on the target node")
5124 result = self.rpc.call_instance_start(target_node, instance, None, None)
5125 msg = result.fail_msg
5127 _ShutdownInstanceDisks(self, instance)
5128 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5129 (instance.name, target_node, msg))
5132 class LUMigrateInstance(LogicalUnit):
5133 """Migrate an instance.
5135 This is migration without shutting down, compared to the failover,
5136 which is done with shutdown.
5139 HPATH = "instance-migrate"
5140 HTYPE = constants.HTYPE_INSTANCE
5141 _OP_REQP = ["instance_name", "live", "cleanup"]
5145 def ExpandNames(self):
5146 self._ExpandAndLockInstance()
5148 self.needed_locks[locking.LEVEL_NODE] = []
5149 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5151 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5152 self.op.live, self.op.cleanup)
5153 self.tasklets = [self._migrater]
5155 def DeclareLocks(self, level):
5156 if level == locking.LEVEL_NODE:
5157 self._LockInstancesNodes()
5159 def BuildHooksEnv(self):
5162 This runs on master, primary and secondary nodes of the instance.
5165 instance = self._migrater.instance
5166 source_node = instance.primary_node
5167 target_node = instance.secondary_nodes[0]
5168 env = _BuildInstanceHookEnvByObject(self, instance)
5169 env["MIGRATE_LIVE"] = self.op.live
5170 env["MIGRATE_CLEANUP"] = self.op.cleanup
5172 "OLD_PRIMARY": source_node,
5173 "OLD_SECONDARY": target_node,
5174 "NEW_PRIMARY": target_node,
5175 "NEW_SECONDARY": source_node,
5177 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5179 nl_post.append(source_node)
5180 return env, nl, nl_post
5183 class LUMoveInstance(LogicalUnit):
5184 """Move an instance by data-copying.
5187 HPATH = "instance-move"
5188 HTYPE = constants.HTYPE_INSTANCE
5189 _OP_REQP = ["instance_name", "target_node"]
5192 def CheckArguments(self):
5193 """Check the arguments.
5196 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5197 constants.DEFAULT_SHUTDOWN_TIMEOUT)
5199 def ExpandNames(self):
5200 self._ExpandAndLockInstance()
5201 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5202 self.op.target_node = target_node
5203 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5204 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5206 def DeclareLocks(self, level):
5207 if level == locking.LEVEL_NODE:
5208 self._LockInstancesNodes(primary_only=True)
5210 def BuildHooksEnv(self):
5213 This runs on master, primary and secondary nodes of the instance.
5217 "TARGET_NODE": self.op.target_node,
5218 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5220 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5221 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5222 self.op.target_node]
5225 def CheckPrereq(self):
5226 """Check prerequisites.
5228 This checks that the instance is in the cluster.
5231 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5232 assert self.instance is not None, \
5233 "Cannot retrieve locked instance %s" % self.op.instance_name
5235 node = self.cfg.GetNodeInfo(self.op.target_node)
5236 assert node is not None, \
5237 "Cannot retrieve locked node %s" % self.op.target_node
5239 self.target_node = target_node = node.name
5241 if target_node == instance.primary_node:
5242 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5243 (instance.name, target_node),
5246 bep = self.cfg.GetClusterInfo().FillBE(instance)
5248 for idx, dsk in enumerate(instance.disks):
5249 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5250 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5251 " cannot copy" % idx, errors.ECODE_STATE)
5253 _CheckNodeOnline(self, target_node)
5254 _CheckNodeNotDrained(self, target_node)
5256 if instance.admin_up:
5257 # check memory requirements on the secondary node
5258 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5259 instance.name, bep[constants.BE_MEMORY],
5260 instance.hypervisor)
5262 self.LogInfo("Not checking memory on the secondary node as"
5263 " instance will not be started")
5265 # check bridge existance
5266 _CheckInstanceBridgesExist(self, instance, node=target_node)
5268 def Exec(self, feedback_fn):
5269 """Move an instance.
5271 The move is done by shutting it down on its present node, copying
5272 the data over (slow) and starting it on the new node.
5275 instance = self.instance
5277 source_node = instance.primary_node
5278 target_node = self.target_node
5280 self.LogInfo("Shutting down instance %s on source node %s",
5281 instance.name, source_node)
5283 result = self.rpc.call_instance_shutdown(source_node, instance,
5284 self.shutdown_timeout)
5285 msg = result.fail_msg
5287 if self.op.ignore_consistency:
5288 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5289 " Proceeding anyway. Please make sure node"
5290 " %s is down. Error details: %s",
5291 instance.name, source_node, source_node, msg)
5293 raise errors.OpExecError("Could not shutdown instance %s on"
5295 (instance.name, source_node, msg))
5297 # create the target disks
5299 _CreateDisks(self, instance, target_node=target_node)
5300 except errors.OpExecError:
5301 self.LogWarning("Device creation failed, reverting...")
5303 _RemoveDisks(self, instance, target_node=target_node)
5305 self.cfg.ReleaseDRBDMinors(instance.name)
5308 cluster_name = self.cfg.GetClusterInfo().cluster_name
5311 # activate, get path, copy the data over
5312 for idx, disk in enumerate(instance.disks):
5313 self.LogInfo("Copying data for disk %d", idx)
5314 result = self.rpc.call_blockdev_assemble(target_node, disk,
5315 instance.name, True)
5317 self.LogWarning("Can't assemble newly created disk %d: %s",
5318 idx, result.fail_msg)
5319 errs.append(result.fail_msg)
5321 dev_path = result.payload
5322 result = self.rpc.call_blockdev_export(source_node, disk,
5323 target_node, dev_path,
5326 self.LogWarning("Can't copy data over for disk %d: %s",
5327 idx, result.fail_msg)
5328 errs.append(result.fail_msg)
5332 self.LogWarning("Some disks failed to copy, aborting")
5334 _RemoveDisks(self, instance, target_node=target_node)
5336 self.cfg.ReleaseDRBDMinors(instance.name)
5337 raise errors.OpExecError("Errors during disk copy: %s" %
5340 instance.primary_node = target_node
5341 self.cfg.Update(instance, feedback_fn)
5343 self.LogInfo("Removing the disks on the original node")
5344 _RemoveDisks(self, instance, target_node=source_node)
5346 # Only start the instance if it's marked as up
5347 if instance.admin_up:
5348 self.LogInfo("Starting instance %s on node %s",
5349 instance.name, target_node)
5351 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5352 ignore_secondaries=True)
5354 _ShutdownInstanceDisks(self, instance)
5355 raise errors.OpExecError("Can't activate the instance's disks")
5357 result = self.rpc.call_instance_start(target_node, instance, None, None)
5358 msg = result.fail_msg
5360 _ShutdownInstanceDisks(self, instance)
5361 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5362 (instance.name, target_node, msg))
5365 class LUMigrateNode(LogicalUnit):
5366 """Migrate all instances from a node.
5369 HPATH = "node-migrate"
5370 HTYPE = constants.HTYPE_NODE
5371 _OP_REQP = ["node_name", "live"]
5374 def ExpandNames(self):
5375 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5377 self.needed_locks = {
5378 locking.LEVEL_NODE: [self.op.node_name],
5381 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5383 # Create tasklets for migrating instances for all instances on this node
5387 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5388 logging.debug("Migrating instance %s", inst.name)
5389 names.append(inst.name)
5391 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5393 self.tasklets = tasklets
5395 # Declare instance locks
5396 self.needed_locks[locking.LEVEL_INSTANCE] = names
5398 def DeclareLocks(self, level):
5399 if level == locking.LEVEL_NODE:
5400 self._LockInstancesNodes()
5402 def BuildHooksEnv(self):
5405 This runs on the master, the primary and all the secondaries.
5409 "NODE_NAME": self.op.node_name,
5412 nl = [self.cfg.GetMasterNode()]
5414 return (env, nl, nl)
5417 class TLMigrateInstance(Tasklet):
5418 def __init__(self, lu, instance_name, live, cleanup):
5419 """Initializes this class.
5422 Tasklet.__init__(self, lu)
5425 self.instance_name = instance_name
5427 self.cleanup = cleanup
5429 def CheckPrereq(self):
5430 """Check prerequisites.
5432 This checks that the instance is in the cluster.
5435 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5436 instance = self.cfg.GetInstanceInfo(instance_name)
5437 assert instance is not None
5439 if instance.disk_template != constants.DT_DRBD8:
5440 raise errors.OpPrereqError("Instance's disk layout is not"
5441 " drbd8, cannot migrate.", errors.ECODE_STATE)
5443 secondary_nodes = instance.secondary_nodes
5444 if not secondary_nodes:
5445 raise errors.ConfigurationError("No secondary node but using"
5446 " drbd8 disk template")
5448 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5450 target_node = secondary_nodes[0]
5451 # check memory requirements on the secondary node
5452 _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5453 instance.name, i_be[constants.BE_MEMORY],
5454 instance.hypervisor)
5456 # check bridge existance
5457 _CheckInstanceBridgesExist(self, instance, node=target_node)
5459 if not self.cleanup:
5460 _CheckNodeNotDrained(self, target_node)
5461 result = self.rpc.call_instance_migratable(instance.primary_node,
5463 result.Raise("Can't migrate, please use failover",
5464 prereq=True, ecode=errors.ECODE_STATE)
5466 self.instance = instance
5468 def _WaitUntilSync(self):
5469 """Poll with custom rpc for disk sync.
5471 This uses our own step-based rpc call.
5474 self.feedback_fn("* wait until resync is done")
5478 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5480 self.instance.disks)
5482 for node, nres in result.items():
5483 nres.Raise("Cannot resync disks on node %s" % node)
5484 node_done, node_percent = nres.payload
5485 all_done = all_done and node_done
5486 if node_percent is not None:
5487 min_percent = min(min_percent, node_percent)
5489 if min_percent < 100:
5490 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5493 def _EnsureSecondary(self, node):
5494 """Demote a node to secondary.
5497 self.feedback_fn("* switching node %s to secondary mode" % node)
5499 for dev in self.instance.disks:
5500 self.cfg.SetDiskID(dev, node)
5502 result = self.rpc.call_blockdev_close(node, self.instance.name,
5503 self.instance.disks)
5504 result.Raise("Cannot change disk to secondary on node %s" % node)
5506 def _GoStandalone(self):
5507 """Disconnect from the network.
5510 self.feedback_fn("* changing into standalone mode")
5511 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5512 self.instance.disks)
5513 for node, nres in result.items():
5514 nres.Raise("Cannot disconnect disks node %s" % node)
5516 def _GoReconnect(self, multimaster):
5517 """Reconnect to the network.
5523 msg = "single-master"
5524 self.feedback_fn("* changing disks into %s mode" % msg)
5525 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5526 self.instance.disks,
5527 self.instance.name, multimaster)
5528 for node, nres in result.items():
5529 nres.Raise("Cannot change disks config on node %s" % node)
5531 def _ExecCleanup(self):
5532 """Try to cleanup after a failed migration.
5534 The cleanup is done by:
5535 - check that the instance is running only on one node
5536 (and update the config if needed)
5537 - change disks on its secondary node to secondary
5538 - wait until disks are fully synchronized
5539 - disconnect from the network
5540 - change disks into single-master mode
5541 - wait again until disks are fully synchronized
5544 instance = self.instance
5545 target_node = self.target_node
5546 source_node = self.source_node
5548 # check running on only one node
5549 self.feedback_fn("* checking where the instance actually runs"
5550 " (if this hangs, the hypervisor might be in"
5552 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5553 for node, result in ins_l.items():
5554 result.Raise("Can't contact node %s" % node)
5556 runningon_source = instance.name in ins_l[source_node].payload
5557 runningon_target = instance.name in ins_l[target_node].payload
5559 if runningon_source and runningon_target:
5560 raise errors.OpExecError("Instance seems to be running on two nodes,"
5561 " or the hypervisor is confused. You will have"
5562 " to ensure manually that it runs only on one"
5563 " and restart this operation.")
5565 if not (runningon_source or runningon_target):
5566 raise errors.OpExecError("Instance does not seem to be running at all."
5567 " In this case, it's safer to repair by"
5568 " running 'gnt-instance stop' to ensure disk"
5569 " shutdown, and then restarting it.")
5571 if runningon_target:
5572 # the migration has actually succeeded, we need to update the config
5573 self.feedback_fn("* instance running on secondary node (%s),"
5574 " updating config" % target_node)
5575 instance.primary_node = target_node
5576 self.cfg.Update(instance, self.feedback_fn)
5577 demoted_node = source_node
5579 self.feedback_fn("* instance confirmed to be running on its"
5580 " primary node (%s)" % source_node)
5581 demoted_node = target_node
5583 self._EnsureSecondary(demoted_node)
5585 self._WaitUntilSync()
5586 except errors.OpExecError:
5587 # we ignore here errors, since if the device is standalone, it
5588 # won't be able to sync
5590 self._GoStandalone()
5591 self._GoReconnect(False)
5592 self._WaitUntilSync()
5594 self.feedback_fn("* done")
5596 def _RevertDiskStatus(self):
5597 """Try to revert the disk status after a failed migration.
5600 target_node = self.target_node
5602 self._EnsureSecondary(target_node)
5603 self._GoStandalone()
5604 self._GoReconnect(False)
5605 self._WaitUntilSync()
5606 except errors.OpExecError, err:
5607 self.lu.LogWarning("Migration failed and I can't reconnect the"
5608 " drives: error '%s'\n"
5609 "Please look and recover the instance status" %
5612 def _AbortMigration(self):
5613 """Call the hypervisor code to abort a started migration.
5616 instance = self.instance
5617 target_node = self.target_node
5618 migration_info = self.migration_info
5620 abort_result = self.rpc.call_finalize_migration(target_node,
5624 abort_msg = abort_result.fail_msg
5626 logging.error("Aborting migration failed on target node %s: %s",
5627 target_node, abort_msg)
5628 # Don't raise an exception here, as we stil have to try to revert the
5629 # disk status, even if this step failed.
5631 def _ExecMigration(self):
5632 """Migrate an instance.
5634 The migrate is done by:
5635 - change the disks into dual-master mode
5636 - wait until disks are fully synchronized again
5637 - migrate the instance
5638 - change disks on the new secondary node (the old primary) to secondary
5639 - wait until disks are fully synchronized
5640 - change disks into single-master mode
5643 instance = self.instance
5644 target_node = self.target_node
5645 source_node = self.source_node
5647 self.feedback_fn("* checking disk consistency between source and target")
5648 for dev in instance.disks:
5649 if not _CheckDiskConsistency(self, dev, target_node, False):
5650 raise errors.OpExecError("Disk %s is degraded or not fully"
5651 " synchronized on target node,"
5652 " aborting migrate." % dev.iv_name)
5654 # First get the migration information from the remote node
5655 result = self.rpc.call_migration_info(source_node, instance)
5656 msg = result.fail_msg
5658 log_err = ("Failed fetching source migration information from %s: %s" %
5660 logging.error(log_err)
5661 raise errors.OpExecError(log_err)
5663 self.migration_info = migration_info = result.payload
5665 # Then switch the disks to master/master mode
5666 self._EnsureSecondary(target_node)
5667 self._GoStandalone()
5668 self._GoReconnect(True)
5669 self._WaitUntilSync()
5671 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5672 result = self.rpc.call_accept_instance(target_node,
5675 self.nodes_ip[target_node])
5677 msg = result.fail_msg
5679 logging.error("Instance pre-migration failed, trying to revert"
5680 " disk status: %s", msg)
5681 self.feedback_fn("Pre-migration failed, aborting")
5682 self._AbortMigration()
5683 self._RevertDiskStatus()
5684 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5685 (instance.name, msg))
5687 self.feedback_fn("* migrating instance to %s" % target_node)
5689 result = self.rpc.call_instance_migrate(source_node, instance,
5690 self.nodes_ip[target_node],
5692 msg = result.fail_msg
5694 logging.error("Instance migration failed, trying to revert"
5695 " disk status: %s", msg)
5696 self.feedback_fn("Migration failed, aborting")
5697 self._AbortMigration()
5698 self._RevertDiskStatus()
5699 raise errors.OpExecError("Could not migrate instance %s: %s" %
5700 (instance.name, msg))
5703 instance.primary_node = target_node
5704 # distribute new instance config to the other nodes
5705 self.cfg.Update(instance, self.feedback_fn)
5707 result = self.rpc.call_finalize_migration(target_node,
5711 msg = result.fail_msg
5713 logging.error("Instance migration succeeded, but finalization failed:"
5715 raise errors.OpExecError("Could not finalize instance migration: %s" %
5718 self._EnsureSecondary(source_node)
5719 self._WaitUntilSync()
5720 self._GoStandalone()
5721 self._GoReconnect(False)
5722 self._WaitUntilSync()
5724 self.feedback_fn("* done")
5726 def Exec(self, feedback_fn):
5727 """Perform the migration.
5730 feedback_fn("Migrating instance %s" % self.instance.name)
5732 self.feedback_fn = feedback_fn
5734 self.source_node = self.instance.primary_node
5735 self.target_node = self.instance.secondary_nodes[0]
5736 self.all_nodes = [self.source_node, self.target_node]
5738 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5739 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5743 return self._ExecCleanup()
5745 return self._ExecMigration()
5748 def _CreateBlockDev(lu, node, instance, device, force_create,
5750 """Create a tree of block devices on a given node.
5752 If this device type has to be created on secondaries, create it and
5755 If not, just recurse to children keeping the same 'force' value.
5757 @param lu: the lu on whose behalf we execute
5758 @param node: the node on which to create the device
5759 @type instance: L{objects.Instance}
5760 @param instance: the instance which owns the device
5761 @type device: L{objects.Disk}
5762 @param device: the device to create
5763 @type force_create: boolean
5764 @param force_create: whether to force creation of this device; this
5765 will be change to True whenever we find a device which has
5766 CreateOnSecondary() attribute
5767 @param info: the extra 'metadata' we should attach to the device
5768 (this will be represented as a LVM tag)
5769 @type force_open: boolean
5770 @param force_open: this parameter will be passes to the
5771 L{backend.BlockdevCreate} function where it specifies
5772 whether we run on primary or not, and it affects both
5773 the child assembly and the device own Open() execution
5776 if device.CreateOnSecondary():
5780 for child in device.children:
5781 _CreateBlockDev(lu, node, instance, child, force_create,
5784 if not force_create:
5787 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5790 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5791 """Create a single block device on a given node.
5793 This will not recurse over children of the device, so they must be
5796 @param lu: the lu on whose behalf we execute
5797 @param node: the node on which to create the device
5798 @type instance: L{objects.Instance}
5799 @param instance: the instance which owns the device
5800 @type device: L{objects.Disk}
5801 @param device: the device to create
5802 @param info: the extra 'metadata' we should attach to the device
5803 (this will be represented as a LVM tag)
5804 @type force_open: boolean
5805 @param force_open: this parameter will be passes to the
5806 L{backend.BlockdevCreate} function where it specifies
5807 whether we run on primary or not, and it affects both
5808 the child assembly and the device own Open() execution
5811 lu.cfg.SetDiskID(device, node)
5812 result = lu.rpc.call_blockdev_create(node, device, device.size,
5813 instance.name, force_open, info)
5814 result.Raise("Can't create block device %s on"
5815 " node %s for instance %s" % (device, node, instance.name))
5816 if device.physical_id is None:
5817 device.physical_id = result.payload
5820 def _GenerateUniqueNames(lu, exts):
5821 """Generate a suitable LV name.
5823 This will generate a logical volume name for the given instance.
5828 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5829 results.append("%s%s" % (new_id, val))
5833 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5835 """Generate a drbd8 device complete with its children.
5838 port = lu.cfg.AllocatePort()
5839 vgname = lu.cfg.GetVGName()
5840 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5841 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5842 logical_id=(vgname, names[0]))
5843 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5844 logical_id=(vgname, names[1]))
5845 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5846 logical_id=(primary, secondary, port,
5849 children=[dev_data, dev_meta],
5854 def _GenerateDiskTemplate(lu, template_name,
5855 instance_name, primary_node,
5856 secondary_nodes, disk_info,
5857 file_storage_dir, file_driver,
5859 """Generate the entire disk layout for a given template type.
5862 #TODO: compute space requirements
5864 vgname = lu.cfg.GetVGName()
5865 disk_count = len(disk_info)
5867 if template_name == constants.DT_DISKLESS:
5869 elif template_name == constants.DT_PLAIN:
5870 if len(secondary_nodes) != 0:
5871 raise errors.ProgrammerError("Wrong template configuration")
5873 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5874 for i in range(disk_count)])
5875 for idx, disk in enumerate(disk_info):
5876 disk_index = idx + base_index
5877 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5878 logical_id=(vgname, names[idx]),
5879 iv_name="disk/%d" % disk_index,
5881 disks.append(disk_dev)
5882 elif template_name == constants.DT_DRBD8:
5883 if len(secondary_nodes) != 1:
5884 raise errors.ProgrammerError("Wrong template configuration")
5885 remote_node = secondary_nodes[0]
5886 minors = lu.cfg.AllocateDRBDMinor(
5887 [primary_node, remote_node] * len(disk_info), instance_name)
5890 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5891 for i in range(disk_count)]):
5892 names.append(lv_prefix + "_data")
5893 names.append(lv_prefix + "_meta")
5894 for idx, disk in enumerate(disk_info):
5895 disk_index = idx + base_index
5896 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5897 disk["size"], names[idx*2:idx*2+2],
5898 "disk/%d" % disk_index,
5899 minors[idx*2], minors[idx*2+1])
5900 disk_dev.mode = disk["mode"]
5901 disks.append(disk_dev)
5902 elif template_name == constants.DT_FILE:
5903 if len(secondary_nodes) != 0:
5904 raise errors.ProgrammerError("Wrong template configuration")
5906 _RequireFileStorage()
5908 for idx, disk in enumerate(disk_info):
5909 disk_index = idx + base_index
5910 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5911 iv_name="disk/%d" % disk_index,
5912 logical_id=(file_driver,
5913 "%s/disk%d" % (file_storage_dir,
5916 disks.append(disk_dev)
5918 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5922 def _GetInstanceInfoText(instance):
5923 """Compute that text that should be added to the disk's metadata.
5926 return "originstname+%s" % instance.name
5929 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5930 """Create all disks for an instance.
5932 This abstracts away some work from AddInstance.
5934 @type lu: L{LogicalUnit}
5935 @param lu: the logical unit on whose behalf we execute
5936 @type instance: L{objects.Instance}
5937 @param instance: the instance whose disks we should create
5939 @param to_skip: list of indices to skip
5940 @type target_node: string
5941 @param target_node: if passed, overrides the target node for creation
5943 @return: the success of the creation
5946 info = _GetInstanceInfoText(instance)
5947 if target_node is None:
5948 pnode = instance.primary_node
5949 all_nodes = instance.all_nodes
5954 if instance.disk_template == constants.DT_FILE:
5955 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5956 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5958 result.Raise("Failed to create directory '%s' on"
5959 " node %s" % (file_storage_dir, pnode))
5961 # Note: this needs to be kept in sync with adding of disks in
5962 # LUSetInstanceParams
5963 for idx, device in enumerate(instance.disks):
5964 if to_skip and idx in to_skip:
5966 logging.info("Creating volume %s for instance %s",
5967 device.iv_name, instance.name)
5969 for node in all_nodes:
5970 f_create = node == pnode
5971 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5974 def _RemoveDisks(lu, instance, target_node=None):
5975 """Remove all disks for an instance.
5977 This abstracts away some work from `AddInstance()` and
5978 `RemoveInstance()`. Note that in case some of the devices couldn't
5979 be removed, the removal will continue with the other ones (compare
5980 with `_CreateDisks()`).
5982 @type lu: L{LogicalUnit}
5983 @param lu: the logical unit on whose behalf we execute
5984 @type instance: L{objects.Instance}
5985 @param instance: the instance whose disks we should remove
5986 @type target_node: string
5987 @param target_node: used to override the node on which to remove the disks
5989 @return: the success of the removal
5992 logging.info("Removing block devices for instance %s", instance.name)
5995 for device in instance.disks:
5997 edata = [(target_node, device)]
5999 edata = device.ComputeNodeTree(instance.primary_node)
6000 for node, disk in edata:
6001 lu.cfg.SetDiskID(disk, node)
6002 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6004 lu.LogWarning("Could not remove block device %s on node %s,"
6005 " continuing anyway: %s", device.iv_name, node, msg)
6008 if instance.disk_template == constants.DT_FILE:
6009 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6013 tgt = instance.primary_node
6014 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6016 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6017 file_storage_dir, instance.primary_node, result.fail_msg)
6023 def _ComputeDiskSize(disk_template, disks):
6024 """Compute disk size requirements in the volume group
6027 # Required free disk space as a function of disk and swap space
6029 constants.DT_DISKLESS: None,
6030 constants.DT_PLAIN: sum(d["size"] for d in disks),
6031 # 128 MB are added for drbd metadata for each disk
6032 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6033 constants.DT_FILE: None,
6036 if disk_template not in req_size_dict:
6037 raise errors.ProgrammerError("Disk template '%s' size requirement"
6038 " is unknown" % disk_template)
6040 return req_size_dict[disk_template]
6043 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6044 """Hypervisor parameter validation.
6046 This function abstract the hypervisor parameter validation to be
6047 used in both instance create and instance modify.
6049 @type lu: L{LogicalUnit}
6050 @param lu: the logical unit for which we check
6051 @type nodenames: list
6052 @param nodenames: the list of nodes on which we should check
6053 @type hvname: string
6054 @param hvname: the name of the hypervisor we should use
6055 @type hvparams: dict
6056 @param hvparams: the parameters which we need to check
6057 @raise errors.OpPrereqError: if the parameters are not valid
6060 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6063 for node in nodenames:
6067 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6070 class LUCreateInstance(LogicalUnit):
6071 """Create an instance.
6074 HPATH = "instance-add"
6075 HTYPE = constants.HTYPE_INSTANCE
6076 _OP_REQP = ["instance_name", "disks",
6078 "wait_for_sync", "ip_check", "nics",
6079 "hvparams", "beparams"]
6082 def CheckArguments(self):
6086 # set optional parameters to none if they don't exist
6087 for attr in ["pnode", "snode", "iallocator", "hypervisor",
6088 "disk_template", "identify_defaults"]:
6089 if not hasattr(self.op, attr):
6090 setattr(self.op, attr, None)
6092 # do not require name_check to ease forward/backward compatibility
6094 if not hasattr(self.op, "name_check"):
6095 self.op.name_check = True
6096 if not hasattr(self.op, "no_install"):
6097 self.op.no_install = False
6098 if self.op.no_install and self.op.start:
6099 self.LogInfo("No-installation mode selected, disabling startup")
6100 self.op.start = False
6101 # validate/normalize the instance name
6102 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6103 if self.op.ip_check and not self.op.name_check:
6104 # TODO: make the ip check more flexible and not depend on the name check
6105 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6107 # check disk information: either all adopt, or no adopt
6108 has_adopt = has_no_adopt = False
6109 for disk in self.op.disks:
6114 if has_adopt and has_no_adopt:
6115 raise errors.OpPrereqError("Either all disks are adopted or none is",
6118 if self.op.disk_template != constants.DT_PLAIN:
6119 raise errors.OpPrereqError("Disk adoption is only supported for the"
6120 " 'plain' disk template",
6122 if self.op.iallocator is not None:
6123 raise errors.OpPrereqError("Disk adoption not allowed with an"
6124 " iallocator script", errors.ECODE_INVAL)
6125 if self.op.mode == constants.INSTANCE_IMPORT:
6126 raise errors.OpPrereqError("Disk adoption not allowed for"
6127 " instance import", errors.ECODE_INVAL)
6129 self.adopt_disks = has_adopt
6131 # verify creation mode
6132 if self.op.mode not in constants.INSTANCE_CREATE_MODES:
6133 raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6134 self.op.mode, errors.ECODE_INVAL)
6136 # instance name verification
6137 if self.op.name_check:
6138 self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6139 self.op.instance_name = self.hostname1.name
6140 # used in CheckPrereq for ip ping check
6141 self.check_ip = self.hostname1.ip
6142 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6143 raise errors.OpPrereqError("Remote imports require names to be checked" %
6146 self.check_ip = None
6148 # file storage checks
6149 if (self.op.file_driver and
6150 not self.op.file_driver in constants.FILE_DRIVER):
6151 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6152 self.op.file_driver, errors.ECODE_INVAL)
6154 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6155 raise errors.OpPrereqError("File storage directory path not absolute",
6158 ### Node/iallocator related checks
6159 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6160 raise errors.OpPrereqError("One and only one of iallocator and primary"
6161 " node must be given",
6164 self._cds = _GetClusterDomainSecret()
6166 if self.op.mode == constants.INSTANCE_IMPORT:
6167 # On import force_variant must be True, because if we forced it at
6168 # initial install, our only chance when importing it back is that it
6170 self.op.force_variant = True
6172 if self.op.no_install:
6173 self.LogInfo("No-installation mode has no effect during import")
6175 elif self.op.mode == constants.INSTANCE_CREATE:
6176 if getattr(self.op, "os_type", None) is None:
6177 raise errors.OpPrereqError("No guest OS specified",
6179 self.op.force_variant = getattr(self.op, "force_variant", False)
6180 if self.op.disk_template is None:
6181 raise errors.OpPrereqError("No disk template specified",
6184 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6185 # Check handshake to ensure both clusters have the same domain secret
6186 src_handshake = getattr(self.op, "source_handshake", None)
6187 if not src_handshake:
6188 raise errors.OpPrereqError("Missing source handshake",
6191 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6194 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6197 # Load and check source CA
6198 self.source_x509_ca_pem = getattr(self.op, "source_x509_ca", None)
6199 if not self.source_x509_ca_pem:
6200 raise errors.OpPrereqError("Missing source X509 CA",
6204 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6206 except OpenSSL.crypto.Error, err:
6207 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6208 (err, ), errors.ECODE_INVAL)
6210 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6211 if errcode is not None:
6212 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6215 self.source_x509_ca = cert
6217 src_instance_name = getattr(self.op, "source_instance_name", None)
6218 if not src_instance_name:
6219 raise errors.OpPrereqError("Missing source instance name",
6222 self.source_instance_name = \
6223 utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6226 raise errors.OpPrereqError("Invalid instance creation mode %r" %
6227 self.op.mode, errors.ECODE_INVAL)
6229 def ExpandNames(self):
6230 """ExpandNames for CreateInstance.
6232 Figure out the right locks for instance creation.
6235 self.needed_locks = {}
6237 instance_name = self.op.instance_name
6238 # this is just a preventive check, but someone might still add this
6239 # instance in the meantime, and creation will fail at lock-add time
6240 if instance_name in self.cfg.GetInstanceList():
6241 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6242 instance_name, errors.ECODE_EXISTS)
6244 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6246 if self.op.iallocator:
6247 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6249 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6250 nodelist = [self.op.pnode]
6251 if self.op.snode is not None:
6252 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6253 nodelist.append(self.op.snode)
6254 self.needed_locks[locking.LEVEL_NODE] = nodelist
6256 # in case of import lock the source node too
6257 if self.op.mode == constants.INSTANCE_IMPORT:
6258 src_node = getattr(self.op, "src_node", None)
6259 src_path = getattr(self.op, "src_path", None)
6261 if src_path is None:
6262 self.op.src_path = src_path = self.op.instance_name
6264 if src_node is None:
6265 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6266 self.op.src_node = None
6267 if os.path.isabs(src_path):
6268 raise errors.OpPrereqError("Importing an instance from an absolute"
6269 " path requires a source node option.",
6272 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6273 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6274 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6275 if not os.path.isabs(src_path):
6276 self.op.src_path = src_path = \
6277 utils.PathJoin(constants.EXPORT_DIR, src_path)
6279 def _RunAllocator(self):
6280 """Run the allocator based on input opcode.
6283 nics = [n.ToDict() for n in self.nics]
6284 ial = IAllocator(self.cfg, self.rpc,
6285 mode=constants.IALLOCATOR_MODE_ALLOC,
6286 name=self.op.instance_name,
6287 disk_template=self.op.disk_template,
6290 vcpus=self.be_full[constants.BE_VCPUS],
6291 mem_size=self.be_full[constants.BE_MEMORY],
6294 hypervisor=self.op.hypervisor,
6297 ial.Run(self.op.iallocator)
6300 raise errors.OpPrereqError("Can't compute nodes using"
6301 " iallocator '%s': %s" %
6302 (self.op.iallocator, ial.info),
6304 if len(ial.result) != ial.required_nodes:
6305 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6306 " of nodes (%s), required %s" %
6307 (self.op.iallocator, len(ial.result),
6308 ial.required_nodes), errors.ECODE_FAULT)
6309 self.op.pnode = ial.result[0]
6310 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6311 self.op.instance_name, self.op.iallocator,
6312 utils.CommaJoin(ial.result))
6313 if ial.required_nodes == 2:
6314 self.op.snode = ial.result[1]
6316 def BuildHooksEnv(self):
6319 This runs on master, primary and secondary nodes of the instance.
6323 "ADD_MODE": self.op.mode,
6325 if self.op.mode == constants.INSTANCE_IMPORT:
6326 env["SRC_NODE"] = self.op.src_node
6327 env["SRC_PATH"] = self.op.src_path
6328 env["SRC_IMAGES"] = self.src_images
6330 env.update(_BuildInstanceHookEnv(
6331 name=self.op.instance_name,
6332 primary_node=self.op.pnode,
6333 secondary_nodes=self.secondaries,
6334 status=self.op.start,
6335 os_type=self.op.os_type,
6336 memory=self.be_full[constants.BE_MEMORY],
6337 vcpus=self.be_full[constants.BE_VCPUS],
6338 nics=_NICListToTuple(self, self.nics),
6339 disk_template=self.op.disk_template,
6340 disks=[(d["size"], d["mode"]) for d in self.disks],
6343 hypervisor_name=self.op.hypervisor,
6346 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6350 def _ReadExportInfo(self):
6351 """Reads the export information from disk.
6353 It will override the opcode source node and path with the actual
6354 information, if these two were not specified before.
6356 @return: the export information
6359 assert self.op.mode == constants.INSTANCE_IMPORT
6361 src_node = self.op.src_node
6362 src_path = self.op.src_path
6364 if src_node is None:
6365 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6366 exp_list = self.rpc.call_export_list(locked_nodes)
6368 for node in exp_list:
6369 if exp_list[node].fail_msg:
6371 if src_path in exp_list[node].payload:
6373 self.op.src_node = src_node = node
6374 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6378 raise errors.OpPrereqError("No export found for relative path %s" %
6379 src_path, errors.ECODE_INVAL)
6381 _CheckNodeOnline(self, src_node)
6382 result = self.rpc.call_export_info(src_node, src_path)
6383 result.Raise("No export or invalid export found in dir %s" % src_path)
6385 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6386 if not export_info.has_section(constants.INISECT_EXP):
6387 raise errors.ProgrammerError("Corrupted export config",
6388 errors.ECODE_ENVIRON)
6390 ei_version = export_info.get(constants.INISECT_EXP, "version")
6391 if (int(ei_version) != constants.EXPORT_VERSION):
6392 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6393 (ei_version, constants.EXPORT_VERSION),
6394 errors.ECODE_ENVIRON)
6397 def _ReadExportParams(self, einfo):
6398 """Use export parameters as defaults.
6400 In case the opcode doesn't specify (as in override) some instance
6401 parameters, then try to use them from the export information, if
6405 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6407 if self.op.disk_template is None:
6408 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6409 self.op.disk_template = einfo.get(constants.INISECT_INS,
6412 raise errors.OpPrereqError("No disk template specified and the export"
6413 " is missing the disk_template information",
6416 if not self.op.disks:
6417 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6419 # TODO: import the disk iv_name too
6420 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6421 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6422 disks.append({"size": disk_sz})
6423 self.op.disks = disks
6425 raise errors.OpPrereqError("No disk info specified and the export"
6426 " is missing the disk information",
6429 if (not self.op.nics and
6430 einfo.has_option(constants.INISECT_INS, "nic_count")):
6432 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6434 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6435 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6440 if (self.op.hypervisor is None and
6441 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6442 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6443 if einfo.has_section(constants.INISECT_HYP):
6444 # use the export parameters but do not override the ones
6445 # specified by the user
6446 for name, value in einfo.items(constants.INISECT_HYP):
6447 if name not in self.op.hvparams:
6448 self.op.hvparams[name] = value
6450 if einfo.has_section(constants.INISECT_BEP):
6451 # use the parameters, without overriding
6452 for name, value in einfo.items(constants.INISECT_BEP):
6453 if name not in self.op.beparams:
6454 self.op.beparams[name] = value
6456 # try to read the parameters old style, from the main section
6457 for name in constants.BES_PARAMETERS:
6458 if (name not in self.op.beparams and
6459 einfo.has_option(constants.INISECT_INS, name)):
6460 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6462 def _RevertToDefaults(self, cluster):
6463 """Revert the instance parameters to the default values.
6467 hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6468 for name in self.op.hvparams.keys():
6469 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6470 del self.op.hvparams[name]
6472 be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6473 for name in self.op.beparams.keys():
6474 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6475 del self.op.beparams[name]
6477 nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6478 for nic in self.op.nics:
6479 for name in constants.NICS_PARAMETERS:
6480 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6483 def CheckPrereq(self):
6484 """Check prerequisites.
6487 if self.op.mode == constants.INSTANCE_IMPORT:
6488 export_info = self._ReadExportInfo()
6489 self._ReadExportParams(export_info)
6491 _CheckDiskTemplate(self.op.disk_template)
6493 if (not self.cfg.GetVGName() and
6494 self.op.disk_template not in constants.DTS_NOT_LVM):
6495 raise errors.OpPrereqError("Cluster does not support lvm-based"
6496 " instances", errors.ECODE_STATE)
6498 if self.op.hypervisor is None:
6499 self.op.hypervisor = self.cfg.GetHypervisorType()
6501 cluster = self.cfg.GetClusterInfo()
6502 enabled_hvs = cluster.enabled_hypervisors
6503 if self.op.hypervisor not in enabled_hvs:
6504 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6505 " cluster (%s)" % (self.op.hypervisor,
6506 ",".join(enabled_hvs)),
6509 # check hypervisor parameter syntax (locally)
6510 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6511 filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6514 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6515 hv_type.CheckParameterSyntax(filled_hvp)
6516 self.hv_full = filled_hvp
6517 # check that we don't specify global parameters on an instance
6518 _CheckGlobalHvParams(self.op.hvparams)
6520 # fill and remember the beparams dict
6521 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6522 self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6525 # now that hvp/bep are in final format, let's reset to defaults,
6527 if self.op.identify_defaults:
6528 self._RevertToDefaults(cluster)
6532 for idx, nic in enumerate(self.op.nics):
6533 nic_mode_req = nic.get("mode", None)
6534 nic_mode = nic_mode_req
6535 if nic_mode is None:
6536 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6538 # in routed mode, for the first nic, the default ip is 'auto'
6539 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6540 default_ip_mode = constants.VALUE_AUTO
6542 default_ip_mode = constants.VALUE_NONE
6544 # ip validity checks
6545 ip = nic.get("ip", default_ip_mode)
6546 if ip is None or ip.lower() == constants.VALUE_NONE:
6548 elif ip.lower() == constants.VALUE_AUTO:
6549 if not self.op.name_check:
6550 raise errors.OpPrereqError("IP address set to auto but name checks"
6551 " have been skipped. Aborting.",
6553 nic_ip = self.hostname1.ip
6555 if not utils.IsValidIP(ip):
6556 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6557 " like a valid IP" % ip,
6561 # TODO: check the ip address for uniqueness
6562 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6563 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6566 # MAC address verification
6567 mac = nic.get("mac", constants.VALUE_AUTO)
6568 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6569 mac = utils.NormalizeAndValidateMac(mac)
6572 self.cfg.ReserveMAC(mac, self.proc.GetECId())
6573 except errors.ReservationError:
6574 raise errors.OpPrereqError("MAC address %s already in use"
6575 " in cluster" % mac,
6576 errors.ECODE_NOTUNIQUE)
6578 # bridge verification
6579 bridge = nic.get("bridge", None)
6580 link = nic.get("link", None)
6582 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6583 " at the same time", errors.ECODE_INVAL)
6584 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6585 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6592 nicparams[constants.NIC_MODE] = nic_mode_req
6594 nicparams[constants.NIC_LINK] = link
6596 check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6598 objects.NIC.CheckParameterSyntax(check_params)
6599 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6601 # disk checks/pre-build
6603 for disk in self.op.disks:
6604 mode = disk.get("mode", constants.DISK_RDWR)
6605 if mode not in constants.DISK_ACCESS_SET:
6606 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6607 mode, errors.ECODE_INVAL)
6608 size = disk.get("size", None)
6610 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6613 except (TypeError, ValueError):
6614 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6616 new_disk = {"size": size, "mode": mode}
6618 new_disk["adopt"] = disk["adopt"]
6619 self.disks.append(new_disk)
6621 if self.op.mode == constants.INSTANCE_IMPORT:
6623 # Check that the new instance doesn't have less disks than the export
6624 instance_disks = len(self.disks)
6625 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6626 if instance_disks < export_disks:
6627 raise errors.OpPrereqError("Not enough disks to import."
6628 " (instance: %d, export: %d)" %
6629 (instance_disks, export_disks),
6633 for idx in range(export_disks):
6634 option = 'disk%d_dump' % idx
6635 if export_info.has_option(constants.INISECT_INS, option):
6636 # FIXME: are the old os-es, disk sizes, etc. useful?
6637 export_name = export_info.get(constants.INISECT_INS, option)
6638 image = utils.PathJoin(self.op.src_path, export_name)
6639 disk_images.append(image)
6641 disk_images.append(False)
6643 self.src_images = disk_images
6645 old_name = export_info.get(constants.INISECT_INS, 'name')
6647 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6648 except (TypeError, ValueError), err:
6649 raise errors.OpPrereqError("Invalid export file, nic_count is not"
6650 " an integer: %s" % str(err),
6652 if self.op.instance_name == old_name:
6653 for idx, nic in enumerate(self.nics):
6654 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6655 nic_mac_ini = 'nic%d_mac' % idx
6656 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6658 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6660 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6661 if self.op.ip_check:
6662 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6663 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6664 (self.check_ip, self.op.instance_name),
6665 errors.ECODE_NOTUNIQUE)
6667 #### mac address generation
6668 # By generating here the mac address both the allocator and the hooks get
6669 # the real final mac address rather than the 'auto' or 'generate' value.
6670 # There is a race condition between the generation and the instance object
6671 # creation, which means that we know the mac is valid now, but we're not
6672 # sure it will be when we actually add the instance. If things go bad
6673 # adding the instance will abort because of a duplicate mac, and the
6674 # creation job will fail.
6675 for nic in self.nics:
6676 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6677 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6681 if self.op.iallocator is not None:
6682 self._RunAllocator()
6684 #### node related checks
6686 # check primary node
6687 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6688 assert self.pnode is not None, \
6689 "Cannot retrieve locked node %s" % self.op.pnode
6691 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6692 pnode.name, errors.ECODE_STATE)
6694 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6695 pnode.name, errors.ECODE_STATE)
6697 self.secondaries = []
6699 # mirror node verification
6700 if self.op.disk_template in constants.DTS_NET_MIRROR:
6701 if self.op.snode is None:
6702 raise errors.OpPrereqError("The networked disk templates need"
6703 " a mirror node", errors.ECODE_INVAL)
6704 if self.op.snode == pnode.name:
6705 raise errors.OpPrereqError("The secondary node cannot be the"
6706 " primary node.", errors.ECODE_INVAL)
6707 _CheckNodeOnline(self, self.op.snode)
6708 _CheckNodeNotDrained(self, self.op.snode)
6709 self.secondaries.append(self.op.snode)
6711 nodenames = [pnode.name] + self.secondaries
6713 req_size = _ComputeDiskSize(self.op.disk_template,
6716 # Check lv size requirements, if not adopting
6717 if req_size is not None and not self.adopt_disks:
6718 _CheckNodesFreeDisk(self, nodenames, req_size)
6720 if self.adopt_disks: # instead, we must check the adoption data
6721 all_lvs = set([i["adopt"] for i in self.disks])
6722 if len(all_lvs) != len(self.disks):
6723 raise errors.OpPrereqError("Duplicate volume names given for adoption",
6725 for lv_name in all_lvs:
6727 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6728 except errors.ReservationError:
6729 raise errors.OpPrereqError("LV named %s used by another instance" %
6730 lv_name, errors.ECODE_NOTUNIQUE)
6732 node_lvs = self.rpc.call_lv_list([pnode.name],
6733 self.cfg.GetVGName())[pnode.name]
6734 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6735 node_lvs = node_lvs.payload
6736 delta = all_lvs.difference(node_lvs.keys())
6738 raise errors.OpPrereqError("Missing logical volume(s): %s" %
6739 utils.CommaJoin(delta),
6741 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6743 raise errors.OpPrereqError("Online logical volumes found, cannot"
6744 " adopt: %s" % utils.CommaJoin(online_lvs),
6746 # update the size of disk based on what is found
6747 for dsk in self.disks:
6748 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6750 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6752 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6754 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6756 # memory check on primary node
6758 _CheckNodeFreeMemory(self, self.pnode.name,
6759 "creating instance %s" % self.op.instance_name,
6760 self.be_full[constants.BE_MEMORY],
6763 self.dry_run_result = list(nodenames)
6765 def Exec(self, feedback_fn):
6766 """Create and add the instance to the cluster.
6769 instance = self.op.instance_name
6770 pnode_name = self.pnode.name
6772 ht_kind = self.op.hypervisor
6773 if ht_kind in constants.HTS_REQ_PORT:
6774 network_port = self.cfg.AllocatePort()
6778 if constants.ENABLE_FILE_STORAGE:
6779 # this is needed because os.path.join does not accept None arguments
6780 if self.op.file_storage_dir is None:
6781 string_file_storage_dir = ""
6783 string_file_storage_dir = self.op.file_storage_dir
6785 # build the full file storage dir path
6786 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6787 string_file_storage_dir, instance)
6789 file_storage_dir = ""
6791 disks = _GenerateDiskTemplate(self,
6792 self.op.disk_template,
6793 instance, pnode_name,
6797 self.op.file_driver,
6800 iobj = objects.Instance(name=instance, os=self.op.os_type,
6801 primary_node=pnode_name,
6802 nics=self.nics, disks=disks,
6803 disk_template=self.op.disk_template,
6805 network_port=network_port,
6806 beparams=self.op.beparams,
6807 hvparams=self.op.hvparams,
6808 hypervisor=self.op.hypervisor,
6811 if self.adopt_disks:
6812 # rename LVs to the newly-generated names; we need to construct
6813 # 'fake' LV disks with the old data, plus the new unique_id
6814 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6816 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6817 rename_to.append(t_dsk.logical_id)
6818 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6819 self.cfg.SetDiskID(t_dsk, pnode_name)
6820 result = self.rpc.call_blockdev_rename(pnode_name,
6821 zip(tmp_disks, rename_to))
6822 result.Raise("Failed to rename adoped LVs")
6824 feedback_fn("* creating instance disks...")
6826 _CreateDisks(self, iobj)
6827 except errors.OpExecError:
6828 self.LogWarning("Device creation failed, reverting...")
6830 _RemoveDisks(self, iobj)
6832 self.cfg.ReleaseDRBDMinors(instance)
6835 feedback_fn("adding instance %s to cluster config" % instance)
6837 self.cfg.AddInstance(iobj, self.proc.GetECId())
6839 # Declare that we don't want to remove the instance lock anymore, as we've
6840 # added the instance to the config
6841 del self.remove_locks[locking.LEVEL_INSTANCE]
6842 # Unlock all the nodes
6843 if self.op.mode == constants.INSTANCE_IMPORT:
6844 nodes_keep = [self.op.src_node]
6845 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6846 if node != self.op.src_node]
6847 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6848 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6850 self.context.glm.release(locking.LEVEL_NODE)
6851 del self.acquired_locks[locking.LEVEL_NODE]
6853 if self.op.wait_for_sync:
6854 disk_abort = not _WaitForSync(self, iobj)
6855 elif iobj.disk_template in constants.DTS_NET_MIRROR:
6856 # make sure the disks are not degraded (still sync-ing is ok)
6858 feedback_fn("* checking mirrors status")
6859 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6864 _RemoveDisks(self, iobj)
6865 self.cfg.RemoveInstance(iobj.name)
6866 # Make sure the instance lock gets removed
6867 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6868 raise errors.OpExecError("There are some degraded disks for"
6871 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6872 if self.op.mode == constants.INSTANCE_CREATE:
6873 if not self.op.no_install:
6874 feedback_fn("* running the instance OS create scripts...")
6875 # FIXME: pass debug option from opcode to backend
6876 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6877 self.op.debug_level)
6878 result.Raise("Could not add os for instance %s"
6879 " on node %s" % (instance, pnode_name))
6881 elif self.op.mode == constants.INSTANCE_IMPORT:
6882 feedback_fn("* running the instance OS import scripts...")
6886 for idx, image in enumerate(self.src_images):
6890 # FIXME: pass debug option from opcode to backend
6891 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
6892 constants.IEIO_FILE, (image, ),
6893 constants.IEIO_SCRIPT,
6894 (iobj.disks[idx], idx),
6896 transfers.append(dt)
6899 masterd.instance.TransferInstanceData(self, feedback_fn,
6900 self.op.src_node, pnode_name,
6901 self.pnode.secondary_ip,
6903 if not compat.all(import_result):
6904 self.LogWarning("Some disks for instance %s on node %s were not"
6905 " imported successfully" % (instance, pnode_name))
6907 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6908 feedback_fn("* preparing remote import...")
6909 connect_timeout = constants.RIE_CONNECT_TIMEOUT
6910 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
6912 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
6913 self.source_x509_ca,
6914 self._cds, timeouts)
6915 if not compat.all(disk_results):
6916 # TODO: Should the instance still be started, even if some disks
6917 # failed to import (valid for local imports, too)?
6918 self.LogWarning("Some disks for instance %s on node %s were not"
6919 " imported successfully" % (instance, pnode_name))
6921 # Run rename script on newly imported instance
6922 assert iobj.name == instance
6923 feedback_fn("Running rename script for %s" % instance)
6924 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
6925 self.source_instance_name,
6926 self.op.debug_level)
6928 self.LogWarning("Failed to run rename script for %s on node"
6929 " %s: %s" % (instance, pnode_name, result.fail_msg))
6932 # also checked in the prereq part
6933 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6937 iobj.admin_up = True
6938 self.cfg.Update(iobj, feedback_fn)
6939 logging.info("Starting instance %s on node %s", instance, pnode_name)
6940 feedback_fn("* starting instance...")
6941 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6942 result.Raise("Could not start instance")
6944 return list(iobj.all_nodes)
6947 class LUConnectConsole(NoHooksLU):
6948 """Connect to an instance's console.
6950 This is somewhat special in that it returns the command line that
6951 you need to run on the master node in order to connect to the
6955 _OP_REQP = ["instance_name"]
6958 def ExpandNames(self):
6959 self._ExpandAndLockInstance()
6961 def CheckPrereq(self):
6962 """Check prerequisites.
6964 This checks that the instance is in the cluster.
6967 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6968 assert self.instance is not None, \
6969 "Cannot retrieve locked instance %s" % self.op.instance_name
6970 _CheckNodeOnline(self, self.instance.primary_node)
6972 def Exec(self, feedback_fn):
6973 """Connect to the console of an instance
6976 instance = self.instance
6977 node = instance.primary_node
6979 node_insts = self.rpc.call_instance_list([node],
6980 [instance.hypervisor])[node]
6981 node_insts.Raise("Can't get node information from %s" % node)
6983 if instance.name not in node_insts.payload:
6984 raise errors.OpExecError("Instance %s is not running." % instance.name)
6986 logging.debug("Connecting to console of %s on %s", instance.name, node)
6988 hyper = hypervisor.GetHypervisor(instance.hypervisor)
6989 cluster = self.cfg.GetClusterInfo()
6990 # beparams and hvparams are passed separately, to avoid editing the
6991 # instance and then saving the defaults in the instance itself.
6992 hvparams = cluster.FillHV(instance)
6993 beparams = cluster.FillBE(instance)
6994 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6997 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7000 class LUReplaceDisks(LogicalUnit):
7001 """Replace the disks of an instance.
7004 HPATH = "mirrors-replace"
7005 HTYPE = constants.HTYPE_INSTANCE
7006 _OP_REQP = ["instance_name", "mode", "disks"]
7009 def CheckArguments(self):
7010 if not hasattr(self.op, "remote_node"):
7011 self.op.remote_node = None
7012 if not hasattr(self.op, "iallocator"):
7013 self.op.iallocator = None
7014 if not hasattr(self.op, "early_release"):
7015 self.op.early_release = False
7017 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7020 def ExpandNames(self):
7021 self._ExpandAndLockInstance()
7023 if self.op.iallocator is not None:
7024 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7026 elif self.op.remote_node is not None:
7027 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7028 self.op.remote_node = remote_node
7030 # Warning: do not remove the locking of the new secondary here
7031 # unless DRBD8.AddChildren is changed to work in parallel;
7032 # currently it doesn't since parallel invocations of
7033 # FindUnusedMinor will conflict
7034 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7035 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7038 self.needed_locks[locking.LEVEL_NODE] = []
7039 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7041 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7042 self.op.iallocator, self.op.remote_node,
7043 self.op.disks, False, self.op.early_release)
7045 self.tasklets = [self.replacer]
7047 def DeclareLocks(self, level):
7048 # If we're not already locking all nodes in the set we have to declare the
7049 # instance's primary/secondary nodes.
7050 if (level == locking.LEVEL_NODE and
7051 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7052 self._LockInstancesNodes()
7054 def BuildHooksEnv(self):
7057 This runs on the master, the primary and all the secondaries.
7060 instance = self.replacer.instance
7062 "MODE": self.op.mode,
7063 "NEW_SECONDARY": self.op.remote_node,
7064 "OLD_SECONDARY": instance.secondary_nodes[0],
7066 env.update(_BuildInstanceHookEnvByObject(self, instance))
7068 self.cfg.GetMasterNode(),
7069 instance.primary_node,
7071 if self.op.remote_node is not None:
7072 nl.append(self.op.remote_node)
7076 class LUEvacuateNode(LogicalUnit):
7077 """Relocate the secondary instances from a node.
7080 HPATH = "node-evacuate"
7081 HTYPE = constants.HTYPE_NODE
7082 _OP_REQP = ["node_name"]
7085 def CheckArguments(self):
7086 if not hasattr(self.op, "remote_node"):
7087 self.op.remote_node = None
7088 if not hasattr(self.op, "iallocator"):
7089 self.op.iallocator = None
7090 if not hasattr(self.op, "early_release"):
7091 self.op.early_release = False
7093 TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
7094 self.op.remote_node,
7097 def ExpandNames(self):
7098 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7100 self.needed_locks = {}
7102 # Declare node locks
7103 if self.op.iallocator is not None:
7104 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7106 elif self.op.remote_node is not None:
7107 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7109 # Warning: do not remove the locking of the new secondary here
7110 # unless DRBD8.AddChildren is changed to work in parallel;
7111 # currently it doesn't since parallel invocations of
7112 # FindUnusedMinor will conflict
7113 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7114 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7117 raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7119 # Create tasklets for replacing disks for all secondary instances on this
7124 for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7125 logging.debug("Replacing disks for instance %s", inst.name)
7126 names.append(inst.name)
7128 replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7129 self.op.iallocator, self.op.remote_node, [],
7130 True, self.op.early_release)
7131 tasklets.append(replacer)
7133 self.tasklets = tasklets
7134 self.instance_names = names
7136 # Declare instance locks
7137 self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7139 def DeclareLocks(self, level):
7140 # If we're not already locking all nodes in the set we have to declare the
7141 # instance's primary/secondary nodes.
7142 if (level == locking.LEVEL_NODE and
7143 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7144 self._LockInstancesNodes()
7146 def BuildHooksEnv(self):
7149 This runs on the master, the primary and all the secondaries.
7153 "NODE_NAME": self.op.node_name,
7156 nl = [self.cfg.GetMasterNode()]
7158 if self.op.remote_node is not None:
7159 env["NEW_SECONDARY"] = self.op.remote_node
7160 nl.append(self.op.remote_node)
7162 return (env, nl, nl)
7165 class TLReplaceDisks(Tasklet):
7166 """Replaces disks for an instance.
7168 Note: Locking is not within the scope of this class.
7171 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7172 disks, delay_iallocator, early_release):
7173 """Initializes this class.
7176 Tasklet.__init__(self, lu)
7179 self.instance_name = instance_name
7181 self.iallocator_name = iallocator_name
7182 self.remote_node = remote_node
7184 self.delay_iallocator = delay_iallocator
7185 self.early_release = early_release
7188 self.instance = None
7189 self.new_node = None
7190 self.target_node = None
7191 self.other_node = None
7192 self.remote_node_info = None
7193 self.node_secondary_ip = None
7196 def CheckArguments(mode, remote_node, iallocator):
7197 """Helper function for users of this class.
7200 # check for valid parameter combination
7201 if mode == constants.REPLACE_DISK_CHG:
7202 if remote_node is None and iallocator is None:
7203 raise errors.OpPrereqError("When changing the secondary either an"
7204 " iallocator script must be used or the"
7205 " new node given", errors.ECODE_INVAL)
7207 if remote_node is not None and iallocator is not None:
7208 raise errors.OpPrereqError("Give either the iallocator or the new"
7209 " secondary, not both", errors.ECODE_INVAL)
7211 elif remote_node is not None or iallocator is not None:
7212 # Not replacing the secondary
7213 raise errors.OpPrereqError("The iallocator and new node options can"
7214 " only be used when changing the"
7215 " secondary node", errors.ECODE_INVAL)
7218 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7219 """Compute a new secondary node using an IAllocator.
7222 ial = IAllocator(lu.cfg, lu.rpc,
7223 mode=constants.IALLOCATOR_MODE_RELOC,
7225 relocate_from=relocate_from)
7227 ial.Run(iallocator_name)
7230 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7231 " %s" % (iallocator_name, ial.info),
7234 if len(ial.result) != ial.required_nodes:
7235 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7236 " of nodes (%s), required %s" %
7238 len(ial.result), ial.required_nodes),
7241 remote_node_name = ial.result[0]
7243 lu.LogInfo("Selected new secondary for instance '%s': %s",
7244 instance_name, remote_node_name)
7246 return remote_node_name
7248 def _FindFaultyDisks(self, node_name):
7249 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7252 def CheckPrereq(self):
7253 """Check prerequisites.
7255 This checks that the instance is in the cluster.
7258 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7259 assert instance is not None, \
7260 "Cannot retrieve locked instance %s" % self.instance_name
7262 if instance.disk_template != constants.DT_DRBD8:
7263 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7264 " instances", errors.ECODE_INVAL)
7266 if len(instance.secondary_nodes) != 1:
7267 raise errors.OpPrereqError("The instance has a strange layout,"
7268 " expected one secondary but found %d" %
7269 len(instance.secondary_nodes),
7272 if not self.delay_iallocator:
7273 self._CheckPrereq2()
7275 def _CheckPrereq2(self):
7276 """Check prerequisites, second part.
7278 This function should always be part of CheckPrereq. It was separated and is
7279 now called from Exec because during node evacuation iallocator was only
7280 called with an unmodified cluster model, not taking planned changes into
7284 instance = self.instance
7285 secondary_node = instance.secondary_nodes[0]
7287 if self.iallocator_name is None:
7288 remote_node = self.remote_node
7290 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7291 instance.name, instance.secondary_nodes)
7293 if remote_node is not None:
7294 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7295 assert self.remote_node_info is not None, \
7296 "Cannot retrieve locked node %s" % remote_node
7298 self.remote_node_info = None
7300 if remote_node == self.instance.primary_node:
7301 raise errors.OpPrereqError("The specified node is the primary node of"
7302 " the instance.", errors.ECODE_INVAL)
7304 if remote_node == secondary_node:
7305 raise errors.OpPrereqError("The specified node is already the"
7306 " secondary node of the instance.",
7309 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7310 constants.REPLACE_DISK_CHG):
7311 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7314 if self.mode == constants.REPLACE_DISK_AUTO:
7315 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7316 faulty_secondary = self._FindFaultyDisks(secondary_node)
7318 if faulty_primary and faulty_secondary:
7319 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7320 " one node and can not be repaired"
7321 " automatically" % self.instance_name,
7325 self.disks = faulty_primary
7326 self.target_node = instance.primary_node
7327 self.other_node = secondary_node
7328 check_nodes = [self.target_node, self.other_node]
7329 elif faulty_secondary:
7330 self.disks = faulty_secondary
7331 self.target_node = secondary_node
7332 self.other_node = instance.primary_node
7333 check_nodes = [self.target_node, self.other_node]
7339 # Non-automatic modes
7340 if self.mode == constants.REPLACE_DISK_PRI:
7341 self.target_node = instance.primary_node
7342 self.other_node = secondary_node
7343 check_nodes = [self.target_node, self.other_node]
7345 elif self.mode == constants.REPLACE_DISK_SEC:
7346 self.target_node = secondary_node
7347 self.other_node = instance.primary_node
7348 check_nodes = [self.target_node, self.other_node]
7350 elif self.mode == constants.REPLACE_DISK_CHG:
7351 self.new_node = remote_node
7352 self.other_node = instance.primary_node
7353 self.target_node = secondary_node
7354 check_nodes = [self.new_node, self.other_node]
7356 _CheckNodeNotDrained(self.lu, remote_node)
7358 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7359 assert old_node_info is not None
7360 if old_node_info.offline and not self.early_release:
7361 # doesn't make sense to delay the release
7362 self.early_release = True
7363 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7364 " early-release mode", secondary_node)
7367 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7370 # If not specified all disks should be replaced
7372 self.disks = range(len(self.instance.disks))
7374 for node in check_nodes:
7375 _CheckNodeOnline(self.lu, node)
7377 # Check whether disks are valid
7378 for disk_idx in self.disks:
7379 instance.FindDisk(disk_idx)
7381 # Get secondary node IP addresses
7384 for node_name in [self.target_node, self.other_node, self.new_node]:
7385 if node_name is not None:
7386 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7388 self.node_secondary_ip = node_2nd_ip
7390 def Exec(self, feedback_fn):
7391 """Execute disk replacement.
7393 This dispatches the disk replacement to the appropriate handler.
7396 if self.delay_iallocator:
7397 self._CheckPrereq2()
7400 feedback_fn("No disks need replacement")
7403 feedback_fn("Replacing disk(s) %s for %s" %
7404 (utils.CommaJoin(self.disks), self.instance.name))
7406 activate_disks = (not self.instance.admin_up)
7408 # Activate the instance disks if we're replacing them on a down instance
7410 _StartInstanceDisks(self.lu, self.instance, True)
7413 # Should we replace the secondary node?
7414 if self.new_node is not None:
7415 fn = self._ExecDrbd8Secondary
7417 fn = self._ExecDrbd8DiskOnly
7419 return fn(feedback_fn)
7422 # Deactivate the instance disks if we're replacing them on a
7425 _SafeShutdownInstanceDisks(self.lu, self.instance)
7427 def _CheckVolumeGroup(self, nodes):
7428 self.lu.LogInfo("Checking volume groups")
7430 vgname = self.cfg.GetVGName()
7432 # Make sure volume group exists on all involved nodes
7433 results = self.rpc.call_vg_list(nodes)
7435 raise errors.OpExecError("Can't list volume groups on the nodes")
7439 res.Raise("Error checking node %s" % node)
7440 if vgname not in res.payload:
7441 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7444 def _CheckDisksExistence(self, nodes):
7445 # Check disk existence
7446 for idx, dev in enumerate(self.instance.disks):
7447 if idx not in self.disks:
7451 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7452 self.cfg.SetDiskID(dev, node)
7454 result = self.rpc.call_blockdev_find(node, dev)
7456 msg = result.fail_msg
7457 if msg or not result.payload:
7459 msg = "disk not found"
7460 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7463 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7464 for idx, dev in enumerate(self.instance.disks):
7465 if idx not in self.disks:
7468 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7471 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7473 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7474 " replace disks for instance %s" %
7475 (node_name, self.instance.name))
7477 def _CreateNewStorage(self, node_name):
7478 vgname = self.cfg.GetVGName()
7481 for idx, dev in enumerate(self.instance.disks):
7482 if idx not in self.disks:
7485 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7487 self.cfg.SetDiskID(dev, node_name)
7489 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7490 names = _GenerateUniqueNames(self.lu, lv_names)
7492 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7493 logical_id=(vgname, names[0]))
7494 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7495 logical_id=(vgname, names[1]))
7497 new_lvs = [lv_data, lv_meta]
7498 old_lvs = dev.children
7499 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7501 # we pass force_create=True to force the LVM creation
7502 for new_lv in new_lvs:
7503 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7504 _GetInstanceInfoText(self.instance), False)
7508 def _CheckDevices(self, node_name, iv_names):
7509 for name, (dev, _, _) in iv_names.iteritems():
7510 self.cfg.SetDiskID(dev, node_name)
7512 result = self.rpc.call_blockdev_find(node_name, dev)
7514 msg = result.fail_msg
7515 if msg or not result.payload:
7517 msg = "disk not found"
7518 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7521 if result.payload.is_degraded:
7522 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7524 def _RemoveOldStorage(self, node_name, iv_names):
7525 for name, (_, old_lvs, _) in iv_names.iteritems():
7526 self.lu.LogInfo("Remove logical volumes for %s" % name)
7529 self.cfg.SetDiskID(lv, node_name)
7531 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7533 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7534 hint="remove unused LVs manually")
7536 def _ReleaseNodeLock(self, node_name):
7537 """Releases the lock for a given node."""
7538 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7540 def _ExecDrbd8DiskOnly(self, feedback_fn):
7541 """Replace a disk on the primary or secondary for DRBD 8.
7543 The algorithm for replace is quite complicated:
7545 1. for each disk to be replaced:
7547 1. create new LVs on the target node with unique names
7548 1. detach old LVs from the drbd device
7549 1. rename old LVs to name_replaced.<time_t>
7550 1. rename new LVs to old LVs
7551 1. attach the new LVs (with the old names now) to the drbd device
7553 1. wait for sync across all devices
7555 1. for each modified disk:
7557 1. remove old LVs (which have the name name_replaces.<time_t>)
7559 Failures are not very well handled.
7564 # Step: check device activation
7565 self.lu.LogStep(1, steps_total, "Check device existence")
7566 self._CheckDisksExistence([self.other_node, self.target_node])
7567 self._CheckVolumeGroup([self.target_node, self.other_node])
7569 # Step: check other node consistency
7570 self.lu.LogStep(2, steps_total, "Check peer consistency")
7571 self._CheckDisksConsistency(self.other_node,
7572 self.other_node == self.instance.primary_node,
7575 # Step: create new storage
7576 self.lu.LogStep(3, steps_total, "Allocate new storage")
7577 iv_names = self._CreateNewStorage(self.target_node)
7579 # Step: for each lv, detach+rename*2+attach
7580 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7581 for dev, old_lvs, new_lvs in iv_names.itervalues():
7582 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7584 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7586 result.Raise("Can't detach drbd from local storage on node"
7587 " %s for device %s" % (self.target_node, dev.iv_name))
7589 #cfg.Update(instance)
7591 # ok, we created the new LVs, so now we know we have the needed
7592 # storage; as such, we proceed on the target node to rename
7593 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7594 # using the assumption that logical_id == physical_id (which in
7595 # turn is the unique_id on that node)
7597 # FIXME(iustin): use a better name for the replaced LVs
7598 temp_suffix = int(time.time())
7599 ren_fn = lambda d, suff: (d.physical_id[0],
7600 d.physical_id[1] + "_replaced-%s" % suff)
7602 # Build the rename list based on what LVs exist on the node
7603 rename_old_to_new = []
7604 for to_ren in old_lvs:
7605 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7606 if not result.fail_msg and result.payload:
7608 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7610 self.lu.LogInfo("Renaming the old LVs on the target node")
7611 result = self.rpc.call_blockdev_rename(self.target_node,
7613 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7615 # Now we rename the new LVs to the old LVs
7616 self.lu.LogInfo("Renaming the new LVs on the target node")
7617 rename_new_to_old = [(new, old.physical_id)
7618 for old, new in zip(old_lvs, new_lvs)]
7619 result = self.rpc.call_blockdev_rename(self.target_node,
7621 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7623 for old, new in zip(old_lvs, new_lvs):
7624 new.logical_id = old.logical_id
7625 self.cfg.SetDiskID(new, self.target_node)
7627 for disk in old_lvs:
7628 disk.logical_id = ren_fn(disk, temp_suffix)
7629 self.cfg.SetDiskID(disk, self.target_node)
7631 # Now that the new lvs have the old name, we can add them to the device
7632 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7633 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7635 msg = result.fail_msg
7637 for new_lv in new_lvs:
7638 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7641 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7642 hint=("cleanup manually the unused logical"
7644 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7646 dev.children = new_lvs
7648 self.cfg.Update(self.instance, feedback_fn)
7651 if self.early_release:
7652 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7654 self._RemoveOldStorage(self.target_node, iv_names)
7655 # WARNING: we release both node locks here, do not do other RPCs
7656 # than WaitForSync to the primary node
7657 self._ReleaseNodeLock([self.target_node, self.other_node])
7660 # This can fail as the old devices are degraded and _WaitForSync
7661 # does a combined result over all disks, so we don't check its return value
7662 self.lu.LogStep(cstep, steps_total, "Sync devices")
7664 _WaitForSync(self.lu, self.instance)
7666 # Check all devices manually
7667 self._CheckDevices(self.instance.primary_node, iv_names)
7669 # Step: remove old storage
7670 if not self.early_release:
7671 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7673 self._RemoveOldStorage(self.target_node, iv_names)
7675 def _ExecDrbd8Secondary(self, feedback_fn):
7676 """Replace the secondary node for DRBD 8.
7678 The algorithm for replace is quite complicated:
7679 - for all disks of the instance:
7680 - create new LVs on the new node with same names
7681 - shutdown the drbd device on the old secondary
7682 - disconnect the drbd network on the primary
7683 - create the drbd device on the new secondary
7684 - network attach the drbd on the primary, using an artifice:
7685 the drbd code for Attach() will connect to the network if it
7686 finds a device which is connected to the good local disks but
7688 - wait for sync across all devices
7689 - remove all disks from the old secondary
7691 Failures are not very well handled.
7696 # Step: check device activation
7697 self.lu.LogStep(1, steps_total, "Check device existence")
7698 self._CheckDisksExistence([self.instance.primary_node])
7699 self._CheckVolumeGroup([self.instance.primary_node])
7701 # Step: check other node consistency
7702 self.lu.LogStep(2, steps_total, "Check peer consistency")
7703 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7705 # Step: create new storage
7706 self.lu.LogStep(3, steps_total, "Allocate new storage")
7707 for idx, dev in enumerate(self.instance.disks):
7708 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7709 (self.new_node, idx))
7710 # we pass force_create=True to force LVM creation
7711 for new_lv in dev.children:
7712 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7713 _GetInstanceInfoText(self.instance), False)
7715 # Step 4: dbrd minors and drbd setups changes
7716 # after this, we must manually remove the drbd minors on both the
7717 # error and the success paths
7718 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7719 minors = self.cfg.AllocateDRBDMinor([self.new_node
7720 for dev in self.instance.disks],
7722 logging.debug("Allocated minors %r", minors)
7725 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7726 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7727 (self.new_node, idx))
7728 # create new devices on new_node; note that we create two IDs:
7729 # one without port, so the drbd will be activated without
7730 # networking information on the new node at this stage, and one
7731 # with network, for the latter activation in step 4
7732 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7733 if self.instance.primary_node == o_node1:
7736 assert self.instance.primary_node == o_node2, "Three-node instance?"
7739 new_alone_id = (self.instance.primary_node, self.new_node, None,
7740 p_minor, new_minor, o_secret)
7741 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7742 p_minor, new_minor, o_secret)
7744 iv_names[idx] = (dev, dev.children, new_net_id)
7745 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7747 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7748 logical_id=new_alone_id,
7749 children=dev.children,
7752 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7753 _GetInstanceInfoText(self.instance), False)
7754 except errors.GenericError:
7755 self.cfg.ReleaseDRBDMinors(self.instance.name)
7758 # We have new devices, shutdown the drbd on the old secondary
7759 for idx, dev in enumerate(self.instance.disks):
7760 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7761 self.cfg.SetDiskID(dev, self.target_node)
7762 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7764 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7765 "node: %s" % (idx, msg),
7766 hint=("Please cleanup this device manually as"
7767 " soon as possible"))
7769 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7770 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7771 self.node_secondary_ip,
7772 self.instance.disks)\
7773 [self.instance.primary_node]
7775 msg = result.fail_msg
7777 # detaches didn't succeed (unlikely)
7778 self.cfg.ReleaseDRBDMinors(self.instance.name)
7779 raise errors.OpExecError("Can't detach the disks from the network on"
7780 " old node: %s" % (msg,))
7782 # if we managed to detach at least one, we update all the disks of
7783 # the instance to point to the new secondary
7784 self.lu.LogInfo("Updating instance configuration")
7785 for dev, _, new_logical_id in iv_names.itervalues():
7786 dev.logical_id = new_logical_id
7787 self.cfg.SetDiskID(dev, self.instance.primary_node)
7789 self.cfg.Update(self.instance, feedback_fn)
7791 # and now perform the drbd attach
7792 self.lu.LogInfo("Attaching primary drbds to new secondary"
7793 " (standalone => connected)")
7794 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7796 self.node_secondary_ip,
7797 self.instance.disks,
7800 for to_node, to_result in result.items():
7801 msg = to_result.fail_msg
7803 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7805 hint=("please do a gnt-instance info to see the"
7806 " status of disks"))
7808 if self.early_release:
7809 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7811 self._RemoveOldStorage(self.target_node, iv_names)
7812 # WARNING: we release all node locks here, do not do other RPCs
7813 # than WaitForSync to the primary node
7814 self._ReleaseNodeLock([self.instance.primary_node,
7819 # This can fail as the old devices are degraded and _WaitForSync
7820 # does a combined result over all disks, so we don't check its return value
7821 self.lu.LogStep(cstep, steps_total, "Sync devices")
7823 _WaitForSync(self.lu, self.instance)
7825 # Check all devices manually
7826 self._CheckDevices(self.instance.primary_node, iv_names)
7828 # Step: remove old storage
7829 if not self.early_release:
7830 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7831 self._RemoveOldStorage(self.target_node, iv_names)
7834 class LURepairNodeStorage(NoHooksLU):
7835 """Repairs the volume group on a node.
7838 _OP_REQP = ["node_name"]
7841 def CheckArguments(self):
7842 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7844 _CheckStorageType(self.op.storage_type)
7846 def ExpandNames(self):
7847 self.needed_locks = {
7848 locking.LEVEL_NODE: [self.op.node_name],
7851 def _CheckFaultyDisks(self, instance, node_name):
7852 """Ensure faulty disks abort the opcode or at least warn."""
7854 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7856 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7857 " node '%s'" % (instance.name, node_name),
7859 except errors.OpPrereqError, err:
7860 if self.op.ignore_consistency:
7861 self.proc.LogWarning(str(err.args[0]))
7865 def CheckPrereq(self):
7866 """Check prerequisites.
7869 storage_type = self.op.storage_type
7871 if (constants.SO_FIX_CONSISTENCY not in
7872 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7873 raise errors.OpPrereqError("Storage units of type '%s' can not be"
7874 " repaired" % storage_type,
7877 # Check whether any instance on this node has faulty disks
7878 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7879 if not inst.admin_up:
7881 check_nodes = set(inst.all_nodes)
7882 check_nodes.discard(self.op.node_name)
7883 for inst_node_name in check_nodes:
7884 self._CheckFaultyDisks(inst, inst_node_name)
7886 def Exec(self, feedback_fn):
7887 feedback_fn("Repairing storage unit '%s' on %s ..." %
7888 (self.op.name, self.op.node_name))
7890 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7891 result = self.rpc.call_storage_execute(self.op.node_name,
7892 self.op.storage_type, st_args,
7894 constants.SO_FIX_CONSISTENCY)
7895 result.Raise("Failed to repair storage unit '%s' on %s" %
7896 (self.op.name, self.op.node_name))
7899 class LUNodeEvacuationStrategy(NoHooksLU):
7900 """Computes the node evacuation strategy.
7903 _OP_REQP = ["nodes"]
7906 def CheckArguments(self):
7907 if not hasattr(self.op, "remote_node"):
7908 self.op.remote_node = None
7909 if not hasattr(self.op, "iallocator"):
7910 self.op.iallocator = None
7911 if self.op.remote_node is not None and self.op.iallocator is not None:
7912 raise errors.OpPrereqError("Give either the iallocator or the new"
7913 " secondary, not both", errors.ECODE_INVAL)
7915 def ExpandNames(self):
7916 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7917 self.needed_locks = locks = {}
7918 if self.op.remote_node is None:
7919 locks[locking.LEVEL_NODE] = locking.ALL_SET
7921 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7922 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7924 def CheckPrereq(self):
7927 def Exec(self, feedback_fn):
7928 if self.op.remote_node is not None:
7930 for node in self.op.nodes:
7931 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7934 if i.primary_node == self.op.remote_node:
7935 raise errors.OpPrereqError("Node %s is the primary node of"
7936 " instance %s, cannot use it as"
7938 (self.op.remote_node, i.name),
7940 result.append([i.name, self.op.remote_node])
7942 ial = IAllocator(self.cfg, self.rpc,
7943 mode=constants.IALLOCATOR_MODE_MEVAC,
7944 evac_nodes=self.op.nodes)
7945 ial.Run(self.op.iallocator, validate=True)
7947 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7953 class LUGrowDisk(LogicalUnit):
7954 """Grow a disk of an instance.
7958 HTYPE = constants.HTYPE_INSTANCE
7959 _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7962 def ExpandNames(self):
7963 self._ExpandAndLockInstance()
7964 self.needed_locks[locking.LEVEL_NODE] = []
7965 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7967 def DeclareLocks(self, level):
7968 if level == locking.LEVEL_NODE:
7969 self._LockInstancesNodes()
7971 def BuildHooksEnv(self):
7974 This runs on the master, the primary and all the secondaries.
7978 "DISK": self.op.disk,
7979 "AMOUNT": self.op.amount,
7981 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7982 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7985 def CheckPrereq(self):
7986 """Check prerequisites.
7988 This checks that the instance is in the cluster.
7991 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7992 assert instance is not None, \
7993 "Cannot retrieve locked instance %s" % self.op.instance_name
7994 nodenames = list(instance.all_nodes)
7995 for node in nodenames:
7996 _CheckNodeOnline(self, node)
7999 self.instance = instance
8001 if instance.disk_template not in constants.DTS_GROWABLE:
8002 raise errors.OpPrereqError("Instance's disk layout does not support"
8003 " growing.", errors.ECODE_INVAL)
8005 self.disk = instance.FindDisk(self.op.disk)
8007 if instance.disk_template != constants.DT_FILE:
8008 # TODO: check the free disk space for file, when that feature will be
8010 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8012 def Exec(self, feedback_fn):
8013 """Execute disk grow.
8016 instance = self.instance
8019 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8021 raise errors.OpExecError("Cannot activate block device to grow")
8023 for node in instance.all_nodes:
8024 self.cfg.SetDiskID(disk, node)
8025 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8026 result.Raise("Grow request failed to node %s" % node)
8028 # TODO: Rewrite code to work properly
8029 # DRBD goes into sync mode for a short amount of time after executing the
8030 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8031 # calling "resize" in sync mode fails. Sleeping for a short amount of
8032 # time is a work-around.
8035 disk.RecordGrow(self.op.amount)
8036 self.cfg.Update(instance, feedback_fn)
8037 if self.op.wait_for_sync:
8038 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8040 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8041 " status.\nPlease check the instance.")
8042 if not instance.admin_up:
8043 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8044 elif not instance.admin_up:
8045 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8046 " not supposed to be running because no wait for"
8047 " sync mode was requested.")
8050 class LUQueryInstanceData(NoHooksLU):
8051 """Query runtime instance data.
8054 _OP_REQP = ["instances", "static"]
8057 def ExpandNames(self):
8058 self.needed_locks = {}
8059 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8061 if not isinstance(self.op.instances, list):
8062 raise errors.OpPrereqError("Invalid argument type 'instances'",
8065 if self.op.instances:
8066 self.wanted_names = []
8067 for name in self.op.instances:
8068 full_name = _ExpandInstanceName(self.cfg, name)
8069 self.wanted_names.append(full_name)
8070 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8072 self.wanted_names = None
8073 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8075 self.needed_locks[locking.LEVEL_NODE] = []
8076 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8078 def DeclareLocks(self, level):
8079 if level == locking.LEVEL_NODE:
8080 self._LockInstancesNodes()
8082 def CheckPrereq(self):
8083 """Check prerequisites.
8085 This only checks the optional instance list against the existing names.
8088 if self.wanted_names is None:
8089 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8091 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8092 in self.wanted_names]
8095 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8096 """Returns the status of a block device
8099 if self.op.static or not node:
8102 self.cfg.SetDiskID(dev, node)
8104 result = self.rpc.call_blockdev_find(node, dev)
8108 result.Raise("Can't compute disk status for %s" % instance_name)
8110 status = result.payload
8114 return (status.dev_path, status.major, status.minor,
8115 status.sync_percent, status.estimated_time,
8116 status.is_degraded, status.ldisk_status)
8118 def _ComputeDiskStatus(self, instance, snode, dev):
8119 """Compute block device status.
8122 if dev.dev_type in constants.LDS_DRBD:
8123 # we change the snode then (otherwise we use the one passed in)
8124 if dev.logical_id[0] == instance.primary_node:
8125 snode = dev.logical_id[1]
8127 snode = dev.logical_id[0]
8129 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8131 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8134 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8135 for child in dev.children]
8140 "iv_name": dev.iv_name,
8141 "dev_type": dev.dev_type,
8142 "logical_id": dev.logical_id,
8143 "physical_id": dev.physical_id,
8144 "pstatus": dev_pstatus,
8145 "sstatus": dev_sstatus,
8146 "children": dev_children,
8153 def Exec(self, feedback_fn):
8154 """Gather and return data"""
8157 cluster = self.cfg.GetClusterInfo()
8159 for instance in self.wanted_instances:
8160 if not self.op.static:
8161 remote_info = self.rpc.call_instance_info(instance.primary_node,
8163 instance.hypervisor)
8164 remote_info.Raise("Error checking node %s" % instance.primary_node)
8165 remote_info = remote_info.payload
8166 if remote_info and "state" in remote_info:
8169 remote_state = "down"
8172 if instance.admin_up:
8175 config_state = "down"
8177 disks = [self._ComputeDiskStatus(instance, None, device)
8178 for device in instance.disks]
8181 "name": instance.name,
8182 "config_state": config_state,
8183 "run_state": remote_state,
8184 "pnode": instance.primary_node,
8185 "snodes": instance.secondary_nodes,
8187 # this happens to be the same format used for hooks
8188 "nics": _NICListToTuple(self, instance.nics),
8189 "disk_template": instance.disk_template,
8191 "hypervisor": instance.hypervisor,
8192 "network_port": instance.network_port,
8193 "hv_instance": instance.hvparams,
8194 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8195 "be_instance": instance.beparams,
8196 "be_actual": cluster.FillBE(instance),
8197 "serial_no": instance.serial_no,
8198 "mtime": instance.mtime,
8199 "ctime": instance.ctime,
8200 "uuid": instance.uuid,
8203 result[instance.name] = idict
8208 class LUSetInstanceParams(LogicalUnit):
8209 """Modifies an instances's parameters.
8212 HPATH = "instance-modify"
8213 HTYPE = constants.HTYPE_INSTANCE
8214 _OP_REQP = ["instance_name"]
8217 def CheckArguments(self):
8218 if not hasattr(self.op, 'nics'):
8220 if not hasattr(self.op, 'disks'):
8222 if not hasattr(self.op, 'beparams'):
8223 self.op.beparams = {}
8224 if not hasattr(self.op, 'hvparams'):
8225 self.op.hvparams = {}
8226 if not hasattr(self.op, "disk_template"):
8227 self.op.disk_template = None
8228 if not hasattr(self.op, "remote_node"):
8229 self.op.remote_node = None
8230 if not hasattr(self.op, "os_name"):
8231 self.op.os_name = None
8232 if not hasattr(self.op, "force_variant"):
8233 self.op.force_variant = False
8234 self.op.force = getattr(self.op, "force", False)
8235 if not (self.op.nics or self.op.disks or self.op.disk_template or
8236 self.op.hvparams or self.op.beparams or self.op.os_name):
8237 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8239 if self.op.hvparams:
8240 _CheckGlobalHvParams(self.op.hvparams)
8244 for disk_op, disk_dict in self.op.disks:
8245 if disk_op == constants.DDM_REMOVE:
8248 elif disk_op == constants.DDM_ADD:
8251 if not isinstance(disk_op, int):
8252 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8253 if not isinstance(disk_dict, dict):
8254 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8255 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8257 if disk_op == constants.DDM_ADD:
8258 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8259 if mode not in constants.DISK_ACCESS_SET:
8260 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8262 size = disk_dict.get('size', None)
8264 raise errors.OpPrereqError("Required disk parameter size missing",
8268 except (TypeError, ValueError), err:
8269 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8270 str(err), errors.ECODE_INVAL)
8271 disk_dict['size'] = size
8273 # modification of disk
8274 if 'size' in disk_dict:
8275 raise errors.OpPrereqError("Disk size change not possible, use"
8276 " grow-disk", errors.ECODE_INVAL)
8278 if disk_addremove > 1:
8279 raise errors.OpPrereqError("Only one disk add or remove operation"
8280 " supported at a time", errors.ECODE_INVAL)
8282 if self.op.disks and self.op.disk_template is not None:
8283 raise errors.OpPrereqError("Disk template conversion and other disk"
8284 " changes not supported at the same time",
8287 if self.op.disk_template:
8288 _CheckDiskTemplate(self.op.disk_template)
8289 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8290 self.op.remote_node is None):
8291 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8292 " one requires specifying a secondary node",
8297 for nic_op, nic_dict in self.op.nics:
8298 if nic_op == constants.DDM_REMOVE:
8301 elif nic_op == constants.DDM_ADD:
8304 if not isinstance(nic_op, int):
8305 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8306 if not isinstance(nic_dict, dict):
8307 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8308 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8310 # nic_dict should be a dict
8311 nic_ip = nic_dict.get('ip', None)
8312 if nic_ip is not None:
8313 if nic_ip.lower() == constants.VALUE_NONE:
8314 nic_dict['ip'] = None
8316 if not utils.IsValidIP(nic_ip):
8317 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8320 nic_bridge = nic_dict.get('bridge', None)
8321 nic_link = nic_dict.get('link', None)
8322 if nic_bridge and nic_link:
8323 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8324 " at the same time", errors.ECODE_INVAL)
8325 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8326 nic_dict['bridge'] = None
8327 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8328 nic_dict['link'] = None
8330 if nic_op == constants.DDM_ADD:
8331 nic_mac = nic_dict.get('mac', None)
8333 nic_dict['mac'] = constants.VALUE_AUTO
8335 if 'mac' in nic_dict:
8336 nic_mac = nic_dict['mac']
8337 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8338 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8340 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8341 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8342 " modifying an existing nic",
8345 if nic_addremove > 1:
8346 raise errors.OpPrereqError("Only one NIC add or remove operation"
8347 " supported at a time", errors.ECODE_INVAL)
8349 def ExpandNames(self):
8350 self._ExpandAndLockInstance()
8351 self.needed_locks[locking.LEVEL_NODE] = []
8352 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8354 def DeclareLocks(self, level):
8355 if level == locking.LEVEL_NODE:
8356 self._LockInstancesNodes()
8357 if self.op.disk_template and self.op.remote_node:
8358 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8359 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8361 def BuildHooksEnv(self):
8364 This runs on the master, primary and secondaries.
8368 if constants.BE_MEMORY in self.be_new:
8369 args['memory'] = self.be_new[constants.BE_MEMORY]
8370 if constants.BE_VCPUS in self.be_new:
8371 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8372 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8373 # information at all.
8376 nic_override = dict(self.op.nics)
8377 c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8378 for idx, nic in enumerate(self.instance.nics):
8379 if idx in nic_override:
8380 this_nic_override = nic_override[idx]
8382 this_nic_override = {}
8383 if 'ip' in this_nic_override:
8384 ip = this_nic_override['ip']
8387 if 'mac' in this_nic_override:
8388 mac = this_nic_override['mac']
8391 if idx in self.nic_pnew:
8392 nicparams = self.nic_pnew[idx]
8394 nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8395 mode = nicparams[constants.NIC_MODE]
8396 link = nicparams[constants.NIC_LINK]
8397 args['nics'].append((ip, mac, mode, link))
8398 if constants.DDM_ADD in nic_override:
8399 ip = nic_override[constants.DDM_ADD].get('ip', None)
8400 mac = nic_override[constants.DDM_ADD]['mac']
8401 nicparams = self.nic_pnew[constants.DDM_ADD]
8402 mode = nicparams[constants.NIC_MODE]
8403 link = nicparams[constants.NIC_LINK]
8404 args['nics'].append((ip, mac, mode, link))
8405 elif constants.DDM_REMOVE in nic_override:
8406 del args['nics'][-1]
8408 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8409 if self.op.disk_template:
8410 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8411 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8415 def _GetUpdatedParams(old_params, update_dict,
8416 default_values, parameter_types):
8417 """Return the new params dict for the given params.
8419 @type old_params: dict
8420 @param old_params: old parameters
8421 @type update_dict: dict
8422 @param update_dict: dict containing new parameter values,
8423 or constants.VALUE_DEFAULT to reset the
8424 parameter to its default value
8425 @type default_values: dict
8426 @param default_values: default values for the filled parameters
8427 @type parameter_types: dict
8428 @param parameter_types: dict mapping target dict keys to types
8429 in constants.ENFORCEABLE_TYPES
8430 @rtype: (dict, dict)
8431 @return: (new_parameters, filled_parameters)
8434 params_copy = copy.deepcopy(old_params)
8435 for key, val in update_dict.iteritems():
8436 if val == constants.VALUE_DEFAULT:
8438 del params_copy[key]
8442 params_copy[key] = val
8443 utils.ForceDictType(params_copy, parameter_types)
8444 params_filled = objects.FillDict(default_values, params_copy)
8445 return (params_copy, params_filled)
8447 def CheckPrereq(self):
8448 """Check prerequisites.
8450 This only checks the instance list against the existing names.
8453 self.force = self.op.force
8455 # checking the new params on the primary/secondary nodes
8457 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8458 cluster = self.cluster = self.cfg.GetClusterInfo()
8459 assert self.instance is not None, \
8460 "Cannot retrieve locked instance %s" % self.op.instance_name
8461 pnode = instance.primary_node
8462 nodelist = list(instance.all_nodes)
8464 if self.op.disk_template:
8465 if instance.disk_template == self.op.disk_template:
8466 raise errors.OpPrereqError("Instance already has disk template %s" %
8467 instance.disk_template, errors.ECODE_INVAL)
8469 if (instance.disk_template,
8470 self.op.disk_template) not in self._DISK_CONVERSIONS:
8471 raise errors.OpPrereqError("Unsupported disk template conversion from"
8472 " %s to %s" % (instance.disk_template,
8473 self.op.disk_template),
8475 if self.op.disk_template in constants.DTS_NET_MIRROR:
8476 _CheckNodeOnline(self, self.op.remote_node)
8477 _CheckNodeNotDrained(self, self.op.remote_node)
8478 disks = [{"size": d.size} for d in instance.disks]
8479 required = _ComputeDiskSize(self.op.disk_template, disks)
8480 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8481 _CheckInstanceDown(self, instance, "cannot change disk template")
8483 # hvparams processing
8484 if self.op.hvparams:
8485 i_hvdict, hv_new = self._GetUpdatedParams(
8486 instance.hvparams, self.op.hvparams,
8487 cluster.hvparams[instance.hypervisor],
8488 constants.HVS_PARAMETER_TYPES)
8490 hypervisor.GetHypervisor(
8491 instance.hypervisor).CheckParameterSyntax(hv_new)
8492 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8493 self.hv_new = hv_new # the new actual values
8494 self.hv_inst = i_hvdict # the new dict (without defaults)
8496 self.hv_new = self.hv_inst = {}
8498 # beparams processing
8499 if self.op.beparams:
8500 i_bedict, be_new = self._GetUpdatedParams(
8501 instance.beparams, self.op.beparams,
8502 cluster.beparams[constants.PP_DEFAULT],
8503 constants.BES_PARAMETER_TYPES)
8504 self.be_new = be_new # the new actual values
8505 self.be_inst = i_bedict # the new dict (without defaults)
8507 self.be_new = self.be_inst = {}
8511 if constants.BE_MEMORY in self.op.beparams and not self.force:
8512 mem_check_list = [pnode]
8513 if be_new[constants.BE_AUTO_BALANCE]:
8514 # either we changed auto_balance to yes or it was from before
8515 mem_check_list.extend(instance.secondary_nodes)
8516 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8517 instance.hypervisor)
8518 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8519 instance.hypervisor)
8520 pninfo = nodeinfo[pnode]
8521 msg = pninfo.fail_msg
8523 # Assume the primary node is unreachable and go ahead
8524 self.warn.append("Can't get info from primary node %s: %s" %
8526 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8527 self.warn.append("Node data from primary node %s doesn't contain"
8528 " free memory information" % pnode)
8529 elif instance_info.fail_msg:
8530 self.warn.append("Can't get instance runtime information: %s" %
8531 instance_info.fail_msg)
8533 if instance_info.payload:
8534 current_mem = int(instance_info.payload['memory'])
8536 # Assume instance not running
8537 # (there is a slight race condition here, but it's not very probable,
8538 # and we have no other way to check)
8540 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8541 pninfo.payload['memory_free'])
8543 raise errors.OpPrereqError("This change will prevent the instance"
8544 " from starting, due to %d MB of memory"
8545 " missing on its primary node" % miss_mem,
8548 if be_new[constants.BE_AUTO_BALANCE]:
8549 for node, nres in nodeinfo.items():
8550 if node not in instance.secondary_nodes:
8554 self.warn.append("Can't get info from secondary node %s: %s" %
8556 elif not isinstance(nres.payload.get('memory_free', None), int):
8557 self.warn.append("Secondary node %s didn't return free"
8558 " memory information" % node)
8559 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8560 self.warn.append("Not enough memory to failover instance to"
8561 " secondary node %s" % node)
8566 for nic_op, nic_dict in self.op.nics:
8567 if nic_op == constants.DDM_REMOVE:
8568 if not instance.nics:
8569 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8572 if nic_op != constants.DDM_ADD:
8574 if not instance.nics:
8575 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8576 " no NICs" % nic_op,
8578 if nic_op < 0 or nic_op >= len(instance.nics):
8579 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8581 (nic_op, len(instance.nics) - 1),
8583 old_nic_params = instance.nics[nic_op].nicparams
8584 old_nic_ip = instance.nics[nic_op].ip
8589 update_params_dict = dict([(key, nic_dict[key])
8590 for key in constants.NICS_PARAMETERS
8591 if key in nic_dict])
8593 if 'bridge' in nic_dict:
8594 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8596 new_nic_params, new_filled_nic_params = \
8597 self._GetUpdatedParams(old_nic_params, update_params_dict,
8598 cluster.nicparams[constants.PP_DEFAULT],
8599 constants.NICS_PARAMETER_TYPES)
8600 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8601 self.nic_pinst[nic_op] = new_nic_params
8602 self.nic_pnew[nic_op] = new_filled_nic_params
8603 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8605 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8606 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8607 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8609 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8611 self.warn.append(msg)
8613 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8614 if new_nic_mode == constants.NIC_MODE_ROUTED:
8615 if 'ip' in nic_dict:
8616 nic_ip = nic_dict['ip']
8620 raise errors.OpPrereqError('Cannot set the nic ip to None'
8621 ' on a routed nic', errors.ECODE_INVAL)
8622 if 'mac' in nic_dict:
8623 nic_mac = nic_dict['mac']
8625 raise errors.OpPrereqError('Cannot set the nic mac to None',
8627 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8628 # otherwise generate the mac
8629 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8631 # or validate/reserve the current one
8633 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8634 except errors.ReservationError:
8635 raise errors.OpPrereqError("MAC address %s already in use"
8636 " in cluster" % nic_mac,
8637 errors.ECODE_NOTUNIQUE)
8640 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8641 raise errors.OpPrereqError("Disk operations not supported for"
8642 " diskless instances",
8644 for disk_op, _ in self.op.disks:
8645 if disk_op == constants.DDM_REMOVE:
8646 if len(instance.disks) == 1:
8647 raise errors.OpPrereqError("Cannot remove the last disk of"
8648 " an instance", errors.ECODE_INVAL)
8649 _CheckInstanceDown(self, instance, "cannot remove disks")
8651 if (disk_op == constants.DDM_ADD and
8652 len(instance.nics) >= constants.MAX_DISKS):
8653 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8654 " add more" % constants.MAX_DISKS,
8656 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8658 if disk_op < 0 or disk_op >= len(instance.disks):
8659 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8661 (disk_op, len(instance.disks)),
8665 if self.op.os_name and not self.op.force:
8666 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8667 self.op.force_variant)
8671 def _ConvertPlainToDrbd(self, feedback_fn):
8672 """Converts an instance from plain to drbd.
8675 feedback_fn("Converting template to drbd")
8676 instance = self.instance
8677 pnode = instance.primary_node
8678 snode = self.op.remote_node
8680 # create a fake disk info for _GenerateDiskTemplate
8681 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8682 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8683 instance.name, pnode, [snode],
8684 disk_info, None, None, 0)
8685 info = _GetInstanceInfoText(instance)
8686 feedback_fn("Creating aditional volumes...")
8687 # first, create the missing data and meta devices
8688 for disk in new_disks:
8689 # unfortunately this is... not too nice
8690 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8692 for child in disk.children:
8693 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8694 # at this stage, all new LVs have been created, we can rename the
8696 feedback_fn("Renaming original volumes...")
8697 rename_list = [(o, n.children[0].logical_id)
8698 for (o, n) in zip(instance.disks, new_disks)]
8699 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8700 result.Raise("Failed to rename original LVs")
8702 feedback_fn("Initializing DRBD devices...")
8703 # all child devices are in place, we can now create the DRBD devices
8704 for disk in new_disks:
8705 for node in [pnode, snode]:
8706 f_create = node == pnode
8707 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8709 # at this point, the instance has been modified
8710 instance.disk_template = constants.DT_DRBD8
8711 instance.disks = new_disks
8712 self.cfg.Update(instance, feedback_fn)
8714 # disks are created, waiting for sync
8715 disk_abort = not _WaitForSync(self, instance)
8717 raise errors.OpExecError("There are some degraded disks for"
8718 " this instance, please cleanup manually")
8720 def _ConvertDrbdToPlain(self, feedback_fn):
8721 """Converts an instance from drbd to plain.
8724 instance = self.instance
8725 assert len(instance.secondary_nodes) == 1
8726 pnode = instance.primary_node
8727 snode = instance.secondary_nodes[0]
8728 feedback_fn("Converting template to plain")
8730 old_disks = instance.disks
8731 new_disks = [d.children[0] for d in old_disks]
8733 # copy over size and mode
8734 for parent, child in zip(old_disks, new_disks):
8735 child.size = parent.size
8736 child.mode = parent.mode
8738 # update instance structure
8739 instance.disks = new_disks
8740 instance.disk_template = constants.DT_PLAIN
8741 self.cfg.Update(instance, feedback_fn)
8743 feedback_fn("Removing volumes on the secondary node...")
8744 for disk in old_disks:
8745 self.cfg.SetDiskID(disk, snode)
8746 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8748 self.LogWarning("Could not remove block device %s on node %s,"
8749 " continuing anyway: %s", disk.iv_name, snode, msg)
8751 feedback_fn("Removing unneeded volumes on the primary node...")
8752 for idx, disk in enumerate(old_disks):
8753 meta = disk.children[1]
8754 self.cfg.SetDiskID(meta, pnode)
8755 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8757 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8758 " continuing anyway: %s", idx, pnode, msg)
8761 def Exec(self, feedback_fn):
8762 """Modifies an instance.
8764 All parameters take effect only at the next restart of the instance.
8767 # Process here the warnings from CheckPrereq, as we don't have a
8768 # feedback_fn there.
8769 for warn in self.warn:
8770 feedback_fn("WARNING: %s" % warn)
8773 instance = self.instance
8775 for disk_op, disk_dict in self.op.disks:
8776 if disk_op == constants.DDM_REMOVE:
8777 # remove the last disk
8778 device = instance.disks.pop()
8779 device_idx = len(instance.disks)
8780 for node, disk in device.ComputeNodeTree(instance.primary_node):
8781 self.cfg.SetDiskID(disk, node)
8782 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8784 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8785 " continuing anyway", device_idx, node, msg)
8786 result.append(("disk/%d" % device_idx, "remove"))
8787 elif disk_op == constants.DDM_ADD:
8789 if instance.disk_template == constants.DT_FILE:
8790 file_driver, file_path = instance.disks[0].logical_id
8791 file_path = os.path.dirname(file_path)
8793 file_driver = file_path = None
8794 disk_idx_base = len(instance.disks)
8795 new_disk = _GenerateDiskTemplate(self,
8796 instance.disk_template,
8797 instance.name, instance.primary_node,
8798 instance.secondary_nodes,
8803 instance.disks.append(new_disk)
8804 info = _GetInstanceInfoText(instance)
8806 logging.info("Creating volume %s for instance %s",
8807 new_disk.iv_name, instance.name)
8808 # Note: this needs to be kept in sync with _CreateDisks
8810 for node in instance.all_nodes:
8811 f_create = node == instance.primary_node
8813 _CreateBlockDev(self, node, instance, new_disk,
8814 f_create, info, f_create)
8815 except errors.OpExecError, err:
8816 self.LogWarning("Failed to create volume %s (%s) on"
8818 new_disk.iv_name, new_disk, node, err)
8819 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8820 (new_disk.size, new_disk.mode)))
8822 # change a given disk
8823 instance.disks[disk_op].mode = disk_dict['mode']
8824 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8826 if self.op.disk_template:
8827 r_shut = _ShutdownInstanceDisks(self, instance)
8829 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8830 " proceed with disk template conversion")
8831 mode = (instance.disk_template, self.op.disk_template)
8833 self._DISK_CONVERSIONS[mode](self, feedback_fn)
8835 self.cfg.ReleaseDRBDMinors(instance.name)
8837 result.append(("disk_template", self.op.disk_template))
8840 for nic_op, nic_dict in self.op.nics:
8841 if nic_op == constants.DDM_REMOVE:
8842 # remove the last nic
8843 del instance.nics[-1]
8844 result.append(("nic.%d" % len(instance.nics), "remove"))
8845 elif nic_op == constants.DDM_ADD:
8846 # mac and bridge should be set, by now
8847 mac = nic_dict['mac']
8848 ip = nic_dict.get('ip', None)
8849 nicparams = self.nic_pinst[constants.DDM_ADD]
8850 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8851 instance.nics.append(new_nic)
8852 result.append(("nic.%d" % (len(instance.nics) - 1),
8853 "add:mac=%s,ip=%s,mode=%s,link=%s" %
8854 (new_nic.mac, new_nic.ip,
8855 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8856 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8859 for key in 'mac', 'ip':
8861 setattr(instance.nics[nic_op], key, nic_dict[key])
8862 if nic_op in self.nic_pinst:
8863 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8864 for key, val in nic_dict.iteritems():
8865 result.append(("nic.%s/%d" % (key, nic_op), val))
8868 if self.op.hvparams:
8869 instance.hvparams = self.hv_inst
8870 for key, val in self.op.hvparams.iteritems():
8871 result.append(("hv/%s" % key, val))
8874 if self.op.beparams:
8875 instance.beparams = self.be_inst
8876 for key, val in self.op.beparams.iteritems():
8877 result.append(("be/%s" % key, val))
8881 instance.os = self.op.os_name
8883 self.cfg.Update(instance, feedback_fn)
8887 _DISK_CONVERSIONS = {
8888 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8889 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8893 class LUQueryExports(NoHooksLU):
8894 """Query the exports list
8897 _OP_REQP = ['nodes']
8900 def ExpandNames(self):
8901 self.needed_locks = {}
8902 self.share_locks[locking.LEVEL_NODE] = 1
8903 if not self.op.nodes:
8904 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8906 self.needed_locks[locking.LEVEL_NODE] = \
8907 _GetWantedNodes(self, self.op.nodes)
8909 def CheckPrereq(self):
8910 """Check prerequisites.
8913 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8915 def Exec(self, feedback_fn):
8916 """Compute the list of all the exported system images.
8919 @return: a dictionary with the structure node->(export-list)
8920 where export-list is a list of the instances exported on
8924 rpcresult = self.rpc.call_export_list(self.nodes)
8926 for node in rpcresult:
8927 if rpcresult[node].fail_msg:
8928 result[node] = False
8930 result[node] = rpcresult[node].payload
8935 class LUPrepareExport(NoHooksLU):
8936 """Prepares an instance for an export and returns useful information.
8939 _OP_REQP = ["instance_name", "mode"]
8942 def CheckArguments(self):
8943 """Check the arguments.
8946 if self.op.mode not in constants.EXPORT_MODES:
8947 raise errors.OpPrereqError("Invalid export mode %r" % self.op.mode,
8950 def ExpandNames(self):
8951 self._ExpandAndLockInstance()
8953 def CheckPrereq(self):
8954 """Check prerequisites.
8957 instance_name = self.op.instance_name
8959 self.instance = self.cfg.GetInstanceInfo(instance_name)
8960 assert self.instance is not None, \
8961 "Cannot retrieve locked instance %s" % self.op.instance_name
8962 _CheckNodeOnline(self, self.instance.primary_node)
8964 self._cds = _GetClusterDomainSecret()
8966 def Exec(self, feedback_fn):
8967 """Prepares an instance for an export.
8970 instance = self.instance
8972 if self.op.mode == constants.EXPORT_MODE_REMOTE:
8973 salt = utils.GenerateSecret(8)
8975 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
8976 result = self.rpc.call_x509_cert_create(instance.primary_node,
8977 constants.RIE_CERT_VALIDITY)
8978 result.Raise("Can't create X509 key and certificate on %s" % result.node)
8980 (name, cert_pem) = result.payload
8982 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
8986 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
8987 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
8989 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
8995 class LUExportInstance(LogicalUnit):
8996 """Export an instance to an image in the cluster.
8999 HPATH = "instance-export"
9000 HTYPE = constants.HTYPE_INSTANCE
9001 _OP_REQP = ["instance_name", "target_node", "shutdown"]
9004 def CheckArguments(self):
9005 """Check the arguments.
9008 _CheckBooleanOpField(self.op, "remove_instance")
9009 _CheckBooleanOpField(self.op, "ignore_remove_failures")
9011 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
9012 constants.DEFAULT_SHUTDOWN_TIMEOUT)
9013 self.remove_instance = getattr(self.op, "remove_instance", False)
9014 self.ignore_remove_failures = getattr(self.op, "ignore_remove_failures",
9016 self.export_mode = getattr(self.op, "mode", constants.EXPORT_MODE_LOCAL)
9017 self.x509_key_name = getattr(self.op, "x509_key_name", None)
9018 self.dest_x509_ca_pem = getattr(self.op, "destination_x509_ca", None)
9020 if self.remove_instance and not self.op.shutdown:
9021 raise errors.OpPrereqError("Can not remove instance without shutting it"
9024 if self.export_mode not in constants.EXPORT_MODES:
9025 raise errors.OpPrereqError("Invalid export mode %r" % self.export_mode,
9028 if self.export_mode == constants.EXPORT_MODE_REMOTE:
9029 if not self.x509_key_name:
9030 raise errors.OpPrereqError("Missing X509 key name for encryption",
9033 if not self.dest_x509_ca_pem:
9034 raise errors.OpPrereqError("Missing destination X509 CA",
9037 def ExpandNames(self):
9038 self._ExpandAndLockInstance()
9040 # Lock all nodes for local exports
9041 if self.export_mode == constants.EXPORT_MODE_LOCAL:
9042 # FIXME: lock only instance primary and destination node
9044 # Sad but true, for now we have do lock all nodes, as we don't know where
9045 # the previous export might be, and in this LU we search for it and
9046 # remove it from its current node. In the future we could fix this by:
9047 # - making a tasklet to search (share-lock all), then create the new one,
9048 # then one to remove, after
9049 # - removing the removal operation altogether
9050 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9052 def DeclareLocks(self, level):
9053 """Last minute lock declaration."""
9054 # All nodes are locked anyway, so nothing to do here.
9056 def BuildHooksEnv(self):
9059 This will run on the master, primary node and target node.
9063 "EXPORT_MODE": self.export_mode,
9064 "EXPORT_NODE": self.op.target_node,
9065 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9066 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
9067 # TODO: Generic function for boolean env variables
9068 "REMOVE_INSTANCE": str(bool(self.remove_instance)),
9071 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9073 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9075 if self.export_mode == constants.EXPORT_MODE_LOCAL:
9076 nl.append(self.op.target_node)
9080 def CheckPrereq(self):
9081 """Check prerequisites.
9083 This checks that the instance and node names are valid.
9086 instance_name = self.op.instance_name
9088 self.instance = self.cfg.GetInstanceInfo(instance_name)
9089 assert self.instance is not None, \
9090 "Cannot retrieve locked instance %s" % self.op.instance_name
9091 _CheckNodeOnline(self, self.instance.primary_node)
9093 if self.export_mode == constants.EXPORT_MODE_LOCAL:
9094 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9095 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9096 assert self.dst_node is not None
9098 _CheckNodeOnline(self, self.dst_node.name)
9099 _CheckNodeNotDrained(self, self.dst_node.name)
9102 self.dest_x509_ca = None
9104 elif self.export_mode == constants.EXPORT_MODE_REMOTE:
9105 self.dst_node = None
9107 if len(self.op.target_node) != len(self.instance.disks):
9108 raise errors.OpPrereqError(("Received destination information for %s"
9109 " disks, but instance %s has %s disks") %
9110 (len(self.op.target_node), instance_name,
9111 len(self.instance.disks)),
9114 cds = _GetClusterDomainSecret()
9116 # Check X509 key name
9118 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9119 except (TypeError, ValueError), err:
9120 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9122 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9123 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9126 # Load and verify CA
9128 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9129 except OpenSSL.crypto.Error, err:
9130 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9131 (err, ), errors.ECODE_INVAL)
9133 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9134 if errcode is not None:
9135 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" % (msg, ),
9138 self.dest_x509_ca = cert
9140 # Verify target information
9141 for idx, disk_data in enumerate(self.op.target_node):
9143 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9144 except errors.GenericError, err:
9145 raise errors.OpPrereqError("Target info for disk %s: %s" % (idx, err),
9149 raise errors.ProgrammerError("Unhandled export mode %r" %
9152 # instance disk type verification
9153 # TODO: Implement export support for file-based disks
9154 for disk in self.instance.disks:
9155 if disk.dev_type == constants.LD_FILE:
9156 raise errors.OpPrereqError("Export not supported for instances with"
9157 " file-based disks", errors.ECODE_INVAL)
9159 def _CleanupExports(self, feedback_fn):
9160 """Removes exports of current instance from all other nodes.
9162 If an instance in a cluster with nodes A..D was exported to node C, its
9163 exports will be removed from the nodes A, B and D.
9166 assert self.export_mode != constants.EXPORT_MODE_REMOTE
9168 nodelist = self.cfg.GetNodeList()
9169 nodelist.remove(self.dst_node.name)
9171 # on one-node clusters nodelist will be empty after the removal
9172 # if we proceed the backup would be removed because OpQueryExports
9173 # substitutes an empty list with the full cluster node list.
9174 iname = self.instance.name
9176 feedback_fn("Removing old exports for instance %s" % iname)
9177 exportlist = self.rpc.call_export_list(nodelist)
9178 for node in exportlist:
9179 if exportlist[node].fail_msg:
9181 if iname in exportlist[node].payload:
9182 msg = self.rpc.call_export_remove(node, iname).fail_msg
9184 self.LogWarning("Could not remove older export for instance %s"
9185 " on node %s: %s", iname, node, msg)
9187 def Exec(self, feedback_fn):
9188 """Export an instance to an image in the cluster.
9191 assert self.export_mode in constants.EXPORT_MODES
9193 instance = self.instance
9194 src_node = instance.primary_node
9196 if self.op.shutdown:
9197 # shutdown the instance, but not the disks
9198 feedback_fn("Shutting down instance %s" % instance.name)
9199 result = self.rpc.call_instance_shutdown(src_node, instance,
9200 self.shutdown_timeout)
9201 # TODO: Maybe ignore failures if ignore_remove_failures is set
9202 result.Raise("Could not shutdown instance %s on"
9203 " node %s" % (instance.name, src_node))
9205 # set the disks ID correctly since call_instance_start needs the
9206 # correct drbd minor to create the symlinks
9207 for disk in instance.disks:
9208 self.cfg.SetDiskID(disk, src_node)
9210 activate_disks = (not instance.admin_up)
9213 # Activate the instance disks if we'exporting a stopped instance
9214 feedback_fn("Activating disks for %s" % instance.name)
9215 _StartInstanceDisks(self, instance, None)
9218 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9221 helper.CreateSnapshots()
9223 if self.export_mode == constants.EXPORT_MODE_LOCAL:
9224 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9225 elif self.export_mode == constants.EXPORT_MODE_REMOTE:
9226 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9227 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9229 (key_name, _, _) = self.x509_key_name
9232 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9235 opts = objects.ImportExportOptions(key_name=key_name,
9238 (fin_resu, dresults) = helper.RemoteExport(opts, self.op.target_node,
9243 # Check for backwards compatibility
9244 assert len(dresults) == len(instance.disks)
9245 assert compat.all(isinstance(i, bool) for i in dresults), \
9246 "Not all results are boolean: %r" % dresults
9250 feedback_fn("Deactivating disks for %s" % instance.name)
9251 _ShutdownInstanceDisks(self, instance)
9253 # Remove instance if requested
9254 if self.remove_instance:
9255 if not (compat.all(dresults) and fin_resu):
9256 feedback_fn("Not removing instance %s as parts of the export failed" %
9259 feedback_fn("Removing instance %s" % instance.name)
9260 _RemoveInstance(self, feedback_fn, instance,
9261 self.ignore_remove_failures)
9263 if self.export_mode == constants.EXPORT_MODE_LOCAL:
9264 self._CleanupExports(feedback_fn)
9266 return fin_resu, dresults
9269 class LURemoveExport(NoHooksLU):
9270 """Remove exports related to the named instance.
9273 _OP_REQP = ["instance_name"]
9276 def ExpandNames(self):
9277 self.needed_locks = {}
9278 # We need all nodes to be locked in order for RemoveExport to work, but we
9279 # don't need to lock the instance itself, as nothing will happen to it (and
9280 # we can remove exports also for a removed instance)
9281 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9283 def CheckPrereq(self):
9284 """Check prerequisites.
9288 def Exec(self, feedback_fn):
9289 """Remove any export.
9292 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9293 # If the instance was not found we'll try with the name that was passed in.
9294 # This will only work if it was an FQDN, though.
9296 if not instance_name:
9298 instance_name = self.op.instance_name
9300 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9301 exportlist = self.rpc.call_export_list(locked_nodes)
9303 for node in exportlist:
9304 msg = exportlist[node].fail_msg
9306 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9308 if instance_name in exportlist[node].payload:
9310 result = self.rpc.call_export_remove(node, instance_name)
9311 msg = result.fail_msg
9313 logging.error("Could not remove export for instance %s"
9314 " on node %s: %s", instance_name, node, msg)
9316 if fqdn_warn and not found:
9317 feedback_fn("Export not found. If trying to remove an export belonging"
9318 " to a deleted instance please use its Fully Qualified"
9322 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9325 This is an abstract class which is the parent of all the other tags LUs.
9329 def ExpandNames(self):
9330 self.needed_locks = {}
9331 if self.op.kind == constants.TAG_NODE:
9332 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9333 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9334 elif self.op.kind == constants.TAG_INSTANCE:
9335 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9336 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9338 def CheckPrereq(self):
9339 """Check prerequisites.
9342 if self.op.kind == constants.TAG_CLUSTER:
9343 self.target = self.cfg.GetClusterInfo()
9344 elif self.op.kind == constants.TAG_NODE:
9345 self.target = self.cfg.GetNodeInfo(self.op.name)
9346 elif self.op.kind == constants.TAG_INSTANCE:
9347 self.target = self.cfg.GetInstanceInfo(self.op.name)
9349 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9350 str(self.op.kind), errors.ECODE_INVAL)
9353 class LUGetTags(TagsLU):
9354 """Returns the tags of a given object.
9357 _OP_REQP = ["kind", "name"]
9360 def Exec(self, feedback_fn):
9361 """Returns the tag list.
9364 return list(self.target.GetTags())
9367 class LUSearchTags(NoHooksLU):
9368 """Searches the tags for a given pattern.
9371 _OP_REQP = ["pattern"]
9374 def ExpandNames(self):
9375 self.needed_locks = {}
9377 def CheckPrereq(self):
9378 """Check prerequisites.
9380 This checks the pattern passed for validity by compiling it.
9384 self.re = re.compile(self.op.pattern)
9385 except re.error, err:
9386 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9387 (self.op.pattern, err), errors.ECODE_INVAL)
9389 def Exec(self, feedback_fn):
9390 """Returns the tag list.
9394 tgts = [("/cluster", cfg.GetClusterInfo())]
9395 ilist = cfg.GetAllInstancesInfo().values()
9396 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9397 nlist = cfg.GetAllNodesInfo().values()
9398 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9400 for path, target in tgts:
9401 for tag in target.GetTags():
9402 if self.re.search(tag):
9403 results.append((path, tag))
9407 class LUAddTags(TagsLU):
9408 """Sets a tag on a given object.
9411 _OP_REQP = ["kind", "name", "tags"]
9414 def CheckPrereq(self):
9415 """Check prerequisites.
9417 This checks the type and length of the tag name and value.
9420 TagsLU.CheckPrereq(self)
9421 for tag in self.op.tags:
9422 objects.TaggableObject.ValidateTag(tag)
9424 def Exec(self, feedback_fn):
9429 for tag in self.op.tags:
9430 self.target.AddTag(tag)
9431 except errors.TagError, err:
9432 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9433 self.cfg.Update(self.target, feedback_fn)
9436 class LUDelTags(TagsLU):
9437 """Delete a list of tags from a given object.
9440 _OP_REQP = ["kind", "name", "tags"]
9443 def CheckPrereq(self):
9444 """Check prerequisites.
9446 This checks that we have the given tag.
9449 TagsLU.CheckPrereq(self)
9450 for tag in self.op.tags:
9451 objects.TaggableObject.ValidateTag(tag)
9452 del_tags = frozenset(self.op.tags)
9453 cur_tags = self.target.GetTags()
9454 if not del_tags <= cur_tags:
9455 diff_tags = del_tags - cur_tags
9456 diff_names = ["'%s'" % tag for tag in diff_tags]
9458 raise errors.OpPrereqError("Tag(s) %s not found" %
9459 (",".join(diff_names)), errors.ECODE_NOENT)
9461 def Exec(self, feedback_fn):
9462 """Remove the tag from the object.
9465 for tag in self.op.tags:
9466 self.target.RemoveTag(tag)
9467 self.cfg.Update(self.target, feedback_fn)
9470 class LUTestDelay(NoHooksLU):
9471 """Sleep for a specified amount of time.
9473 This LU sleeps on the master and/or nodes for a specified amount of
9477 _OP_REQP = ["duration", "on_master", "on_nodes"]
9480 def ExpandNames(self):
9481 """Expand names and set required locks.
9483 This expands the node list, if any.
9486 self.needed_locks = {}
9487 if self.op.on_nodes:
9488 # _GetWantedNodes can be used here, but is not always appropriate to use
9489 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9491 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9492 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9494 def CheckPrereq(self):
9495 """Check prerequisites.
9499 def Exec(self, feedback_fn):
9500 """Do the actual sleep.
9503 if self.op.on_master:
9504 if not utils.TestDelay(self.op.duration):
9505 raise errors.OpExecError("Error during master delay test")
9506 if self.op.on_nodes:
9507 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9508 for node, node_result in result.items():
9509 node_result.Raise("Failure during rpc call to node %s" % node)
9512 class IAllocator(object):
9513 """IAllocator framework.
9515 An IAllocator instance has three sets of attributes:
9516 - cfg that is needed to query the cluster
9517 - input data (all members of the _KEYS class attribute are required)
9518 - four buffer attributes (in|out_data|text), that represent the
9519 input (to the external script) in text and data structure format,
9520 and the output from it, again in two formats
9521 - the result variables from the script (success, info, nodes) for
9525 # pylint: disable-msg=R0902
9526 # lots of instance attributes
9528 "name", "mem_size", "disks", "disk_template",
9529 "os", "tags", "nics", "vcpus", "hypervisor",
9532 "name", "relocate_from",
9538 def __init__(self, cfg, rpc, mode, **kwargs):
9541 # init buffer variables
9542 self.in_text = self.out_text = self.in_data = self.out_data = None
9543 # init all input fields so that pylint is happy
9545 self.mem_size = self.disks = self.disk_template = None
9546 self.os = self.tags = self.nics = self.vcpus = None
9547 self.hypervisor = None
9548 self.relocate_from = None
9550 self.evac_nodes = None
9552 self.required_nodes = None
9553 # init result fields
9554 self.success = self.info = self.result = None
9555 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9556 keyset = self._ALLO_KEYS
9557 fn = self._AddNewInstance
9558 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9559 keyset = self._RELO_KEYS
9560 fn = self._AddRelocateInstance
9561 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9562 keyset = self._EVAC_KEYS
9563 fn = self._AddEvacuateNodes
9565 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9566 " IAllocator" % self.mode)
9568 if key not in keyset:
9569 raise errors.ProgrammerError("Invalid input parameter '%s' to"
9570 " IAllocator" % key)
9571 setattr(self, key, kwargs[key])
9574 if key not in kwargs:
9575 raise errors.ProgrammerError("Missing input parameter '%s' to"
9576 " IAllocator" % key)
9577 self._BuildInputData(fn)
9579 def _ComputeClusterData(self):
9580 """Compute the generic allocator input data.
9582 This is the data that is independent of the actual operation.
9586 cluster_info = cfg.GetClusterInfo()
9589 "version": constants.IALLOCATOR_VERSION,
9590 "cluster_name": cfg.GetClusterName(),
9591 "cluster_tags": list(cluster_info.GetTags()),
9592 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9593 # we don't have job IDs
9595 iinfo = cfg.GetAllInstancesInfo().values()
9596 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9600 node_list = cfg.GetNodeList()
9602 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9603 hypervisor_name = self.hypervisor
9604 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9605 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9606 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9607 hypervisor_name = cluster_info.enabled_hypervisors[0]
9609 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9612 self.rpc.call_all_instances_info(node_list,
9613 cluster_info.enabled_hypervisors)
9614 for nname, nresult in node_data.items():
9615 # first fill in static (config-based) values
9616 ninfo = cfg.GetNodeInfo(nname)
9618 "tags": list(ninfo.GetTags()),
9619 "primary_ip": ninfo.primary_ip,
9620 "secondary_ip": ninfo.secondary_ip,
9621 "offline": ninfo.offline,
9622 "drained": ninfo.drained,
9623 "master_candidate": ninfo.master_candidate,
9626 if not (ninfo.offline or ninfo.drained):
9627 nresult.Raise("Can't get data for node %s" % nname)
9628 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9630 remote_info = nresult.payload
9632 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9633 'vg_size', 'vg_free', 'cpu_total']:
9634 if attr not in remote_info:
9635 raise errors.OpExecError("Node '%s' didn't return attribute"
9636 " '%s'" % (nname, attr))
9637 if not isinstance(remote_info[attr], int):
9638 raise errors.OpExecError("Node '%s' returned invalid value"
9640 (nname, attr, remote_info[attr]))
9641 # compute memory used by primary instances
9642 i_p_mem = i_p_up_mem = 0
9643 for iinfo, beinfo in i_list:
9644 if iinfo.primary_node == nname:
9645 i_p_mem += beinfo[constants.BE_MEMORY]
9646 if iinfo.name not in node_iinfo[nname].payload:
9649 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9650 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9651 remote_info['memory_free'] -= max(0, i_mem_diff)
9654 i_p_up_mem += beinfo[constants.BE_MEMORY]
9656 # compute memory used by instances
9658 "total_memory": remote_info['memory_total'],
9659 "reserved_memory": remote_info['memory_dom0'],
9660 "free_memory": remote_info['memory_free'],
9661 "total_disk": remote_info['vg_size'],
9662 "free_disk": remote_info['vg_free'],
9663 "total_cpus": remote_info['cpu_total'],
9664 "i_pri_memory": i_p_mem,
9665 "i_pri_up_memory": i_p_up_mem,
9669 node_results[nname] = pnr
9670 data["nodes"] = node_results
9674 for iinfo, beinfo in i_list:
9676 for nic in iinfo.nics:
9677 filled_params = objects.FillDict(
9678 cluster_info.nicparams[constants.PP_DEFAULT],
9680 nic_dict = {"mac": nic.mac,
9682 "mode": filled_params[constants.NIC_MODE],
9683 "link": filled_params[constants.NIC_LINK],
9685 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9686 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9687 nic_data.append(nic_dict)
9689 "tags": list(iinfo.GetTags()),
9690 "admin_up": iinfo.admin_up,
9691 "vcpus": beinfo[constants.BE_VCPUS],
9692 "memory": beinfo[constants.BE_MEMORY],
9694 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9696 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9697 "disk_template": iinfo.disk_template,
9698 "hypervisor": iinfo.hypervisor,
9700 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9702 instance_data[iinfo.name] = pir
9704 data["instances"] = instance_data
9708 def _AddNewInstance(self):
9709 """Add new instance data to allocator structure.
9711 This in combination with _AllocatorGetClusterData will create the
9712 correct structure needed as input for the allocator.
9714 The checks for the completeness of the opcode must have already been
9718 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9720 if self.disk_template in constants.DTS_NET_MIRROR:
9721 self.required_nodes = 2
9723 self.required_nodes = 1
9726 "disk_template": self.disk_template,
9729 "vcpus": self.vcpus,
9730 "memory": self.mem_size,
9731 "disks": self.disks,
9732 "disk_space_total": disk_space,
9734 "required_nodes": self.required_nodes,
9738 def _AddRelocateInstance(self):
9739 """Add relocate instance data to allocator structure.
9741 This in combination with _IAllocatorGetClusterData will create the
9742 correct structure needed as input for the allocator.
9744 The checks for the completeness of the opcode must have already been
9748 instance = self.cfg.GetInstanceInfo(self.name)
9749 if instance is None:
9750 raise errors.ProgrammerError("Unknown instance '%s' passed to"
9751 " IAllocator" % self.name)
9753 if instance.disk_template not in constants.DTS_NET_MIRROR:
9754 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9757 if len(instance.secondary_nodes) != 1:
9758 raise errors.OpPrereqError("Instance has not exactly one secondary node",
9761 self.required_nodes = 1
9762 disk_sizes = [{'size': disk.size} for disk in instance.disks]
9763 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9767 "disk_space_total": disk_space,
9768 "required_nodes": self.required_nodes,
9769 "relocate_from": self.relocate_from,
9773 def _AddEvacuateNodes(self):
9774 """Add evacuate nodes data to allocator structure.
9778 "evac_nodes": self.evac_nodes
9782 def _BuildInputData(self, fn):
9783 """Build input data structures.
9786 self._ComputeClusterData()
9789 request["type"] = self.mode
9790 self.in_data["request"] = request
9792 self.in_text = serializer.Dump(self.in_data)
9794 def Run(self, name, validate=True, call_fn=None):
9795 """Run an instance allocator and return the results.
9799 call_fn = self.rpc.call_iallocator_runner
9801 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9802 result.Raise("Failure while running the iallocator script")
9804 self.out_text = result.payload
9806 self._ValidateResult()
9808 def _ValidateResult(self):
9809 """Process the allocator results.
9811 This will process and if successful save the result in
9812 self.out_data and the other parameters.
9816 rdict = serializer.Load(self.out_text)
9817 except Exception, err:
9818 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9820 if not isinstance(rdict, dict):
9821 raise errors.OpExecError("Can't parse iallocator results: not a dict")
9823 # TODO: remove backwards compatiblity in later versions
9824 if "nodes" in rdict and "result" not in rdict:
9825 rdict["result"] = rdict["nodes"]
9828 for key in "success", "info", "result":
9829 if key not in rdict:
9830 raise errors.OpExecError("Can't parse iallocator results:"
9831 " missing key '%s'" % key)
9832 setattr(self, key, rdict[key])
9834 if not isinstance(rdict["result"], list):
9835 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9837 self.out_data = rdict
9840 class LUTestAllocator(NoHooksLU):
9841 """Run allocator tests.
9843 This LU runs the allocator tests
9846 _OP_REQP = ["direction", "mode", "name"]
9848 def CheckPrereq(self):
9849 """Check prerequisites.
9851 This checks the opcode parameters depending on the director and mode test.
9854 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9855 for attr in ["name", "mem_size", "disks", "disk_template",
9856 "os", "tags", "nics", "vcpus"]:
9857 if not hasattr(self.op, attr):
9858 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9859 attr, errors.ECODE_INVAL)
9860 iname = self.cfg.ExpandInstanceName(self.op.name)
9861 if iname is not None:
9862 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9863 iname, errors.ECODE_EXISTS)
9864 if not isinstance(self.op.nics, list):
9865 raise errors.OpPrereqError("Invalid parameter 'nics'",
9867 for row in self.op.nics:
9868 if (not isinstance(row, dict) or
9871 "bridge" not in row):
9872 raise errors.OpPrereqError("Invalid contents of the 'nics'"
9873 " parameter", errors.ECODE_INVAL)
9874 if not isinstance(self.op.disks, list):
9875 raise errors.OpPrereqError("Invalid parameter 'disks'",
9877 for row in self.op.disks:
9878 if (not isinstance(row, dict) or
9879 "size" not in row or
9880 not isinstance(row["size"], int) or
9881 "mode" not in row or
9882 row["mode"] not in ['r', 'w']):
9883 raise errors.OpPrereqError("Invalid contents of the 'disks'"
9884 " parameter", errors.ECODE_INVAL)
9885 if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9886 self.op.hypervisor = self.cfg.GetHypervisorType()
9887 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9888 if not hasattr(self.op, "name"):
9889 raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9891 fname = _ExpandInstanceName(self.cfg, self.op.name)
9892 self.op.name = fname
9893 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9894 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9895 if not hasattr(self.op, "evac_nodes"):
9896 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9897 " opcode input", errors.ECODE_INVAL)
9899 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9900 self.op.mode, errors.ECODE_INVAL)
9902 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9903 if not hasattr(self.op, "allocator") or self.op.allocator is None:
9904 raise errors.OpPrereqError("Missing allocator name",
9906 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9907 raise errors.OpPrereqError("Wrong allocator test '%s'" %
9908 self.op.direction, errors.ECODE_INVAL)
9910 def Exec(self, feedback_fn):
9911 """Run the allocator test.
9914 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9915 ial = IAllocator(self.cfg, self.rpc,
9918 mem_size=self.op.mem_size,
9919 disks=self.op.disks,
9920 disk_template=self.op.disk_template,
9924 vcpus=self.op.vcpus,
9925 hypervisor=self.op.hypervisor,
9927 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9928 ial = IAllocator(self.cfg, self.rpc,
9931 relocate_from=list(self.relocate_from),
9933 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9934 ial = IAllocator(self.cfg, self.rpc,
9936 evac_nodes=self.op.evac_nodes)
9938 raise errors.ProgrammerError("Uncatched mode %s in"
9939 " LUTestAllocator.Exec", self.op.mode)
9941 if self.op.direction == constants.IALLOCATOR_DIR_IN:
9942 result = ial.in_text
9944 ial.Run(self.op.allocator, validate=False)
9945 result = ial.out_text