4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
38 from ganeti import ssh
39 from ganeti import utils
40 from ganeti import errors
41 from ganeti import hypervisor
42 from ganeti import locking
43 from ganeti import constants
44 from ganeti import objects
45 from ganeti import serializer
46 from ganeti import ssconf
47 from ganeti import uidpool
48 from ganeti import compat
51 class LogicalUnit(object):
52 """Logical Unit base class.
54 Subclasses must follow these rules:
55 - implement ExpandNames
56 - implement CheckPrereq (except when tasklets are used)
57 - implement Exec (except when tasklets are used)
58 - implement BuildHooksEnv
59 - redefine HPATH and HTYPE
60 - optionally redefine their run requirements:
61 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
63 Note that all commands require root permissions.
65 @ivar dry_run_result: the value (if any) that will be returned to the caller
66 in dry-run mode (signalled by opcode dry_run parameter)
74 def __init__(self, processor, op, context, rpc):
75 """Constructor for LogicalUnit.
77 This needs to be overridden in derived classes in order to check op
83 self.cfg = context.cfg
84 self.context = context
86 # Dicts used to declare locking needs to mcpu
87 self.needed_locks = None
88 self.acquired_locks = {}
89 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
91 self.remove_locks = {}
92 # Used to force good behavior when calling helper functions
93 self.recalculate_locks = {}
96 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
97 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
98 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
100 self.dry_run_result = None
101 # support for generic debug attribute
102 if (not hasattr(self.op, "debug_level") or
103 not isinstance(self.op.debug_level, int)):
104 self.op.debug_level = 0
109 for attr_name in self._OP_REQP:
110 attr_val = getattr(op, attr_name, None)
112 raise errors.OpPrereqError("Required parameter '%s' missing" %
113 attr_name, errors.ECODE_INVAL)
115 self.CheckArguments()
118 """Returns the SshRunner object
122 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
125 ssh = property(fget=__GetSSH)
127 def CheckArguments(self):
128 """Check syntactic validity for the opcode arguments.
130 This method is for doing a simple syntactic check and ensure
131 validity of opcode parameters, without any cluster-related
132 checks. While the same can be accomplished in ExpandNames and/or
133 CheckPrereq, doing these separate is better because:
135 - ExpandNames is left as as purely a lock-related function
136 - CheckPrereq is run after we have acquired locks (and possible
139 The function is allowed to change the self.op attribute so that
140 later methods can no longer worry about missing parameters.
145 def ExpandNames(self):
146 """Expand names for this LU.
148 This method is called before starting to execute the opcode, and it should
149 update all the parameters of the opcode to their canonical form (e.g. a
150 short node name must be fully expanded after this method has successfully
151 completed). This way locking, hooks, logging, ecc. can work correctly.
153 LUs which implement this method must also populate the self.needed_locks
154 member, as a dict with lock levels as keys, and a list of needed lock names
157 - use an empty dict if you don't need any lock
158 - if you don't need any lock at a particular level omit that level
159 - don't put anything for the BGL level
160 - if you want all locks at a level use locking.ALL_SET as a value
162 If you need to share locks (rather than acquire them exclusively) at one
163 level you can modify self.share_locks, setting a true value (usually 1) for
164 that level. By default locks are not shared.
166 This function can also define a list of tasklets, which then will be
167 executed in order instead of the usual LU-level CheckPrereq and Exec
168 functions, if those are not defined by the LU.
172 # Acquire all nodes and one instance
173 self.needed_locks = {
174 locking.LEVEL_NODE: locking.ALL_SET,
175 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
177 # Acquire just two nodes
178 self.needed_locks = {
179 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
182 self.needed_locks = {} # No, you can't leave it to the default value None
185 # The implementation of this method is mandatory only if the new LU is
186 # concurrent, so that old LUs don't need to be changed all at the same
189 self.needed_locks = {} # Exclusive LUs don't need locks.
191 raise NotImplementedError
193 def DeclareLocks(self, level):
194 """Declare LU locking needs for a level
196 While most LUs can just declare their locking needs at ExpandNames time,
197 sometimes there's the need to calculate some locks after having acquired
198 the ones before. This function is called just before acquiring locks at a
199 particular level, but after acquiring the ones at lower levels, and permits
200 such calculations. It can be used to modify self.needed_locks, and by
201 default it does nothing.
203 This function is only called if you have something already set in
204 self.needed_locks for the level.
206 @param level: Locking level which is going to be locked
207 @type level: member of ganeti.locking.LEVELS
211 def CheckPrereq(self):
212 """Check prerequisites for this LU.
214 This method should check that the prerequisites for the execution
215 of this LU are fulfilled. It can do internode communication, but
216 it should be idempotent - no cluster or system changes are
219 The method should raise errors.OpPrereqError in case something is
220 not fulfilled. Its return value is ignored.
222 This method should also update all the parameters of the opcode to
223 their canonical form if it hasn't been done by ExpandNames before.
226 if self.tasklets is not None:
227 for (idx, tl) in enumerate(self.tasklets):
228 logging.debug("Checking prerequisites for tasklet %s/%s",
229 idx + 1, len(self.tasklets))
232 raise NotImplementedError
234 def Exec(self, feedback_fn):
237 This method should implement the actual work. It should raise
238 errors.OpExecError for failures that are somewhat dealt with in
242 if self.tasklets is not None:
243 for (idx, tl) in enumerate(self.tasklets):
244 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
247 raise NotImplementedError
249 def BuildHooksEnv(self):
250 """Build hooks environment for this LU.
252 This method should return a three-node tuple consisting of: a dict
253 containing the environment that will be used for running the
254 specific hook for this LU, a list of node names on which the hook
255 should run before the execution, and a list of node names on which
256 the hook should run after the execution.
258 The keys of the dict must not have 'GANETI_' prefixed as this will
259 be handled in the hooks runner. Also note additional keys will be
260 added by the hooks runner. If the LU doesn't define any
261 environment, an empty dict (and not None) should be returned.
263 No nodes should be returned as an empty list (and not None).
265 Note that if the HPATH for a LU class is None, this function will
269 raise NotImplementedError
271 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
272 """Notify the LU about the results of its hooks.
274 This method is called every time a hooks phase is executed, and notifies
275 the Logical Unit about the hooks' result. The LU can then use it to alter
276 its result based on the hooks. By default the method does nothing and the
277 previous result is passed back unchanged but any LU can define it if it
278 wants to use the local cluster hook-scripts somehow.
280 @param phase: one of L{constants.HOOKS_PHASE_POST} or
281 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
282 @param hook_results: the results of the multi-node hooks rpc call
283 @param feedback_fn: function used send feedback back to the caller
284 @param lu_result: the previous Exec result this LU had, or None
286 @return: the new Exec result, based on the previous result
290 # API must be kept, thus we ignore the unused argument and could
291 # be a function warnings
292 # pylint: disable-msg=W0613,R0201
295 def _ExpandAndLockInstance(self):
296 """Helper function to expand and lock an instance.
298 Many LUs that work on an instance take its name in self.op.instance_name
299 and need to expand it and then declare the expanded name for locking. This
300 function does it, and then updates self.op.instance_name to the expanded
301 name. It also initializes needed_locks as a dict, if this hasn't been done
305 if self.needed_locks is None:
306 self.needed_locks = {}
308 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
309 "_ExpandAndLockInstance called with instance-level locks set"
310 self.op.instance_name = _ExpandInstanceName(self.cfg,
311 self.op.instance_name)
312 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
314 def _LockInstancesNodes(self, primary_only=False):
315 """Helper function to declare instances' nodes for locking.
317 This function should be called after locking one or more instances to lock
318 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
319 with all primary or secondary nodes for instances already locked and
320 present in self.needed_locks[locking.LEVEL_INSTANCE].
322 It should be called from DeclareLocks, and for safety only works if
323 self.recalculate_locks[locking.LEVEL_NODE] is set.
325 In the future it may grow parameters to just lock some instance's nodes, or
326 to just lock primaries or secondary nodes, if needed.
328 If should be called in DeclareLocks in a way similar to::
330 if level == locking.LEVEL_NODE:
331 self._LockInstancesNodes()
333 @type primary_only: boolean
334 @param primary_only: only lock primary nodes of locked instances
337 assert locking.LEVEL_NODE in self.recalculate_locks, \
338 "_LockInstancesNodes helper function called with no nodes to recalculate"
340 # TODO: check if we're really been called with the instance locks held
342 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
343 # future we might want to have different behaviors depending on the value
344 # of self.recalculate_locks[locking.LEVEL_NODE]
346 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
347 instance = self.context.cfg.GetInstanceInfo(instance_name)
348 wanted_nodes.append(instance.primary_node)
350 wanted_nodes.extend(instance.secondary_nodes)
352 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
353 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
354 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
355 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
357 del self.recalculate_locks[locking.LEVEL_NODE]
360 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
361 """Simple LU which runs no hooks.
363 This LU is intended as a parent for other LogicalUnits which will
364 run no hooks, in order to reduce duplicate code.
370 def BuildHooksEnv(self):
371 """Empty BuildHooksEnv for NoHooksLu.
373 This just raises an error.
376 assert False, "BuildHooksEnv called for NoHooksLUs"
380 """Tasklet base class.
382 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
383 they can mix legacy code with tasklets. Locking needs to be done in the LU,
384 tasklets know nothing about locks.
386 Subclasses must follow these rules:
387 - Implement CheckPrereq
391 def __init__(self, lu):
398 def CheckPrereq(self):
399 """Check prerequisites for this tasklets.
401 This method should check whether the prerequisites for the execution of
402 this tasklet are fulfilled. It can do internode communication, but it
403 should be idempotent - no cluster or system changes are allowed.
405 The method should raise errors.OpPrereqError in case something is not
406 fulfilled. Its return value is ignored.
408 This method should also update all parameters to their canonical form if it
409 hasn't been done before.
412 raise NotImplementedError
414 def Exec(self, feedback_fn):
415 """Execute the tasklet.
417 This method should implement the actual work. It should raise
418 errors.OpExecError for failures that are somewhat dealt with in code, or
422 raise NotImplementedError
425 def _GetWantedNodes(lu, nodes):
426 """Returns list of checked and expanded node names.
428 @type lu: L{LogicalUnit}
429 @param lu: the logical unit on whose behalf we execute
431 @param nodes: list of node names or None for all nodes
433 @return: the list of nodes, sorted
434 @raise errors.ProgrammerError: if the nodes parameter is wrong type
437 if not isinstance(nodes, list):
438 raise errors.OpPrereqError("Invalid argument type 'nodes'",
442 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
443 " non-empty list of nodes whose name is to be expanded.")
445 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
446 return utils.NiceSort(wanted)
449 def _GetWantedInstances(lu, instances):
450 """Returns list of checked and expanded instance names.
452 @type lu: L{LogicalUnit}
453 @param lu: the logical unit on whose behalf we execute
454 @type instances: list
455 @param instances: list of instance names or None for all instances
457 @return: the list of instances, sorted
458 @raise errors.OpPrereqError: if the instances parameter is wrong type
459 @raise errors.OpPrereqError: if any of the passed instances is not found
462 if not isinstance(instances, list):
463 raise errors.OpPrereqError("Invalid argument type 'instances'",
467 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
469 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
473 def _CheckOutputFields(static, dynamic, selected):
474 """Checks whether all selected fields are valid.
476 @type static: L{utils.FieldSet}
477 @param static: static fields set
478 @type dynamic: L{utils.FieldSet}
479 @param dynamic: dynamic fields set
486 delta = f.NonMatching(selected)
488 raise errors.OpPrereqError("Unknown output fields selected: %s"
489 % ",".join(delta), errors.ECODE_INVAL)
492 def _CheckBooleanOpField(op, name):
493 """Validates boolean opcode parameters.
495 This will ensure that an opcode parameter is either a boolean value,
496 or None (but that it always exists).
499 val = getattr(op, name, None)
500 if not (val is None or isinstance(val, bool)):
501 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
502 (name, str(val)), errors.ECODE_INVAL)
503 setattr(op, name, val)
506 def _CheckGlobalHvParams(params):
507 """Validates that given hypervisor params are not global ones.
509 This will ensure that instances don't get customised versions of
513 used_globals = constants.HVC_GLOBALS.intersection(params)
515 msg = ("The following hypervisor parameters are global and cannot"
516 " be customized at instance level, please modify them at"
517 " cluster level: %s" % utils.CommaJoin(used_globals))
518 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
521 def _CheckNodeOnline(lu, node):
522 """Ensure that a given node is online.
524 @param lu: the LU on behalf of which we make the check
525 @param node: the node to check
526 @raise errors.OpPrereqError: if the node is offline
529 if lu.cfg.GetNodeInfo(node).offline:
530 raise errors.OpPrereqError("Can't use offline node %s" % node,
534 def _CheckNodeNotDrained(lu, node):
535 """Ensure that a given node is not drained.
537 @param lu: the LU on behalf of which we make the check
538 @param node: the node to check
539 @raise errors.OpPrereqError: if the node is drained
542 if lu.cfg.GetNodeInfo(node).drained:
543 raise errors.OpPrereqError("Can't use drained node %s" % node,
547 def _CheckNodeHasOS(lu, node, os_name, force_variant):
548 """Ensure that a node supports a given OS.
550 @param lu: the LU on behalf of which we make the check
551 @param node: the node to check
552 @param os_name: the OS to query about
553 @param force_variant: whether to ignore variant errors
554 @raise errors.OpPrereqError: if the node is not supporting the OS
557 result = lu.rpc.call_os_get(node, os_name)
558 result.Raise("OS '%s' not in supported OS list for node %s" %
560 prereq=True, ecode=errors.ECODE_INVAL)
561 if not force_variant:
562 _CheckOSVariant(result.payload, os_name)
565 def _RequireFileStorage():
566 """Checks that file storage is enabled.
568 @raise errors.OpPrereqError: when file storage is disabled
571 if not constants.ENABLE_FILE_STORAGE:
572 raise errors.OpPrereqError("File storage disabled at configure time",
576 def _CheckDiskTemplate(template):
577 """Ensure a given disk template is valid.
580 if template not in constants.DISK_TEMPLATES:
581 msg = ("Invalid disk template name '%s', valid templates are: %s" %
582 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
583 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
584 if template == constants.DT_FILE:
585 _RequireFileStorage()
588 def _CheckStorageType(storage_type):
589 """Ensure a given storage type is valid.
592 if storage_type not in constants.VALID_STORAGE_TYPES:
593 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
595 if storage_type == constants.ST_FILE:
596 _RequireFileStorage()
600 def _CheckInstanceDown(lu, instance, reason):
601 """Ensure that an instance is not running."""
602 if instance.admin_up:
603 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
604 (instance.name, reason), errors.ECODE_STATE)
606 pnode = instance.primary_node
607 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
608 ins_l.Raise("Can't contact node %s for instance information" % pnode,
609 prereq=True, ecode=errors.ECODE_ENVIRON)
611 if instance.name in ins_l.payload:
612 raise errors.OpPrereqError("Instance %s is running, %s" %
613 (instance.name, reason), errors.ECODE_STATE)
616 def _ExpandItemName(fn, name, kind):
617 """Expand an item name.
619 @param fn: the function to use for expansion
620 @param name: requested item name
621 @param kind: text description ('Node' or 'Instance')
622 @return: the resolved (full) name
623 @raise errors.OpPrereqError: if the item is not found
627 if full_name is None:
628 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
633 def _ExpandNodeName(cfg, name):
634 """Wrapper over L{_ExpandItemName} for nodes."""
635 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
638 def _ExpandInstanceName(cfg, name):
639 """Wrapper over L{_ExpandItemName} for instance."""
640 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
643 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
644 memory, vcpus, nics, disk_template, disks,
645 bep, hvp, hypervisor_name):
646 """Builds instance related env variables for hooks
648 This builds the hook environment from individual variables.
651 @param name: the name of the instance
652 @type primary_node: string
653 @param primary_node: the name of the instance's primary node
654 @type secondary_nodes: list
655 @param secondary_nodes: list of secondary nodes as strings
656 @type os_type: string
657 @param os_type: the name of the instance's OS
658 @type status: boolean
659 @param status: the should_run status of the instance
661 @param memory: the memory size of the instance
663 @param vcpus: the count of VCPUs the instance has
665 @param nics: list of tuples (ip, mac, mode, link) representing
666 the NICs the instance has
667 @type disk_template: string
668 @param disk_template: the disk template of the instance
670 @param disks: the list of (size, mode) pairs
672 @param bep: the backend parameters for the instance
674 @param hvp: the hypervisor parameters for the instance
675 @type hypervisor_name: string
676 @param hypervisor_name: the hypervisor for the instance
678 @return: the hook environment for this instance
687 "INSTANCE_NAME": name,
688 "INSTANCE_PRIMARY": primary_node,
689 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
690 "INSTANCE_OS_TYPE": os_type,
691 "INSTANCE_STATUS": str_status,
692 "INSTANCE_MEMORY": memory,
693 "INSTANCE_VCPUS": vcpus,
694 "INSTANCE_DISK_TEMPLATE": disk_template,
695 "INSTANCE_HYPERVISOR": hypervisor_name,
699 nic_count = len(nics)
700 for idx, (ip, mac, mode, link) in enumerate(nics):
703 env["INSTANCE_NIC%d_IP" % idx] = ip
704 env["INSTANCE_NIC%d_MAC" % idx] = mac
705 env["INSTANCE_NIC%d_MODE" % idx] = mode
706 env["INSTANCE_NIC%d_LINK" % idx] = link
707 if mode == constants.NIC_MODE_BRIDGED:
708 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
712 env["INSTANCE_NIC_COUNT"] = nic_count
715 disk_count = len(disks)
716 for idx, (size, mode) in enumerate(disks):
717 env["INSTANCE_DISK%d_SIZE" % idx] = size
718 env["INSTANCE_DISK%d_MODE" % idx] = mode
722 env["INSTANCE_DISK_COUNT"] = disk_count
724 for source, kind in [(bep, "BE"), (hvp, "HV")]:
725 for key, value in source.items():
726 env["INSTANCE_%s_%s" % (kind, key)] = value
731 def _NICListToTuple(lu, nics):
732 """Build a list of nic information tuples.
734 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
735 value in LUQueryInstanceData.
737 @type lu: L{LogicalUnit}
738 @param lu: the logical unit on whose behalf we execute
739 @type nics: list of L{objects.NIC}
740 @param nics: list of nics to convert to hooks tuples
744 c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
748 filled_params = objects.FillDict(c_nicparams, nic.nicparams)
749 mode = filled_params[constants.NIC_MODE]
750 link = filled_params[constants.NIC_LINK]
751 hooks_nics.append((ip, mac, mode, link))
755 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
756 """Builds instance related env variables for hooks from an object.
758 @type lu: L{LogicalUnit}
759 @param lu: the logical unit on whose behalf we execute
760 @type instance: L{objects.Instance}
761 @param instance: the instance for which we should build the
764 @param override: dictionary with key/values that will override
767 @return: the hook environment dictionary
770 cluster = lu.cfg.GetClusterInfo()
771 bep = cluster.FillBE(instance)
772 hvp = cluster.FillHV(instance)
774 'name': instance.name,
775 'primary_node': instance.primary_node,
776 'secondary_nodes': instance.secondary_nodes,
777 'os_type': instance.os,
778 'status': instance.admin_up,
779 'memory': bep[constants.BE_MEMORY],
780 'vcpus': bep[constants.BE_VCPUS],
781 'nics': _NICListToTuple(lu, instance.nics),
782 'disk_template': instance.disk_template,
783 'disks': [(disk.size, disk.mode) for disk in instance.disks],
786 'hypervisor_name': instance.hypervisor,
789 args.update(override)
790 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
793 def _AdjustCandidatePool(lu, exceptions):
794 """Adjust the candidate pool after node operations.
797 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
799 lu.LogInfo("Promoted nodes to master candidate role: %s",
800 utils.CommaJoin(node.name for node in mod_list))
801 for name in mod_list:
802 lu.context.ReaddNode(name)
803 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
805 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
809 def _DecideSelfPromotion(lu, exceptions=None):
810 """Decide whether I should promote myself as a master candidate.
813 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
814 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
815 # the new node will increase mc_max with one, so:
816 mc_should = min(mc_should + 1, cp_size)
817 return mc_now < mc_should
820 def _CheckNicsBridgesExist(lu, target_nics, target_node,
821 profile=constants.PP_DEFAULT):
822 """Check that the brigdes needed by a list of nics exist.
825 c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
826 paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
827 for nic in target_nics]
828 brlist = [params[constants.NIC_LINK] for params in paramslist
829 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
831 result = lu.rpc.call_bridges_exist(target_node, brlist)
832 result.Raise("Error checking bridges on destination node '%s'" %
833 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
836 def _CheckInstanceBridgesExist(lu, instance, node=None):
837 """Check that the brigdes needed by an instance exist.
841 node = instance.primary_node
842 _CheckNicsBridgesExist(lu, instance.nics, node)
845 def _CheckOSVariant(os_obj, name):
846 """Check whether an OS name conforms to the os variants specification.
848 @type os_obj: L{objects.OS}
849 @param os_obj: OS object to check
851 @param name: OS name passed by the user, to check for validity
854 if not os_obj.supported_variants:
857 variant = name.split("+", 1)[1]
859 raise errors.OpPrereqError("OS name must include a variant",
862 if variant not in os_obj.supported_variants:
863 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
866 def _GetNodeInstancesInner(cfg, fn):
867 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
870 def _GetNodeInstances(cfg, node_name):
871 """Returns a list of all primary and secondary instances on a node.
875 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
878 def _GetNodePrimaryInstances(cfg, node_name):
879 """Returns primary instances on a node.
882 return _GetNodeInstancesInner(cfg,
883 lambda inst: node_name == inst.primary_node)
886 def _GetNodeSecondaryInstances(cfg, node_name):
887 """Returns secondary instances on a node.
890 return _GetNodeInstancesInner(cfg,
891 lambda inst: node_name in inst.secondary_nodes)
894 def _GetStorageTypeArgs(cfg, storage_type):
895 """Returns the arguments for a storage type.
898 # Special case for file storage
899 if storage_type == constants.ST_FILE:
900 # storage.FileStorage wants a list of storage directories
901 return [[cfg.GetFileStorageDir()]]
906 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
909 for dev in instance.disks:
910 cfg.SetDiskID(dev, node_name)
912 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
913 result.Raise("Failed to get disk status from node %s" % node_name,
914 prereq=prereq, ecode=errors.ECODE_ENVIRON)
916 for idx, bdev_status in enumerate(result.payload):
917 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
923 def _FormatTimestamp(secs):
924 """Formats a Unix timestamp with the local timezone.
927 return time.strftime("%F %T %Z", time.gmtime(secs))
930 class LUPostInitCluster(LogicalUnit):
931 """Logical unit for running hooks after cluster initialization.
934 HPATH = "cluster-init"
935 HTYPE = constants.HTYPE_CLUSTER
938 def BuildHooksEnv(self):
942 env = {"OP_TARGET": self.cfg.GetClusterName()}
943 mn = self.cfg.GetMasterNode()
946 def CheckPrereq(self):
947 """No prerequisites to check.
952 def Exec(self, feedback_fn):
959 class LUDestroyCluster(LogicalUnit):
960 """Logical unit for destroying the cluster.
963 HPATH = "cluster-destroy"
964 HTYPE = constants.HTYPE_CLUSTER
967 def BuildHooksEnv(self):
971 env = {"OP_TARGET": self.cfg.GetClusterName()}
974 def CheckPrereq(self):
975 """Check prerequisites.
977 This checks whether the cluster is empty.
979 Any errors are signaled by raising errors.OpPrereqError.
982 master = self.cfg.GetMasterNode()
984 nodelist = self.cfg.GetNodeList()
985 if len(nodelist) != 1 or nodelist[0] != master:
986 raise errors.OpPrereqError("There are still %d node(s) in"
987 " this cluster." % (len(nodelist) - 1),
989 instancelist = self.cfg.GetInstanceList()
991 raise errors.OpPrereqError("There are still %d instance(s) in"
992 " this cluster." % len(instancelist),
995 def Exec(self, feedback_fn):
996 """Destroys the cluster.
999 master = self.cfg.GetMasterNode()
1000 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1002 # Run post hooks on master node before it's removed
1003 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1005 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1007 # pylint: disable-msg=W0702
1008 self.LogWarning("Errors occurred running hooks on %s" % master)
1010 result = self.rpc.call_node_stop_master(master, False)
1011 result.Raise("Could not disable the master role")
1013 if modify_ssh_setup:
1014 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1015 utils.CreateBackup(priv_key)
1016 utils.CreateBackup(pub_key)
1021 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
1022 warn_days=constants.SSL_CERT_EXPIRATION_WARN,
1023 error_days=constants.SSL_CERT_EXPIRATION_ERROR):
1024 """Verifies certificate details for LUVerifyCluster.
1028 msg = "Certificate %s is expired" % filename
1030 if not_before is not None and not_after is not None:
1031 msg += (" (valid from %s to %s)" %
1032 (_FormatTimestamp(not_before),
1033 _FormatTimestamp(not_after)))
1034 elif not_before is not None:
1035 msg += " (valid from %s)" % _FormatTimestamp(not_before)
1036 elif not_after is not None:
1037 msg += " (valid until %s)" % _FormatTimestamp(not_after)
1039 return (LUVerifyCluster.ETYPE_ERROR, msg)
1041 elif not_before is not None and not_before > now:
1042 return (LUVerifyCluster.ETYPE_WARNING,
1043 "Certificate %s not yet valid (valid from %s)" %
1044 (filename, _FormatTimestamp(not_before)))
1046 elif not_after is not None:
1047 remaining_days = int((not_after - now) / (24 * 3600))
1049 msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1051 if remaining_days <= error_days:
1052 return (LUVerifyCluster.ETYPE_ERROR, msg)
1054 if remaining_days <= warn_days:
1055 return (LUVerifyCluster.ETYPE_WARNING, msg)
1060 def _VerifyCertificate(filename):
1061 """Verifies a certificate for LUVerifyCluster.
1063 @type filename: string
1064 @param filename: Path to PEM file
1068 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1069 utils.ReadFile(filename))
1070 except Exception, err: # pylint: disable-msg=W0703
1071 return (LUVerifyCluster.ETYPE_ERROR,
1072 "Failed to load X509 certificate %s: %s" % (filename, err))
1074 # Depending on the pyOpenSSL version, this can just return (None, None)
1075 (not_before, not_after) = utils.GetX509CertValidity(cert)
1077 return _VerifyCertificateInner(filename, cert.has_expired(),
1078 not_before, not_after, time.time())
1081 class LUVerifyCluster(LogicalUnit):
1082 """Verifies the cluster status.
1085 HPATH = "cluster-verify"
1086 HTYPE = constants.HTYPE_CLUSTER
1087 _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1090 TCLUSTER = "cluster"
1092 TINSTANCE = "instance"
1094 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1095 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1096 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1097 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1098 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1099 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1100 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1101 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1102 ENODEDRBD = (TNODE, "ENODEDRBD")
1103 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1104 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1105 ENODEHV = (TNODE, "ENODEHV")
1106 ENODELVM = (TNODE, "ENODELVM")
1107 ENODEN1 = (TNODE, "ENODEN1")
1108 ENODENET = (TNODE, "ENODENET")
1109 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1110 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1111 ENODERPC = (TNODE, "ENODERPC")
1112 ENODESSH = (TNODE, "ENODESSH")
1113 ENODEVERSION = (TNODE, "ENODEVERSION")
1114 ENODESETUP = (TNODE, "ENODESETUP")
1115 ENODETIME = (TNODE, "ENODETIME")
1117 ETYPE_FIELD = "code"
1118 ETYPE_ERROR = "ERROR"
1119 ETYPE_WARNING = "WARNING"
1121 class NodeImage(object):
1122 """A class representing the logical and physical status of a node.
1124 @ivar volumes: a structure as returned from
1125 L{ganeti.backend.GetVolumeList} (runtime)
1126 @ivar instances: a list of running instances (runtime)
1127 @ivar pinst: list of configured primary instances (config)
1128 @ivar sinst: list of configured secondary instances (config)
1129 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1130 of this node (config)
1131 @ivar mfree: free memory, as reported by hypervisor (runtime)
1132 @ivar dfree: free disk, as reported by the node (runtime)
1133 @ivar offline: the offline status (config)
1134 @type rpc_fail: boolean
1135 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1136 not whether the individual keys were correct) (runtime)
1137 @type lvm_fail: boolean
1138 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1139 @type hyp_fail: boolean
1140 @ivar hyp_fail: whether the RPC call didn't return the instance list
1141 @type ghost: boolean
1142 @ivar ghost: whether this is a known node or not (config)
1145 def __init__(self, offline=False):
1153 self.offline = offline
1154 self.rpc_fail = False
1155 self.lvm_fail = False
1156 self.hyp_fail = False
1159 def ExpandNames(self):
1160 self.needed_locks = {
1161 locking.LEVEL_NODE: locking.ALL_SET,
1162 locking.LEVEL_INSTANCE: locking.ALL_SET,
1164 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1166 def _Error(self, ecode, item, msg, *args, **kwargs):
1167 """Format an error message.
1169 Based on the opcode's error_codes parameter, either format a
1170 parseable error code, or a simpler error string.
1172 This must be called only from Exec and functions called from Exec.
1175 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1177 # first complete the msg
1180 # then format the whole message
1181 if self.op.error_codes:
1182 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1188 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1189 # and finally report it via the feedback_fn
1190 self._feedback_fn(" - %s" % msg)
1192 def _ErrorIf(self, cond, *args, **kwargs):
1193 """Log an error message if the passed condition is True.
1196 cond = bool(cond) or self.op.debug_simulate_errors
1198 self._Error(*args, **kwargs)
1199 # do not mark the operation as failed for WARN cases only
1200 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1201 self.bad = self.bad or cond
1203 def _VerifyNode(self, ninfo, nresult):
1204 """Run multiple tests against a node.
1208 - compares ganeti version
1209 - checks vg existence and size > 20G
1210 - checks config file checksum
1211 - checks ssh to other nodes
1213 @type ninfo: L{objects.Node}
1214 @param ninfo: the node to check
1215 @param nresult: the results from the node
1217 @return: whether overall this call was successful (and we can expect
1218 reasonable values in the respose)
1222 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1224 # main result, nresult should be a non-empty dict
1225 test = not nresult or not isinstance(nresult, dict)
1226 _ErrorIf(test, self.ENODERPC, node,
1227 "unable to verify node: no data returned")
1231 # compares ganeti version
1232 local_version = constants.PROTOCOL_VERSION
1233 remote_version = nresult.get("version", None)
1234 test = not (remote_version and
1235 isinstance(remote_version, (list, tuple)) and
1236 len(remote_version) == 2)
1237 _ErrorIf(test, self.ENODERPC, node,
1238 "connection to node returned invalid data")
1242 test = local_version != remote_version[0]
1243 _ErrorIf(test, self.ENODEVERSION, node,
1244 "incompatible protocol versions: master %s,"
1245 " node %s", local_version, remote_version[0])
1249 # node seems compatible, we can actually try to look into its results
1251 # full package version
1252 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1253 self.ENODEVERSION, node,
1254 "software version mismatch: master %s, node %s",
1255 constants.RELEASE_VERSION, remote_version[1],
1256 code=self.ETYPE_WARNING)
1258 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1259 if isinstance(hyp_result, dict):
1260 for hv_name, hv_result in hyp_result.iteritems():
1261 test = hv_result is not None
1262 _ErrorIf(test, self.ENODEHV, node,
1263 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1266 test = nresult.get(constants.NV_NODESETUP,
1267 ["Missing NODESETUP results"])
1268 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1273 def _VerifyNodeTime(self, ninfo, nresult,
1274 nvinfo_starttime, nvinfo_endtime):
1275 """Check the node time.
1277 @type ninfo: L{objects.Node}
1278 @param ninfo: the node to check
1279 @param nresult: the remote results for the node
1280 @param nvinfo_starttime: the start time of the RPC call
1281 @param nvinfo_endtime: the end time of the RPC call
1285 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1287 ntime = nresult.get(constants.NV_TIME, None)
1289 ntime_merged = utils.MergeTime(ntime)
1290 except (ValueError, TypeError):
1291 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1294 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1295 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1296 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1297 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1301 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1302 "Node time diverges by at least %s from master node time",
1305 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1306 """Check the node time.
1308 @type ninfo: L{objects.Node}
1309 @param ninfo: the node to check
1310 @param nresult: the remote results for the node
1311 @param vg_name: the configured VG name
1318 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1320 # checks vg existence and size > 20G
1321 vglist = nresult.get(constants.NV_VGLIST, None)
1323 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1325 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1326 constants.MIN_VG_SIZE)
1327 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1330 pvlist = nresult.get(constants.NV_PVLIST, None)
1331 test = pvlist is None
1332 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1334 # check that ':' is not present in PV names, since it's a
1335 # special character for lvcreate (denotes the range of PEs to
1337 for _, pvname, owner_vg in pvlist:
1338 test = ":" in pvname
1339 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1340 " '%s' of VG '%s'", pvname, owner_vg)
1342 def _VerifyNodeNetwork(self, ninfo, nresult):
1343 """Check the node time.
1345 @type ninfo: L{objects.Node}
1346 @param ninfo: the node to check
1347 @param nresult: the remote results for the node
1351 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1353 test = constants.NV_NODELIST not in nresult
1354 _ErrorIf(test, self.ENODESSH, node,
1355 "node hasn't returned node ssh connectivity data")
1357 if nresult[constants.NV_NODELIST]:
1358 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1359 _ErrorIf(True, self.ENODESSH, node,
1360 "ssh communication with node '%s': %s", a_node, a_msg)
1362 test = constants.NV_NODENETTEST not in nresult
1363 _ErrorIf(test, self.ENODENET, node,
1364 "node hasn't returned node tcp connectivity data")
1366 if nresult[constants.NV_NODENETTEST]:
1367 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1369 _ErrorIf(True, self.ENODENET, node,
1370 "tcp communication with node '%s': %s",
1371 anode, nresult[constants.NV_NODENETTEST][anode])
1373 def _VerifyInstance(self, instance, instanceconfig, node_image):
1374 """Verify an instance.
1376 This function checks to see if the required block devices are
1377 available on the instance's node.
1380 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1381 node_current = instanceconfig.primary_node
1383 node_vol_should = {}
1384 instanceconfig.MapLVsByNode(node_vol_should)
1386 for node in node_vol_should:
1387 n_img = node_image[node]
1388 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1389 # ignore missing volumes on offline or broken nodes
1391 for volume in node_vol_should[node]:
1392 test = volume not in n_img.volumes
1393 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1394 "volume %s missing on node %s", volume, node)
1396 if instanceconfig.admin_up:
1397 pri_img = node_image[node_current]
1398 test = instance not in pri_img.instances and not pri_img.offline
1399 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1400 "instance not running on its primary node %s",
1403 for node, n_img in node_image.items():
1404 if (not node == node_current):
1405 test = instance in n_img.instances
1406 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1407 "instance should not run on node %s", node)
1409 def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1410 """Verify if there are any unknown volumes in the cluster.
1412 The .os, .swap and backup volumes are ignored. All other volumes are
1413 reported as unknown.
1416 for node, n_img in node_image.items():
1417 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1418 # skip non-healthy nodes
1420 for volume in n_img.volumes:
1421 test = (node not in node_vol_should or
1422 volume not in node_vol_should[node])
1423 self._ErrorIf(test, self.ENODEORPHANLV, node,
1424 "volume %s is unknown", volume)
1426 def _VerifyOrphanInstances(self, instancelist, node_image):
1427 """Verify the list of running instances.
1429 This checks what instances are running but unknown to the cluster.
1432 for node, n_img in node_image.items():
1433 for o_inst in n_img.instances:
1434 test = o_inst not in instancelist
1435 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1436 "instance %s on node %s should not exist", o_inst, node)
1438 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1439 """Verify N+1 Memory Resilience.
1441 Check that if one single node dies we can still start all the
1442 instances it was primary for.
1445 for node, n_img in node_image.items():
1446 # This code checks that every node which is now listed as
1447 # secondary has enough memory to host all instances it is
1448 # supposed to should a single other node in the cluster fail.
1449 # FIXME: not ready for failover to an arbitrary node
1450 # FIXME: does not support file-backed instances
1451 # WARNING: we currently take into account down instances as well
1452 # as up ones, considering that even if they're down someone
1453 # might want to start them even in the event of a node failure.
1454 for prinode, instances in n_img.sbp.items():
1456 for instance in instances:
1457 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1458 if bep[constants.BE_AUTO_BALANCE]:
1459 needed_mem += bep[constants.BE_MEMORY]
1460 test = n_img.mfree < needed_mem
1461 self._ErrorIf(test, self.ENODEN1, node,
1462 "not enough memory on to accommodate"
1463 " failovers should peer node %s fail", prinode)
1465 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1467 """Verifies and computes the node required file checksums.
1469 @type ninfo: L{objects.Node}
1470 @param ninfo: the node to check
1471 @param nresult: the remote results for the node
1472 @param file_list: required list of files
1473 @param local_cksum: dictionary of local files and their checksums
1474 @param master_files: list of files that only masters should have
1478 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1480 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1481 test = not isinstance(remote_cksum, dict)
1482 _ErrorIf(test, self.ENODEFILECHECK, node,
1483 "node hasn't returned file checksum data")
1487 for file_name in file_list:
1488 node_is_mc = ninfo.master_candidate
1489 must_have = (file_name not in master_files) or node_is_mc
1491 test1 = file_name not in remote_cksum
1493 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1495 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1496 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1497 "file '%s' missing", file_name)
1498 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1499 "file '%s' has wrong checksum", file_name)
1500 # not candidate and this is not a must-have file
1501 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1502 "file '%s' should not exist on non master"
1503 " candidates (and the file is outdated)", file_name)
1504 # all good, except non-master/non-must have combination
1505 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1506 "file '%s' should not exist"
1507 " on non master candidates", file_name)
1509 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1510 """Verifies and the node DRBD status.
1512 @type ninfo: L{objects.Node}
1513 @param ninfo: the node to check
1514 @param nresult: the remote results for the node
1515 @param instanceinfo: the dict of instances
1516 @param drbd_map: the DRBD map as returned by
1517 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1521 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1523 # compute the DRBD minors
1525 for minor, instance in drbd_map[node].items():
1526 test = instance not in instanceinfo
1527 _ErrorIf(test, self.ECLUSTERCFG, None,
1528 "ghost instance '%s' in temporary DRBD map", instance)
1529 # ghost instance should not be running, but otherwise we
1530 # don't give double warnings (both ghost instance and
1531 # unallocated minor in use)
1533 node_drbd[minor] = (instance, False)
1535 instance = instanceinfo[instance]
1536 node_drbd[minor] = (instance.name, instance.admin_up)
1538 # and now check them
1539 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1540 test = not isinstance(used_minors, (tuple, list))
1541 _ErrorIf(test, self.ENODEDRBD, node,
1542 "cannot parse drbd status file: %s", str(used_minors))
1544 # we cannot check drbd status
1547 for minor, (iname, must_exist) in node_drbd.items():
1548 test = minor not in used_minors and must_exist
1549 _ErrorIf(test, self.ENODEDRBD, node,
1550 "drbd minor %d of instance %s is not active", minor, iname)
1551 for minor in used_minors:
1552 test = minor not in node_drbd
1553 _ErrorIf(test, self.ENODEDRBD, node,
1554 "unallocated drbd minor %d is in use", minor)
1556 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1557 """Verifies and updates the node volume data.
1559 This function will update a L{NodeImage}'s internal structures
1560 with data from the remote call.
1562 @type ninfo: L{objects.Node}
1563 @param ninfo: the node to check
1564 @param nresult: the remote results for the node
1565 @param nimg: the node image object
1566 @param vg_name: the configured VG name
1570 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1572 nimg.lvm_fail = True
1573 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1576 elif isinstance(lvdata, basestring):
1577 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1578 utils.SafeEncode(lvdata))
1579 elif not isinstance(lvdata, dict):
1580 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1582 nimg.volumes = lvdata
1583 nimg.lvm_fail = False
1585 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1586 """Verifies and updates the node instance list.
1588 If the listing was successful, then updates this node's instance
1589 list. Otherwise, it marks the RPC call as failed for the instance
1592 @type ninfo: L{objects.Node}
1593 @param ninfo: the node to check
1594 @param nresult: the remote results for the node
1595 @param nimg: the node image object
1598 idata = nresult.get(constants.NV_INSTANCELIST, None)
1599 test = not isinstance(idata, list)
1600 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1601 " (instancelist): %s", utils.SafeEncode(str(idata)))
1603 nimg.hyp_fail = True
1605 nimg.instances = idata
1607 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1608 """Verifies and computes a node information map
1610 @type ninfo: L{objects.Node}
1611 @param ninfo: the node to check
1612 @param nresult: the remote results for the node
1613 @param nimg: the node image object
1614 @param vg_name: the configured VG name
1618 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1620 # try to read free memory (from the hypervisor)
1621 hv_info = nresult.get(constants.NV_HVINFO, None)
1622 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1623 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1626 nimg.mfree = int(hv_info["memory_free"])
1627 except (ValueError, TypeError):
1628 _ErrorIf(True, self.ENODERPC, node,
1629 "node returned invalid nodeinfo, check hypervisor")
1631 # FIXME: devise a free space model for file based instances as well
1632 if vg_name is not None:
1633 test = (constants.NV_VGLIST not in nresult or
1634 vg_name not in nresult[constants.NV_VGLIST])
1635 _ErrorIf(test, self.ENODELVM, node,
1636 "node didn't return data for the volume group '%s'"
1637 " - it is either missing or broken", vg_name)
1640 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1641 except (ValueError, TypeError):
1642 _ErrorIf(True, self.ENODERPC, node,
1643 "node returned invalid LVM info, check LVM status")
1645 def CheckPrereq(self):
1646 """Check prerequisites.
1648 Transform the list of checks we're going to skip into a set and check that
1649 all its members are valid.
1652 self.skip_set = frozenset(self.op.skip_checks)
1653 if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1654 raise errors.OpPrereqError("Invalid checks to be skipped specified",
1657 def BuildHooksEnv(self):
1660 Cluster-Verify hooks just ran in the post phase and their failure makes
1661 the output be logged in the verify output and the verification to fail.
1664 all_nodes = self.cfg.GetNodeList()
1666 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1668 for node in self.cfg.GetAllNodesInfo().values():
1669 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1671 return env, [], all_nodes
1673 def Exec(self, feedback_fn):
1674 """Verify integrity of cluster, performing various test on nodes.
1678 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1679 verbose = self.op.verbose
1680 self._feedback_fn = feedback_fn
1681 feedback_fn("* Verifying global settings")
1682 for msg in self.cfg.VerifyConfig():
1683 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1685 # Check the cluster certificates
1686 for cert_filename in constants.ALL_CERT_FILES:
1687 (errcode, msg) = _VerifyCertificate(cert_filename)
1688 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1690 vg_name = self.cfg.GetVGName()
1691 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1692 cluster = self.cfg.GetClusterInfo()
1693 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1694 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1695 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1696 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1697 for iname in instancelist)
1698 i_non_redundant = [] # Non redundant instances
1699 i_non_a_balanced = [] # Non auto-balanced instances
1700 n_offline = 0 # Count of offline nodes
1701 n_drained = 0 # Count of nodes being drained
1702 node_vol_should = {}
1704 # FIXME: verify OS list
1705 # do local checksums
1706 master_files = [constants.CLUSTER_CONF_FILE]
1708 file_names = ssconf.SimpleStore().GetFileList()
1709 file_names.extend(constants.ALL_CERT_FILES)
1710 file_names.extend(master_files)
1711 if cluster.modify_etc_hosts:
1712 file_names.append(constants.ETC_HOSTS)
1714 local_checksums = utils.FingerprintFiles(file_names)
1716 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1717 node_verify_param = {
1718 constants.NV_FILELIST: file_names,
1719 constants.NV_NODELIST: [node.name for node in nodeinfo
1720 if not node.offline],
1721 constants.NV_HYPERVISOR: hypervisors,
1722 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1723 node.secondary_ip) for node in nodeinfo
1724 if not node.offline],
1725 constants.NV_INSTANCELIST: hypervisors,
1726 constants.NV_VERSION: None,
1727 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1728 constants.NV_NODESETUP: None,
1729 constants.NV_TIME: None,
1732 if vg_name is not None:
1733 node_verify_param[constants.NV_VGLIST] = None
1734 node_verify_param[constants.NV_LVLIST] = vg_name
1735 node_verify_param[constants.NV_PVLIST] = [vg_name]
1736 node_verify_param[constants.NV_DRBDLIST] = None
1738 # Build our expected cluster state
1739 node_image = dict((node.name, self.NodeImage(offline=node.offline))
1740 for node in nodeinfo)
1742 for instance in instancelist:
1743 inst_config = instanceinfo[instance]
1745 for nname in inst_config.all_nodes:
1746 if nname not in node_image:
1748 gnode = self.NodeImage()
1750 node_image[nname] = gnode
1752 inst_config.MapLVsByNode(node_vol_should)
1754 pnode = inst_config.primary_node
1755 node_image[pnode].pinst.append(instance)
1757 for snode in inst_config.secondary_nodes:
1758 nimg = node_image[snode]
1759 nimg.sinst.append(instance)
1760 if pnode not in nimg.sbp:
1761 nimg.sbp[pnode] = []
1762 nimg.sbp[pnode].append(instance)
1764 # At this point, we have the in-memory data structures complete,
1765 # except for the runtime information, which we'll gather next
1767 # Due to the way our RPC system works, exact response times cannot be
1768 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1769 # time before and after executing the request, we can at least have a time
1771 nvinfo_starttime = time.time()
1772 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1773 self.cfg.GetClusterName())
1774 nvinfo_endtime = time.time()
1776 master_node = self.cfg.GetMasterNode()
1777 all_drbd_map = self.cfg.ComputeDRBDMap()
1779 feedback_fn("* Verifying node status")
1780 for node_i in nodeinfo:
1782 nimg = node_image[node]
1786 feedback_fn("* Skipping offline node %s" % (node,))
1790 if node == master_node:
1792 elif node_i.master_candidate:
1793 ntype = "master candidate"
1794 elif node_i.drained:
1800 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1802 msg = all_nvinfo[node].fail_msg
1803 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1805 nimg.rpc_fail = True
1808 nresult = all_nvinfo[node].payload
1810 nimg.call_ok = self._VerifyNode(node_i, nresult)
1811 self._VerifyNodeNetwork(node_i, nresult)
1812 self._VerifyNodeLVM(node_i, nresult, vg_name)
1813 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1815 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1816 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1818 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1819 self._UpdateNodeInstances(node_i, nresult, nimg)
1820 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1822 feedback_fn("* Verifying instance status")
1823 for instance in instancelist:
1825 feedback_fn("* Verifying instance %s" % instance)
1826 inst_config = instanceinfo[instance]
1827 self._VerifyInstance(instance, inst_config, node_image)
1828 inst_nodes_offline = []
1830 pnode = inst_config.primary_node
1831 pnode_img = node_image[pnode]
1832 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1833 self.ENODERPC, pnode, "instance %s, connection to"
1834 " primary node failed", instance)
1836 if pnode_img.offline:
1837 inst_nodes_offline.append(pnode)
1839 # If the instance is non-redundant we cannot survive losing its primary
1840 # node, so we are not N+1 compliant. On the other hand we have no disk
1841 # templates with more than one secondary so that situation is not well
1843 # FIXME: does not support file-backed instances
1844 if not inst_config.secondary_nodes:
1845 i_non_redundant.append(instance)
1846 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1847 instance, "instance has multiple secondary nodes: %s",
1848 utils.CommaJoin(inst_config.secondary_nodes),
1849 code=self.ETYPE_WARNING)
1851 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1852 i_non_a_balanced.append(instance)
1854 for snode in inst_config.secondary_nodes:
1855 s_img = node_image[snode]
1856 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1857 "instance %s, connection to secondary node failed", instance)
1860 inst_nodes_offline.append(snode)
1862 # warn that the instance lives on offline nodes
1863 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1864 "instance lives on offline node(s) %s",
1865 utils.CommaJoin(inst_nodes_offline))
1866 # ... or ghost nodes
1867 for node in inst_config.all_nodes:
1868 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1869 "instance lives on ghost node %s", node)
1871 feedback_fn("* Verifying orphan volumes")
1872 self._VerifyOrphanVolumes(node_vol_should, node_image)
1874 feedback_fn("* Verifying orphan instances")
1875 self._VerifyOrphanInstances(instancelist, node_image)
1877 if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1878 feedback_fn("* Verifying N+1 Memory redundancy")
1879 self._VerifyNPlusOneMemory(node_image, instanceinfo)
1881 feedback_fn("* Other Notes")
1883 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
1884 % len(i_non_redundant))
1886 if i_non_a_balanced:
1887 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
1888 % len(i_non_a_balanced))
1891 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
1894 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
1898 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1899 """Analyze the post-hooks' result
1901 This method analyses the hook result, handles it, and sends some
1902 nicely-formatted feedback back to the user.
1904 @param phase: one of L{constants.HOOKS_PHASE_POST} or
1905 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1906 @param hooks_results: the results of the multi-node hooks rpc call
1907 @param feedback_fn: function used send feedback back to the caller
1908 @param lu_result: previous Exec result
1909 @return: the new Exec result, based on the previous result
1913 # We only really run POST phase hooks, and are only interested in
1915 if phase == constants.HOOKS_PHASE_POST:
1916 # Used to change hooks' output to proper indentation
1917 indent_re = re.compile('^', re.M)
1918 feedback_fn("* Hooks Results")
1919 assert hooks_results, "invalid result from hooks"
1921 for node_name in hooks_results:
1922 res = hooks_results[node_name]
1924 test = msg and not res.offline
1925 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1926 "Communication failure in hooks execution: %s", msg)
1927 if res.offline or msg:
1928 # No need to investigate payload if node is offline or gave an error.
1929 # override manually lu_result here as _ErrorIf only
1930 # overrides self.bad
1933 for script, hkr, output in res.payload:
1934 test = hkr == constants.HKR_FAIL
1935 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1936 "Script %s failed, output:", script)
1938 output = indent_re.sub(' ', output)
1939 feedback_fn("%s" % output)
1945 class LUVerifyDisks(NoHooksLU):
1946 """Verifies the cluster disks status.
1952 def ExpandNames(self):
1953 self.needed_locks = {
1954 locking.LEVEL_NODE: locking.ALL_SET,
1955 locking.LEVEL_INSTANCE: locking.ALL_SET,
1957 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1959 def CheckPrereq(self):
1960 """Check prerequisites.
1962 This has no prerequisites.
1967 def Exec(self, feedback_fn):
1968 """Verify integrity of cluster disks.
1970 @rtype: tuple of three items
1971 @return: a tuple of (dict of node-to-node_error, list of instances
1972 which need activate-disks, dict of instance: (node, volume) for
1976 result = res_nodes, res_instances, res_missing = {}, [], {}
1978 vg_name = self.cfg.GetVGName()
1979 nodes = utils.NiceSort(self.cfg.GetNodeList())
1980 instances = [self.cfg.GetInstanceInfo(name)
1981 for name in self.cfg.GetInstanceList()]
1984 for inst in instances:
1986 if (not inst.admin_up or
1987 inst.disk_template not in constants.DTS_NET_MIRROR):
1989 inst.MapLVsByNode(inst_lvs)
1990 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1991 for node, vol_list in inst_lvs.iteritems():
1992 for vol in vol_list:
1993 nv_dict[(node, vol)] = inst
1998 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2002 node_res = node_lvs[node]
2003 if node_res.offline:
2005 msg = node_res.fail_msg
2007 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2008 res_nodes[node] = msg
2011 lvs = node_res.payload
2012 for lv_name, (_, _, lv_online) in lvs.items():
2013 inst = nv_dict.pop((node, lv_name), None)
2014 if (not lv_online and inst is not None
2015 and inst.name not in res_instances):
2016 res_instances.append(inst.name)
2018 # any leftover items in nv_dict are missing LVs, let's arrange the
2020 for key, inst in nv_dict.iteritems():
2021 if inst.name not in res_missing:
2022 res_missing[inst.name] = []
2023 res_missing[inst.name].append(key)
2028 class LURepairDiskSizes(NoHooksLU):
2029 """Verifies the cluster disks sizes.
2032 _OP_REQP = ["instances"]
2035 def ExpandNames(self):
2036 if not isinstance(self.op.instances, list):
2037 raise errors.OpPrereqError("Invalid argument type 'instances'",
2040 if self.op.instances:
2041 self.wanted_names = []
2042 for name in self.op.instances:
2043 full_name = _ExpandInstanceName(self.cfg, name)
2044 self.wanted_names.append(full_name)
2045 self.needed_locks = {
2046 locking.LEVEL_NODE: [],
2047 locking.LEVEL_INSTANCE: self.wanted_names,
2049 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2051 self.wanted_names = None
2052 self.needed_locks = {
2053 locking.LEVEL_NODE: locking.ALL_SET,
2054 locking.LEVEL_INSTANCE: locking.ALL_SET,
2056 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2058 def DeclareLocks(self, level):
2059 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2060 self._LockInstancesNodes(primary_only=True)
2062 def CheckPrereq(self):
2063 """Check prerequisites.
2065 This only checks the optional instance list against the existing names.
2068 if self.wanted_names is None:
2069 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2071 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2072 in self.wanted_names]
2074 def _EnsureChildSizes(self, disk):
2075 """Ensure children of the disk have the needed disk size.
2077 This is valid mainly for DRBD8 and fixes an issue where the
2078 children have smaller disk size.
2080 @param disk: an L{ganeti.objects.Disk} object
2083 if disk.dev_type == constants.LD_DRBD8:
2084 assert disk.children, "Empty children for DRBD8?"
2085 fchild = disk.children[0]
2086 mismatch = fchild.size < disk.size
2088 self.LogInfo("Child disk has size %d, parent %d, fixing",
2089 fchild.size, disk.size)
2090 fchild.size = disk.size
2092 # and we recurse on this child only, not on the metadev
2093 return self._EnsureChildSizes(fchild) or mismatch
2097 def Exec(self, feedback_fn):
2098 """Verify the size of cluster disks.
2101 # TODO: check child disks too
2102 # TODO: check differences in size between primary/secondary nodes
2104 for instance in self.wanted_instances:
2105 pnode = instance.primary_node
2106 if pnode not in per_node_disks:
2107 per_node_disks[pnode] = []
2108 for idx, disk in enumerate(instance.disks):
2109 per_node_disks[pnode].append((instance, idx, disk))
2112 for node, dskl in per_node_disks.items():
2113 newl = [v[2].Copy() for v in dskl]
2115 self.cfg.SetDiskID(dsk, node)
2116 result = self.rpc.call_blockdev_getsizes(node, newl)
2118 self.LogWarning("Failure in blockdev_getsizes call to node"
2119 " %s, ignoring", node)
2121 if len(result.data) != len(dskl):
2122 self.LogWarning("Invalid result from node %s, ignoring node results",
2125 for ((instance, idx, disk), size) in zip(dskl, result.data):
2127 self.LogWarning("Disk %d of instance %s did not return size"
2128 " information, ignoring", idx, instance.name)
2130 if not isinstance(size, (int, long)):
2131 self.LogWarning("Disk %d of instance %s did not return valid"
2132 " size information, ignoring", idx, instance.name)
2135 if size != disk.size:
2136 self.LogInfo("Disk %d of instance %s has mismatched size,"
2137 " correcting: recorded %d, actual %d", idx,
2138 instance.name, disk.size, size)
2140 self.cfg.Update(instance, feedback_fn)
2141 changed.append((instance.name, idx, size))
2142 if self._EnsureChildSizes(disk):
2143 self.cfg.Update(instance, feedback_fn)
2144 changed.append((instance.name, idx, disk.size))
2148 class LURenameCluster(LogicalUnit):
2149 """Rename the cluster.
2152 HPATH = "cluster-rename"
2153 HTYPE = constants.HTYPE_CLUSTER
2156 def BuildHooksEnv(self):
2161 "OP_TARGET": self.cfg.GetClusterName(),
2162 "NEW_NAME": self.op.name,
2164 mn = self.cfg.GetMasterNode()
2165 all_nodes = self.cfg.GetNodeList()
2166 return env, [mn], all_nodes
2168 def CheckPrereq(self):
2169 """Verify that the passed name is a valid one.
2172 hostname = utils.GetHostInfo(self.op.name)
2174 new_name = hostname.name
2175 self.ip = new_ip = hostname.ip
2176 old_name = self.cfg.GetClusterName()
2177 old_ip = self.cfg.GetMasterIP()
2178 if new_name == old_name and new_ip == old_ip:
2179 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2180 " cluster has changed",
2182 if new_ip != old_ip:
2183 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2184 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2185 " reachable on the network. Aborting." %
2186 new_ip, errors.ECODE_NOTUNIQUE)
2188 self.op.name = new_name
2190 def Exec(self, feedback_fn):
2191 """Rename the cluster.
2194 clustername = self.op.name
2197 # shutdown the master IP
2198 master = self.cfg.GetMasterNode()
2199 result = self.rpc.call_node_stop_master(master, False)
2200 result.Raise("Could not disable the master role")
2203 cluster = self.cfg.GetClusterInfo()
2204 cluster.cluster_name = clustername
2205 cluster.master_ip = ip
2206 self.cfg.Update(cluster, feedback_fn)
2208 # update the known hosts file
2209 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2210 node_list = self.cfg.GetNodeList()
2212 node_list.remove(master)
2215 result = self.rpc.call_upload_file(node_list,
2216 constants.SSH_KNOWN_HOSTS_FILE)
2217 for to_node, to_result in result.iteritems():
2218 msg = to_result.fail_msg
2220 msg = ("Copy of file %s to node %s failed: %s" %
2221 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2222 self.proc.LogWarning(msg)
2225 result = self.rpc.call_node_start_master(master, False, False)
2226 msg = result.fail_msg
2228 self.LogWarning("Could not re-enable the master role on"
2229 " the master, please restart manually: %s", msg)
2232 def _RecursiveCheckIfLVMBased(disk):
2233 """Check if the given disk or its children are lvm-based.
2235 @type disk: L{objects.Disk}
2236 @param disk: the disk to check
2238 @return: boolean indicating whether a LD_LV dev_type was found or not
2242 for chdisk in disk.children:
2243 if _RecursiveCheckIfLVMBased(chdisk):
2245 return disk.dev_type == constants.LD_LV
2248 class LUSetClusterParams(LogicalUnit):
2249 """Change the parameters of the cluster.
2252 HPATH = "cluster-modify"
2253 HTYPE = constants.HTYPE_CLUSTER
2257 def CheckArguments(self):
2261 for attr in ["candidate_pool_size",
2262 "uid_pool", "add_uids", "remove_uids"]:
2263 if not hasattr(self.op, attr):
2264 setattr(self.op, attr, None)
2266 if self.op.candidate_pool_size is not None:
2268 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2269 except (ValueError, TypeError), err:
2270 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2271 str(err), errors.ECODE_INVAL)
2272 if self.op.candidate_pool_size < 1:
2273 raise errors.OpPrereqError("At least one master candidate needed",
2276 _CheckBooleanOpField(self.op, "maintain_node_health")
2278 if self.op.uid_pool:
2279 uidpool.CheckUidPool(self.op.uid_pool)
2281 if self.op.add_uids:
2282 uidpool.CheckUidPool(self.op.add_uids)
2284 if self.op.remove_uids:
2285 uidpool.CheckUidPool(self.op.remove_uids)
2287 def ExpandNames(self):
2288 # FIXME: in the future maybe other cluster params won't require checking on
2289 # all nodes to be modified.
2290 self.needed_locks = {
2291 locking.LEVEL_NODE: locking.ALL_SET,
2293 self.share_locks[locking.LEVEL_NODE] = 1
2295 def BuildHooksEnv(self):
2300 "OP_TARGET": self.cfg.GetClusterName(),
2301 "NEW_VG_NAME": self.op.vg_name,
2303 mn = self.cfg.GetMasterNode()
2304 return env, [mn], [mn]
2306 def CheckPrereq(self):
2307 """Check prerequisites.
2309 This checks whether the given params don't conflict and
2310 if the given volume group is valid.
2313 if self.op.vg_name is not None and not self.op.vg_name:
2314 instances = self.cfg.GetAllInstancesInfo().values()
2315 for inst in instances:
2316 for disk in inst.disks:
2317 if _RecursiveCheckIfLVMBased(disk):
2318 raise errors.OpPrereqError("Cannot disable lvm storage while"
2319 " lvm-based instances exist",
2322 node_list = self.acquired_locks[locking.LEVEL_NODE]
2324 # if vg_name not None, checks given volume group on all nodes
2326 vglist = self.rpc.call_vg_list(node_list)
2327 for node in node_list:
2328 msg = vglist[node].fail_msg
2330 # ignoring down node
2331 self.LogWarning("Error while gathering data on node %s"
2332 " (ignoring node): %s", node, msg)
2334 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2336 constants.MIN_VG_SIZE)
2338 raise errors.OpPrereqError("Error on node '%s': %s" %
2339 (node, vgstatus), errors.ECODE_ENVIRON)
2341 self.cluster = cluster = self.cfg.GetClusterInfo()
2342 # validate params changes
2343 if self.op.beparams:
2344 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2345 self.new_beparams = objects.FillDict(
2346 cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2348 if self.op.nicparams:
2349 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2350 self.new_nicparams = objects.FillDict(
2351 cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2352 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2355 # check all instances for consistency
2356 for instance in self.cfg.GetAllInstancesInfo().values():
2357 for nic_idx, nic in enumerate(instance.nics):
2358 params_copy = copy.deepcopy(nic.nicparams)
2359 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2361 # check parameter syntax
2363 objects.NIC.CheckParameterSyntax(params_filled)
2364 except errors.ConfigurationError, err:
2365 nic_errors.append("Instance %s, nic/%d: %s" %
2366 (instance.name, nic_idx, err))
2368 # if we're moving instances to routed, check that they have an ip
2369 target_mode = params_filled[constants.NIC_MODE]
2370 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2371 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2372 (instance.name, nic_idx))
2374 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2375 "\n".join(nic_errors))
2377 # hypervisor list/parameters
2378 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2379 if self.op.hvparams:
2380 if not isinstance(self.op.hvparams, dict):
2381 raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2383 for hv_name, hv_dict in self.op.hvparams.items():
2384 if hv_name not in self.new_hvparams:
2385 self.new_hvparams[hv_name] = hv_dict
2387 self.new_hvparams[hv_name].update(hv_dict)
2389 # os hypervisor parameters
2390 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2392 if not isinstance(self.op.os_hvp, dict):
2393 raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2395 for os_name, hvs in self.op.os_hvp.items():
2396 if not isinstance(hvs, dict):
2397 raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2398 " input"), errors.ECODE_INVAL)
2399 if os_name not in self.new_os_hvp:
2400 self.new_os_hvp[os_name] = hvs
2402 for hv_name, hv_dict in hvs.items():
2403 if hv_name not in self.new_os_hvp[os_name]:
2404 self.new_os_hvp[os_name][hv_name] = hv_dict
2406 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2408 # changes to the hypervisor list
2409 if self.op.enabled_hypervisors is not None:
2410 self.hv_list = self.op.enabled_hypervisors
2411 if not self.hv_list:
2412 raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2413 " least one member",
2415 invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2417 raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2419 utils.CommaJoin(invalid_hvs),
2421 for hv in self.hv_list:
2422 # if the hypervisor doesn't already exist in the cluster
2423 # hvparams, we initialize it to empty, and then (in both
2424 # cases) we make sure to fill the defaults, as we might not
2425 # have a complete defaults list if the hypervisor wasn't
2427 if hv not in new_hvp:
2429 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2430 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2432 self.hv_list = cluster.enabled_hypervisors
2434 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2435 # either the enabled list has changed, or the parameters have, validate
2436 for hv_name, hv_params in self.new_hvparams.items():
2437 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2438 (self.op.enabled_hypervisors and
2439 hv_name in self.op.enabled_hypervisors)):
2440 # either this is a new hypervisor, or its parameters have changed
2441 hv_class = hypervisor.GetHypervisor(hv_name)
2442 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2443 hv_class.CheckParameterSyntax(hv_params)
2444 _CheckHVParams(self, node_list, hv_name, hv_params)
2447 # no need to check any newly-enabled hypervisors, since the
2448 # defaults have already been checked in the above code-block
2449 for os_name, os_hvp in self.new_os_hvp.items():
2450 for hv_name, hv_params in os_hvp.items():
2451 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2452 # we need to fill in the new os_hvp on top of the actual hv_p
2453 cluster_defaults = self.new_hvparams.get(hv_name, {})
2454 new_osp = objects.FillDict(cluster_defaults, hv_params)
2455 hv_class = hypervisor.GetHypervisor(hv_name)
2456 hv_class.CheckParameterSyntax(new_osp)
2457 _CheckHVParams(self, node_list, hv_name, new_osp)
2460 def Exec(self, feedback_fn):
2461 """Change the parameters of the cluster.
2464 if self.op.vg_name is not None:
2465 new_volume = self.op.vg_name
2468 if new_volume != self.cfg.GetVGName():
2469 self.cfg.SetVGName(new_volume)
2471 feedback_fn("Cluster LVM configuration already in desired"
2472 " state, not changing")
2473 if self.op.hvparams:
2474 self.cluster.hvparams = self.new_hvparams
2476 self.cluster.os_hvp = self.new_os_hvp
2477 if self.op.enabled_hypervisors is not None:
2478 self.cluster.hvparams = self.new_hvparams
2479 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2480 if self.op.beparams:
2481 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2482 if self.op.nicparams:
2483 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2485 if self.op.candidate_pool_size is not None:
2486 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2487 # we need to update the pool size here, otherwise the save will fail
2488 _AdjustCandidatePool(self, [])
2490 if self.op.maintain_node_health is not None:
2491 self.cluster.maintain_node_health = self.op.maintain_node_health
2493 if self.op.add_uids is not None:
2494 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2496 if self.op.remove_uids is not None:
2497 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2499 if self.op.uid_pool is not None:
2500 self.cluster.uid_pool = self.op.uid_pool
2502 self.cfg.Update(self.cluster, feedback_fn)
2505 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2506 """Distribute additional files which are part of the cluster configuration.
2508 ConfigWriter takes care of distributing the config and ssconf files, but
2509 there are more files which should be distributed to all nodes. This function
2510 makes sure those are copied.
2512 @param lu: calling logical unit
2513 @param additional_nodes: list of nodes not in the config to distribute to
2516 # 1. Gather target nodes
2517 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2518 dist_nodes = lu.cfg.GetOnlineNodeList()
2519 if additional_nodes is not None:
2520 dist_nodes.extend(additional_nodes)
2521 if myself.name in dist_nodes:
2522 dist_nodes.remove(myself.name)
2524 # 2. Gather files to distribute
2525 dist_files = set([constants.ETC_HOSTS,
2526 constants.SSH_KNOWN_HOSTS_FILE,
2527 constants.RAPI_CERT_FILE,
2528 constants.RAPI_USERS_FILE,
2529 constants.CONFD_HMAC_KEY,
2532 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2533 for hv_name in enabled_hypervisors:
2534 hv_class = hypervisor.GetHypervisor(hv_name)
2535 dist_files.update(hv_class.GetAncillaryFiles())
2537 # 3. Perform the files upload
2538 for fname in dist_files:
2539 if os.path.exists(fname):
2540 result = lu.rpc.call_upload_file(dist_nodes, fname)
2541 for to_node, to_result in result.items():
2542 msg = to_result.fail_msg
2544 msg = ("Copy of file %s to node %s failed: %s" %
2545 (fname, to_node, msg))
2546 lu.proc.LogWarning(msg)
2549 class LURedistributeConfig(NoHooksLU):
2550 """Force the redistribution of cluster configuration.
2552 This is a very simple LU.
2558 def ExpandNames(self):
2559 self.needed_locks = {
2560 locking.LEVEL_NODE: locking.ALL_SET,
2562 self.share_locks[locking.LEVEL_NODE] = 1
2564 def CheckPrereq(self):
2565 """Check prerequisites.
2569 def Exec(self, feedback_fn):
2570 """Redistribute the configuration.
2573 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2574 _RedistributeAncillaryFiles(self)
2577 def _WaitForSync(lu, instance, oneshot=False):
2578 """Sleep and poll for an instance's disk to sync.
2581 if not instance.disks:
2585 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2587 node = instance.primary_node
2589 for dev in instance.disks:
2590 lu.cfg.SetDiskID(dev, node)
2592 # TODO: Convert to utils.Retry
2595 degr_retries = 10 # in seconds, as we sleep 1 second each time
2599 cumul_degraded = False
2600 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2601 msg = rstats.fail_msg
2603 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2606 raise errors.RemoteError("Can't contact node %s for mirror data,"
2607 " aborting." % node)
2610 rstats = rstats.payload
2612 for i, mstat in enumerate(rstats):
2614 lu.LogWarning("Can't compute data for node %s/%s",
2615 node, instance.disks[i].iv_name)
2618 cumul_degraded = (cumul_degraded or
2619 (mstat.is_degraded and mstat.sync_percent is None))
2620 if mstat.sync_percent is not None:
2622 if mstat.estimated_time is not None:
2623 rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2624 max_time = mstat.estimated_time
2626 rem_time = "no time estimate"
2627 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2628 (instance.disks[i].iv_name, mstat.sync_percent,
2631 # if we're done but degraded, let's do a few small retries, to
2632 # make sure we see a stable and not transient situation; therefore
2633 # we force restart of the loop
2634 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2635 logging.info("Degraded disks found, %d retries left", degr_retries)
2643 time.sleep(min(60, max_time))
2646 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2647 return not cumul_degraded
2650 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2651 """Check that mirrors are not degraded.
2653 The ldisk parameter, if True, will change the test from the
2654 is_degraded attribute (which represents overall non-ok status for
2655 the device(s)) to the ldisk (representing the local storage status).
2658 lu.cfg.SetDiskID(dev, node)
2662 if on_primary or dev.AssembleOnSecondary():
2663 rstats = lu.rpc.call_blockdev_find(node, dev)
2664 msg = rstats.fail_msg
2666 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2668 elif not rstats.payload:
2669 lu.LogWarning("Can't find disk on node %s", node)
2673 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2675 result = result and not rstats.payload.is_degraded
2678 for child in dev.children:
2679 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2684 class LUDiagnoseOS(NoHooksLU):
2685 """Logical unit for OS diagnose/query.
2688 _OP_REQP = ["output_fields", "names"]
2690 _FIELDS_STATIC = utils.FieldSet()
2691 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2692 # Fields that need calculation of global os validity
2693 _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2695 def ExpandNames(self):
2697 raise errors.OpPrereqError("Selective OS query not supported",
2700 _CheckOutputFields(static=self._FIELDS_STATIC,
2701 dynamic=self._FIELDS_DYNAMIC,
2702 selected=self.op.output_fields)
2704 # Lock all nodes, in shared mode
2705 # Temporary removal of locks, should be reverted later
2706 # TODO: reintroduce locks when they are lighter-weight
2707 self.needed_locks = {}
2708 #self.share_locks[locking.LEVEL_NODE] = 1
2709 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2711 def CheckPrereq(self):
2712 """Check prerequisites.
2717 def _DiagnoseByOS(rlist):
2718 """Remaps a per-node return list into an a per-os per-node dictionary
2720 @param rlist: a map with node names as keys and OS objects as values
2723 @return: a dictionary with osnames as keys and as value another map, with
2724 nodes as keys and tuples of (path, status, diagnose) as values, eg::
2726 {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2727 (/srv/..., False, "invalid api")],
2728 "node2": [(/srv/..., True, "")]}
2733 # we build here the list of nodes that didn't fail the RPC (at RPC
2734 # level), so that nodes with a non-responding node daemon don't
2735 # make all OSes invalid
2736 good_nodes = [node_name for node_name in rlist
2737 if not rlist[node_name].fail_msg]
2738 for node_name, nr in rlist.items():
2739 if nr.fail_msg or not nr.payload:
2741 for name, path, status, diagnose, variants in nr.payload:
2742 if name not in all_os:
2743 # build a list of nodes for this os containing empty lists
2744 # for each node in node_list
2746 for nname in good_nodes:
2747 all_os[name][nname] = []
2748 all_os[name][node_name].append((path, status, diagnose, variants))
2751 def Exec(self, feedback_fn):
2752 """Compute the list of OSes.
2755 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2756 node_data = self.rpc.call_os_diagnose(valid_nodes)
2757 pol = self._DiagnoseByOS(node_data)
2759 calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2760 calc_variants = "variants" in self.op.output_fields
2762 for os_name, os_data in pol.items():
2767 for osl in os_data.values():
2768 valid = valid and osl and osl[0][1]
2773 node_variants = osl[0][3]
2774 if variants is None:
2775 variants = node_variants
2777 variants = [v for v in variants if v in node_variants]
2779 for field in self.op.output_fields:
2782 elif field == "valid":
2784 elif field == "node_status":
2785 # this is just a copy of the dict
2787 for node_name, nos_list in os_data.items():
2788 val[node_name] = nos_list
2789 elif field == "variants":
2792 raise errors.ParameterError(field)
2799 class LURemoveNode(LogicalUnit):
2800 """Logical unit for removing a node.
2803 HPATH = "node-remove"
2804 HTYPE = constants.HTYPE_NODE
2805 _OP_REQP = ["node_name"]
2807 def BuildHooksEnv(self):
2810 This doesn't run on the target node in the pre phase as a failed
2811 node would then be impossible to remove.
2815 "OP_TARGET": self.op.node_name,
2816 "NODE_NAME": self.op.node_name,
2818 all_nodes = self.cfg.GetNodeList()
2820 all_nodes.remove(self.op.node_name)
2822 logging.warning("Node %s which is about to be removed not found"
2823 " in the all nodes list", self.op.node_name)
2824 return env, all_nodes, all_nodes
2826 def CheckPrereq(self):
2827 """Check prerequisites.
2830 - the node exists in the configuration
2831 - it does not have primary or secondary instances
2832 - it's not the master
2834 Any errors are signaled by raising errors.OpPrereqError.
2837 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2838 node = self.cfg.GetNodeInfo(self.op.node_name)
2839 assert node is not None
2841 instance_list = self.cfg.GetInstanceList()
2843 masternode = self.cfg.GetMasterNode()
2844 if node.name == masternode:
2845 raise errors.OpPrereqError("Node is the master node,"
2846 " you need to failover first.",
2849 for instance_name in instance_list:
2850 instance = self.cfg.GetInstanceInfo(instance_name)
2851 if node.name in instance.all_nodes:
2852 raise errors.OpPrereqError("Instance %s is still running on the node,"
2853 " please remove first." % instance_name,
2855 self.op.node_name = node.name
2858 def Exec(self, feedback_fn):
2859 """Removes the node from the cluster.
2863 logging.info("Stopping the node daemon and removing configs from node %s",
2866 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2868 # Promote nodes to master candidate as needed
2869 _AdjustCandidatePool(self, exceptions=[node.name])
2870 self.context.RemoveNode(node.name)
2872 # Run post hooks on the node before it's removed
2873 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2875 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2877 # pylint: disable-msg=W0702
2878 self.LogWarning("Errors occurred running hooks on %s" % node.name)
2880 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2881 msg = result.fail_msg
2883 self.LogWarning("Errors encountered on the remote node while leaving"
2884 " the cluster: %s", msg)
2886 # Remove node from our /etc/hosts
2887 if self.cfg.GetClusterInfo().modify_etc_hosts:
2888 # FIXME: this should be done via an rpc call to node daemon
2889 utils.RemoveHostFromEtcHosts(node.name)
2890 _RedistributeAncillaryFiles(self)
2893 class LUQueryNodes(NoHooksLU):
2894 """Logical unit for querying nodes.
2897 # pylint: disable-msg=W0142
2898 _OP_REQP = ["output_fields", "names", "use_locking"]
2901 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2902 "master_candidate", "offline", "drained"]
2904 _FIELDS_DYNAMIC = utils.FieldSet(
2906 "mtotal", "mnode", "mfree",
2908 "ctotal", "cnodes", "csockets",
2911 _FIELDS_STATIC = utils.FieldSet(*[
2912 "pinst_cnt", "sinst_cnt",
2913 "pinst_list", "sinst_list",
2914 "pip", "sip", "tags",
2916 "role"] + _SIMPLE_FIELDS
2919 def ExpandNames(self):
2920 _CheckOutputFields(static=self._FIELDS_STATIC,
2921 dynamic=self._FIELDS_DYNAMIC,
2922 selected=self.op.output_fields)
2924 self.needed_locks = {}
2925 self.share_locks[locking.LEVEL_NODE] = 1
2928 self.wanted = _GetWantedNodes(self, self.op.names)
2930 self.wanted = locking.ALL_SET
2932 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2933 self.do_locking = self.do_node_query and self.op.use_locking
2935 # if we don't request only static fields, we need to lock the nodes
2936 self.needed_locks[locking.LEVEL_NODE] = self.wanted
2938 def CheckPrereq(self):
2939 """Check prerequisites.
2942 # The validation of the node list is done in the _GetWantedNodes,
2943 # if non empty, and if empty, there's no validation to do
2946 def Exec(self, feedback_fn):
2947 """Computes the list of nodes and their attributes.
2950 all_info = self.cfg.GetAllNodesInfo()
2952 nodenames = self.acquired_locks[locking.LEVEL_NODE]
2953 elif self.wanted != locking.ALL_SET:
2954 nodenames = self.wanted
2955 missing = set(nodenames).difference(all_info.keys())
2957 raise errors.OpExecError(
2958 "Some nodes were removed before retrieving their data: %s" % missing)
2960 nodenames = all_info.keys()
2962 nodenames = utils.NiceSort(nodenames)
2963 nodelist = [all_info[name] for name in nodenames]
2965 # begin data gathering
2967 if self.do_node_query:
2969 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2970 self.cfg.GetHypervisorType())
2971 for name in nodenames:
2972 nodeinfo = node_data[name]
2973 if not nodeinfo.fail_msg and nodeinfo.payload:
2974 nodeinfo = nodeinfo.payload
2975 fn = utils.TryConvert
2977 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2978 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2979 "mfree": fn(int, nodeinfo.get('memory_free', None)),
2980 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2981 "dfree": fn(int, nodeinfo.get('vg_free', None)),
2982 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2983 "bootid": nodeinfo.get('bootid', None),
2984 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2985 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2988 live_data[name] = {}
2990 live_data = dict.fromkeys(nodenames, {})
2992 node_to_primary = dict([(name, set()) for name in nodenames])
2993 node_to_secondary = dict([(name, set()) for name in nodenames])
2995 inst_fields = frozenset(("pinst_cnt", "pinst_list",
2996 "sinst_cnt", "sinst_list"))
2997 if inst_fields & frozenset(self.op.output_fields):
2998 inst_data = self.cfg.GetAllInstancesInfo()
3000 for inst in inst_data.values():
3001 if inst.primary_node in node_to_primary:
3002 node_to_primary[inst.primary_node].add(inst.name)
3003 for secnode in inst.secondary_nodes:
3004 if secnode in node_to_secondary:
3005 node_to_secondary[secnode].add(inst.name)
3007 master_node = self.cfg.GetMasterNode()
3009 # end data gathering
3012 for node in nodelist:
3014 for field in self.op.output_fields:
3015 if field in self._SIMPLE_FIELDS:
3016 val = getattr(node, field)
3017 elif field == "pinst_list":
3018 val = list(node_to_primary[node.name])
3019 elif field == "sinst_list":
3020 val = list(node_to_secondary[node.name])
3021 elif field == "pinst_cnt":
3022 val = len(node_to_primary[node.name])
3023 elif field == "sinst_cnt":
3024 val = len(node_to_secondary[node.name])
3025 elif field == "pip":
3026 val = node.primary_ip
3027 elif field == "sip":
3028 val = node.secondary_ip
3029 elif field == "tags":
3030 val = list(node.GetTags())
3031 elif field == "master":
3032 val = node.name == master_node
3033 elif self._FIELDS_DYNAMIC.Matches(field):
3034 val = live_data[node.name].get(field, None)
3035 elif field == "role":
3036 if node.name == master_node:
3038 elif node.master_candidate:
3047 raise errors.ParameterError(field)
3048 node_output.append(val)
3049 output.append(node_output)
3054 class LUQueryNodeVolumes(NoHooksLU):
3055 """Logical unit for getting volumes on node(s).
3058 _OP_REQP = ["nodes", "output_fields"]
3060 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3061 _FIELDS_STATIC = utils.FieldSet("node")
3063 def ExpandNames(self):
3064 _CheckOutputFields(static=self._FIELDS_STATIC,
3065 dynamic=self._FIELDS_DYNAMIC,
3066 selected=self.op.output_fields)
3068 self.needed_locks = {}
3069 self.share_locks[locking.LEVEL_NODE] = 1
3070 if not self.op.nodes:
3071 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3073 self.needed_locks[locking.LEVEL_NODE] = \
3074 _GetWantedNodes(self, self.op.nodes)
3076 def CheckPrereq(self):
3077 """Check prerequisites.
3079 This checks that the fields required are valid output fields.
3082 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3084 def Exec(self, feedback_fn):
3085 """Computes the list of nodes and their attributes.
3088 nodenames = self.nodes
3089 volumes = self.rpc.call_node_volumes(nodenames)
3091 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3092 in self.cfg.GetInstanceList()]
3094 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3097 for node in nodenames:
3098 nresult = volumes[node]
3101 msg = nresult.fail_msg
3103 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3106 node_vols = nresult.payload[:]
3107 node_vols.sort(key=lambda vol: vol['dev'])
3109 for vol in node_vols:
3111 for field in self.op.output_fields:
3114 elif field == "phys":
3118 elif field == "name":
3120 elif field == "size":
3121 val = int(float(vol['size']))
3122 elif field == "instance":
3124 if node not in lv_by_node[inst]:
3126 if vol['name'] in lv_by_node[inst][node]:
3132 raise errors.ParameterError(field)
3133 node_output.append(str(val))
3135 output.append(node_output)
3140 class LUQueryNodeStorage(NoHooksLU):
3141 """Logical unit for getting information on storage units on node(s).
3144 _OP_REQP = ["nodes", "storage_type", "output_fields"]
3146 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3148 def CheckArguments(self):
3149 _CheckStorageType(self.op.storage_type)
3151 _CheckOutputFields(static=self._FIELDS_STATIC,
3152 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3153 selected=self.op.output_fields)
3155 def ExpandNames(self):
3156 self.needed_locks = {}
3157 self.share_locks[locking.LEVEL_NODE] = 1
3160 self.needed_locks[locking.LEVEL_NODE] = \
3161 _GetWantedNodes(self, self.op.nodes)
3163 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3165 def CheckPrereq(self):
3166 """Check prerequisites.
3168 This checks that the fields required are valid output fields.
3171 self.op.name = getattr(self.op, "name", None)
3173 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3175 def Exec(self, feedback_fn):
3176 """Computes the list of nodes and their attributes.
3179 # Always get name to sort by
3180 if constants.SF_NAME in self.op.output_fields:
3181 fields = self.op.output_fields[:]
3183 fields = [constants.SF_NAME] + self.op.output_fields
3185 # Never ask for node or type as it's only known to the LU
3186 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3187 while extra in fields:
3188 fields.remove(extra)
3190 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3191 name_idx = field_idx[constants.SF_NAME]
3193 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3194 data = self.rpc.call_storage_list(self.nodes,
3195 self.op.storage_type, st_args,
3196 self.op.name, fields)
3200 for node in utils.NiceSort(self.nodes):
3201 nresult = data[node]
3205 msg = nresult.fail_msg
3207 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3210 rows = dict([(row[name_idx], row) for row in nresult.payload])
3212 for name in utils.NiceSort(rows.keys()):
3217 for field in self.op.output_fields:
3218 if field == constants.SF_NODE:
3220 elif field == constants.SF_TYPE:
3221 val = self.op.storage_type
3222 elif field in field_idx:
3223 val = row[field_idx[field]]
3225 raise errors.ParameterError(field)
3234 class LUModifyNodeStorage(NoHooksLU):
3235 """Logical unit for modifying a storage volume on a node.
3238 _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3241 def CheckArguments(self):
3242 self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3244 _CheckStorageType(self.op.storage_type)
3246 def ExpandNames(self):
3247 self.needed_locks = {
3248 locking.LEVEL_NODE: self.op.node_name,
3251 def CheckPrereq(self):
3252 """Check prerequisites.
3255 storage_type = self.op.storage_type
3258 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3260 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3261 " modified" % storage_type,
3264 diff = set(self.op.changes.keys()) - modifiable
3266 raise errors.OpPrereqError("The following fields can not be modified for"
3267 " storage units of type '%s': %r" %
3268 (storage_type, list(diff)),
3271 def Exec(self, feedback_fn):
3272 """Computes the list of nodes and their attributes.
3275 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3276 result = self.rpc.call_storage_modify(self.op.node_name,
3277 self.op.storage_type, st_args,
3278 self.op.name, self.op.changes)
3279 result.Raise("Failed to modify storage unit '%s' on %s" %
3280 (self.op.name, self.op.node_name))
3283 class LUAddNode(LogicalUnit):
3284 """Logical unit for adding node to the cluster.
3288 HTYPE = constants.HTYPE_NODE
3289 _OP_REQP = ["node_name"]
3291 def CheckArguments(self):
3292 # validate/normalize the node name
3293 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3295 def BuildHooksEnv(self):
3298 This will run on all nodes before, and on all nodes + the new node after.
3302 "OP_TARGET": self.op.node_name,
3303 "NODE_NAME": self.op.node_name,
3304 "NODE_PIP": self.op.primary_ip,
3305 "NODE_SIP": self.op.secondary_ip,
3307 nodes_0 = self.cfg.GetNodeList()
3308 nodes_1 = nodes_0 + [self.op.node_name, ]
3309 return env, nodes_0, nodes_1
3311 def CheckPrereq(self):
3312 """Check prerequisites.
3315 - the new node is not already in the config
3317 - its parameters (single/dual homed) matches the cluster
3319 Any errors are signaled by raising errors.OpPrereqError.
3322 node_name = self.op.node_name
3325 dns_data = utils.GetHostInfo(node_name)
3327 node = dns_data.name
3328 primary_ip = self.op.primary_ip = dns_data.ip
3329 secondary_ip = getattr(self.op, "secondary_ip", None)
3330 if secondary_ip is None:
3331 secondary_ip = primary_ip
3332 if not utils.IsValidIP(secondary_ip):
3333 raise errors.OpPrereqError("Invalid secondary IP given",
3335 self.op.secondary_ip = secondary_ip
3337 node_list = cfg.GetNodeList()
3338 if not self.op.readd and node in node_list:
3339 raise errors.OpPrereqError("Node %s is already in the configuration" %
3340 node, errors.ECODE_EXISTS)
3341 elif self.op.readd and node not in node_list:
3342 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3345 self.changed_primary_ip = False
3347 for existing_node_name in node_list:
3348 existing_node = cfg.GetNodeInfo(existing_node_name)
3350 if self.op.readd and node == existing_node_name:
3351 if existing_node.secondary_ip != secondary_ip:
3352 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3353 " address configuration as before",
3355 if existing_node.primary_ip != primary_ip:
3356 self.changed_primary_ip = True
3360 if (existing_node.primary_ip == primary_ip or
3361 existing_node.secondary_ip == primary_ip or
3362 existing_node.primary_ip == secondary_ip or
3363 existing_node.secondary_ip == secondary_ip):
3364 raise errors.OpPrereqError("New node ip address(es) conflict with"
3365 " existing node %s" % existing_node.name,
3366 errors.ECODE_NOTUNIQUE)
3368 # check that the type of the node (single versus dual homed) is the
3369 # same as for the master
3370 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3371 master_singlehomed = myself.secondary_ip == myself.primary_ip
3372 newbie_singlehomed = secondary_ip == primary_ip
3373 if master_singlehomed != newbie_singlehomed:
3374 if master_singlehomed:
3375 raise errors.OpPrereqError("The master has no private ip but the"
3376 " new node has one",
3379 raise errors.OpPrereqError("The master has a private ip but the"
3380 " new node doesn't have one",
3383 # checks reachability
3384 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3385 raise errors.OpPrereqError("Node not reachable by ping",
3386 errors.ECODE_ENVIRON)
3388 if not newbie_singlehomed:
3389 # check reachability from my secondary ip to newbie's secondary ip
3390 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3391 source=myself.secondary_ip):
3392 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3393 " based ping to noded port",
3394 errors.ECODE_ENVIRON)
3401 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3404 self.new_node = self.cfg.GetNodeInfo(node)
3405 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3407 self.new_node = objects.Node(name=node,
3408 primary_ip=primary_ip,
3409 secondary_ip=secondary_ip,
3410 master_candidate=self.master_candidate,
3411 offline=False, drained=False)
3413 def Exec(self, feedback_fn):
3414 """Adds the new node to the cluster.
3417 new_node = self.new_node
3418 node = new_node.name
3420 # for re-adds, reset the offline/drained/master-candidate flags;
3421 # we need to reset here, otherwise offline would prevent RPC calls
3422 # later in the procedure; this also means that if the re-add
3423 # fails, we are left with a non-offlined, broken node
3425 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3426 self.LogInfo("Readding a node, the offline/drained flags were reset")
3427 # if we demote the node, we do cleanup later in the procedure
3428 new_node.master_candidate = self.master_candidate
3429 if self.changed_primary_ip:
3430 new_node.primary_ip = self.op.primary_ip
3432 # notify the user about any possible mc promotion
3433 if new_node.master_candidate:
3434 self.LogInfo("Node will be a master candidate")
3436 # check connectivity
3437 result = self.rpc.call_version([node])[node]
3438 result.Raise("Can't get version information from node %s" % node)
3439 if constants.PROTOCOL_VERSION == result.payload:
3440 logging.info("Communication to node %s fine, sw version %s match",
3441 node, result.payload)
3443 raise errors.OpExecError("Version mismatch master version %s,"
3444 " node version %s" %
3445 (constants.PROTOCOL_VERSION, result.payload))
3448 if self.cfg.GetClusterInfo().modify_ssh_setup:
3449 logging.info("Copy ssh key to node %s", node)
3450 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3452 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3453 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3457 keyarray.append(utils.ReadFile(i))
3459 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3460 keyarray[2], keyarray[3], keyarray[4],
3462 result.Raise("Cannot transfer ssh keys to the new node")
3464 # Add node to our /etc/hosts, and add key to known_hosts
3465 if self.cfg.GetClusterInfo().modify_etc_hosts:
3466 # FIXME: this should be done via an rpc call to node daemon
3467 utils.AddHostToEtcHosts(new_node.name)
3469 if new_node.secondary_ip != new_node.primary_ip:
3470 result = self.rpc.call_node_has_ip_address(new_node.name,
3471 new_node.secondary_ip)
3472 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3473 prereq=True, ecode=errors.ECODE_ENVIRON)
3474 if not result.payload:
3475 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3476 " you gave (%s). Please fix and re-run this"
3477 " command." % new_node.secondary_ip)
3479 node_verify_list = [self.cfg.GetMasterNode()]
3480 node_verify_param = {
3481 constants.NV_NODELIST: [node],
3482 # TODO: do a node-net-test as well?
3485 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3486 self.cfg.GetClusterName())
3487 for verifier in node_verify_list:
3488 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3489 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3491 for failed in nl_payload:
3492 feedback_fn("ssh/hostname verification failed"
3493 " (checking from %s): %s" %
3494 (verifier, nl_payload[failed]))
3495 raise errors.OpExecError("ssh/hostname verification failed.")
3498 _RedistributeAncillaryFiles(self)
3499 self.context.ReaddNode(new_node)
3500 # make sure we redistribute the config
3501 self.cfg.Update(new_node, feedback_fn)
3502 # and make sure the new node will not have old files around
3503 if not new_node.master_candidate:
3504 result = self.rpc.call_node_demote_from_mc(new_node.name)
3505 msg = result.fail_msg
3507 self.LogWarning("Node failed to demote itself from master"
3508 " candidate status: %s" % msg)
3510 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3511 self.context.AddNode(new_node, self.proc.GetECId())
3514 class LUSetNodeParams(LogicalUnit):
3515 """Modifies the parameters of a node.
3518 HPATH = "node-modify"
3519 HTYPE = constants.HTYPE_NODE
3520 _OP_REQP = ["node_name"]
3523 def CheckArguments(self):
3524 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3525 _CheckBooleanOpField(self.op, 'master_candidate')
3526 _CheckBooleanOpField(self.op, 'offline')
3527 _CheckBooleanOpField(self.op, 'drained')
3528 _CheckBooleanOpField(self.op, 'auto_promote')
3529 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3530 if all_mods.count(None) == 3:
3531 raise errors.OpPrereqError("Please pass at least one modification",
3533 if all_mods.count(True) > 1:
3534 raise errors.OpPrereqError("Can't set the node into more than one"
3535 " state at the same time",
3538 # Boolean value that tells us whether we're offlining or draining the node
3539 self.offline_or_drain = (self.op.offline == True or
3540 self.op.drained == True)
3541 self.deoffline_or_drain = (self.op.offline == False or
3542 self.op.drained == False)
3543 self.might_demote = (self.op.master_candidate == False or
3544 self.offline_or_drain)
3546 self.lock_all = self.op.auto_promote and self.might_demote
3549 def ExpandNames(self):
3551 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3553 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3555 def BuildHooksEnv(self):
3558 This runs on the master node.
3562 "OP_TARGET": self.op.node_name,
3563 "MASTER_CANDIDATE": str(self.op.master_candidate),
3564 "OFFLINE": str(self.op.offline),
3565 "DRAINED": str(self.op.drained),
3567 nl = [self.cfg.GetMasterNode(),
3571 def CheckPrereq(self):
3572 """Check prerequisites.
3574 This only checks the instance list against the existing names.
3577 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3579 if (self.op.master_candidate is not None or
3580 self.op.drained is not None or
3581 self.op.offline is not None):
3582 # we can't change the master's node flags
3583 if self.op.node_name == self.cfg.GetMasterNode():
3584 raise errors.OpPrereqError("The master role can be changed"
3585 " only via masterfailover",
3589 if node.master_candidate and self.might_demote and not self.lock_all:
3590 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3591 # check if after removing the current node, we're missing master
3593 (mc_remaining, mc_should, _) = \
3594 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3595 if mc_remaining < mc_should:
3596 raise errors.OpPrereqError("Not enough master candidates, please"
3597 " pass auto_promote to allow promotion",
3600 if (self.op.master_candidate == True and
3601 ((node.offline and not self.op.offline == False) or
3602 (node.drained and not self.op.drained == False))):
3603 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3604 " to master_candidate" % node.name,
3607 # If we're being deofflined/drained, we'll MC ourself if needed
3608 if (self.deoffline_or_drain and not self.offline_or_drain and not
3609 self.op.master_candidate == True and not node.master_candidate):
3610 self.op.master_candidate = _DecideSelfPromotion(self)
3611 if self.op.master_candidate:
3612 self.LogInfo("Autopromoting node to master candidate")
3616 def Exec(self, feedback_fn):
3625 if self.op.offline is not None:
3626 node.offline = self.op.offline
3627 result.append(("offline", str(self.op.offline)))
3628 if self.op.offline == True:
3629 if node.master_candidate:
3630 node.master_candidate = False
3632 result.append(("master_candidate", "auto-demotion due to offline"))
3634 node.drained = False
3635 result.append(("drained", "clear drained status due to offline"))
3637 if self.op.master_candidate is not None:
3638 node.master_candidate = self.op.master_candidate
3640 result.append(("master_candidate", str(self.op.master_candidate)))
3641 if self.op.master_candidate == False:
3642 rrc = self.rpc.call_node_demote_from_mc(node.name)
3645 self.LogWarning("Node failed to demote itself: %s" % msg)
3647 if self.op.drained is not None:
3648 node.drained = self.op.drained
3649 result.append(("drained", str(self.op.drained)))
3650 if self.op.drained == True:
3651 if node.master_candidate:
3652 node.master_candidate = False
3654 result.append(("master_candidate", "auto-demotion due to drain"))
3655 rrc = self.rpc.call_node_demote_from_mc(node.name)
3658 self.LogWarning("Node failed to demote itself: %s" % msg)
3660 node.offline = False
3661 result.append(("offline", "clear offline status due to drain"))
3663 # we locked all nodes, we adjust the CP before updating this node
3665 _AdjustCandidatePool(self, [node.name])
3667 # this will trigger configuration file update, if needed
3668 self.cfg.Update(node, feedback_fn)
3670 # this will trigger job queue propagation or cleanup
3672 self.context.ReaddNode(node)
3677 class LUPowercycleNode(NoHooksLU):
3678 """Powercycles a node.
3681 _OP_REQP = ["node_name", "force"]
3684 def CheckArguments(self):
3685 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3686 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3687 raise errors.OpPrereqError("The node is the master and the force"
3688 " parameter was not set",
3691 def ExpandNames(self):
3692 """Locking for PowercycleNode.
3694 This is a last-resort option and shouldn't block on other
3695 jobs. Therefore, we grab no locks.
3698 self.needed_locks = {}
3700 def CheckPrereq(self):
3701 """Check prerequisites.
3703 This LU has no prereqs.
3708 def Exec(self, feedback_fn):
3712 result = self.rpc.call_node_powercycle(self.op.node_name,
3713 self.cfg.GetHypervisorType())
3714 result.Raise("Failed to schedule the reboot")
3715 return result.payload
3718 class LUQueryClusterInfo(NoHooksLU):
3719 """Query cluster configuration.
3725 def ExpandNames(self):
3726 self.needed_locks = {}
3728 def CheckPrereq(self):
3729 """No prerequsites needed for this LU.
3734 def Exec(self, feedback_fn):
3735 """Return cluster config.
3738 cluster = self.cfg.GetClusterInfo()
3741 # Filter just for enabled hypervisors
3742 for os_name, hv_dict in cluster.os_hvp.items():
3743 os_hvp[os_name] = {}
3744 for hv_name, hv_params in hv_dict.items():
3745 if hv_name in cluster.enabled_hypervisors:
3746 os_hvp[os_name][hv_name] = hv_params
3749 "software_version": constants.RELEASE_VERSION,
3750 "protocol_version": constants.PROTOCOL_VERSION,
3751 "config_version": constants.CONFIG_VERSION,
3752 "os_api_version": max(constants.OS_API_VERSIONS),
3753 "export_version": constants.EXPORT_VERSION,
3754 "architecture": (platform.architecture()[0], platform.machine()),
3755 "name": cluster.cluster_name,
3756 "master": cluster.master_node,
3757 "default_hypervisor": cluster.enabled_hypervisors[0],
3758 "enabled_hypervisors": cluster.enabled_hypervisors,
3759 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3760 for hypervisor_name in cluster.enabled_hypervisors]),
3762 "beparams": cluster.beparams,
3763 "nicparams": cluster.nicparams,
3764 "candidate_pool_size": cluster.candidate_pool_size,
3765 "master_netdev": cluster.master_netdev,
3766 "volume_group_name": cluster.volume_group_name,
3767 "file_storage_dir": cluster.file_storage_dir,
3768 "maintain_node_health": cluster.maintain_node_health,
3769 "ctime": cluster.ctime,
3770 "mtime": cluster.mtime,
3771 "uuid": cluster.uuid,
3772 "tags": list(cluster.GetTags()),
3773 "uid_pool": cluster.uid_pool,
3779 class LUQueryConfigValues(NoHooksLU):
3780 """Return configuration values.
3785 _FIELDS_DYNAMIC = utils.FieldSet()
3786 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3789 def ExpandNames(self):
3790 self.needed_locks = {}
3792 _CheckOutputFields(static=self._FIELDS_STATIC,
3793 dynamic=self._FIELDS_DYNAMIC,
3794 selected=self.op.output_fields)
3796 def CheckPrereq(self):
3797 """No prerequisites.
3802 def Exec(self, feedback_fn):
3803 """Dump a representation of the cluster config to the standard output.
3807 for field in self.op.output_fields:
3808 if field == "cluster_name":
3809 entry = self.cfg.GetClusterName()
3810 elif field == "master_node":
3811 entry = self.cfg.GetMasterNode()
3812 elif field == "drain_flag":
3813 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3814 elif field == "watcher_pause":
3815 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3817 raise errors.ParameterError(field)
3818 values.append(entry)
3822 class LUActivateInstanceDisks(NoHooksLU):
3823 """Bring up an instance's disks.
3826 _OP_REQP = ["instance_name"]
3829 def ExpandNames(self):
3830 self._ExpandAndLockInstance()
3831 self.needed_locks[locking.LEVEL_NODE] = []
3832 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3834 def DeclareLocks(self, level):
3835 if level == locking.LEVEL_NODE:
3836 self._LockInstancesNodes()
3838 def CheckPrereq(self):
3839 """Check prerequisites.
3841 This checks that the instance is in the cluster.
3844 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3845 assert self.instance is not None, \
3846 "Cannot retrieve locked instance %s" % self.op.instance_name
3847 _CheckNodeOnline(self, self.instance.primary_node)
3848 if not hasattr(self.op, "ignore_size"):
3849 self.op.ignore_size = False
3851 def Exec(self, feedback_fn):
3852 """Activate the disks.
3855 disks_ok, disks_info = \
3856 _AssembleInstanceDisks(self, self.instance,
3857 ignore_size=self.op.ignore_size)
3859 raise errors.OpExecError("Cannot activate block devices")
3864 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3866 """Prepare the block devices for an instance.
3868 This sets up the block devices on all nodes.
3870 @type lu: L{LogicalUnit}
3871 @param lu: the logical unit on whose behalf we execute
3872 @type instance: L{objects.Instance}
3873 @param instance: the instance for whose disks we assemble
3874 @type ignore_secondaries: boolean
3875 @param ignore_secondaries: if true, errors on secondary nodes
3876 won't result in an error return from the function
3877 @type ignore_size: boolean
3878 @param ignore_size: if true, the current known size of the disk
3879 will not be used during the disk activation, useful for cases
3880 when the size is wrong
3881 @return: False if the operation failed, otherwise a list of
3882 (host, instance_visible_name, node_visible_name)
3883 with the mapping from node devices to instance devices
3888 iname = instance.name
3889 # With the two passes mechanism we try to reduce the window of
3890 # opportunity for the race condition of switching DRBD to primary
3891 # before handshaking occured, but we do not eliminate it
3893 # The proper fix would be to wait (with some limits) until the
3894 # connection has been made and drbd transitions from WFConnection
3895 # into any other network-connected state (Connected, SyncTarget,
3898 # 1st pass, assemble on all nodes in secondary mode
3899 for inst_disk in instance.disks:
3900 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3902 node_disk = node_disk.Copy()
3903 node_disk.UnsetSize()
3904 lu.cfg.SetDiskID(node_disk, node)
3905 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3906 msg = result.fail_msg
3908 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3909 " (is_primary=False, pass=1): %s",
3910 inst_disk.iv_name, node, msg)
3911 if not ignore_secondaries:
3914 # FIXME: race condition on drbd migration to primary
3916 # 2nd pass, do only the primary node
3917 for inst_disk in instance.disks:
3920 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3921 if node != instance.primary_node:
3924 node_disk = node_disk.Copy()
3925 node_disk.UnsetSize()
3926 lu.cfg.SetDiskID(node_disk, node)
3927 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3928 msg = result.fail_msg
3930 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3931 " (is_primary=True, pass=2): %s",
3932 inst_disk.iv_name, node, msg)
3935 dev_path = result.payload
3937 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3939 # leave the disks configured for the primary node
3940 # this is a workaround that would be fixed better by
3941 # improving the logical/physical id handling
3942 for disk in instance.disks:
3943 lu.cfg.SetDiskID(disk, instance.primary_node)
3945 return disks_ok, device_info
3948 def _StartInstanceDisks(lu, instance, force):
3949 """Start the disks of an instance.
3952 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3953 ignore_secondaries=force)
3955 _ShutdownInstanceDisks(lu, instance)
3956 if force is not None and not force:
3957 lu.proc.LogWarning("", hint="If the message above refers to a"
3959 " you can retry the operation using '--force'.")
3960 raise errors.OpExecError("Disk consistency error")
3963 class LUDeactivateInstanceDisks(NoHooksLU):
3964 """Shutdown an instance's disks.
3967 _OP_REQP = ["instance_name"]
3970 def ExpandNames(self):
3971 self._ExpandAndLockInstance()
3972 self.needed_locks[locking.LEVEL_NODE] = []
3973 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3975 def DeclareLocks(self, level):
3976 if level == locking.LEVEL_NODE:
3977 self._LockInstancesNodes()
3979 def CheckPrereq(self):
3980 """Check prerequisites.
3982 This checks that the instance is in the cluster.
3985 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3986 assert self.instance is not None, \
3987 "Cannot retrieve locked instance %s" % self.op.instance_name
3989 def Exec(self, feedback_fn):
3990 """Deactivate the disks
3993 instance = self.instance
3994 _SafeShutdownInstanceDisks(self, instance)
3997 def _SafeShutdownInstanceDisks(lu, instance):
3998 """Shutdown block devices of an instance.
4000 This function checks if an instance is running, before calling
4001 _ShutdownInstanceDisks.
4004 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4005 _ShutdownInstanceDisks(lu, instance)
4008 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
4009 """Shutdown block devices of an instance.
4011 This does the shutdown on all nodes of the instance.
4013 If the ignore_primary is false, errors on the primary node are
4018 for disk in instance.disks:
4019 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4020 lu.cfg.SetDiskID(top_disk, node)
4021 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4022 msg = result.fail_msg
4024 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4025 disk.iv_name, node, msg)
4026 if not ignore_primary or node != instance.primary_node:
4031 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4032 """Checks if a node has enough free memory.
4034 This function check if a given node has the needed amount of free
4035 memory. In case the node has less memory or we cannot get the
4036 information from the node, this function raise an OpPrereqError
4039 @type lu: C{LogicalUnit}
4040 @param lu: a logical unit from which we get configuration data
4042 @param node: the node to check
4043 @type reason: C{str}
4044 @param reason: string to use in the error message
4045 @type requested: C{int}
4046 @param requested: the amount of memory in MiB to check for
4047 @type hypervisor_name: C{str}
4048 @param hypervisor_name: the hypervisor to ask for memory stats
4049 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4050 we cannot check the node
4053 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4054 nodeinfo[node].Raise("Can't get data from node %s" % node,
4055 prereq=True, ecode=errors.ECODE_ENVIRON)
4056 free_mem = nodeinfo[node].payload.get('memory_free', None)
4057 if not isinstance(free_mem, int):
4058 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4059 " was '%s'" % (node, free_mem),
4060 errors.ECODE_ENVIRON)
4061 if requested > free_mem:
4062 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4063 " needed %s MiB, available %s MiB" %
4064 (node, reason, requested, free_mem),
4068 def _CheckNodesFreeDisk(lu, nodenames, requested):
4069 """Checks if nodes have enough free disk space in the default VG.
4071 This function check if all given nodes have the needed amount of
4072 free disk. In case any node has less disk or we cannot get the
4073 information from the node, this function raise an OpPrereqError
4076 @type lu: C{LogicalUnit}
4077 @param lu: a logical unit from which we get configuration data
4078 @type nodenames: C{list}
4079 @param nodenames: the list of node names to check
4080 @type requested: C{int}
4081 @param requested: the amount of disk in MiB to check for
4082 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4083 we cannot check the node
4086 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4087 lu.cfg.GetHypervisorType())
4088 for node in nodenames:
4089 info = nodeinfo[node]
4090 info.Raise("Cannot get current information from node %s" % node,
4091 prereq=True, ecode=errors.ECODE_ENVIRON)
4092 vg_free = info.payload.get("vg_free", None)
4093 if not isinstance(vg_free, int):
4094 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4095 " result was '%s'" % (node, vg_free),
4096 errors.ECODE_ENVIRON)
4097 if requested > vg_free:
4098 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4099 " required %d MiB, available %d MiB" %
4100 (node, requested, vg_free),
4104 class LUStartupInstance(LogicalUnit):
4105 """Starts an instance.
4108 HPATH = "instance-start"
4109 HTYPE = constants.HTYPE_INSTANCE
4110 _OP_REQP = ["instance_name", "force"]
4113 def ExpandNames(self):
4114 self._ExpandAndLockInstance()
4116 def BuildHooksEnv(self):
4119 This runs on master, primary and secondary nodes of the instance.
4123 "FORCE": self.op.force,
4125 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4126 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4129 def CheckPrereq(self):
4130 """Check prerequisites.
4132 This checks that the instance is in the cluster.
4135 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4136 assert self.instance is not None, \
4137 "Cannot retrieve locked instance %s" % self.op.instance_name
4140 self.beparams = getattr(self.op, "beparams", {})
4142 if not isinstance(self.beparams, dict):
4143 raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4144 " dict" % (type(self.beparams), ),
4146 # fill the beparams dict
4147 utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4148 self.op.beparams = self.beparams
4151 self.hvparams = getattr(self.op, "hvparams", {})
4153 if not isinstance(self.hvparams, dict):
4154 raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4155 " dict" % (type(self.hvparams), ),
4158 # check hypervisor parameter syntax (locally)
4159 cluster = self.cfg.GetClusterInfo()
4160 utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4161 filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4163 filled_hvp.update(self.hvparams)
4164 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4165 hv_type.CheckParameterSyntax(filled_hvp)
4166 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4167 self.op.hvparams = self.hvparams
4169 _CheckNodeOnline(self, instance.primary_node)
4171 bep = self.cfg.GetClusterInfo().FillBE(instance)
4172 # check bridges existence
4173 _CheckInstanceBridgesExist(self, instance)
4175 remote_info = self.rpc.call_instance_info(instance.primary_node,
4177 instance.hypervisor)
4178 remote_info.Raise("Error checking node %s" % instance.primary_node,
4179 prereq=True, ecode=errors.ECODE_ENVIRON)
4180 if not remote_info.payload: # not running already
4181 _CheckNodeFreeMemory(self, instance.primary_node,
4182 "starting instance %s" % instance.name,
4183 bep[constants.BE_MEMORY], instance.hypervisor)
4185 def Exec(self, feedback_fn):
4186 """Start the instance.
4189 instance = self.instance
4190 force = self.op.force
4192 self.cfg.MarkInstanceUp(instance.name)
4194 node_current = instance.primary_node
4196 _StartInstanceDisks(self, instance, force)
4198 result = self.rpc.call_instance_start(node_current, instance,
4199 self.hvparams, self.beparams)
4200 msg = result.fail_msg
4202 _ShutdownInstanceDisks(self, instance)
4203 raise errors.OpExecError("Could not start instance: %s" % msg)
4206 class LURebootInstance(LogicalUnit):
4207 """Reboot an instance.
4210 HPATH = "instance-reboot"
4211 HTYPE = constants.HTYPE_INSTANCE
4212 _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4215 def CheckArguments(self):
4216 """Check the arguments.
4219 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4220 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4222 def ExpandNames(self):
4223 if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4224 constants.INSTANCE_REBOOT_HARD,
4225 constants.INSTANCE_REBOOT_FULL]:
4226 raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4227 (constants.INSTANCE_REBOOT_SOFT,
4228 constants.INSTANCE_REBOOT_HARD,
4229 constants.INSTANCE_REBOOT_FULL))
4230 self._ExpandAndLockInstance()
4232 def BuildHooksEnv(self):
4235 This runs on master, primary and secondary nodes of the instance.
4239 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4240 "REBOOT_TYPE": self.op.reboot_type,
4241 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4243 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4244 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4247 def CheckPrereq(self):
4248 """Check prerequisites.
4250 This checks that the instance is in the cluster.
4253 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4254 assert self.instance is not None, \
4255 "Cannot retrieve locked instance %s" % self.op.instance_name
4257 _CheckNodeOnline(self, instance.primary_node)
4259 # check bridges existence
4260 _CheckInstanceBridgesExist(self, instance)
4262 def Exec(self, feedback_fn):
4263 """Reboot the instance.
4266 instance = self.instance
4267 ignore_secondaries = self.op.ignore_secondaries
4268 reboot_type = self.op.reboot_type
4270 node_current = instance.primary_node
4272 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4273 constants.INSTANCE_REBOOT_HARD]:
4274 for disk in instance.disks:
4275 self.cfg.SetDiskID(disk, node_current)
4276 result = self.rpc.call_instance_reboot(node_current, instance,
4278 self.shutdown_timeout)
4279 result.Raise("Could not reboot instance")
4281 result = self.rpc.call_instance_shutdown(node_current, instance,
4282 self.shutdown_timeout)
4283 result.Raise("Could not shutdown instance for full reboot")
4284 _ShutdownInstanceDisks(self, instance)
4285 _StartInstanceDisks(self, instance, ignore_secondaries)
4286 result = self.rpc.call_instance_start(node_current, instance, None, None)
4287 msg = result.fail_msg
4289 _ShutdownInstanceDisks(self, instance)
4290 raise errors.OpExecError("Could not start instance for"
4291 " full reboot: %s" % msg)
4293 self.cfg.MarkInstanceUp(instance.name)
4296 class LUShutdownInstance(LogicalUnit):
4297 """Shutdown an instance.
4300 HPATH = "instance-stop"
4301 HTYPE = constants.HTYPE_INSTANCE
4302 _OP_REQP = ["instance_name"]
4305 def CheckArguments(self):
4306 """Check the arguments.
4309 self.timeout = getattr(self.op, "timeout",
4310 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4312 def ExpandNames(self):
4313 self._ExpandAndLockInstance()
4315 def BuildHooksEnv(self):
4318 This runs on master, primary and secondary nodes of the instance.
4321 env = _BuildInstanceHookEnvByObject(self, self.instance)
4322 env["TIMEOUT"] = self.timeout
4323 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4326 def CheckPrereq(self):
4327 """Check prerequisites.
4329 This checks that the instance is in the cluster.
4332 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4333 assert self.instance is not None, \
4334 "Cannot retrieve locked instance %s" % self.op.instance_name
4335 _CheckNodeOnline(self, self.instance.primary_node)
4337 def Exec(self, feedback_fn):
4338 """Shutdown the instance.
4341 instance = self.instance
4342 node_current = instance.primary_node
4343 timeout = self.timeout
4344 self.cfg.MarkInstanceDown(instance.name)
4345 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4346 msg = result.fail_msg
4348 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4350 _ShutdownInstanceDisks(self, instance)
4353 class LUReinstallInstance(LogicalUnit):
4354 """Reinstall an instance.
4357 HPATH = "instance-reinstall"
4358 HTYPE = constants.HTYPE_INSTANCE
4359 _OP_REQP = ["instance_name"]
4362 def ExpandNames(self):
4363 self._ExpandAndLockInstance()
4365 def BuildHooksEnv(self):
4368 This runs on master, primary and secondary nodes of the instance.
4371 env = _BuildInstanceHookEnvByObject(self, self.instance)
4372 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4375 def CheckPrereq(self):
4376 """Check prerequisites.
4378 This checks that the instance is in the cluster and is not running.
4381 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4382 assert instance is not None, \
4383 "Cannot retrieve locked instance %s" % self.op.instance_name
4384 _CheckNodeOnline(self, instance.primary_node)
4386 if instance.disk_template == constants.DT_DISKLESS:
4387 raise errors.OpPrereqError("Instance '%s' has no disks" %
4388 self.op.instance_name,
4390 _CheckInstanceDown(self, instance, "cannot reinstall")
4392 self.op.os_type = getattr(self.op, "os_type", None)
4393 self.op.force_variant = getattr(self.op, "force_variant", False)
4394 if self.op.os_type is not None:
4396 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4397 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4399 self.instance = instance
4401 def Exec(self, feedback_fn):
4402 """Reinstall the instance.
4405 inst = self.instance
4407 if self.op.os_type is not None:
4408 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4409 inst.os = self.op.os_type
4410 self.cfg.Update(inst, feedback_fn)
4412 _StartInstanceDisks(self, inst, None)
4414 feedback_fn("Running the instance OS create scripts...")
4415 # FIXME: pass debug option from opcode to backend
4416 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4417 self.op.debug_level)
4418 result.Raise("Could not install OS for instance %s on node %s" %
4419 (inst.name, inst.primary_node))
4421 _ShutdownInstanceDisks(self, inst)
4424 class LURecreateInstanceDisks(LogicalUnit):
4425 """Recreate an instance's missing disks.
4428 HPATH = "instance-recreate-disks"
4429 HTYPE = constants.HTYPE_INSTANCE
4430 _OP_REQP = ["instance_name", "disks"]
4433 def CheckArguments(self):
4434 """Check the arguments.
4437 if not isinstance(self.op.disks, list):
4438 raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4439 for item in self.op.disks:
4440 if (not isinstance(item, int) or
4442 raise errors.OpPrereqError("Invalid disk specification '%s'" %
4443 str(item), errors.ECODE_INVAL)
4445 def ExpandNames(self):
4446 self._ExpandAndLockInstance()
4448 def BuildHooksEnv(self):
4451 This runs on master, primary and secondary nodes of the instance.
4454 env = _BuildInstanceHookEnvByObject(self, self.instance)
4455 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4458 def CheckPrereq(self):
4459 """Check prerequisites.
4461 This checks that the instance is in the cluster and is not running.
4464 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4465 assert instance is not None, \
4466 "Cannot retrieve locked instance %s" % self.op.instance_name
4467 _CheckNodeOnline(self, instance.primary_node)
4469 if instance.disk_template == constants.DT_DISKLESS:
4470 raise errors.OpPrereqError("Instance '%s' has no disks" %
4471 self.op.instance_name, errors.ECODE_INVAL)
4472 _CheckInstanceDown(self, instance, "cannot recreate disks")
4474 if not self.op.disks:
4475 self.op.disks = range(len(instance.disks))
4477 for idx in self.op.disks:
4478 if idx >= len(instance.disks):
4479 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4482 self.instance = instance
4484 def Exec(self, feedback_fn):
4485 """Recreate the disks.
4489 for idx, _ in enumerate(self.instance.disks):
4490 if idx not in self.op.disks: # disk idx has not been passed in
4494 _CreateDisks(self, self.instance, to_skip=to_skip)
4497 class LURenameInstance(LogicalUnit):
4498 """Rename an instance.
4501 HPATH = "instance-rename"
4502 HTYPE = constants.HTYPE_INSTANCE
4503 _OP_REQP = ["instance_name", "new_name"]
4505 def BuildHooksEnv(self):
4508 This runs on master, primary and secondary nodes of the instance.
4511 env = _BuildInstanceHookEnvByObject(self, self.instance)
4512 env["INSTANCE_NEW_NAME"] = self.op.new_name
4513 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4516 def CheckPrereq(self):
4517 """Check prerequisites.
4519 This checks that the instance is in the cluster and is not running.
4522 self.op.instance_name = _ExpandInstanceName(self.cfg,
4523 self.op.instance_name)
4524 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4525 assert instance is not None
4526 _CheckNodeOnline(self, instance.primary_node)
4527 _CheckInstanceDown(self, instance, "cannot rename")
4528 self.instance = instance
4530 # new name verification
4531 name_info = utils.GetHostInfo(self.op.new_name)
4533 self.op.new_name = new_name = name_info.name
4534 instance_list = self.cfg.GetInstanceList()
4535 if new_name in instance_list:
4536 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4537 new_name, errors.ECODE_EXISTS)
4539 if not getattr(self.op, "ignore_ip", False):
4540 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4541 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4542 (name_info.ip, new_name),
4543 errors.ECODE_NOTUNIQUE)
4546 def Exec(self, feedback_fn):
4547 """Reinstall the instance.
4550 inst = self.instance
4551 old_name = inst.name
4553 if inst.disk_template == constants.DT_FILE:
4554 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4556 self.cfg.RenameInstance(inst.name, self.op.new_name)
4557 # Change the instance lock. This is definitely safe while we hold the BGL
4558 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4559 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4561 # re-read the instance from the configuration after rename
4562 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4564 if inst.disk_template == constants.DT_FILE:
4565 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4566 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4567 old_file_storage_dir,
4568 new_file_storage_dir)
4569 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4570 " (but the instance has been renamed in Ganeti)" %
4571 (inst.primary_node, old_file_storage_dir,
4572 new_file_storage_dir))
4574 _StartInstanceDisks(self, inst, None)
4576 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4577 old_name, self.op.debug_level)
4578 msg = result.fail_msg
4580 msg = ("Could not run OS rename script for instance %s on node %s"
4581 " (but the instance has been renamed in Ganeti): %s" %
4582 (inst.name, inst.primary_node, msg))
4583 self.proc.LogWarning(msg)
4585 _ShutdownInstanceDisks(self, inst)
4588 class LURemoveInstance(LogicalUnit):
4589 """Remove an instance.
4592 HPATH = "instance-remove"
4593 HTYPE = constants.HTYPE_INSTANCE
4594 _OP_REQP = ["instance_name", "ignore_failures"]
4597 def CheckArguments(self):
4598 """Check the arguments.
4601 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4602 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4604 def ExpandNames(self):
4605 self._ExpandAndLockInstance()
4606 self.needed_locks[locking.LEVEL_NODE] = []
4607 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4609 def DeclareLocks(self, level):
4610 if level == locking.LEVEL_NODE:
4611 self._LockInstancesNodes()
4613 def BuildHooksEnv(self):
4616 This runs on master, primary and secondary nodes of the instance.
4619 env = _BuildInstanceHookEnvByObject(self, self.instance)
4620 env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4621 nl = [self.cfg.GetMasterNode()]
4622 nl_post = list(self.instance.all_nodes) + nl
4623 return env, nl, nl_post
4625 def CheckPrereq(self):
4626 """Check prerequisites.
4628 This checks that the instance is in the cluster.
4631 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4632 assert self.instance is not None, \
4633 "Cannot retrieve locked instance %s" % self.op.instance_name
4635 def Exec(self, feedback_fn):
4636 """Remove the instance.
4639 instance = self.instance
4640 logging.info("Shutting down instance %s on node %s",
4641 instance.name, instance.primary_node)
4643 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4644 self.shutdown_timeout)
4645 msg = result.fail_msg
4647 if self.op.ignore_failures:
4648 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4650 raise errors.OpExecError("Could not shutdown instance %s on"
4652 (instance.name, instance.primary_node, msg))
4654 logging.info("Removing block devices for instance %s", instance.name)
4656 if not _RemoveDisks(self, instance):
4657 if self.op.ignore_failures:
4658 feedback_fn("Warning: can't remove instance's disks")
4660 raise errors.OpExecError("Can't remove instance's disks")
4662 logging.info("Removing instance %s out of cluster config", instance.name)
4664 self.cfg.RemoveInstance(instance.name)
4665 self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4668 class LUQueryInstances(NoHooksLU):
4669 """Logical unit for querying instances.
4672 # pylint: disable-msg=W0142
4673 _OP_REQP = ["output_fields", "names", "use_locking"]
4675 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4676 "serial_no", "ctime", "mtime", "uuid"]
4677 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4679 "disk_template", "ip", "mac", "bridge",
4680 "nic_mode", "nic_link",
4681 "sda_size", "sdb_size", "vcpus", "tags",
4682 "network_port", "beparams",
4683 r"(disk)\.(size)/([0-9]+)",
4684 r"(disk)\.(sizes)", "disk_usage",
4685 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4686 r"(nic)\.(bridge)/([0-9]+)",
4687 r"(nic)\.(macs|ips|modes|links|bridges)",
4688 r"(disk|nic)\.(count)",
4690 ] + _SIMPLE_FIELDS +
4692 for name in constants.HVS_PARAMETERS
4693 if name not in constants.HVC_GLOBALS] +
4695 for name in constants.BES_PARAMETERS])
4696 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4699 def ExpandNames(self):
4700 _CheckOutputFields(static=self._FIELDS_STATIC,
4701 dynamic=self._FIELDS_DYNAMIC,
4702 selected=self.op.output_fields)
4704 self.needed_locks = {}
4705 self.share_locks[locking.LEVEL_INSTANCE] = 1
4706 self.share_locks[locking.LEVEL_NODE] = 1
4709 self.wanted = _GetWantedInstances(self, self.op.names)
4711 self.wanted = locking.ALL_SET
4713 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4714 self.do_locking = self.do_node_query and self.op.use_locking
4716 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4717 self.needed_locks[locking.LEVEL_NODE] = []
4718 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4720 def DeclareLocks(self, level):
4721 if level == locking.LEVEL_NODE and self.do_locking:
4722 self._LockInstancesNodes()
4724 def CheckPrereq(self):
4725 """Check prerequisites.
4730 def Exec(self, feedback_fn):
4731 """Computes the list of nodes and their attributes.
4734 # pylint: disable-msg=R0912
4735 # way too many branches here
4736 all_info = self.cfg.GetAllInstancesInfo()
4737 if self.wanted == locking.ALL_SET:
4738 # caller didn't specify instance names, so ordering is not important
4740 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4742 instance_names = all_info.keys()
4743 instance_names = utils.NiceSort(instance_names)
4745 # caller did specify names, so we must keep the ordering
4747 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4749 tgt_set = all_info.keys()
4750 missing = set(self.wanted).difference(tgt_set)
4752 raise errors.OpExecError("Some instances were removed before"
4753 " retrieving their data: %s" % missing)
4754 instance_names = self.wanted
4756 instance_list = [all_info[iname] for iname in instance_names]
4758 # begin data gathering
4760 nodes = frozenset([inst.primary_node for inst in instance_list])
4761 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4765 if self.do_node_query:
4767 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4769 result = node_data[name]
4771 # offline nodes will be in both lists
4772 off_nodes.append(name)
4774 bad_nodes.append(name)
4777 live_data.update(result.payload)
4778 # else no instance is alive
4780 live_data = dict([(name, {}) for name in instance_names])
4782 # end data gathering
4787 cluster = self.cfg.GetClusterInfo()
4788 for instance in instance_list:
4790 i_hv = cluster.FillHV(instance, skip_globals=True)
4791 i_be = cluster.FillBE(instance)
4792 i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4793 nic.nicparams) for nic in instance.nics]
4794 for field in self.op.output_fields:
4795 st_match = self._FIELDS_STATIC.Matches(field)
4796 if field in self._SIMPLE_FIELDS:
4797 val = getattr(instance, field)
4798 elif field == "pnode":
4799 val = instance.primary_node
4800 elif field == "snodes":
4801 val = list(instance.secondary_nodes)
4802 elif field == "admin_state":
4803 val = instance.admin_up
4804 elif field == "oper_state":
4805 if instance.primary_node in bad_nodes:
4808 val = bool(live_data.get(instance.name))
4809 elif field == "status":
4810 if instance.primary_node in off_nodes:
4811 val = "ERROR_nodeoffline"
4812 elif instance.primary_node in bad_nodes:
4813 val = "ERROR_nodedown"
4815 running = bool(live_data.get(instance.name))
4817 if instance.admin_up:
4822 if instance.admin_up:
4826 elif field == "oper_ram":
4827 if instance.primary_node in bad_nodes:
4829 elif instance.name in live_data:
4830 val = live_data[instance.name].get("memory", "?")
4833 elif field == "vcpus":
4834 val = i_be[constants.BE_VCPUS]
4835 elif field == "disk_template":
4836 val = instance.disk_template
4839 val = instance.nics[0].ip
4842 elif field == "nic_mode":
4844 val = i_nicp[0][constants.NIC_MODE]
4847 elif field == "nic_link":
4849 val = i_nicp[0][constants.NIC_LINK]
4852 elif field == "bridge":
4853 if (instance.nics and
4854 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4855 val = i_nicp[0][constants.NIC_LINK]
4858 elif field == "mac":
4860 val = instance.nics[0].mac
4863 elif field == "sda_size" or field == "sdb_size":
4864 idx = ord(field[2]) - ord('a')
4866 val = instance.FindDisk(idx).size
4867 except errors.OpPrereqError:
4869 elif field == "disk_usage": # total disk usage per node
4870 disk_sizes = [{'size': disk.size} for disk in instance.disks]
4871 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4872 elif field == "tags":
4873 val = list(instance.GetTags())
4874 elif field == "hvparams":
4876 elif (field.startswith(HVPREFIX) and
4877 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4878 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4879 val = i_hv.get(field[len(HVPREFIX):], None)
4880 elif field == "beparams":
4882 elif (field.startswith(BEPREFIX) and
4883 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4884 val = i_be.get(field[len(BEPREFIX):], None)
4885 elif st_match and st_match.groups():
4886 # matches a variable list
4887 st_groups = st_match.groups()
4888 if st_groups and st_groups[0] == "disk":
4889 if st_groups[1] == "count":
4890 val = len(instance.disks)
4891 elif st_groups[1] == "sizes":
4892 val = [disk.size for disk in instance.disks]
4893 elif st_groups[1] == "size":
4895 val = instance.FindDisk(st_groups[2]).size
4896 except errors.OpPrereqError:
4899 assert False, "Unhandled disk parameter"
4900 elif st_groups[0] == "nic":
4901 if st_groups[1] == "count":
4902 val = len(instance.nics)
4903 elif st_groups[1] == "macs":
4904 val = [nic.mac for nic in instance.nics]
4905 elif st_groups[1] == "ips":
4906 val = [nic.ip for nic in instance.nics]
4907 elif st_groups[1] == "modes":
4908 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4909 elif st_groups[1] == "links":
4910 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4911 elif st_groups[1] == "bridges":
4914 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4915 val.append(nicp[constants.NIC_LINK])
4920 nic_idx = int(st_groups[2])
4921 if nic_idx >= len(instance.nics):
4924 if st_groups[1] == "mac":
4925 val = instance.nics[nic_idx].mac
4926 elif st_groups[1] == "ip":
4927 val = instance.nics[nic_idx].ip
4928 elif st_groups[1] == "mode":
4929 val = i_nicp[nic_idx][constants.NIC_MODE]
4930 elif st_groups[1] == "link":
4931 val = i_nicp[nic_idx][constants.NIC_LINK]
4932 elif st_groups[1] == "bridge":
4933 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4934 if nic_mode == constants.NIC_MODE_BRIDGED:
4935 val = i_nicp[nic_idx][constants.NIC_LINK]
4939 assert False, "Unhandled NIC parameter"
4941 assert False, ("Declared but unhandled variable parameter '%s'" %
4944 assert False, "Declared but unhandled parameter '%s'" % field
4951 class LUFailoverInstance(LogicalUnit):
4952 """Failover an instance.
4955 HPATH = "instance-failover"
4956 HTYPE = constants.HTYPE_INSTANCE
4957 _OP_REQP = ["instance_name", "ignore_consistency"]
4960 def CheckArguments(self):
4961 """Check the arguments.
4964 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4965 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4967 def ExpandNames(self):
4968 self._ExpandAndLockInstance()
4969 self.needed_locks[locking.LEVEL_NODE] = []
4970 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4972 def DeclareLocks(self, level):
4973 if level == locking.LEVEL_NODE:
4974 self._LockInstancesNodes()
4976 def BuildHooksEnv(self):
4979 This runs on master, primary and secondary nodes of the instance.
4982 instance = self.instance
4983 source_node = instance.primary_node
4984 target_node = instance.secondary_nodes[0]
4986 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4987 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4988 "OLD_PRIMARY": source_node,
4989 "OLD_SECONDARY": target_node,
4990 "NEW_PRIMARY": target_node,
4991 "NEW_SECONDARY": source_node,
4993 env.update(_BuildInstanceHookEnvByObject(self, instance))
4994 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4996 nl_post.append(source_node)
4997 return env, nl, nl_post
4999 def CheckPrereq(self):
5000 """Check prerequisites.
5002 This checks that the instance is in the cluster.
5005 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5006 assert self.instance is not None, \
5007 "Cannot retrieve locked instance %s" % self.op.instance_name
5009 bep = self.cfg.GetClusterInfo().FillBE(instance)
5010 if instance.disk_template not in constants.DTS_NET_MIRROR:
5011 raise errors.OpPrereqError("Instance's disk layout is not"
5012 " network mirrored, cannot failover.",
5015 secondary_nodes = instance.secondary_nodes
5016 if not secondary_nodes:
5017 raise errors.ProgrammerError("no secondary node but using "
5018 "a mirrored disk template")
5020 target_node = secondary_nodes[0]
5021 _CheckNodeOnline(self, target_node)
5022 _CheckNodeNotDrained(self, target_node)
5023 if instance.admin_up:
5024 # check memory requirements on the secondary node
5025 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5026 instance.name, bep[constants.BE_MEMORY],
5027 instance.hypervisor)
5029 self.LogInfo("Not checking memory on the secondary node as"
5030 " instance will not be started")
5032 # check bridge existance
5033 _CheckInstanceBridgesExist(self, instance, node=target_node)
5035 def Exec(self, feedback_fn):
5036 """Failover an instance.
5038 The failover is done by shutting it down on its present node and
5039 starting it on the secondary.
5042 instance = self.instance
5044 source_node = instance.primary_node
5045 target_node = instance.secondary_nodes[0]
5047 if instance.admin_up:
5048 feedback_fn("* checking disk consistency between source and target")
5049 for dev in instance.disks:
5050 # for drbd, these are drbd over lvm
5051 if not _CheckDiskConsistency(self, dev, target_node, False):
5052 if not self.op.ignore_consistency:
5053 raise errors.OpExecError("Disk %s is degraded on target node,"
5054 " aborting failover." % dev.iv_name)
5056 feedback_fn("* not checking disk consistency as instance is not running")
5058 feedback_fn("* shutting down instance on source node")
5059 logging.info("Shutting down instance %s on node %s",
5060 instance.name, source_node)
5062 result = self.rpc.call_instance_shutdown(source_node, instance,
5063 self.shutdown_timeout)
5064 msg = result.fail_msg
5066 if self.op.ignore_consistency:
5067 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5068 " Proceeding anyway. Please make sure node"
5069 " %s is down. Error details: %s",
5070 instance.name, source_node, source_node, msg)
5072 raise errors.OpExecError("Could not shutdown instance %s on"
5074 (instance.name, source_node, msg))
5076 feedback_fn("* deactivating the instance's disks on source node")
5077 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5078 raise errors.OpExecError("Can't shut down the instance's disks.")
5080 instance.primary_node = target_node
5081 # distribute new instance config to the other nodes
5082 self.cfg.Update(instance, feedback_fn)
5084 # Only start the instance if it's marked as up
5085 if instance.admin_up:
5086 feedback_fn("* activating the instance's disks on target node")
5087 logging.info("Starting instance %s on node %s",
5088 instance.name, target_node)
5090 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5091 ignore_secondaries=True)
5093 _ShutdownInstanceDisks(self, instance)
5094 raise errors.OpExecError("Can't activate the instance's disks")
5096 feedback_fn("* starting the instance on the target node")
5097 result = self.rpc.call_instance_start(target_node, instance, None, None)
5098 msg = result.fail_msg
5100 _ShutdownInstanceDisks(self, instance)
5101 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5102 (instance.name, target_node, msg))
5105 class LUMigrateInstance(LogicalUnit):
5106 """Migrate an instance.
5108 This is migration without shutting down, compared to the failover,
5109 which is done with shutdown.
5112 HPATH = "instance-migrate"
5113 HTYPE = constants.HTYPE_INSTANCE
5114 _OP_REQP = ["instance_name", "live", "cleanup"]
5118 def ExpandNames(self):
5119 self._ExpandAndLockInstance()
5121 self.needed_locks[locking.LEVEL_NODE] = []
5122 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5124 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5125 self.op.live, self.op.cleanup)
5126 self.tasklets = [self._migrater]
5128 def DeclareLocks(self, level):
5129 if level == locking.LEVEL_NODE:
5130 self._LockInstancesNodes()
5132 def BuildHooksEnv(self):
5135 This runs on master, primary and secondary nodes of the instance.
5138 instance = self._migrater.instance
5139 source_node = instance.primary_node
5140 target_node = instance.secondary_nodes[0]
5141 env = _BuildInstanceHookEnvByObject(self, instance)
5142 env["MIGRATE_LIVE"] = self.op.live
5143 env["MIGRATE_CLEANUP"] = self.op.cleanup
5145 "OLD_PRIMARY": source_node,
5146 "OLD_SECONDARY": target_node,
5147 "NEW_PRIMARY": target_node,
5148 "NEW_SECONDARY": source_node,
5150 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5152 nl_post.append(source_node)
5153 return env, nl, nl_post
5156 class LUMoveInstance(LogicalUnit):
5157 """Move an instance by data-copying.
5160 HPATH = "instance-move"
5161 HTYPE = constants.HTYPE_INSTANCE
5162 _OP_REQP = ["instance_name", "target_node"]
5165 def CheckArguments(self):
5166 """Check the arguments.
5169 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5170 constants.DEFAULT_SHUTDOWN_TIMEOUT)
5172 def ExpandNames(self):
5173 self._ExpandAndLockInstance()
5174 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5175 self.op.target_node = target_node
5176 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5177 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5179 def DeclareLocks(self, level):
5180 if level == locking.LEVEL_NODE:
5181 self._LockInstancesNodes(primary_only=True)
5183 def BuildHooksEnv(self):
5186 This runs on master, primary and secondary nodes of the instance.
5190 "TARGET_NODE": self.op.target_node,
5191 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5193 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5194 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5195 self.op.target_node]
5198 def CheckPrereq(self):
5199 """Check prerequisites.
5201 This checks that the instance is in the cluster.
5204 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5205 assert self.instance is not None, \
5206 "Cannot retrieve locked instance %s" % self.op.instance_name
5208 node = self.cfg.GetNodeInfo(self.op.target_node)
5209 assert node is not None, \
5210 "Cannot retrieve locked node %s" % self.op.target_node
5212 self.target_node = target_node = node.name
5214 if target_node == instance.primary_node:
5215 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5216 (instance.name, target_node),
5219 bep = self.cfg.GetClusterInfo().FillBE(instance)
5221 for idx, dsk in enumerate(instance.disks):
5222 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5223 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5224 " cannot copy" % idx, errors.ECODE_STATE)
5226 _CheckNodeOnline(self, target_node)
5227 _CheckNodeNotDrained(self, target_node)
5229 if instance.admin_up:
5230 # check memory requirements on the secondary node
5231 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5232 instance.name, bep[constants.BE_MEMORY],
5233 instance.hypervisor)
5235 self.LogInfo("Not checking memory on the secondary node as"
5236 " instance will not be started")
5238 # check bridge existance
5239 _CheckInstanceBridgesExist(self, instance, node=target_node)
5241 def Exec(self, feedback_fn):
5242 """Move an instance.
5244 The move is done by shutting it down on its present node, copying
5245 the data over (slow) and starting it on the new node.
5248 instance = self.instance
5250 source_node = instance.primary_node
5251 target_node = self.target_node
5253 self.LogInfo("Shutting down instance %s on source node %s",
5254 instance.name, source_node)
5256 result = self.rpc.call_instance_shutdown(source_node, instance,
5257 self.shutdown_timeout)
5258 msg = result.fail_msg
5260 if self.op.ignore_consistency:
5261 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5262 " Proceeding anyway. Please make sure node"
5263 " %s is down. Error details: %s",
5264 instance.name, source_node, source_node, msg)
5266 raise errors.OpExecError("Could not shutdown instance %s on"
5268 (instance.name, source_node, msg))
5270 # create the target disks
5272 _CreateDisks(self, instance, target_node=target_node)
5273 except errors.OpExecError:
5274 self.LogWarning("Device creation failed, reverting...")
5276 _RemoveDisks(self, instance, target_node=target_node)
5278 self.cfg.ReleaseDRBDMinors(instance.name)
5281 cluster_name = self.cfg.GetClusterInfo().cluster_name
5284 # activate, get path, copy the data over
5285 for idx, disk in enumerate(instance.disks):
5286 self.LogInfo("Copying data for disk %d", idx)
5287 result = self.rpc.call_blockdev_assemble(target_node, disk,
5288 instance.name, True)
5290 self.LogWarning("Can't assemble newly created disk %d: %s",
5291 idx, result.fail_msg)
5292 errs.append(result.fail_msg)
5294 dev_path = result.payload
5295 result = self.rpc.call_blockdev_export(source_node, disk,
5296 target_node, dev_path,
5299 self.LogWarning("Can't copy data over for disk %d: %s",
5300 idx, result.fail_msg)
5301 errs.append(result.fail_msg)
5305 self.LogWarning("Some disks failed to copy, aborting")
5307 _RemoveDisks(self, instance, target_node=target_node)
5309 self.cfg.ReleaseDRBDMinors(instance.name)
5310 raise errors.OpExecError("Errors during disk copy: %s" %
5313 instance.primary_node = target_node
5314 self.cfg.Update(instance, feedback_fn)
5316 self.LogInfo("Removing the disks on the original node")
5317 _RemoveDisks(self, instance, target_node=source_node)
5319 # Only start the instance if it's marked as up
5320 if instance.admin_up:
5321 self.LogInfo("Starting instance %s on node %s",
5322 instance.name, target_node)
5324 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5325 ignore_secondaries=True)
5327 _ShutdownInstanceDisks(self, instance)
5328 raise errors.OpExecError("Can't activate the instance's disks")
5330 result = self.rpc.call_instance_start(target_node, instance, None, None)
5331 msg = result.fail_msg
5333 _ShutdownInstanceDisks(self, instance)
5334 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5335 (instance.name, target_node, msg))
5338 class LUMigrateNode(LogicalUnit):
5339 """Migrate all instances from a node.
5342 HPATH = "node-migrate"
5343 HTYPE = constants.HTYPE_NODE
5344 _OP_REQP = ["node_name", "live"]
5347 def ExpandNames(self):
5348 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5350 self.needed_locks = {
5351 locking.LEVEL_NODE: [self.op.node_name],
5354 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5356 # Create tasklets for migrating instances for all instances on this node
5360 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5361 logging.debug("Migrating instance %s", inst.name)
5362 names.append(inst.name)
5364 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5366 self.tasklets = tasklets
5368 # Declare instance locks
5369 self.needed_locks[locking.LEVEL_INSTANCE] = names
5371 def DeclareLocks(self, level):
5372 if level == locking.LEVEL_NODE:
5373 self._LockInstancesNodes()
5375 def BuildHooksEnv(self):
5378 This runs on the master, the primary and all the secondaries.
5382 "NODE_NAME": self.op.node_name,
5385 nl = [self.cfg.GetMasterNode()]
5387 return (env, nl, nl)
5390 class TLMigrateInstance(Tasklet):
5391 def __init__(self, lu, instance_name, live, cleanup):
5392 """Initializes this class.
5395 Tasklet.__init__(self, lu)
5398 self.instance_name = instance_name
5400 self.cleanup = cleanup
5402 def CheckPrereq(self):
5403 """Check prerequisites.
5405 This checks that the instance is in the cluster.
5408 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5409 instance = self.cfg.GetInstanceInfo(instance_name)
5410 assert instance is not None
5412 if instance.disk_template != constants.DT_DRBD8:
5413 raise errors.OpPrereqError("Instance's disk layout is not"
5414 " drbd8, cannot migrate.", errors.ECODE_STATE)
5416 secondary_nodes = instance.secondary_nodes
5417 if not secondary_nodes:
5418 raise errors.ConfigurationError("No secondary node but using"
5419 " drbd8 disk template")
5421 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5423 target_node = secondary_nodes[0]
5424 # check memory requirements on the secondary node
5425 _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5426 instance.name, i_be[constants.BE_MEMORY],
5427 instance.hypervisor)
5429 # check bridge existance
5430 _CheckInstanceBridgesExist(self, instance, node=target_node)
5432 if not self.cleanup:
5433 _CheckNodeNotDrained(self, target_node)
5434 result = self.rpc.call_instance_migratable(instance.primary_node,
5436 result.Raise("Can't migrate, please use failover",
5437 prereq=True, ecode=errors.ECODE_STATE)
5439 self.instance = instance
5441 def _WaitUntilSync(self):
5442 """Poll with custom rpc for disk sync.
5444 This uses our own step-based rpc call.
5447 self.feedback_fn("* wait until resync is done")
5451 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5453 self.instance.disks)
5455 for node, nres in result.items():
5456 nres.Raise("Cannot resync disks on node %s" % node)
5457 node_done, node_percent = nres.payload
5458 all_done = all_done and node_done
5459 if node_percent is not None:
5460 min_percent = min(min_percent, node_percent)
5462 if min_percent < 100:
5463 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5466 def _EnsureSecondary(self, node):
5467 """Demote a node to secondary.
5470 self.feedback_fn("* switching node %s to secondary mode" % node)
5472 for dev in self.instance.disks:
5473 self.cfg.SetDiskID(dev, node)
5475 result = self.rpc.call_blockdev_close(node, self.instance.name,
5476 self.instance.disks)
5477 result.Raise("Cannot change disk to secondary on node %s" % node)
5479 def _GoStandalone(self):
5480 """Disconnect from the network.
5483 self.feedback_fn("* changing into standalone mode")
5484 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5485 self.instance.disks)
5486 for node, nres in result.items():
5487 nres.Raise("Cannot disconnect disks node %s" % node)
5489 def _GoReconnect(self, multimaster):
5490 """Reconnect to the network.
5496 msg = "single-master"
5497 self.feedback_fn("* changing disks into %s mode" % msg)
5498 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5499 self.instance.disks,
5500 self.instance.name, multimaster)
5501 for node, nres in result.items():
5502 nres.Raise("Cannot change disks config on node %s" % node)
5504 def _ExecCleanup(self):
5505 """Try to cleanup after a failed migration.
5507 The cleanup is done by:
5508 - check that the instance is running only on one node
5509 (and update the config if needed)
5510 - change disks on its secondary node to secondary
5511 - wait until disks are fully synchronized
5512 - disconnect from the network
5513 - change disks into single-master mode
5514 - wait again until disks are fully synchronized
5517 instance = self.instance
5518 target_node = self.target_node
5519 source_node = self.source_node
5521 # check running on only one node
5522 self.feedback_fn("* checking where the instance actually runs"
5523 " (if this hangs, the hypervisor might be in"
5525 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5526 for node, result in ins_l.items():
5527 result.Raise("Can't contact node %s" % node)
5529 runningon_source = instance.name in ins_l[source_node].payload
5530 runningon_target = instance.name in ins_l[target_node].payload
5532 if runningon_source and runningon_target:
5533 raise errors.OpExecError("Instance seems to be running on two nodes,"
5534 " or the hypervisor is confused. You will have"
5535 " to ensure manually that it runs only on one"
5536 " and restart this operation.")
5538 if not (runningon_source or runningon_target):
5539 raise errors.OpExecError("Instance does not seem to be running at all."
5540 " In this case, it's safer to repair by"
5541 " running 'gnt-instance stop' to ensure disk"
5542 " shutdown, and then restarting it.")
5544 if runningon_target:
5545 # the migration has actually succeeded, we need to update the config
5546 self.feedback_fn("* instance running on secondary node (%s),"
5547 " updating config" % target_node)
5548 instance.primary_node = target_node
5549 self.cfg.Update(instance, self.feedback_fn)
5550 demoted_node = source_node
5552 self.feedback_fn("* instance confirmed to be running on its"
5553 " primary node (%s)" % source_node)
5554 demoted_node = target_node
5556 self._EnsureSecondary(demoted_node)
5558 self._WaitUntilSync()
5559 except errors.OpExecError:
5560 # we ignore here errors, since if the device is standalone, it
5561 # won't be able to sync
5563 self._GoStandalone()
5564 self._GoReconnect(False)
5565 self._WaitUntilSync()
5567 self.feedback_fn("* done")
5569 def _RevertDiskStatus(self):
5570 """Try to revert the disk status after a failed migration.
5573 target_node = self.target_node
5575 self._EnsureSecondary(target_node)
5576 self._GoStandalone()
5577 self._GoReconnect(False)
5578 self._WaitUntilSync()
5579 except errors.OpExecError, err:
5580 self.lu.LogWarning("Migration failed and I can't reconnect the"
5581 " drives: error '%s'\n"
5582 "Please look and recover the instance status" %
5585 def _AbortMigration(self):
5586 """Call the hypervisor code to abort a started migration.
5589 instance = self.instance
5590 target_node = self.target_node
5591 migration_info = self.migration_info
5593 abort_result = self.rpc.call_finalize_migration(target_node,
5597 abort_msg = abort_result.fail_msg
5599 logging.error("Aborting migration failed on target node %s: %s",
5600 target_node, abort_msg)
5601 # Don't raise an exception here, as we stil have to try to revert the
5602 # disk status, even if this step failed.
5604 def _ExecMigration(self):
5605 """Migrate an instance.
5607 The migrate is done by:
5608 - change the disks into dual-master mode
5609 - wait until disks are fully synchronized again
5610 - migrate the instance
5611 - change disks on the new secondary node (the old primary) to secondary
5612 - wait until disks are fully synchronized
5613 - change disks into single-master mode
5616 instance = self.instance
5617 target_node = self.target_node
5618 source_node = self.source_node
5620 self.feedback_fn("* checking disk consistency between source and target")
5621 for dev in instance.disks:
5622 if not _CheckDiskConsistency(self, dev, target_node, False):
5623 raise errors.OpExecError("Disk %s is degraded or not fully"
5624 " synchronized on target node,"
5625 " aborting migrate." % dev.iv_name)
5627 # First get the migration information from the remote node
5628 result = self.rpc.call_migration_info(source_node, instance)
5629 msg = result.fail_msg
5631 log_err = ("Failed fetching source migration information from %s: %s" %
5633 logging.error(log_err)
5634 raise errors.OpExecError(log_err)
5636 self.migration_info = migration_info = result.payload
5638 # Then switch the disks to master/master mode
5639 self._EnsureSecondary(target_node)
5640 self._GoStandalone()
5641 self._GoReconnect(True)
5642 self._WaitUntilSync()
5644 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5645 result = self.rpc.call_accept_instance(target_node,
5648 self.nodes_ip[target_node])
5650 msg = result.fail_msg
5652 logging.error("Instance pre-migration failed, trying to revert"
5653 " disk status: %s", msg)
5654 self.feedback_fn("Pre-migration failed, aborting")
5655 self._AbortMigration()
5656 self._RevertDiskStatus()
5657 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5658 (instance.name, msg))
5660 self.feedback_fn("* migrating instance to %s" % target_node)
5662 result = self.rpc.call_instance_migrate(source_node, instance,
5663 self.nodes_ip[target_node],
5665 msg = result.fail_msg
5667 logging.error("Instance migration failed, trying to revert"
5668 " disk status: %s", msg)
5669 self.feedback_fn("Migration failed, aborting")
5670 self._AbortMigration()
5671 self._RevertDiskStatus()
5672 raise errors.OpExecError("Could not migrate instance %s: %s" %
5673 (instance.name, msg))
5676 instance.primary_node = target_node
5677 # distribute new instance config to the other nodes
5678 self.cfg.Update(instance, self.feedback_fn)
5680 result = self.rpc.call_finalize_migration(target_node,
5684 msg = result.fail_msg
5686 logging.error("Instance migration succeeded, but finalization failed:"
5688 raise errors.OpExecError("Could not finalize instance migration: %s" %
5691 self._EnsureSecondary(source_node)
5692 self._WaitUntilSync()
5693 self._GoStandalone()
5694 self._GoReconnect(False)
5695 self._WaitUntilSync()
5697 self.feedback_fn("* done")
5699 def Exec(self, feedback_fn):
5700 """Perform the migration.
5703 feedback_fn("Migrating instance %s" % self.instance.name)
5705 self.feedback_fn = feedback_fn
5707 self.source_node = self.instance.primary_node
5708 self.target_node = self.instance.secondary_nodes[0]
5709 self.all_nodes = [self.source_node, self.target_node]
5711 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5712 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5716 return self._ExecCleanup()
5718 return self._ExecMigration()
5721 def _CreateBlockDev(lu, node, instance, device, force_create,
5723 """Create a tree of block devices on a given node.
5725 If this device type has to be created on secondaries, create it and
5728 If not, just recurse to children keeping the same 'force' value.
5730 @param lu: the lu on whose behalf we execute
5731 @param node: the node on which to create the device
5732 @type instance: L{objects.Instance}
5733 @param instance: the instance which owns the device
5734 @type device: L{objects.Disk}
5735 @param device: the device to create
5736 @type force_create: boolean
5737 @param force_create: whether to force creation of this device; this
5738 will be change to True whenever we find a device which has
5739 CreateOnSecondary() attribute
5740 @param info: the extra 'metadata' we should attach to the device
5741 (this will be represented as a LVM tag)
5742 @type force_open: boolean
5743 @param force_open: this parameter will be passes to the
5744 L{backend.BlockdevCreate} function where it specifies
5745 whether we run on primary or not, and it affects both
5746 the child assembly and the device own Open() execution
5749 if device.CreateOnSecondary():
5753 for child in device.children:
5754 _CreateBlockDev(lu, node, instance, child, force_create,
5757 if not force_create:
5760 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5763 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5764 """Create a single block device on a given node.
5766 This will not recurse over children of the device, so they must be
5769 @param lu: the lu on whose behalf we execute
5770 @param node: the node on which to create the device
5771 @type instance: L{objects.Instance}
5772 @param instance: the instance which owns the device
5773 @type device: L{objects.Disk}
5774 @param device: the device to create
5775 @param info: the extra 'metadata' we should attach to the device
5776 (this will be represented as a LVM tag)
5777 @type force_open: boolean
5778 @param force_open: this parameter will be passes to the
5779 L{backend.BlockdevCreate} function where it specifies
5780 whether we run on primary or not, and it affects both
5781 the child assembly and the device own Open() execution
5784 lu.cfg.SetDiskID(device, node)
5785 result = lu.rpc.call_blockdev_create(node, device, device.size,
5786 instance.name, force_open, info)
5787 result.Raise("Can't create block device %s on"
5788 " node %s for instance %s" % (device, node, instance.name))
5789 if device.physical_id is None:
5790 device.physical_id = result.payload
5793 def _GenerateUniqueNames(lu, exts):
5794 """Generate a suitable LV name.
5796 This will generate a logical volume name for the given instance.
5801 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5802 results.append("%s%s" % (new_id, val))
5806 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5808 """Generate a drbd8 device complete with its children.
5811 port = lu.cfg.AllocatePort()
5812 vgname = lu.cfg.GetVGName()
5813 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5814 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5815 logical_id=(vgname, names[0]))
5816 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5817 logical_id=(vgname, names[1]))
5818 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5819 logical_id=(primary, secondary, port,
5822 children=[dev_data, dev_meta],
5827 def _GenerateDiskTemplate(lu, template_name,
5828 instance_name, primary_node,
5829 secondary_nodes, disk_info,
5830 file_storage_dir, file_driver,
5832 """Generate the entire disk layout for a given template type.
5835 #TODO: compute space requirements
5837 vgname = lu.cfg.GetVGName()
5838 disk_count = len(disk_info)
5840 if template_name == constants.DT_DISKLESS:
5842 elif template_name == constants.DT_PLAIN:
5843 if len(secondary_nodes) != 0:
5844 raise errors.ProgrammerError("Wrong template configuration")
5846 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5847 for i in range(disk_count)])
5848 for idx, disk in enumerate(disk_info):
5849 disk_index = idx + base_index
5850 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5851 logical_id=(vgname, names[idx]),
5852 iv_name="disk/%d" % disk_index,
5854 disks.append(disk_dev)
5855 elif template_name == constants.DT_DRBD8:
5856 if len(secondary_nodes) != 1:
5857 raise errors.ProgrammerError("Wrong template configuration")
5858 remote_node = secondary_nodes[0]
5859 minors = lu.cfg.AllocateDRBDMinor(
5860 [primary_node, remote_node] * len(disk_info), instance_name)
5863 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5864 for i in range(disk_count)]):
5865 names.append(lv_prefix + "_data")
5866 names.append(lv_prefix + "_meta")
5867 for idx, disk in enumerate(disk_info):
5868 disk_index = idx + base_index
5869 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5870 disk["size"], names[idx*2:idx*2+2],
5871 "disk/%d" % disk_index,
5872 minors[idx*2], minors[idx*2+1])
5873 disk_dev.mode = disk["mode"]
5874 disks.append(disk_dev)
5875 elif template_name == constants.DT_FILE:
5876 if len(secondary_nodes) != 0:
5877 raise errors.ProgrammerError("Wrong template configuration")
5879 _RequireFileStorage()
5881 for idx, disk in enumerate(disk_info):
5882 disk_index = idx + base_index
5883 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5884 iv_name="disk/%d" % disk_index,
5885 logical_id=(file_driver,
5886 "%s/disk%d" % (file_storage_dir,
5889 disks.append(disk_dev)
5891 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5895 def _GetInstanceInfoText(instance):
5896 """Compute that text that should be added to the disk's metadata.
5899 return "originstname+%s" % instance.name
5902 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5903 """Create all disks for an instance.
5905 This abstracts away some work from AddInstance.
5907 @type lu: L{LogicalUnit}
5908 @param lu: the logical unit on whose behalf we execute
5909 @type instance: L{objects.Instance}
5910 @param instance: the instance whose disks we should create
5912 @param to_skip: list of indices to skip
5913 @type target_node: string
5914 @param target_node: if passed, overrides the target node for creation
5916 @return: the success of the creation
5919 info = _GetInstanceInfoText(instance)
5920 if target_node is None:
5921 pnode = instance.primary_node
5922 all_nodes = instance.all_nodes
5927 if instance.disk_template == constants.DT_FILE:
5928 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5929 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5931 result.Raise("Failed to create directory '%s' on"
5932 " node %s" % (file_storage_dir, pnode))
5934 # Note: this needs to be kept in sync with adding of disks in
5935 # LUSetInstanceParams
5936 for idx, device in enumerate(instance.disks):
5937 if to_skip and idx in to_skip:
5939 logging.info("Creating volume %s for instance %s",
5940 device.iv_name, instance.name)
5942 for node in all_nodes:
5943 f_create = node == pnode
5944 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5947 def _RemoveDisks(lu, instance, target_node=None):
5948 """Remove all disks for an instance.
5950 This abstracts away some work from `AddInstance()` and
5951 `RemoveInstance()`. Note that in case some of the devices couldn't
5952 be removed, the removal will continue with the other ones (compare
5953 with `_CreateDisks()`).
5955 @type lu: L{LogicalUnit}
5956 @param lu: the logical unit on whose behalf we execute
5957 @type instance: L{objects.Instance}
5958 @param instance: the instance whose disks we should remove
5959 @type target_node: string
5960 @param target_node: used to override the node on which to remove the disks
5962 @return: the success of the removal
5965 logging.info("Removing block devices for instance %s", instance.name)
5968 for device in instance.disks:
5970 edata = [(target_node, device)]
5972 edata = device.ComputeNodeTree(instance.primary_node)
5973 for node, disk in edata:
5974 lu.cfg.SetDiskID(disk, node)
5975 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5977 lu.LogWarning("Could not remove block device %s on node %s,"
5978 " continuing anyway: %s", device.iv_name, node, msg)
5981 if instance.disk_template == constants.DT_FILE:
5982 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5986 tgt = instance.primary_node
5987 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5989 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5990 file_storage_dir, instance.primary_node, result.fail_msg)
5996 def _ComputeDiskSize(disk_template, disks):
5997 """Compute disk size requirements in the volume group
6000 # Required free disk space as a function of disk and swap space
6002 constants.DT_DISKLESS: None,
6003 constants.DT_PLAIN: sum(d["size"] for d in disks),
6004 # 128 MB are added for drbd metadata for each disk
6005 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6006 constants.DT_FILE: None,
6009 if disk_template not in req_size_dict:
6010 raise errors.ProgrammerError("Disk template '%s' size requirement"
6011 " is unknown" % disk_template)
6013 return req_size_dict[disk_template]
6016 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6017 """Hypervisor parameter validation.
6019 This function abstract the hypervisor parameter validation to be
6020 used in both instance create and instance modify.
6022 @type lu: L{LogicalUnit}
6023 @param lu: the logical unit for which we check
6024 @type nodenames: list
6025 @param nodenames: the list of nodes on which we should check
6026 @type hvname: string
6027 @param hvname: the name of the hypervisor we should use
6028 @type hvparams: dict
6029 @param hvparams: the parameters which we need to check
6030 @raise errors.OpPrereqError: if the parameters are not valid
6033 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6036 for node in nodenames:
6040 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6043 class LUCreateInstance(LogicalUnit):
6044 """Create an instance.
6047 HPATH = "instance-add"
6048 HTYPE = constants.HTYPE_INSTANCE
6049 _OP_REQP = ["instance_name", "disks",
6051 "wait_for_sync", "ip_check", "nics",
6052 "hvparams", "beparams"]
6055 def CheckArguments(self):
6059 # set optional parameters to none if they don't exist
6060 for attr in ["pnode", "snode", "iallocator", "hypervisor",
6061 "disk_template", "identify_defaults"]:
6062 if not hasattr(self.op, attr):
6063 setattr(self.op, attr, None)
6065 # do not require name_check to ease forward/backward compatibility
6067 if not hasattr(self.op, "name_check"):
6068 self.op.name_check = True
6069 if not hasattr(self.op, "no_install"):
6070 self.op.no_install = False
6071 if self.op.no_install and self.op.start:
6072 self.LogInfo("No-installation mode selected, disabling startup")
6073 self.op.start = False
6074 # validate/normalize the instance name
6075 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6076 if self.op.ip_check and not self.op.name_check:
6077 # TODO: make the ip check more flexible and not depend on the name check
6078 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6080 # check disk information: either all adopt, or no adopt
6081 has_adopt = has_no_adopt = False
6082 for disk in self.op.disks:
6087 if has_adopt and has_no_adopt:
6088 raise errors.OpPrereqError("Either all disks are adopted or none is",
6091 if self.op.disk_template != constants.DT_PLAIN:
6092 raise errors.OpPrereqError("Disk adoption is only supported for the"
6093 " 'plain' disk template",
6095 if self.op.iallocator is not None:
6096 raise errors.OpPrereqError("Disk adoption not allowed with an"
6097 " iallocator script", errors.ECODE_INVAL)
6098 if self.op.mode == constants.INSTANCE_IMPORT:
6099 raise errors.OpPrereqError("Disk adoption not allowed for"
6100 " instance import", errors.ECODE_INVAL)
6102 self.adopt_disks = has_adopt
6104 # verify creation mode
6105 if self.op.mode not in (constants.INSTANCE_CREATE,
6106 constants.INSTANCE_IMPORT):
6107 raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6108 self.op.mode, errors.ECODE_INVAL)
6110 # instance name verification
6111 if self.op.name_check:
6112 self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6113 self.op.instance_name = self.hostname1.name
6114 # used in CheckPrereq for ip ping check
6115 self.check_ip = self.hostname1.ip
6117 self.check_ip = None
6119 # file storage checks
6120 if (self.op.file_driver and
6121 not self.op.file_driver in constants.FILE_DRIVER):
6122 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6123 self.op.file_driver, errors.ECODE_INVAL)
6125 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6126 raise errors.OpPrereqError("File storage directory path not absolute",
6129 ### Node/iallocator related checks
6130 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6131 raise errors.OpPrereqError("One and only one of iallocator and primary"
6132 " node must be given",
6135 if self.op.mode == constants.INSTANCE_IMPORT:
6136 # On import force_variant must be True, because if we forced it at
6137 # initial install, our only chance when importing it back is that it
6139 self.op.force_variant = True
6141 if self.op.no_install:
6142 self.LogInfo("No-installation mode has no effect during import")
6144 else: # INSTANCE_CREATE
6145 if getattr(self.op, "os_type", None) is None:
6146 raise errors.OpPrereqError("No guest OS specified",
6148 self.op.force_variant = getattr(self.op, "force_variant", False)
6149 if self.op.disk_template is None:
6150 raise errors.OpPrereqError("No disk template specified",
6153 def ExpandNames(self):
6154 """ExpandNames for CreateInstance.
6156 Figure out the right locks for instance creation.
6159 self.needed_locks = {}
6161 instance_name = self.op.instance_name
6162 # this is just a preventive check, but someone might still add this
6163 # instance in the meantime, and creation will fail at lock-add time
6164 if instance_name in self.cfg.GetInstanceList():
6165 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6166 instance_name, errors.ECODE_EXISTS)
6168 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6170 if self.op.iallocator:
6171 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6173 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6174 nodelist = [self.op.pnode]
6175 if self.op.snode is not None:
6176 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6177 nodelist.append(self.op.snode)
6178 self.needed_locks[locking.LEVEL_NODE] = nodelist
6180 # in case of import lock the source node too
6181 if self.op.mode == constants.INSTANCE_IMPORT:
6182 src_node = getattr(self.op, "src_node", None)
6183 src_path = getattr(self.op, "src_path", None)
6185 if src_path is None:
6186 self.op.src_path = src_path = self.op.instance_name
6188 if src_node is None:
6189 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6190 self.op.src_node = None
6191 if os.path.isabs(src_path):
6192 raise errors.OpPrereqError("Importing an instance from an absolute"
6193 " path requires a source node option.",
6196 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6197 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6198 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6199 if not os.path.isabs(src_path):
6200 self.op.src_path = src_path = \
6201 utils.PathJoin(constants.EXPORT_DIR, src_path)
6203 def _RunAllocator(self):
6204 """Run the allocator based on input opcode.
6207 nics = [n.ToDict() for n in self.nics]
6208 ial = IAllocator(self.cfg, self.rpc,
6209 mode=constants.IALLOCATOR_MODE_ALLOC,
6210 name=self.op.instance_name,
6211 disk_template=self.op.disk_template,
6214 vcpus=self.be_full[constants.BE_VCPUS],
6215 mem_size=self.be_full[constants.BE_MEMORY],
6218 hypervisor=self.op.hypervisor,
6221 ial.Run(self.op.iallocator)
6224 raise errors.OpPrereqError("Can't compute nodes using"
6225 " iallocator '%s': %s" %
6226 (self.op.iallocator, ial.info),
6228 if len(ial.result) != ial.required_nodes:
6229 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6230 " of nodes (%s), required %s" %
6231 (self.op.iallocator, len(ial.result),
6232 ial.required_nodes), errors.ECODE_FAULT)
6233 self.op.pnode = ial.result[0]
6234 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6235 self.op.instance_name, self.op.iallocator,
6236 utils.CommaJoin(ial.result))
6237 if ial.required_nodes == 2:
6238 self.op.snode = ial.result[1]
6240 def BuildHooksEnv(self):
6243 This runs on master, primary and secondary nodes of the instance.
6247 "ADD_MODE": self.op.mode,
6249 if self.op.mode == constants.INSTANCE_IMPORT:
6250 env["SRC_NODE"] = self.op.src_node
6251 env["SRC_PATH"] = self.op.src_path
6252 env["SRC_IMAGES"] = self.src_images
6254 env.update(_BuildInstanceHookEnv(
6255 name=self.op.instance_name,
6256 primary_node=self.op.pnode,
6257 secondary_nodes=self.secondaries,
6258 status=self.op.start,
6259 os_type=self.op.os_type,
6260 memory=self.be_full[constants.BE_MEMORY],
6261 vcpus=self.be_full[constants.BE_VCPUS],
6262 nics=_NICListToTuple(self, self.nics),
6263 disk_template=self.op.disk_template,
6264 disks=[(d["size"], d["mode"]) for d in self.disks],
6267 hypervisor_name=self.op.hypervisor,
6270 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6274 def _ReadExportInfo(self):
6275 """Reads the export information from disk.
6277 It will override the opcode source node and path with the actual
6278 information, if these two were not specified before.
6280 @return: the export information
6283 assert self.op.mode == constants.INSTANCE_IMPORT
6285 src_node = self.op.src_node
6286 src_path = self.op.src_path
6288 if src_node is None:
6289 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6290 exp_list = self.rpc.call_export_list(locked_nodes)
6292 for node in exp_list:
6293 if exp_list[node].fail_msg:
6295 if src_path in exp_list[node].payload:
6297 self.op.src_node = src_node = node
6298 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6302 raise errors.OpPrereqError("No export found for relative path %s" %
6303 src_path, errors.ECODE_INVAL)
6305 _CheckNodeOnline(self, src_node)
6306 result = self.rpc.call_export_info(src_node, src_path)
6307 result.Raise("No export or invalid export found in dir %s" % src_path)
6309 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6310 if not export_info.has_section(constants.INISECT_EXP):
6311 raise errors.ProgrammerError("Corrupted export config",
6312 errors.ECODE_ENVIRON)
6314 ei_version = export_info.get(constants.INISECT_EXP, "version")
6315 if (int(ei_version) != constants.EXPORT_VERSION):
6316 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6317 (ei_version, constants.EXPORT_VERSION),
6318 errors.ECODE_ENVIRON)
6321 def _ReadExportParams(self, einfo):
6322 """Use export parameters as defaults.
6324 In case the opcode doesn't specify (as in override) some instance
6325 parameters, then try to use them from the export information, if
6329 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6331 if self.op.disk_template is None:
6332 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6333 self.op.disk_template = einfo.get(constants.INISECT_INS,
6336 raise errors.OpPrereqError("No disk template specified and the export"
6337 " is missing the disk_template information",
6340 if not self.op.disks:
6341 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6343 # TODO: import the disk iv_name too
6344 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6345 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6346 disks.append({"size": disk_sz})
6347 self.op.disks = disks
6349 raise errors.OpPrereqError("No disk info specified and the export"
6350 " is missing the disk information",
6353 if (not self.op.nics and
6354 einfo.has_option(constants.INISECT_INS, "nic_count")):
6356 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6358 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6359 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6364 if (self.op.hypervisor is None and
6365 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6366 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6367 if einfo.has_section(constants.INISECT_HYP):
6368 # use the export parameters but do not override the ones
6369 # specified by the user
6370 for name, value in einfo.items(constants.INISECT_HYP):
6371 if name not in self.op.hvparams:
6372 self.op.hvparams[name] = value
6374 if einfo.has_section(constants.INISECT_BEP):
6375 # use the parameters, without overriding
6376 for name, value in einfo.items(constants.INISECT_BEP):
6377 if name not in self.op.beparams:
6378 self.op.beparams[name] = value
6380 # try to read the parameters old style, from the main section
6381 for name in constants.BES_PARAMETERS:
6382 if (name not in self.op.beparams and
6383 einfo.has_option(constants.INISECT_INS, name)):
6384 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6386 def _RevertToDefaults(self, cluster):
6387 """Revert the instance parameters to the default values.
6391 hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6392 for name in self.op.hvparams.keys():
6393 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6394 del self.op.hvparams[name]
6396 be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6397 for name in self.op.beparams.keys():
6398 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6399 del self.op.beparams[name]
6401 nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6402 for nic in self.op.nics:
6403 for name in constants.NICS_PARAMETERS:
6404 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6407 def CheckPrereq(self):
6408 """Check prerequisites.
6411 if self.op.mode == constants.INSTANCE_IMPORT:
6412 export_info = self._ReadExportInfo()
6413 self._ReadExportParams(export_info)
6415 _CheckDiskTemplate(self.op.disk_template)
6417 if (not self.cfg.GetVGName() and
6418 self.op.disk_template not in constants.DTS_NOT_LVM):
6419 raise errors.OpPrereqError("Cluster does not support lvm-based"
6420 " instances", errors.ECODE_STATE)
6422 if self.op.hypervisor is None:
6423 self.op.hypervisor = self.cfg.GetHypervisorType()
6425 cluster = self.cfg.GetClusterInfo()
6426 enabled_hvs = cluster.enabled_hypervisors
6427 if self.op.hypervisor not in enabled_hvs:
6428 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6429 " cluster (%s)" % (self.op.hypervisor,
6430 ",".join(enabled_hvs)),
6433 # check hypervisor parameter syntax (locally)
6434 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6435 filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6438 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6439 hv_type.CheckParameterSyntax(filled_hvp)
6440 self.hv_full = filled_hvp
6441 # check that we don't specify global parameters on an instance
6442 _CheckGlobalHvParams(self.op.hvparams)
6444 # fill and remember the beparams dict
6445 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6446 self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6449 # now that hvp/bep are in final format, let's reset to defaults,
6451 if self.op.identify_defaults:
6452 self._RevertToDefaults(cluster)
6456 for idx, nic in enumerate(self.op.nics):
6457 nic_mode_req = nic.get("mode", None)
6458 nic_mode = nic_mode_req
6459 if nic_mode is None:
6460 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6462 # in routed mode, for the first nic, the default ip is 'auto'
6463 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6464 default_ip_mode = constants.VALUE_AUTO
6466 default_ip_mode = constants.VALUE_NONE
6468 # ip validity checks
6469 ip = nic.get("ip", default_ip_mode)
6470 if ip is None or ip.lower() == constants.VALUE_NONE:
6472 elif ip.lower() == constants.VALUE_AUTO:
6473 if not self.op.name_check:
6474 raise errors.OpPrereqError("IP address set to auto but name checks"
6475 " have been skipped. Aborting.",
6477 nic_ip = self.hostname1.ip
6479 if not utils.IsValidIP(ip):
6480 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6481 " like a valid IP" % ip,
6485 # TODO: check the ip address for uniqueness
6486 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6487 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6490 # MAC address verification
6491 mac = nic.get("mac", constants.VALUE_AUTO)
6492 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6493 mac = utils.NormalizeAndValidateMac(mac)
6496 self.cfg.ReserveMAC(mac, self.proc.GetECId())
6497 except errors.ReservationError:
6498 raise errors.OpPrereqError("MAC address %s already in use"
6499 " in cluster" % mac,
6500 errors.ECODE_NOTUNIQUE)
6502 # bridge verification
6503 bridge = nic.get("bridge", None)
6504 link = nic.get("link", None)
6506 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6507 " at the same time", errors.ECODE_INVAL)
6508 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6509 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6516 nicparams[constants.NIC_MODE] = nic_mode_req
6518 nicparams[constants.NIC_LINK] = link
6520 check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6522 objects.NIC.CheckParameterSyntax(check_params)
6523 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6525 # disk checks/pre-build
6527 for disk in self.op.disks:
6528 mode = disk.get("mode", constants.DISK_RDWR)
6529 if mode not in constants.DISK_ACCESS_SET:
6530 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6531 mode, errors.ECODE_INVAL)
6532 size = disk.get("size", None)
6534 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6537 except (TypeError, ValueError):
6538 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6540 new_disk = {"size": size, "mode": mode}
6542 new_disk["adopt"] = disk["adopt"]
6543 self.disks.append(new_disk)
6545 if self.op.mode == constants.INSTANCE_IMPORT:
6547 # Check that the new instance doesn't have less disks than the export
6548 instance_disks = len(self.disks)
6549 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6550 if instance_disks < export_disks:
6551 raise errors.OpPrereqError("Not enough disks to import."
6552 " (instance: %d, export: %d)" %
6553 (instance_disks, export_disks),
6557 for idx in range(export_disks):
6558 option = 'disk%d_dump' % idx
6559 if export_info.has_option(constants.INISECT_INS, option):
6560 # FIXME: are the old os-es, disk sizes, etc. useful?
6561 export_name = export_info.get(constants.INISECT_INS, option)
6562 image = utils.PathJoin(self.op.src_path, export_name)
6563 disk_images.append(image)
6565 disk_images.append(False)
6567 self.src_images = disk_images
6569 old_name = export_info.get(constants.INISECT_INS, 'name')
6571 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6572 except (TypeError, ValueError), err:
6573 raise errors.OpPrereqError("Invalid export file, nic_count is not"
6574 " an integer: %s" % str(err),
6576 if self.op.instance_name == old_name:
6577 for idx, nic in enumerate(self.nics):
6578 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6579 nic_mac_ini = 'nic%d_mac' % idx
6580 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6582 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6584 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6585 if self.op.ip_check:
6586 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6587 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6588 (self.check_ip, self.op.instance_name),
6589 errors.ECODE_NOTUNIQUE)
6591 #### mac address generation
6592 # By generating here the mac address both the allocator and the hooks get
6593 # the real final mac address rather than the 'auto' or 'generate' value.
6594 # There is a race condition between the generation and the instance object
6595 # creation, which means that we know the mac is valid now, but we're not
6596 # sure it will be when we actually add the instance. If things go bad
6597 # adding the instance will abort because of a duplicate mac, and the
6598 # creation job will fail.
6599 for nic in self.nics:
6600 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6601 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6605 if self.op.iallocator is not None:
6606 self._RunAllocator()
6608 #### node related checks
6610 # check primary node
6611 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6612 assert self.pnode is not None, \
6613 "Cannot retrieve locked node %s" % self.op.pnode
6615 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6616 pnode.name, errors.ECODE_STATE)
6618 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6619 pnode.name, errors.ECODE_STATE)
6621 self.secondaries = []
6623 # mirror node verification
6624 if self.op.disk_template in constants.DTS_NET_MIRROR:
6625 if self.op.snode is None:
6626 raise errors.OpPrereqError("The networked disk templates need"
6627 " a mirror node", errors.ECODE_INVAL)
6628 if self.op.snode == pnode.name:
6629 raise errors.OpPrereqError("The secondary node cannot be the"
6630 " primary node.", errors.ECODE_INVAL)
6631 _CheckNodeOnline(self, self.op.snode)
6632 _CheckNodeNotDrained(self, self.op.snode)
6633 self.secondaries.append(self.op.snode)
6635 nodenames = [pnode.name] + self.secondaries
6637 req_size = _ComputeDiskSize(self.op.disk_template,
6640 # Check lv size requirements, if not adopting
6641 if req_size is not None and not self.adopt_disks:
6642 _CheckNodesFreeDisk(self, nodenames, req_size)
6644 if self.adopt_disks: # instead, we must check the adoption data
6645 all_lvs = set([i["adopt"] for i in self.disks])
6646 if len(all_lvs) != len(self.disks):
6647 raise errors.OpPrereqError("Duplicate volume names given for adoption",
6649 for lv_name in all_lvs:
6651 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6652 except errors.ReservationError:
6653 raise errors.OpPrereqError("LV named %s used by another instance" %
6654 lv_name, errors.ECODE_NOTUNIQUE)
6656 node_lvs = self.rpc.call_lv_list([pnode.name],
6657 self.cfg.GetVGName())[pnode.name]
6658 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6659 node_lvs = node_lvs.payload
6660 delta = all_lvs.difference(node_lvs.keys())
6662 raise errors.OpPrereqError("Missing logical volume(s): %s" %
6663 utils.CommaJoin(delta),
6665 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6667 raise errors.OpPrereqError("Online logical volumes found, cannot"
6668 " adopt: %s" % utils.CommaJoin(online_lvs),
6670 # update the size of disk based on what is found
6671 for dsk in self.disks:
6672 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6674 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6676 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6678 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6680 # memory check on primary node
6682 _CheckNodeFreeMemory(self, self.pnode.name,
6683 "creating instance %s" % self.op.instance_name,
6684 self.be_full[constants.BE_MEMORY],
6687 self.dry_run_result = list(nodenames)
6689 def Exec(self, feedback_fn):
6690 """Create and add the instance to the cluster.
6693 instance = self.op.instance_name
6694 pnode_name = self.pnode.name
6696 ht_kind = self.op.hypervisor
6697 if ht_kind in constants.HTS_REQ_PORT:
6698 network_port = self.cfg.AllocatePort()
6702 if constants.ENABLE_FILE_STORAGE:
6703 # this is needed because os.path.join does not accept None arguments
6704 if self.op.file_storage_dir is None:
6705 string_file_storage_dir = ""
6707 string_file_storage_dir = self.op.file_storage_dir
6709 # build the full file storage dir path
6710 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6711 string_file_storage_dir, instance)
6713 file_storage_dir = ""
6716 disks = _GenerateDiskTemplate(self,
6717 self.op.disk_template,
6718 instance, pnode_name,
6722 self.op.file_driver,
6725 iobj = objects.Instance(name=instance, os=self.op.os_type,
6726 primary_node=pnode_name,
6727 nics=self.nics, disks=disks,
6728 disk_template=self.op.disk_template,
6730 network_port=network_port,
6731 beparams=self.op.beparams,
6732 hvparams=self.op.hvparams,
6733 hypervisor=self.op.hypervisor,
6736 if self.adopt_disks:
6737 # rename LVs to the newly-generated names; we need to construct
6738 # 'fake' LV disks with the old data, plus the new unique_id
6739 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6741 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6742 rename_to.append(t_dsk.logical_id)
6743 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6744 self.cfg.SetDiskID(t_dsk, pnode_name)
6745 result = self.rpc.call_blockdev_rename(pnode_name,
6746 zip(tmp_disks, rename_to))
6747 result.Raise("Failed to rename adoped LVs")
6749 feedback_fn("* creating instance disks...")
6751 _CreateDisks(self, iobj)
6752 except errors.OpExecError:
6753 self.LogWarning("Device creation failed, reverting...")
6755 _RemoveDisks(self, iobj)
6757 self.cfg.ReleaseDRBDMinors(instance)
6760 feedback_fn("adding instance %s to cluster config" % instance)
6762 self.cfg.AddInstance(iobj, self.proc.GetECId())
6764 # Declare that we don't want to remove the instance lock anymore, as we've
6765 # added the instance to the config
6766 del self.remove_locks[locking.LEVEL_INSTANCE]
6767 # Unlock all the nodes
6768 if self.op.mode == constants.INSTANCE_IMPORT:
6769 nodes_keep = [self.op.src_node]
6770 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6771 if node != self.op.src_node]
6772 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6773 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6775 self.context.glm.release(locking.LEVEL_NODE)
6776 del self.acquired_locks[locking.LEVEL_NODE]
6778 if self.op.wait_for_sync:
6779 disk_abort = not _WaitForSync(self, iobj)
6780 elif iobj.disk_template in constants.DTS_NET_MIRROR:
6781 # make sure the disks are not degraded (still sync-ing is ok)
6783 feedback_fn("* checking mirrors status")
6784 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6789 _RemoveDisks(self, iobj)
6790 self.cfg.RemoveInstance(iobj.name)
6791 # Make sure the instance lock gets removed
6792 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6793 raise errors.OpExecError("There are some degraded disks for"
6796 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6797 if self.op.mode == constants.INSTANCE_CREATE:
6798 if not self.op.no_install:
6799 feedback_fn("* running the instance OS create scripts...")
6800 # FIXME: pass debug option from opcode to backend
6801 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6802 self.op.debug_level)
6803 result.Raise("Could not add os for instance %s"
6804 " on node %s" % (instance, pnode_name))
6806 elif self.op.mode == constants.INSTANCE_IMPORT:
6807 feedback_fn("* running the instance OS import scripts...")
6808 src_node = self.op.src_node
6809 src_images = self.src_images
6810 cluster_name = self.cfg.GetClusterName()
6811 # FIXME: pass debug option from opcode to backend
6812 import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6813 src_node, src_images,
6815 self.op.debug_level)
6816 msg = import_result.fail_msg
6818 self.LogWarning("Error while importing the disk images for instance"
6819 " %s on node %s: %s" % (instance, pnode_name, msg))
6821 # also checked in the prereq part
6822 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6826 iobj.admin_up = True
6827 self.cfg.Update(iobj, feedback_fn)
6828 logging.info("Starting instance %s on node %s", instance, pnode_name)
6829 feedback_fn("* starting instance...")
6830 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6831 result.Raise("Could not start instance")
6833 return list(iobj.all_nodes)
6836 class LUConnectConsole(NoHooksLU):
6837 """Connect to an instance's console.
6839 This is somewhat special in that it returns the command line that
6840 you need to run on the master node in order to connect to the
6844 _OP_REQP = ["instance_name"]
6847 def ExpandNames(self):
6848 self._ExpandAndLockInstance()
6850 def CheckPrereq(self):
6851 """Check prerequisites.
6853 This checks that the instance is in the cluster.
6856 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6857 assert self.instance is not None, \
6858 "Cannot retrieve locked instance %s" % self.op.instance_name
6859 _CheckNodeOnline(self, self.instance.primary_node)
6861 def Exec(self, feedback_fn):
6862 """Connect to the console of an instance
6865 instance = self.instance
6866 node = instance.primary_node
6868 node_insts = self.rpc.call_instance_list([node],
6869 [instance.hypervisor])[node]
6870 node_insts.Raise("Can't get node information from %s" % node)
6872 if instance.name not in node_insts.payload:
6873 raise errors.OpExecError("Instance %s is not running." % instance.name)
6875 logging.debug("Connecting to console of %s on %s", instance.name, node)
6877 hyper = hypervisor.GetHypervisor(instance.hypervisor)
6878 cluster = self.cfg.GetClusterInfo()
6879 # beparams and hvparams are passed separately, to avoid editing the
6880 # instance and then saving the defaults in the instance itself.
6881 hvparams = cluster.FillHV(instance)
6882 beparams = cluster.FillBE(instance)
6883 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6886 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6889 class LUReplaceDisks(LogicalUnit):
6890 """Replace the disks of an instance.
6893 HPATH = "mirrors-replace"
6894 HTYPE = constants.HTYPE_INSTANCE
6895 _OP_REQP = ["instance_name", "mode", "disks"]
6898 def CheckArguments(self):
6899 if not hasattr(self.op, "remote_node"):
6900 self.op.remote_node = None
6901 if not hasattr(self.op, "iallocator"):
6902 self.op.iallocator = None
6903 if not hasattr(self.op, "early_release"):
6904 self.op.early_release = False
6906 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6909 def ExpandNames(self):
6910 self._ExpandAndLockInstance()
6912 if self.op.iallocator is not None:
6913 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6915 elif self.op.remote_node is not None:
6916 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6917 self.op.remote_node = remote_node
6919 # Warning: do not remove the locking of the new secondary here
6920 # unless DRBD8.AddChildren is changed to work in parallel;
6921 # currently it doesn't since parallel invocations of
6922 # FindUnusedMinor will conflict
6923 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6924 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6927 self.needed_locks[locking.LEVEL_NODE] = []
6928 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6930 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6931 self.op.iallocator, self.op.remote_node,
6932 self.op.disks, False, self.op.early_release)
6934 self.tasklets = [self.replacer]
6936 def DeclareLocks(self, level):
6937 # If we're not already locking all nodes in the set we have to declare the
6938 # instance's primary/secondary nodes.
6939 if (level == locking.LEVEL_NODE and
6940 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6941 self._LockInstancesNodes()
6943 def BuildHooksEnv(self):
6946 This runs on the master, the primary and all the secondaries.
6949 instance = self.replacer.instance
6951 "MODE": self.op.mode,
6952 "NEW_SECONDARY": self.op.remote_node,
6953 "OLD_SECONDARY": instance.secondary_nodes[0],
6955 env.update(_BuildInstanceHookEnvByObject(self, instance))
6957 self.cfg.GetMasterNode(),
6958 instance.primary_node,
6960 if self.op.remote_node is not None:
6961 nl.append(self.op.remote_node)
6965 class LUEvacuateNode(LogicalUnit):
6966 """Relocate the secondary instances from a node.
6969 HPATH = "node-evacuate"
6970 HTYPE = constants.HTYPE_NODE
6971 _OP_REQP = ["node_name"]
6974 def CheckArguments(self):
6975 if not hasattr(self.op, "remote_node"):
6976 self.op.remote_node = None
6977 if not hasattr(self.op, "iallocator"):
6978 self.op.iallocator = None
6979 if not hasattr(self.op, "early_release"):
6980 self.op.early_release = False
6982 TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6983 self.op.remote_node,
6986 def ExpandNames(self):
6987 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6989 self.needed_locks = {}
6991 # Declare node locks
6992 if self.op.iallocator is not None:
6993 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6995 elif self.op.remote_node is not None:
6996 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6998 # Warning: do not remove the locking of the new secondary here
6999 # unless DRBD8.AddChildren is changed to work in parallel;
7000 # currently it doesn't since parallel invocations of
7001 # FindUnusedMinor will conflict
7002 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7003 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7006 raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7008 # Create tasklets for replacing disks for all secondary instances on this
7013 for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7014 logging.debug("Replacing disks for instance %s", inst.name)
7015 names.append(inst.name)
7017 replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7018 self.op.iallocator, self.op.remote_node, [],
7019 True, self.op.early_release)
7020 tasklets.append(replacer)
7022 self.tasklets = tasklets
7023 self.instance_names = names
7025 # Declare instance locks
7026 self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7028 def DeclareLocks(self, level):
7029 # If we're not already locking all nodes in the set we have to declare the
7030 # instance's primary/secondary nodes.
7031 if (level == locking.LEVEL_NODE and
7032 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7033 self._LockInstancesNodes()
7035 def BuildHooksEnv(self):
7038 This runs on the master, the primary and all the secondaries.
7042 "NODE_NAME": self.op.node_name,
7045 nl = [self.cfg.GetMasterNode()]
7047 if self.op.remote_node is not None:
7048 env["NEW_SECONDARY"] = self.op.remote_node
7049 nl.append(self.op.remote_node)
7051 return (env, nl, nl)
7054 class TLReplaceDisks(Tasklet):
7055 """Replaces disks for an instance.
7057 Note: Locking is not within the scope of this class.
7060 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7061 disks, delay_iallocator, early_release):
7062 """Initializes this class.
7065 Tasklet.__init__(self, lu)
7068 self.instance_name = instance_name
7070 self.iallocator_name = iallocator_name
7071 self.remote_node = remote_node
7073 self.delay_iallocator = delay_iallocator
7074 self.early_release = early_release
7077 self.instance = None
7078 self.new_node = None
7079 self.target_node = None
7080 self.other_node = None
7081 self.remote_node_info = None
7082 self.node_secondary_ip = None
7085 def CheckArguments(mode, remote_node, iallocator):
7086 """Helper function for users of this class.
7089 # check for valid parameter combination
7090 if mode == constants.REPLACE_DISK_CHG:
7091 if remote_node is None and iallocator is None:
7092 raise errors.OpPrereqError("When changing the secondary either an"
7093 " iallocator script must be used or the"
7094 " new node given", errors.ECODE_INVAL)
7096 if remote_node is not None and iallocator is not None:
7097 raise errors.OpPrereqError("Give either the iallocator or the new"
7098 " secondary, not both", errors.ECODE_INVAL)
7100 elif remote_node is not None or iallocator is not None:
7101 # Not replacing the secondary
7102 raise errors.OpPrereqError("The iallocator and new node options can"
7103 " only be used when changing the"
7104 " secondary node", errors.ECODE_INVAL)
7107 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7108 """Compute a new secondary node using an IAllocator.
7111 ial = IAllocator(lu.cfg, lu.rpc,
7112 mode=constants.IALLOCATOR_MODE_RELOC,
7114 relocate_from=relocate_from)
7116 ial.Run(iallocator_name)
7119 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7120 " %s" % (iallocator_name, ial.info),
7123 if len(ial.result) != ial.required_nodes:
7124 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7125 " of nodes (%s), required %s" %
7127 len(ial.result), ial.required_nodes),
7130 remote_node_name = ial.result[0]
7132 lu.LogInfo("Selected new secondary for instance '%s': %s",
7133 instance_name, remote_node_name)
7135 return remote_node_name
7137 def _FindFaultyDisks(self, node_name):
7138 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7141 def CheckPrereq(self):
7142 """Check prerequisites.
7144 This checks that the instance is in the cluster.
7147 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7148 assert instance is not None, \
7149 "Cannot retrieve locked instance %s" % self.instance_name
7151 if instance.disk_template != constants.DT_DRBD8:
7152 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7153 " instances", errors.ECODE_INVAL)
7155 if len(instance.secondary_nodes) != 1:
7156 raise errors.OpPrereqError("The instance has a strange layout,"
7157 " expected one secondary but found %d" %
7158 len(instance.secondary_nodes),
7161 if not self.delay_iallocator:
7162 self._CheckPrereq2()
7164 def _CheckPrereq2(self):
7165 """Check prerequisites, second part.
7167 This function should always be part of CheckPrereq. It was separated and is
7168 now called from Exec because during node evacuation iallocator was only
7169 called with an unmodified cluster model, not taking planned changes into
7173 instance = self.instance
7174 secondary_node = instance.secondary_nodes[0]
7176 if self.iallocator_name is None:
7177 remote_node = self.remote_node
7179 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7180 instance.name, instance.secondary_nodes)
7182 if remote_node is not None:
7183 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7184 assert self.remote_node_info is not None, \
7185 "Cannot retrieve locked node %s" % remote_node
7187 self.remote_node_info = None
7189 if remote_node == self.instance.primary_node:
7190 raise errors.OpPrereqError("The specified node is the primary node of"
7191 " the instance.", errors.ECODE_INVAL)
7193 if remote_node == secondary_node:
7194 raise errors.OpPrereqError("The specified node is already the"
7195 " secondary node of the instance.",
7198 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7199 constants.REPLACE_DISK_CHG):
7200 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7203 if self.mode == constants.REPLACE_DISK_AUTO:
7204 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7205 faulty_secondary = self._FindFaultyDisks(secondary_node)
7207 if faulty_primary and faulty_secondary:
7208 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7209 " one node and can not be repaired"
7210 " automatically" % self.instance_name,
7214 self.disks = faulty_primary
7215 self.target_node = instance.primary_node
7216 self.other_node = secondary_node
7217 check_nodes = [self.target_node, self.other_node]
7218 elif faulty_secondary:
7219 self.disks = faulty_secondary
7220 self.target_node = secondary_node
7221 self.other_node = instance.primary_node
7222 check_nodes = [self.target_node, self.other_node]
7228 # Non-automatic modes
7229 if self.mode == constants.REPLACE_DISK_PRI:
7230 self.target_node = instance.primary_node
7231 self.other_node = secondary_node
7232 check_nodes = [self.target_node, self.other_node]
7234 elif self.mode == constants.REPLACE_DISK_SEC:
7235 self.target_node = secondary_node
7236 self.other_node = instance.primary_node
7237 check_nodes = [self.target_node, self.other_node]
7239 elif self.mode == constants.REPLACE_DISK_CHG:
7240 self.new_node = remote_node
7241 self.other_node = instance.primary_node
7242 self.target_node = secondary_node
7243 check_nodes = [self.new_node, self.other_node]
7245 _CheckNodeNotDrained(self.lu, remote_node)
7247 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7248 assert old_node_info is not None
7249 if old_node_info.offline and not self.early_release:
7250 # doesn't make sense to delay the release
7251 self.early_release = True
7252 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7253 " early-release mode", secondary_node)
7256 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7259 # If not specified all disks should be replaced
7261 self.disks = range(len(self.instance.disks))
7263 for node in check_nodes:
7264 _CheckNodeOnline(self.lu, node)
7266 # Check whether disks are valid
7267 for disk_idx in self.disks:
7268 instance.FindDisk(disk_idx)
7270 # Get secondary node IP addresses
7273 for node_name in [self.target_node, self.other_node, self.new_node]:
7274 if node_name is not None:
7275 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7277 self.node_secondary_ip = node_2nd_ip
7279 def Exec(self, feedback_fn):
7280 """Execute disk replacement.
7282 This dispatches the disk replacement to the appropriate handler.
7285 if self.delay_iallocator:
7286 self._CheckPrereq2()
7289 feedback_fn("No disks need replacement")
7292 feedback_fn("Replacing disk(s) %s for %s" %
7293 (utils.CommaJoin(self.disks), self.instance.name))
7295 activate_disks = (not self.instance.admin_up)
7297 # Activate the instance disks if we're replacing them on a down instance
7299 _StartInstanceDisks(self.lu, self.instance, True)
7302 # Should we replace the secondary node?
7303 if self.new_node is not None:
7304 fn = self._ExecDrbd8Secondary
7306 fn = self._ExecDrbd8DiskOnly
7308 return fn(feedback_fn)
7311 # Deactivate the instance disks if we're replacing them on a
7314 _SafeShutdownInstanceDisks(self.lu, self.instance)
7316 def _CheckVolumeGroup(self, nodes):
7317 self.lu.LogInfo("Checking volume groups")
7319 vgname = self.cfg.GetVGName()
7321 # Make sure volume group exists on all involved nodes
7322 results = self.rpc.call_vg_list(nodes)
7324 raise errors.OpExecError("Can't list volume groups on the nodes")
7328 res.Raise("Error checking node %s" % node)
7329 if vgname not in res.payload:
7330 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7333 def _CheckDisksExistence(self, nodes):
7334 # Check disk existence
7335 for idx, dev in enumerate(self.instance.disks):
7336 if idx not in self.disks:
7340 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7341 self.cfg.SetDiskID(dev, node)
7343 result = self.rpc.call_blockdev_find(node, dev)
7345 msg = result.fail_msg
7346 if msg or not result.payload:
7348 msg = "disk not found"
7349 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7352 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7353 for idx, dev in enumerate(self.instance.disks):
7354 if idx not in self.disks:
7357 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7360 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7362 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7363 " replace disks for instance %s" %
7364 (node_name, self.instance.name))
7366 def _CreateNewStorage(self, node_name):
7367 vgname = self.cfg.GetVGName()
7370 for idx, dev in enumerate(self.instance.disks):
7371 if idx not in self.disks:
7374 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7376 self.cfg.SetDiskID(dev, node_name)
7378 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7379 names = _GenerateUniqueNames(self.lu, lv_names)
7381 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7382 logical_id=(vgname, names[0]))
7383 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7384 logical_id=(vgname, names[1]))
7386 new_lvs = [lv_data, lv_meta]
7387 old_lvs = dev.children
7388 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7390 # we pass force_create=True to force the LVM creation
7391 for new_lv in new_lvs:
7392 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7393 _GetInstanceInfoText(self.instance), False)
7397 def _CheckDevices(self, node_name, iv_names):
7398 for name, (dev, _, _) in iv_names.iteritems():
7399 self.cfg.SetDiskID(dev, node_name)
7401 result = self.rpc.call_blockdev_find(node_name, dev)
7403 msg = result.fail_msg
7404 if msg or not result.payload:
7406 msg = "disk not found"
7407 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7410 if result.payload.is_degraded:
7411 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7413 def _RemoveOldStorage(self, node_name, iv_names):
7414 for name, (_, old_lvs, _) in iv_names.iteritems():
7415 self.lu.LogInfo("Remove logical volumes for %s" % name)
7418 self.cfg.SetDiskID(lv, node_name)
7420 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7422 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7423 hint="remove unused LVs manually")
7425 def _ReleaseNodeLock(self, node_name):
7426 """Releases the lock for a given node."""
7427 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7429 def _ExecDrbd8DiskOnly(self, feedback_fn):
7430 """Replace a disk on the primary or secondary for DRBD 8.
7432 The algorithm for replace is quite complicated:
7434 1. for each disk to be replaced:
7436 1. create new LVs on the target node with unique names
7437 1. detach old LVs from the drbd device
7438 1. rename old LVs to name_replaced.<time_t>
7439 1. rename new LVs to old LVs
7440 1. attach the new LVs (with the old names now) to the drbd device
7442 1. wait for sync across all devices
7444 1. for each modified disk:
7446 1. remove old LVs (which have the name name_replaces.<time_t>)
7448 Failures are not very well handled.
7453 # Step: check device activation
7454 self.lu.LogStep(1, steps_total, "Check device existence")
7455 self._CheckDisksExistence([self.other_node, self.target_node])
7456 self._CheckVolumeGroup([self.target_node, self.other_node])
7458 # Step: check other node consistency
7459 self.lu.LogStep(2, steps_total, "Check peer consistency")
7460 self._CheckDisksConsistency(self.other_node,
7461 self.other_node == self.instance.primary_node,
7464 # Step: create new storage
7465 self.lu.LogStep(3, steps_total, "Allocate new storage")
7466 iv_names = self._CreateNewStorage(self.target_node)
7468 # Step: for each lv, detach+rename*2+attach
7469 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7470 for dev, old_lvs, new_lvs in iv_names.itervalues():
7471 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7473 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7475 result.Raise("Can't detach drbd from local storage on node"
7476 " %s for device %s" % (self.target_node, dev.iv_name))
7478 #cfg.Update(instance)
7480 # ok, we created the new LVs, so now we know we have the needed
7481 # storage; as such, we proceed on the target node to rename
7482 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7483 # using the assumption that logical_id == physical_id (which in
7484 # turn is the unique_id on that node)
7486 # FIXME(iustin): use a better name for the replaced LVs
7487 temp_suffix = int(time.time())
7488 ren_fn = lambda d, suff: (d.physical_id[0],
7489 d.physical_id[1] + "_replaced-%s" % suff)
7491 # Build the rename list based on what LVs exist on the node
7492 rename_old_to_new = []
7493 for to_ren in old_lvs:
7494 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7495 if not result.fail_msg and result.payload:
7497 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7499 self.lu.LogInfo("Renaming the old LVs on the target node")
7500 result = self.rpc.call_blockdev_rename(self.target_node,
7502 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7504 # Now we rename the new LVs to the old LVs
7505 self.lu.LogInfo("Renaming the new LVs on the target node")
7506 rename_new_to_old = [(new, old.physical_id)
7507 for old, new in zip(old_lvs, new_lvs)]
7508 result = self.rpc.call_blockdev_rename(self.target_node,
7510 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7512 for old, new in zip(old_lvs, new_lvs):
7513 new.logical_id = old.logical_id
7514 self.cfg.SetDiskID(new, self.target_node)
7516 for disk in old_lvs:
7517 disk.logical_id = ren_fn(disk, temp_suffix)
7518 self.cfg.SetDiskID(disk, self.target_node)
7520 # Now that the new lvs have the old name, we can add them to the device
7521 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7522 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7524 msg = result.fail_msg
7526 for new_lv in new_lvs:
7527 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7530 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7531 hint=("cleanup manually the unused logical"
7533 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7535 dev.children = new_lvs
7537 self.cfg.Update(self.instance, feedback_fn)
7540 if self.early_release:
7541 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7543 self._RemoveOldStorage(self.target_node, iv_names)
7544 # WARNING: we release both node locks here, do not do other RPCs
7545 # than WaitForSync to the primary node
7546 self._ReleaseNodeLock([self.target_node, self.other_node])
7549 # This can fail as the old devices are degraded and _WaitForSync
7550 # does a combined result over all disks, so we don't check its return value
7551 self.lu.LogStep(cstep, steps_total, "Sync devices")
7553 _WaitForSync(self.lu, self.instance)
7555 # Check all devices manually
7556 self._CheckDevices(self.instance.primary_node, iv_names)
7558 # Step: remove old storage
7559 if not self.early_release:
7560 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7562 self._RemoveOldStorage(self.target_node, iv_names)
7564 def _ExecDrbd8Secondary(self, feedback_fn):
7565 """Replace the secondary node for DRBD 8.
7567 The algorithm for replace is quite complicated:
7568 - for all disks of the instance:
7569 - create new LVs on the new node with same names
7570 - shutdown the drbd device on the old secondary
7571 - disconnect the drbd network on the primary
7572 - create the drbd device on the new secondary
7573 - network attach the drbd on the primary, using an artifice:
7574 the drbd code for Attach() will connect to the network if it
7575 finds a device which is connected to the good local disks but
7577 - wait for sync across all devices
7578 - remove all disks from the old secondary
7580 Failures are not very well handled.
7585 # Step: check device activation
7586 self.lu.LogStep(1, steps_total, "Check device existence")
7587 self._CheckDisksExistence([self.instance.primary_node])
7588 self._CheckVolumeGroup([self.instance.primary_node])
7590 # Step: check other node consistency
7591 self.lu.LogStep(2, steps_total, "Check peer consistency")
7592 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7594 # Step: create new storage
7595 self.lu.LogStep(3, steps_total, "Allocate new storage")
7596 for idx, dev in enumerate(self.instance.disks):
7597 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7598 (self.new_node, idx))
7599 # we pass force_create=True to force LVM creation
7600 for new_lv in dev.children:
7601 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7602 _GetInstanceInfoText(self.instance), False)
7604 # Step 4: dbrd minors and drbd setups changes
7605 # after this, we must manually remove the drbd minors on both the
7606 # error and the success paths
7607 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7608 minors = self.cfg.AllocateDRBDMinor([self.new_node
7609 for dev in self.instance.disks],
7611 logging.debug("Allocated minors %r", minors)
7614 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7615 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7616 (self.new_node, idx))
7617 # create new devices on new_node; note that we create two IDs:
7618 # one without port, so the drbd will be activated without
7619 # networking information on the new node at this stage, and one
7620 # with network, for the latter activation in step 4
7621 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7622 if self.instance.primary_node == o_node1:
7625 assert self.instance.primary_node == o_node2, "Three-node instance?"
7628 new_alone_id = (self.instance.primary_node, self.new_node, None,
7629 p_minor, new_minor, o_secret)
7630 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7631 p_minor, new_minor, o_secret)
7633 iv_names[idx] = (dev, dev.children, new_net_id)
7634 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7636 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7637 logical_id=new_alone_id,
7638 children=dev.children,
7641 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7642 _GetInstanceInfoText(self.instance), False)
7643 except errors.GenericError:
7644 self.cfg.ReleaseDRBDMinors(self.instance.name)
7647 # We have new devices, shutdown the drbd on the old secondary
7648 for idx, dev in enumerate(self.instance.disks):
7649 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7650 self.cfg.SetDiskID(dev, self.target_node)
7651 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7653 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7654 "node: %s" % (idx, msg),
7655 hint=("Please cleanup this device manually as"
7656 " soon as possible"))
7658 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7659 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7660 self.node_secondary_ip,
7661 self.instance.disks)\
7662 [self.instance.primary_node]
7664 msg = result.fail_msg
7666 # detaches didn't succeed (unlikely)
7667 self.cfg.ReleaseDRBDMinors(self.instance.name)
7668 raise errors.OpExecError("Can't detach the disks from the network on"
7669 " old node: %s" % (msg,))
7671 # if we managed to detach at least one, we update all the disks of
7672 # the instance to point to the new secondary
7673 self.lu.LogInfo("Updating instance configuration")
7674 for dev, _, new_logical_id in iv_names.itervalues():
7675 dev.logical_id = new_logical_id
7676 self.cfg.SetDiskID(dev, self.instance.primary_node)
7678 self.cfg.Update(self.instance, feedback_fn)
7680 # and now perform the drbd attach
7681 self.lu.LogInfo("Attaching primary drbds to new secondary"
7682 " (standalone => connected)")
7683 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7685 self.node_secondary_ip,
7686 self.instance.disks,
7689 for to_node, to_result in result.items():
7690 msg = to_result.fail_msg
7692 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7694 hint=("please do a gnt-instance info to see the"
7695 " status of disks"))
7697 if self.early_release:
7698 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7700 self._RemoveOldStorage(self.target_node, iv_names)
7701 # WARNING: we release all node locks here, do not do other RPCs
7702 # than WaitForSync to the primary node
7703 self._ReleaseNodeLock([self.instance.primary_node,
7708 # This can fail as the old devices are degraded and _WaitForSync
7709 # does a combined result over all disks, so we don't check its return value
7710 self.lu.LogStep(cstep, steps_total, "Sync devices")
7712 _WaitForSync(self.lu, self.instance)
7714 # Check all devices manually
7715 self._CheckDevices(self.instance.primary_node, iv_names)
7717 # Step: remove old storage
7718 if not self.early_release:
7719 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7720 self._RemoveOldStorage(self.target_node, iv_names)
7723 class LURepairNodeStorage(NoHooksLU):
7724 """Repairs the volume group on a node.
7727 _OP_REQP = ["node_name"]
7730 def CheckArguments(self):
7731 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7733 _CheckStorageType(self.op.storage_type)
7735 def ExpandNames(self):
7736 self.needed_locks = {
7737 locking.LEVEL_NODE: [self.op.node_name],
7740 def _CheckFaultyDisks(self, instance, node_name):
7741 """Ensure faulty disks abort the opcode or at least warn."""
7743 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7745 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7746 " node '%s'" % (instance.name, node_name),
7748 except errors.OpPrereqError, err:
7749 if self.op.ignore_consistency:
7750 self.proc.LogWarning(str(err.args[0]))
7754 def CheckPrereq(self):
7755 """Check prerequisites.
7758 storage_type = self.op.storage_type
7760 if (constants.SO_FIX_CONSISTENCY not in
7761 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7762 raise errors.OpPrereqError("Storage units of type '%s' can not be"
7763 " repaired" % storage_type,
7766 # Check whether any instance on this node has faulty disks
7767 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7768 if not inst.admin_up:
7770 check_nodes = set(inst.all_nodes)
7771 check_nodes.discard(self.op.node_name)
7772 for inst_node_name in check_nodes:
7773 self._CheckFaultyDisks(inst, inst_node_name)
7775 def Exec(self, feedback_fn):
7776 feedback_fn("Repairing storage unit '%s' on %s ..." %
7777 (self.op.name, self.op.node_name))
7779 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7780 result = self.rpc.call_storage_execute(self.op.node_name,
7781 self.op.storage_type, st_args,
7783 constants.SO_FIX_CONSISTENCY)
7784 result.Raise("Failed to repair storage unit '%s' on %s" %
7785 (self.op.name, self.op.node_name))
7788 class LUNodeEvacuationStrategy(NoHooksLU):
7789 """Computes the node evacuation strategy.
7792 _OP_REQP = ["nodes"]
7795 def CheckArguments(self):
7796 if not hasattr(self.op, "remote_node"):
7797 self.op.remote_node = None
7798 if not hasattr(self.op, "iallocator"):
7799 self.op.iallocator = None
7800 if self.op.remote_node is not None and self.op.iallocator is not None:
7801 raise errors.OpPrereqError("Give either the iallocator or the new"
7802 " secondary, not both", errors.ECODE_INVAL)
7804 def ExpandNames(self):
7805 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7806 self.needed_locks = locks = {}
7807 if self.op.remote_node is None:
7808 locks[locking.LEVEL_NODE] = locking.ALL_SET
7810 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7811 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7813 def CheckPrereq(self):
7816 def Exec(self, feedback_fn):
7817 if self.op.remote_node is not None:
7819 for node in self.op.nodes:
7820 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7823 if i.primary_node == self.op.remote_node:
7824 raise errors.OpPrereqError("Node %s is the primary node of"
7825 " instance %s, cannot use it as"
7827 (self.op.remote_node, i.name),
7829 result.append([i.name, self.op.remote_node])
7831 ial = IAllocator(self.cfg, self.rpc,
7832 mode=constants.IALLOCATOR_MODE_MEVAC,
7833 evac_nodes=self.op.nodes)
7834 ial.Run(self.op.iallocator, validate=True)
7836 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7842 class LUGrowDisk(LogicalUnit):
7843 """Grow a disk of an instance.
7847 HTYPE = constants.HTYPE_INSTANCE
7848 _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7851 def ExpandNames(self):
7852 self._ExpandAndLockInstance()
7853 self.needed_locks[locking.LEVEL_NODE] = []
7854 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7856 def DeclareLocks(self, level):
7857 if level == locking.LEVEL_NODE:
7858 self._LockInstancesNodes()
7860 def BuildHooksEnv(self):
7863 This runs on the master, the primary and all the secondaries.
7867 "DISK": self.op.disk,
7868 "AMOUNT": self.op.amount,
7870 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7871 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7874 def CheckPrereq(self):
7875 """Check prerequisites.
7877 This checks that the instance is in the cluster.
7880 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7881 assert instance is not None, \
7882 "Cannot retrieve locked instance %s" % self.op.instance_name
7883 nodenames = list(instance.all_nodes)
7884 for node in nodenames:
7885 _CheckNodeOnline(self, node)
7888 self.instance = instance
7890 if instance.disk_template not in constants.DTS_GROWABLE:
7891 raise errors.OpPrereqError("Instance's disk layout does not support"
7892 " growing.", errors.ECODE_INVAL)
7894 self.disk = instance.FindDisk(self.op.disk)
7896 if instance.disk_template != constants.DT_FILE:
7897 # TODO: check the free disk space for file, when that feature will be
7899 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7901 def Exec(self, feedback_fn):
7902 """Execute disk grow.
7905 instance = self.instance
7907 for node in instance.all_nodes:
7908 self.cfg.SetDiskID(disk, node)
7909 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7910 result.Raise("Grow request failed to node %s" % node)
7912 # TODO: Rewrite code to work properly
7913 # DRBD goes into sync mode for a short amount of time after executing the
7914 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7915 # calling "resize" in sync mode fails. Sleeping for a short amount of
7916 # time is a work-around.
7919 disk.RecordGrow(self.op.amount)
7920 self.cfg.Update(instance, feedback_fn)
7921 if self.op.wait_for_sync:
7922 disk_abort = not _WaitForSync(self, instance)
7924 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7925 " status.\nPlease check the instance.")
7928 class LUQueryInstanceData(NoHooksLU):
7929 """Query runtime instance data.
7932 _OP_REQP = ["instances", "static"]
7935 def ExpandNames(self):
7936 self.needed_locks = {}
7937 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7939 if not isinstance(self.op.instances, list):
7940 raise errors.OpPrereqError("Invalid argument type 'instances'",
7943 if self.op.instances:
7944 self.wanted_names = []
7945 for name in self.op.instances:
7946 full_name = _ExpandInstanceName(self.cfg, name)
7947 self.wanted_names.append(full_name)
7948 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7950 self.wanted_names = None
7951 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7953 self.needed_locks[locking.LEVEL_NODE] = []
7954 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7956 def DeclareLocks(self, level):
7957 if level == locking.LEVEL_NODE:
7958 self._LockInstancesNodes()
7960 def CheckPrereq(self):
7961 """Check prerequisites.
7963 This only checks the optional instance list against the existing names.
7966 if self.wanted_names is None:
7967 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7969 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7970 in self.wanted_names]
7973 def _ComputeBlockdevStatus(self, node, instance_name, dev):
7974 """Returns the status of a block device
7977 if self.op.static or not node:
7980 self.cfg.SetDiskID(dev, node)
7982 result = self.rpc.call_blockdev_find(node, dev)
7986 result.Raise("Can't compute disk status for %s" % instance_name)
7988 status = result.payload
7992 return (status.dev_path, status.major, status.minor,
7993 status.sync_percent, status.estimated_time,
7994 status.is_degraded, status.ldisk_status)
7996 def _ComputeDiskStatus(self, instance, snode, dev):
7997 """Compute block device status.
8000 if dev.dev_type in constants.LDS_DRBD:
8001 # we change the snode then (otherwise we use the one passed in)
8002 if dev.logical_id[0] == instance.primary_node:
8003 snode = dev.logical_id[1]
8005 snode = dev.logical_id[0]
8007 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8009 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8012 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8013 for child in dev.children]
8018 "iv_name": dev.iv_name,
8019 "dev_type": dev.dev_type,
8020 "logical_id": dev.logical_id,
8021 "physical_id": dev.physical_id,
8022 "pstatus": dev_pstatus,
8023 "sstatus": dev_sstatus,
8024 "children": dev_children,
8031 def Exec(self, feedback_fn):
8032 """Gather and return data"""
8035 cluster = self.cfg.GetClusterInfo()
8037 for instance in self.wanted_instances:
8038 if not self.op.static:
8039 remote_info = self.rpc.call_instance_info(instance.primary_node,
8041 instance.hypervisor)
8042 remote_info.Raise("Error checking node %s" % instance.primary_node)
8043 remote_info = remote_info.payload
8044 if remote_info and "state" in remote_info:
8047 remote_state = "down"
8050 if instance.admin_up:
8053 config_state = "down"
8055 disks = [self._ComputeDiskStatus(instance, None, device)
8056 for device in instance.disks]
8059 "name": instance.name,
8060 "config_state": config_state,
8061 "run_state": remote_state,
8062 "pnode": instance.primary_node,
8063 "snodes": instance.secondary_nodes,
8065 # this happens to be the same format used for hooks
8066 "nics": _NICListToTuple(self, instance.nics),
8068 "hypervisor": instance.hypervisor,
8069 "network_port": instance.network_port,
8070 "hv_instance": instance.hvparams,
8071 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8072 "be_instance": instance.beparams,
8073 "be_actual": cluster.FillBE(instance),
8074 "serial_no": instance.serial_no,
8075 "mtime": instance.mtime,
8076 "ctime": instance.ctime,
8077 "uuid": instance.uuid,
8080 result[instance.name] = idict
8085 class LUSetInstanceParams(LogicalUnit):
8086 """Modifies an instances's parameters.
8089 HPATH = "instance-modify"
8090 HTYPE = constants.HTYPE_INSTANCE
8091 _OP_REQP = ["instance_name"]
8094 def CheckArguments(self):
8095 if not hasattr(self.op, 'nics'):
8097 if not hasattr(self.op, 'disks'):
8099 if not hasattr(self.op, 'beparams'):
8100 self.op.beparams = {}
8101 if not hasattr(self.op, 'hvparams'):
8102 self.op.hvparams = {}
8103 if not hasattr(self.op, "disk_template"):
8104 self.op.disk_template = None
8105 if not hasattr(self.op, "remote_node"):
8106 self.op.remote_node = None
8107 if not hasattr(self.op, "os_name"):
8108 self.op.os_name = None
8109 if not hasattr(self.op, "force_variant"):
8110 self.op.force_variant = False
8111 self.op.force = getattr(self.op, "force", False)
8112 if not (self.op.nics or self.op.disks or self.op.disk_template or
8113 self.op.hvparams or self.op.beparams or self.op.os_name):
8114 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8116 if self.op.hvparams:
8117 _CheckGlobalHvParams(self.op.hvparams)
8121 for disk_op, disk_dict in self.op.disks:
8122 if disk_op == constants.DDM_REMOVE:
8125 elif disk_op == constants.DDM_ADD:
8128 if not isinstance(disk_op, int):
8129 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8130 if not isinstance(disk_dict, dict):
8131 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8132 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8134 if disk_op == constants.DDM_ADD:
8135 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8136 if mode not in constants.DISK_ACCESS_SET:
8137 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8139 size = disk_dict.get('size', None)
8141 raise errors.OpPrereqError("Required disk parameter size missing",
8145 except (TypeError, ValueError), err:
8146 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8147 str(err), errors.ECODE_INVAL)
8148 disk_dict['size'] = size
8150 # modification of disk
8151 if 'size' in disk_dict:
8152 raise errors.OpPrereqError("Disk size change not possible, use"
8153 " grow-disk", errors.ECODE_INVAL)
8155 if disk_addremove > 1:
8156 raise errors.OpPrereqError("Only one disk add or remove operation"
8157 " supported at a time", errors.ECODE_INVAL)
8159 if self.op.disks and self.op.disk_template is not None:
8160 raise errors.OpPrereqError("Disk template conversion and other disk"
8161 " changes not supported at the same time",
8164 if self.op.disk_template:
8165 _CheckDiskTemplate(self.op.disk_template)
8166 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8167 self.op.remote_node is None):
8168 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8169 " one requires specifying a secondary node",
8174 for nic_op, nic_dict in self.op.nics:
8175 if nic_op == constants.DDM_REMOVE:
8178 elif nic_op == constants.DDM_ADD:
8181 if not isinstance(nic_op, int):
8182 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8183 if not isinstance(nic_dict, dict):
8184 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8185 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8187 # nic_dict should be a dict
8188 nic_ip = nic_dict.get('ip', None)
8189 if nic_ip is not None:
8190 if nic_ip.lower() == constants.VALUE_NONE:
8191 nic_dict['ip'] = None
8193 if not utils.IsValidIP(nic_ip):
8194 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8197 nic_bridge = nic_dict.get('bridge', None)
8198 nic_link = nic_dict.get('link', None)
8199 if nic_bridge and nic_link:
8200 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8201 " at the same time", errors.ECODE_INVAL)
8202 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8203 nic_dict['bridge'] = None
8204 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8205 nic_dict['link'] = None
8207 if nic_op == constants.DDM_ADD:
8208 nic_mac = nic_dict.get('mac', None)
8210 nic_dict['mac'] = constants.VALUE_AUTO
8212 if 'mac' in nic_dict:
8213 nic_mac = nic_dict['mac']
8214 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8215 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8217 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8218 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8219 " modifying an existing nic",
8222 if nic_addremove > 1:
8223 raise errors.OpPrereqError("Only one NIC add or remove operation"
8224 " supported at a time", errors.ECODE_INVAL)
8226 def ExpandNames(self):
8227 self._ExpandAndLockInstance()
8228 self.needed_locks[locking.LEVEL_NODE] = []
8229 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8231 def DeclareLocks(self, level):
8232 if level == locking.LEVEL_NODE:
8233 self._LockInstancesNodes()
8234 if self.op.disk_template and self.op.remote_node:
8235 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8236 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8238 def BuildHooksEnv(self):
8241 This runs on the master, primary and secondaries.
8245 if constants.BE_MEMORY in self.be_new:
8246 args['memory'] = self.be_new[constants.BE_MEMORY]
8247 if constants.BE_VCPUS in self.be_new:
8248 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8249 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8250 # information at all.
8253 nic_override = dict(self.op.nics)
8254 c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8255 for idx, nic in enumerate(self.instance.nics):
8256 if idx in nic_override:
8257 this_nic_override = nic_override[idx]
8259 this_nic_override = {}
8260 if 'ip' in this_nic_override:
8261 ip = this_nic_override['ip']
8264 if 'mac' in this_nic_override:
8265 mac = this_nic_override['mac']
8268 if idx in self.nic_pnew:
8269 nicparams = self.nic_pnew[idx]
8271 nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8272 mode = nicparams[constants.NIC_MODE]
8273 link = nicparams[constants.NIC_LINK]
8274 args['nics'].append((ip, mac, mode, link))
8275 if constants.DDM_ADD in nic_override:
8276 ip = nic_override[constants.DDM_ADD].get('ip', None)
8277 mac = nic_override[constants.DDM_ADD]['mac']
8278 nicparams = self.nic_pnew[constants.DDM_ADD]
8279 mode = nicparams[constants.NIC_MODE]
8280 link = nicparams[constants.NIC_LINK]
8281 args['nics'].append((ip, mac, mode, link))
8282 elif constants.DDM_REMOVE in nic_override:
8283 del args['nics'][-1]
8285 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8286 if self.op.disk_template:
8287 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8288 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8292 def _GetUpdatedParams(old_params, update_dict,
8293 default_values, parameter_types):
8294 """Return the new params dict for the given params.
8296 @type old_params: dict
8297 @param old_params: old parameters
8298 @type update_dict: dict
8299 @param update_dict: dict containing new parameter values,
8300 or constants.VALUE_DEFAULT to reset the
8301 parameter to its default value
8302 @type default_values: dict
8303 @param default_values: default values for the filled parameters
8304 @type parameter_types: dict
8305 @param parameter_types: dict mapping target dict keys to types
8306 in constants.ENFORCEABLE_TYPES
8307 @rtype: (dict, dict)
8308 @return: (new_parameters, filled_parameters)
8311 params_copy = copy.deepcopy(old_params)
8312 for key, val in update_dict.iteritems():
8313 if val == constants.VALUE_DEFAULT:
8315 del params_copy[key]
8319 params_copy[key] = val
8320 utils.ForceDictType(params_copy, parameter_types)
8321 params_filled = objects.FillDict(default_values, params_copy)
8322 return (params_copy, params_filled)
8324 def CheckPrereq(self):
8325 """Check prerequisites.
8327 This only checks the instance list against the existing names.
8330 self.force = self.op.force
8332 # checking the new params on the primary/secondary nodes
8334 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8335 cluster = self.cluster = self.cfg.GetClusterInfo()
8336 assert self.instance is not None, \
8337 "Cannot retrieve locked instance %s" % self.op.instance_name
8338 pnode = instance.primary_node
8339 nodelist = list(instance.all_nodes)
8341 if self.op.disk_template:
8342 if instance.disk_template == self.op.disk_template:
8343 raise errors.OpPrereqError("Instance already has disk template %s" %
8344 instance.disk_template, errors.ECODE_INVAL)
8346 if (instance.disk_template,
8347 self.op.disk_template) not in self._DISK_CONVERSIONS:
8348 raise errors.OpPrereqError("Unsupported disk template conversion from"
8349 " %s to %s" % (instance.disk_template,
8350 self.op.disk_template),
8352 if self.op.disk_template in constants.DTS_NET_MIRROR:
8353 _CheckNodeOnline(self, self.op.remote_node)
8354 _CheckNodeNotDrained(self, self.op.remote_node)
8355 disks = [{"size": d.size} for d in instance.disks]
8356 required = _ComputeDiskSize(self.op.disk_template, disks)
8357 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8358 _CheckInstanceDown(self, instance, "cannot change disk template")
8360 # hvparams processing
8361 if self.op.hvparams:
8362 i_hvdict, hv_new = self._GetUpdatedParams(
8363 instance.hvparams, self.op.hvparams,
8364 cluster.hvparams[instance.hypervisor],
8365 constants.HVS_PARAMETER_TYPES)
8367 hypervisor.GetHypervisor(
8368 instance.hypervisor).CheckParameterSyntax(hv_new)
8369 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8370 self.hv_new = hv_new # the new actual values
8371 self.hv_inst = i_hvdict # the new dict (without defaults)
8373 self.hv_new = self.hv_inst = {}
8375 # beparams processing
8376 if self.op.beparams:
8377 i_bedict, be_new = self._GetUpdatedParams(
8378 instance.beparams, self.op.beparams,
8379 cluster.beparams[constants.PP_DEFAULT],
8380 constants.BES_PARAMETER_TYPES)
8381 self.be_new = be_new # the new actual values
8382 self.be_inst = i_bedict # the new dict (without defaults)
8384 self.be_new = self.be_inst = {}
8388 if constants.BE_MEMORY in self.op.beparams and not self.force:
8389 mem_check_list = [pnode]
8390 if be_new[constants.BE_AUTO_BALANCE]:
8391 # either we changed auto_balance to yes or it was from before
8392 mem_check_list.extend(instance.secondary_nodes)
8393 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8394 instance.hypervisor)
8395 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8396 instance.hypervisor)
8397 pninfo = nodeinfo[pnode]
8398 msg = pninfo.fail_msg
8400 # Assume the primary node is unreachable and go ahead
8401 self.warn.append("Can't get info from primary node %s: %s" %
8403 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8404 self.warn.append("Node data from primary node %s doesn't contain"
8405 " free memory information" % pnode)
8406 elif instance_info.fail_msg:
8407 self.warn.append("Can't get instance runtime information: %s" %
8408 instance_info.fail_msg)
8410 if instance_info.payload:
8411 current_mem = int(instance_info.payload['memory'])
8413 # Assume instance not running
8414 # (there is a slight race condition here, but it's not very probable,
8415 # and we have no other way to check)
8417 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8418 pninfo.payload['memory_free'])
8420 raise errors.OpPrereqError("This change will prevent the instance"
8421 " from starting, due to %d MB of memory"
8422 " missing on its primary node" % miss_mem,
8425 if be_new[constants.BE_AUTO_BALANCE]:
8426 for node, nres in nodeinfo.items():
8427 if node not in instance.secondary_nodes:
8431 self.warn.append("Can't get info from secondary node %s: %s" %
8433 elif not isinstance(nres.payload.get('memory_free', None), int):
8434 self.warn.append("Secondary node %s didn't return free"
8435 " memory information" % node)
8436 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8437 self.warn.append("Not enough memory to failover instance to"
8438 " secondary node %s" % node)
8443 for nic_op, nic_dict in self.op.nics:
8444 if nic_op == constants.DDM_REMOVE:
8445 if not instance.nics:
8446 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8449 if nic_op != constants.DDM_ADD:
8451 if not instance.nics:
8452 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8453 " no NICs" % nic_op,
8455 if nic_op < 0 or nic_op >= len(instance.nics):
8456 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8458 (nic_op, len(instance.nics) - 1),
8460 old_nic_params = instance.nics[nic_op].nicparams
8461 old_nic_ip = instance.nics[nic_op].ip
8466 update_params_dict = dict([(key, nic_dict[key])
8467 for key in constants.NICS_PARAMETERS
8468 if key in nic_dict])
8470 if 'bridge' in nic_dict:
8471 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8473 new_nic_params, new_filled_nic_params = \
8474 self._GetUpdatedParams(old_nic_params, update_params_dict,
8475 cluster.nicparams[constants.PP_DEFAULT],
8476 constants.NICS_PARAMETER_TYPES)
8477 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8478 self.nic_pinst[nic_op] = new_nic_params
8479 self.nic_pnew[nic_op] = new_filled_nic_params
8480 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8482 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8483 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8484 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8486 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8488 self.warn.append(msg)
8490 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8491 if new_nic_mode == constants.NIC_MODE_ROUTED:
8492 if 'ip' in nic_dict:
8493 nic_ip = nic_dict['ip']
8497 raise errors.OpPrereqError('Cannot set the nic ip to None'
8498 ' on a routed nic', errors.ECODE_INVAL)
8499 if 'mac' in nic_dict:
8500 nic_mac = nic_dict['mac']
8502 raise errors.OpPrereqError('Cannot set the nic mac to None',
8504 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8505 # otherwise generate the mac
8506 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8508 # or validate/reserve the current one
8510 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8511 except errors.ReservationError:
8512 raise errors.OpPrereqError("MAC address %s already in use"
8513 " in cluster" % nic_mac,
8514 errors.ECODE_NOTUNIQUE)
8517 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8518 raise errors.OpPrereqError("Disk operations not supported for"
8519 " diskless instances",
8521 for disk_op, _ in self.op.disks:
8522 if disk_op == constants.DDM_REMOVE:
8523 if len(instance.disks) == 1:
8524 raise errors.OpPrereqError("Cannot remove the last disk of"
8525 " an instance", errors.ECODE_INVAL)
8526 _CheckInstanceDown(self, instance, "cannot remove disks")
8528 if (disk_op == constants.DDM_ADD and
8529 len(instance.nics) >= constants.MAX_DISKS):
8530 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8531 " add more" % constants.MAX_DISKS,
8533 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8535 if disk_op < 0 or disk_op >= len(instance.disks):
8536 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8538 (disk_op, len(instance.disks)),
8542 if self.op.os_name and not self.op.force:
8543 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8544 self.op.force_variant)
8548 def _ConvertPlainToDrbd(self, feedback_fn):
8549 """Converts an instance from plain to drbd.
8552 feedback_fn("Converting template to drbd")
8553 instance = self.instance
8554 pnode = instance.primary_node
8555 snode = self.op.remote_node
8557 # create a fake disk info for _GenerateDiskTemplate
8558 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8559 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8560 instance.name, pnode, [snode],
8561 disk_info, None, None, 0)
8562 info = _GetInstanceInfoText(instance)
8563 feedback_fn("Creating aditional volumes...")
8564 # first, create the missing data and meta devices
8565 for disk in new_disks:
8566 # unfortunately this is... not too nice
8567 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8569 for child in disk.children:
8570 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8571 # at this stage, all new LVs have been created, we can rename the
8573 feedback_fn("Renaming original volumes...")
8574 rename_list = [(o, n.children[0].logical_id)
8575 for (o, n) in zip(instance.disks, new_disks)]
8576 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8577 result.Raise("Failed to rename original LVs")
8579 feedback_fn("Initializing DRBD devices...")
8580 # all child devices are in place, we can now create the DRBD devices
8581 for disk in new_disks:
8582 for node in [pnode, snode]:
8583 f_create = node == pnode
8584 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8586 # at this point, the instance has been modified
8587 instance.disk_template = constants.DT_DRBD8
8588 instance.disks = new_disks
8589 self.cfg.Update(instance, feedback_fn)
8591 # disks are created, waiting for sync
8592 disk_abort = not _WaitForSync(self, instance)
8594 raise errors.OpExecError("There are some degraded disks for"
8595 " this instance, please cleanup manually")
8597 def _ConvertDrbdToPlain(self, feedback_fn):
8598 """Converts an instance from drbd to plain.
8601 instance = self.instance
8602 assert len(instance.secondary_nodes) == 1
8603 pnode = instance.primary_node
8604 snode = instance.secondary_nodes[0]
8605 feedback_fn("Converting template to plain")
8607 old_disks = instance.disks
8608 new_disks = [d.children[0] for d in old_disks]
8610 # copy over size and mode
8611 for parent, child in zip(old_disks, new_disks):
8612 child.size = parent.size
8613 child.mode = parent.mode
8615 # update instance structure
8616 instance.disks = new_disks
8617 instance.disk_template = constants.DT_PLAIN
8618 self.cfg.Update(instance, feedback_fn)
8620 feedback_fn("Removing volumes on the secondary node...")
8621 for disk in old_disks:
8622 self.cfg.SetDiskID(disk, snode)
8623 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8625 self.LogWarning("Could not remove block device %s on node %s,"
8626 " continuing anyway: %s", disk.iv_name, snode, msg)
8628 feedback_fn("Removing unneeded volumes on the primary node...")
8629 for idx, disk in enumerate(old_disks):
8630 meta = disk.children[1]
8631 self.cfg.SetDiskID(meta, pnode)
8632 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8634 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8635 " continuing anyway: %s", idx, pnode, msg)
8638 def Exec(self, feedback_fn):
8639 """Modifies an instance.
8641 All parameters take effect only at the next restart of the instance.
8644 # Process here the warnings from CheckPrereq, as we don't have a
8645 # feedback_fn there.
8646 for warn in self.warn:
8647 feedback_fn("WARNING: %s" % warn)
8650 instance = self.instance
8652 for disk_op, disk_dict in self.op.disks:
8653 if disk_op == constants.DDM_REMOVE:
8654 # remove the last disk
8655 device = instance.disks.pop()
8656 device_idx = len(instance.disks)
8657 for node, disk in device.ComputeNodeTree(instance.primary_node):
8658 self.cfg.SetDiskID(disk, node)
8659 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8661 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8662 " continuing anyway", device_idx, node, msg)
8663 result.append(("disk/%d" % device_idx, "remove"))
8664 elif disk_op == constants.DDM_ADD:
8666 if instance.disk_template == constants.DT_FILE:
8667 file_driver, file_path = instance.disks[0].logical_id
8668 file_path = os.path.dirname(file_path)
8670 file_driver = file_path = None
8671 disk_idx_base = len(instance.disks)
8672 new_disk = _GenerateDiskTemplate(self,
8673 instance.disk_template,
8674 instance.name, instance.primary_node,
8675 instance.secondary_nodes,
8680 instance.disks.append(new_disk)
8681 info = _GetInstanceInfoText(instance)
8683 logging.info("Creating volume %s for instance %s",
8684 new_disk.iv_name, instance.name)
8685 # Note: this needs to be kept in sync with _CreateDisks
8687 for node in instance.all_nodes:
8688 f_create = node == instance.primary_node
8690 _CreateBlockDev(self, node, instance, new_disk,
8691 f_create, info, f_create)
8692 except errors.OpExecError, err:
8693 self.LogWarning("Failed to create volume %s (%s) on"
8695 new_disk.iv_name, new_disk, node, err)
8696 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8697 (new_disk.size, new_disk.mode)))
8699 # change a given disk
8700 instance.disks[disk_op].mode = disk_dict['mode']
8701 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8703 if self.op.disk_template:
8704 r_shut = _ShutdownInstanceDisks(self, instance)
8706 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8707 " proceed with disk template conversion")
8708 mode = (instance.disk_template, self.op.disk_template)
8710 self._DISK_CONVERSIONS[mode](self, feedback_fn)
8712 self.cfg.ReleaseDRBDMinors(instance.name)
8714 result.append(("disk_template", self.op.disk_template))
8717 for nic_op, nic_dict in self.op.nics:
8718 if nic_op == constants.DDM_REMOVE:
8719 # remove the last nic
8720 del instance.nics[-1]
8721 result.append(("nic.%d" % len(instance.nics), "remove"))
8722 elif nic_op == constants.DDM_ADD:
8723 # mac and bridge should be set, by now
8724 mac = nic_dict['mac']
8725 ip = nic_dict.get('ip', None)
8726 nicparams = self.nic_pinst[constants.DDM_ADD]
8727 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8728 instance.nics.append(new_nic)
8729 result.append(("nic.%d" % (len(instance.nics) - 1),
8730 "add:mac=%s,ip=%s,mode=%s,link=%s" %
8731 (new_nic.mac, new_nic.ip,
8732 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8733 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8736 for key in 'mac', 'ip':
8738 setattr(instance.nics[nic_op], key, nic_dict[key])
8739 if nic_op in self.nic_pinst:
8740 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8741 for key, val in nic_dict.iteritems():
8742 result.append(("nic.%s/%d" % (key, nic_op), val))
8745 if self.op.hvparams:
8746 instance.hvparams = self.hv_inst
8747 for key, val in self.op.hvparams.iteritems():
8748 result.append(("hv/%s" % key, val))
8751 if self.op.beparams:
8752 instance.beparams = self.be_inst
8753 for key, val in self.op.beparams.iteritems():
8754 result.append(("be/%s" % key, val))
8758 instance.os = self.op.os_name
8760 self.cfg.Update(instance, feedback_fn)
8764 _DISK_CONVERSIONS = {
8765 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8766 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8769 class LUQueryExports(NoHooksLU):
8770 """Query the exports list
8773 _OP_REQP = ['nodes']
8776 def ExpandNames(self):
8777 self.needed_locks = {}
8778 self.share_locks[locking.LEVEL_NODE] = 1
8779 if not self.op.nodes:
8780 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8782 self.needed_locks[locking.LEVEL_NODE] = \
8783 _GetWantedNodes(self, self.op.nodes)
8785 def CheckPrereq(self):
8786 """Check prerequisites.
8789 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8791 def Exec(self, feedback_fn):
8792 """Compute the list of all the exported system images.
8795 @return: a dictionary with the structure node->(export-list)
8796 where export-list is a list of the instances exported on
8800 rpcresult = self.rpc.call_export_list(self.nodes)
8802 for node in rpcresult:
8803 if rpcresult[node].fail_msg:
8804 result[node] = False
8806 result[node] = rpcresult[node].payload
8811 class LUExportInstance(LogicalUnit):
8812 """Export an instance to an image in the cluster.
8815 HPATH = "instance-export"
8816 HTYPE = constants.HTYPE_INSTANCE
8817 _OP_REQP = ["instance_name", "target_node", "shutdown"]
8820 def CheckArguments(self):
8821 """Check the arguments.
8824 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8825 constants.DEFAULT_SHUTDOWN_TIMEOUT)
8827 def ExpandNames(self):
8828 self._ExpandAndLockInstance()
8829 # FIXME: lock only instance primary and destination node
8831 # Sad but true, for now we have do lock all nodes, as we don't know where
8832 # the previous export might be, and and in this LU we search for it and
8833 # remove it from its current node. In the future we could fix this by:
8834 # - making a tasklet to search (share-lock all), then create the new one,
8835 # then one to remove, after
8836 # - removing the removal operation altogether
8837 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8839 def DeclareLocks(self, level):
8840 """Last minute lock declaration."""
8841 # All nodes are locked anyway, so nothing to do here.
8843 def BuildHooksEnv(self):
8846 This will run on the master, primary node and target node.
8850 "EXPORT_NODE": self.op.target_node,
8851 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8852 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8854 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8855 nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8856 self.op.target_node]
8859 def CheckPrereq(self):
8860 """Check prerequisites.
8862 This checks that the instance and node names are valid.
8865 instance_name = self.op.instance_name
8866 self.instance = self.cfg.GetInstanceInfo(instance_name)
8867 assert self.instance is not None, \
8868 "Cannot retrieve locked instance %s" % self.op.instance_name
8869 _CheckNodeOnline(self, self.instance.primary_node)
8871 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8872 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8873 assert self.dst_node is not None
8875 _CheckNodeOnline(self, self.dst_node.name)
8876 _CheckNodeNotDrained(self, self.dst_node.name)
8878 # instance disk type verification
8879 for disk in self.instance.disks:
8880 if disk.dev_type == constants.LD_FILE:
8881 raise errors.OpPrereqError("Export not supported for instances with"
8882 " file-based disks", errors.ECODE_INVAL)
8884 def _CreateSnapshots(self, feedback_fn):
8885 """Creates an LVM snapshot for every disk of the instance.
8887 @return: List of snapshots as L{objects.Disk} instances
8890 instance = self.instance
8891 src_node = instance.primary_node
8893 vgname = self.cfg.GetVGName()
8897 for idx, disk in enumerate(instance.disks):
8898 feedback_fn("Creating a snapshot of disk/%s on node %s" %
8901 # result.payload will be a snapshot of an lvm leaf of the one we
8903 result = self.rpc.call_blockdev_snapshot(src_node, disk)
8904 msg = result.fail_msg
8906 self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8908 snap_disks.append(False)
8910 disk_id = (vgname, result.payload)
8911 new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8912 logical_id=disk_id, physical_id=disk_id,
8913 iv_name=disk.iv_name)
8914 snap_disks.append(new_dev)
8918 def _RemoveSnapshot(self, feedback_fn, snap_disks, disk_index):
8919 """Removes an LVM snapshot.
8921 @type snap_disks: list
8922 @param snap_disks: The list of all snapshots as returned by
8924 @type disk_index: number
8925 @param disk_index: Index of the snapshot to be removed
8927 @return: Whether removal was successful or not
8930 disk = snap_disks[disk_index]
8932 src_node = self.instance.primary_node
8934 feedback_fn("Removing snapshot of disk/%s on node %s" %
8935 (disk_index, src_node))
8937 result = self.rpc.call_blockdev_remove(src_node, disk)
8938 if not result.fail_msg:
8941 self.LogWarning("Could not remove snapshot for disk/%d from node"
8942 " %s: %s", disk_index, src_node, result.fail_msg)
8946 def _CleanupExports(self, feedback_fn):
8947 """Removes exports of current instance from all other nodes.
8949 If an instance in a cluster with nodes A..D was exported to node C, its
8950 exports will be removed from the nodes A, B and D.
8953 nodelist = self.cfg.GetNodeList()
8954 nodelist.remove(self.dst_node.name)
8956 # on one-node clusters nodelist will be empty after the removal
8957 # if we proceed the backup would be removed because OpQueryExports
8958 # substitutes an empty list with the full cluster node list.
8959 iname = self.instance.name
8961 feedback_fn("Removing old exports for instance %s" % iname)
8962 exportlist = self.rpc.call_export_list(nodelist)
8963 for node in exportlist:
8964 if exportlist[node].fail_msg:
8966 if iname in exportlist[node].payload:
8967 msg = self.rpc.call_export_remove(node, iname).fail_msg
8969 self.LogWarning("Could not remove older export for instance %s"
8970 " on node %s: %s", iname, node, msg)
8972 def Exec(self, feedback_fn):
8973 """Export an instance to an image in the cluster.
8976 instance = self.instance
8977 dst_node = self.dst_node
8978 src_node = instance.primary_node
8980 if self.op.shutdown:
8981 # shutdown the instance, but not the disks
8982 feedback_fn("Shutting down instance %s" % instance.name)
8983 result = self.rpc.call_instance_shutdown(src_node, instance,
8984 self.shutdown_timeout)
8985 result.Raise("Could not shutdown instance %s on"
8986 " node %s" % (instance.name, src_node))
8988 # set the disks ID correctly since call_instance_start needs the
8989 # correct drbd minor to create the symlinks
8990 for disk in instance.disks:
8991 self.cfg.SetDiskID(disk, src_node)
8993 activate_disks = (not instance.admin_up)
8996 # Activate the instance disks if we'exporting a stopped instance
8997 feedback_fn("Activating disks for %s" % instance.name)
8998 _StartInstanceDisks(self, instance, None)
9003 removed_snaps = [False] * len(instance.disks)
9008 snap_disks = self._CreateSnapshots(feedback_fn)
9010 if self.op.shutdown and instance.admin_up:
9011 feedback_fn("Starting instance %s" % instance.name)
9012 result = self.rpc.call_instance_start(src_node, instance,
9014 msg = result.fail_msg
9016 _ShutdownInstanceDisks(self, instance)
9017 raise errors.OpExecError("Could not start instance: %s" % msg)
9019 assert len(snap_disks) == len(instance.disks)
9020 assert len(removed_snaps) == len(instance.disks)
9022 # TODO: check for size
9024 cluster_name = self.cfg.GetClusterName()
9025 for idx, dev in enumerate(snap_disks):
9026 feedback_fn("Exporting snapshot %s from %s to %s" %
9027 (idx, src_node, dst_node.name))
9029 # FIXME: pass debug from opcode to backend
9030 result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
9031 instance, cluster_name,
9032 idx, self.op.debug_level)
9033 msg = result.fail_msg
9035 self.LogWarning("Could not export disk/%s from node %s to"
9036 " node %s: %s", idx, src_node, dst_node.name, msg)
9037 dresults.append(False)
9039 dresults.append(True)
9042 if self._RemoveSnapshot(feedback_fn, snap_disks, idx):
9043 removed_snaps[idx] = True
9045 dresults.append(False)
9047 assert len(dresults) == len(instance.disks)
9049 # Check for backwards compatibility
9050 assert compat.all(isinstance(i, bool) for i in dresults), \
9051 "Not all results are boolean: %r" % dresults
9053 feedback_fn("Finalizing export on %s" % dst_node.name)
9054 result = self.rpc.call_finalize_export(dst_node.name, instance,
9056 msg = result.fail_msg
9059 self.LogWarning("Could not finalize export for instance %s"
9060 " on node %s: %s", instance.name, dst_node.name, msg)
9063 # Remove all snapshots
9064 assert len(removed_snaps) == len(instance.disks)
9065 for idx, removed in enumerate(removed_snaps):
9067 self._RemoveSnapshot(feedback_fn, snap_disks, idx)
9071 feedback_fn("Deactivating disks for %s" % instance.name)
9072 _ShutdownInstanceDisks(self, instance)
9074 self._CleanupExports(feedback_fn)
9076 return fin_resu, dresults
9079 class LURemoveExport(NoHooksLU):
9080 """Remove exports related to the named instance.
9083 _OP_REQP = ["instance_name"]
9086 def ExpandNames(self):
9087 self.needed_locks = {}
9088 # We need all nodes to be locked in order for RemoveExport to work, but we
9089 # don't need to lock the instance itself, as nothing will happen to it (and
9090 # we can remove exports also for a removed instance)
9091 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9093 def CheckPrereq(self):
9094 """Check prerequisites.
9098 def Exec(self, feedback_fn):
9099 """Remove any export.
9102 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9103 # If the instance was not found we'll try with the name that was passed in.
9104 # This will only work if it was an FQDN, though.
9106 if not instance_name:
9108 instance_name = self.op.instance_name
9110 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9111 exportlist = self.rpc.call_export_list(locked_nodes)
9113 for node in exportlist:
9114 msg = exportlist[node].fail_msg
9116 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9118 if instance_name in exportlist[node].payload:
9120 result = self.rpc.call_export_remove(node, instance_name)
9121 msg = result.fail_msg
9123 logging.error("Could not remove export for instance %s"
9124 " on node %s: %s", instance_name, node, msg)
9126 if fqdn_warn and not found:
9127 feedback_fn("Export not found. If trying to remove an export belonging"
9128 " to a deleted instance please use its Fully Qualified"
9132 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9135 This is an abstract class which is the parent of all the other tags LUs.
9139 def ExpandNames(self):
9140 self.needed_locks = {}
9141 if self.op.kind == constants.TAG_NODE:
9142 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9143 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9144 elif self.op.kind == constants.TAG_INSTANCE:
9145 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9146 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9148 def CheckPrereq(self):
9149 """Check prerequisites.
9152 if self.op.kind == constants.TAG_CLUSTER:
9153 self.target = self.cfg.GetClusterInfo()
9154 elif self.op.kind == constants.TAG_NODE:
9155 self.target = self.cfg.GetNodeInfo(self.op.name)
9156 elif self.op.kind == constants.TAG_INSTANCE:
9157 self.target = self.cfg.GetInstanceInfo(self.op.name)
9159 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9160 str(self.op.kind), errors.ECODE_INVAL)
9163 class LUGetTags(TagsLU):
9164 """Returns the tags of a given object.
9167 _OP_REQP = ["kind", "name"]
9170 def Exec(self, feedback_fn):
9171 """Returns the tag list.
9174 return list(self.target.GetTags())
9177 class LUSearchTags(NoHooksLU):
9178 """Searches the tags for a given pattern.
9181 _OP_REQP = ["pattern"]
9184 def ExpandNames(self):
9185 self.needed_locks = {}
9187 def CheckPrereq(self):
9188 """Check prerequisites.
9190 This checks the pattern passed for validity by compiling it.
9194 self.re = re.compile(self.op.pattern)
9195 except re.error, err:
9196 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9197 (self.op.pattern, err), errors.ECODE_INVAL)
9199 def Exec(self, feedback_fn):
9200 """Returns the tag list.
9204 tgts = [("/cluster", cfg.GetClusterInfo())]
9205 ilist = cfg.GetAllInstancesInfo().values()
9206 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9207 nlist = cfg.GetAllNodesInfo().values()
9208 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9210 for path, target in tgts:
9211 for tag in target.GetTags():
9212 if self.re.search(tag):
9213 results.append((path, tag))
9217 class LUAddTags(TagsLU):
9218 """Sets a tag on a given object.
9221 _OP_REQP = ["kind", "name", "tags"]
9224 def CheckPrereq(self):
9225 """Check prerequisites.
9227 This checks the type and length of the tag name and value.
9230 TagsLU.CheckPrereq(self)
9231 for tag in self.op.tags:
9232 objects.TaggableObject.ValidateTag(tag)
9234 def Exec(self, feedback_fn):
9239 for tag in self.op.tags:
9240 self.target.AddTag(tag)
9241 except errors.TagError, err:
9242 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9243 self.cfg.Update(self.target, feedback_fn)
9246 class LUDelTags(TagsLU):
9247 """Delete a list of tags from a given object.
9250 _OP_REQP = ["kind", "name", "tags"]
9253 def CheckPrereq(self):
9254 """Check prerequisites.
9256 This checks that we have the given tag.
9259 TagsLU.CheckPrereq(self)
9260 for tag in self.op.tags:
9261 objects.TaggableObject.ValidateTag(tag)
9262 del_tags = frozenset(self.op.tags)
9263 cur_tags = self.target.GetTags()
9264 if not del_tags <= cur_tags:
9265 diff_tags = del_tags - cur_tags
9266 diff_names = ["'%s'" % tag for tag in diff_tags]
9268 raise errors.OpPrereqError("Tag(s) %s not found" %
9269 (",".join(diff_names)), errors.ECODE_NOENT)
9271 def Exec(self, feedback_fn):
9272 """Remove the tag from the object.
9275 for tag in self.op.tags:
9276 self.target.RemoveTag(tag)
9277 self.cfg.Update(self.target, feedback_fn)
9280 class LUTestDelay(NoHooksLU):
9281 """Sleep for a specified amount of time.
9283 This LU sleeps on the master and/or nodes for a specified amount of
9287 _OP_REQP = ["duration", "on_master", "on_nodes"]
9290 def ExpandNames(self):
9291 """Expand names and set required locks.
9293 This expands the node list, if any.
9296 self.needed_locks = {}
9297 if self.op.on_nodes:
9298 # _GetWantedNodes can be used here, but is not always appropriate to use
9299 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9301 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9302 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9304 def CheckPrereq(self):
9305 """Check prerequisites.
9309 def Exec(self, feedback_fn):
9310 """Do the actual sleep.
9313 if self.op.on_master:
9314 if not utils.TestDelay(self.op.duration):
9315 raise errors.OpExecError("Error during master delay test")
9316 if self.op.on_nodes:
9317 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9318 for node, node_result in result.items():
9319 node_result.Raise("Failure during rpc call to node %s" % node)
9322 class IAllocator(object):
9323 """IAllocator framework.
9325 An IAllocator instance has three sets of attributes:
9326 - cfg that is needed to query the cluster
9327 - input data (all members of the _KEYS class attribute are required)
9328 - four buffer attributes (in|out_data|text), that represent the
9329 input (to the external script) in text and data structure format,
9330 and the output from it, again in two formats
9331 - the result variables from the script (success, info, nodes) for
9335 # pylint: disable-msg=R0902
9336 # lots of instance attributes
9338 "name", "mem_size", "disks", "disk_template",
9339 "os", "tags", "nics", "vcpus", "hypervisor",
9342 "name", "relocate_from",
9348 def __init__(self, cfg, rpc, mode, **kwargs):
9351 # init buffer variables
9352 self.in_text = self.out_text = self.in_data = self.out_data = None
9353 # init all input fields so that pylint is happy
9355 self.mem_size = self.disks = self.disk_template = None
9356 self.os = self.tags = self.nics = self.vcpus = None
9357 self.hypervisor = None
9358 self.relocate_from = None
9360 self.evac_nodes = None
9362 self.required_nodes = None
9363 # init result fields
9364 self.success = self.info = self.result = None
9365 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9366 keyset = self._ALLO_KEYS
9367 fn = self._AddNewInstance
9368 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9369 keyset = self._RELO_KEYS
9370 fn = self._AddRelocateInstance
9371 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9372 keyset = self._EVAC_KEYS
9373 fn = self._AddEvacuateNodes
9375 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9376 " IAllocator" % self.mode)
9378 if key not in keyset:
9379 raise errors.ProgrammerError("Invalid input parameter '%s' to"
9380 " IAllocator" % key)
9381 setattr(self, key, kwargs[key])
9384 if key not in kwargs:
9385 raise errors.ProgrammerError("Missing input parameter '%s' to"
9386 " IAllocator" % key)
9387 self._BuildInputData(fn)
9389 def _ComputeClusterData(self):
9390 """Compute the generic allocator input data.
9392 This is the data that is independent of the actual operation.
9396 cluster_info = cfg.GetClusterInfo()
9399 "version": constants.IALLOCATOR_VERSION,
9400 "cluster_name": cfg.GetClusterName(),
9401 "cluster_tags": list(cluster_info.GetTags()),
9402 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9403 # we don't have job IDs
9405 iinfo = cfg.GetAllInstancesInfo().values()
9406 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9410 node_list = cfg.GetNodeList()
9412 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9413 hypervisor_name = self.hypervisor
9414 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9415 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9416 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9417 hypervisor_name = cluster_info.enabled_hypervisors[0]
9419 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9422 self.rpc.call_all_instances_info(node_list,
9423 cluster_info.enabled_hypervisors)
9424 for nname, nresult in node_data.items():
9425 # first fill in static (config-based) values
9426 ninfo = cfg.GetNodeInfo(nname)
9428 "tags": list(ninfo.GetTags()),
9429 "primary_ip": ninfo.primary_ip,
9430 "secondary_ip": ninfo.secondary_ip,
9431 "offline": ninfo.offline,
9432 "drained": ninfo.drained,
9433 "master_candidate": ninfo.master_candidate,
9436 if not (ninfo.offline or ninfo.drained):
9437 nresult.Raise("Can't get data for node %s" % nname)
9438 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9440 remote_info = nresult.payload
9442 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9443 'vg_size', 'vg_free', 'cpu_total']:
9444 if attr not in remote_info:
9445 raise errors.OpExecError("Node '%s' didn't return attribute"
9446 " '%s'" % (nname, attr))
9447 if not isinstance(remote_info[attr], int):
9448 raise errors.OpExecError("Node '%s' returned invalid value"
9450 (nname, attr, remote_info[attr]))
9451 # compute memory used by primary instances
9452 i_p_mem = i_p_up_mem = 0
9453 for iinfo, beinfo in i_list:
9454 if iinfo.primary_node == nname:
9455 i_p_mem += beinfo[constants.BE_MEMORY]
9456 if iinfo.name not in node_iinfo[nname].payload:
9459 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9460 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9461 remote_info['memory_free'] -= max(0, i_mem_diff)
9464 i_p_up_mem += beinfo[constants.BE_MEMORY]
9466 # compute memory used by instances
9468 "total_memory": remote_info['memory_total'],
9469 "reserved_memory": remote_info['memory_dom0'],
9470 "free_memory": remote_info['memory_free'],
9471 "total_disk": remote_info['vg_size'],
9472 "free_disk": remote_info['vg_free'],
9473 "total_cpus": remote_info['cpu_total'],
9474 "i_pri_memory": i_p_mem,
9475 "i_pri_up_memory": i_p_up_mem,
9479 node_results[nname] = pnr
9480 data["nodes"] = node_results
9484 for iinfo, beinfo in i_list:
9486 for nic in iinfo.nics:
9487 filled_params = objects.FillDict(
9488 cluster_info.nicparams[constants.PP_DEFAULT],
9490 nic_dict = {"mac": nic.mac,
9492 "mode": filled_params[constants.NIC_MODE],
9493 "link": filled_params[constants.NIC_LINK],
9495 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9496 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9497 nic_data.append(nic_dict)
9499 "tags": list(iinfo.GetTags()),
9500 "admin_up": iinfo.admin_up,
9501 "vcpus": beinfo[constants.BE_VCPUS],
9502 "memory": beinfo[constants.BE_MEMORY],
9504 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9506 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9507 "disk_template": iinfo.disk_template,
9508 "hypervisor": iinfo.hypervisor,
9510 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9512 instance_data[iinfo.name] = pir
9514 data["instances"] = instance_data
9518 def _AddNewInstance(self):
9519 """Add new instance data to allocator structure.
9521 This in combination with _AllocatorGetClusterData will create the
9522 correct structure needed as input for the allocator.
9524 The checks for the completeness of the opcode must have already been
9528 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9530 if self.disk_template in constants.DTS_NET_MIRROR:
9531 self.required_nodes = 2
9533 self.required_nodes = 1
9536 "disk_template": self.disk_template,
9539 "vcpus": self.vcpus,
9540 "memory": self.mem_size,
9541 "disks": self.disks,
9542 "disk_space_total": disk_space,
9544 "required_nodes": self.required_nodes,
9548 def _AddRelocateInstance(self):
9549 """Add relocate instance data to allocator structure.
9551 This in combination with _IAllocatorGetClusterData will create the
9552 correct structure needed as input for the allocator.
9554 The checks for the completeness of the opcode must have already been
9558 instance = self.cfg.GetInstanceInfo(self.name)
9559 if instance is None:
9560 raise errors.ProgrammerError("Unknown instance '%s' passed to"
9561 " IAllocator" % self.name)
9563 if instance.disk_template not in constants.DTS_NET_MIRROR:
9564 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9567 if len(instance.secondary_nodes) != 1:
9568 raise errors.OpPrereqError("Instance has not exactly one secondary node",
9571 self.required_nodes = 1
9572 disk_sizes = [{'size': disk.size} for disk in instance.disks]
9573 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9577 "disk_space_total": disk_space,
9578 "required_nodes": self.required_nodes,
9579 "relocate_from": self.relocate_from,
9583 def _AddEvacuateNodes(self):
9584 """Add evacuate nodes data to allocator structure.
9588 "evac_nodes": self.evac_nodes
9592 def _BuildInputData(self, fn):
9593 """Build input data structures.
9596 self._ComputeClusterData()
9599 request["type"] = self.mode
9600 self.in_data["request"] = request
9602 self.in_text = serializer.Dump(self.in_data)
9604 def Run(self, name, validate=True, call_fn=None):
9605 """Run an instance allocator and return the results.
9609 call_fn = self.rpc.call_iallocator_runner
9611 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9612 result.Raise("Failure while running the iallocator script")
9614 self.out_text = result.payload
9616 self._ValidateResult()
9618 def _ValidateResult(self):
9619 """Process the allocator results.
9621 This will process and if successful save the result in
9622 self.out_data and the other parameters.
9626 rdict = serializer.Load(self.out_text)
9627 except Exception, err:
9628 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9630 if not isinstance(rdict, dict):
9631 raise errors.OpExecError("Can't parse iallocator results: not a dict")
9633 # TODO: remove backwards compatiblity in later versions
9634 if "nodes" in rdict and "result" not in rdict:
9635 rdict["result"] = rdict["nodes"]
9638 for key in "success", "info", "result":
9639 if key not in rdict:
9640 raise errors.OpExecError("Can't parse iallocator results:"
9641 " missing key '%s'" % key)
9642 setattr(self, key, rdict[key])
9644 if not isinstance(rdict["result"], list):
9645 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9647 self.out_data = rdict
9650 class LUTestAllocator(NoHooksLU):
9651 """Run allocator tests.
9653 This LU runs the allocator tests
9656 _OP_REQP = ["direction", "mode", "name"]
9658 def CheckPrereq(self):
9659 """Check prerequisites.
9661 This checks the opcode parameters depending on the director and mode test.
9664 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9665 for attr in ["name", "mem_size", "disks", "disk_template",
9666 "os", "tags", "nics", "vcpus"]:
9667 if not hasattr(self.op, attr):
9668 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9669 attr, errors.ECODE_INVAL)
9670 iname = self.cfg.ExpandInstanceName(self.op.name)
9671 if iname is not None:
9672 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9673 iname, errors.ECODE_EXISTS)
9674 if not isinstance(self.op.nics, list):
9675 raise errors.OpPrereqError("Invalid parameter 'nics'",
9677 for row in self.op.nics:
9678 if (not isinstance(row, dict) or
9681 "bridge" not in row):
9682 raise errors.OpPrereqError("Invalid contents of the 'nics'"
9683 " parameter", errors.ECODE_INVAL)
9684 if not isinstance(self.op.disks, list):
9685 raise errors.OpPrereqError("Invalid parameter 'disks'",
9687 for row in self.op.disks:
9688 if (not isinstance(row, dict) or
9689 "size" not in row or
9690 not isinstance(row["size"], int) or
9691 "mode" not in row or
9692 row["mode"] not in ['r', 'w']):
9693 raise errors.OpPrereqError("Invalid contents of the 'disks'"
9694 " parameter", errors.ECODE_INVAL)
9695 if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9696 self.op.hypervisor = self.cfg.GetHypervisorType()
9697 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9698 if not hasattr(self.op, "name"):
9699 raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9701 fname = _ExpandInstanceName(self.cfg, self.op.name)
9702 self.op.name = fname
9703 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9704 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9705 if not hasattr(self.op, "evac_nodes"):
9706 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9707 " opcode input", errors.ECODE_INVAL)
9709 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9710 self.op.mode, errors.ECODE_INVAL)
9712 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9713 if not hasattr(self.op, "allocator") or self.op.allocator is None:
9714 raise errors.OpPrereqError("Missing allocator name",
9716 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9717 raise errors.OpPrereqError("Wrong allocator test '%s'" %
9718 self.op.direction, errors.ECODE_INVAL)
9720 def Exec(self, feedback_fn):
9721 """Run the allocator test.
9724 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9725 ial = IAllocator(self.cfg, self.rpc,
9728 mem_size=self.op.mem_size,
9729 disks=self.op.disks,
9730 disk_template=self.op.disk_template,
9734 vcpus=self.op.vcpus,
9735 hypervisor=self.op.hypervisor,
9737 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9738 ial = IAllocator(self.cfg, self.rpc,
9741 relocate_from=list(self.relocate_from),
9743 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9744 ial = IAllocator(self.cfg, self.rpc,
9746 evac_nodes=self.op.evac_nodes)
9748 raise errors.ProgrammerError("Uncatched mode %s in"
9749 " LUTestAllocator.Exec", self.op.mode)
9751 if self.op.direction == constants.IALLOCATOR_DIR_IN:
9752 result = ial.in_text
9754 ial.Run(self.op.allocator, validate=False)
9755 result = ial.out_text