4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
38 from ganeti import ssh
39 from ganeti import utils
40 from ganeti import errors
41 from ganeti import hypervisor
42 from ganeti import locking
43 from ganeti import constants
44 from ganeti import objects
45 from ganeti import serializer
46 from ganeti import ssconf
47 from ganeti import uidpool
50 class LogicalUnit(object):
51 """Logical Unit base class.
53 Subclasses must follow these rules:
54 - implement ExpandNames
55 - implement CheckPrereq (except when tasklets are used)
56 - implement Exec (except when tasklets are used)
57 - implement BuildHooksEnv
58 - redefine HPATH and HTYPE
59 - optionally redefine their run requirements:
60 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
62 Note that all commands require root permissions.
64 @ivar dry_run_result: the value (if any) that will be returned to the caller
65 in dry-run mode (signalled by opcode dry_run parameter)
73 def __init__(self, processor, op, context, rpc):
74 """Constructor for LogicalUnit.
76 This needs to be overridden in derived classes in order to check op
82 self.cfg = context.cfg
83 self.context = context
85 # Dicts used to declare locking needs to mcpu
86 self.needed_locks = None
87 self.acquired_locks = {}
88 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
90 self.remove_locks = {}
91 # Used to force good behavior when calling helper functions
92 self.recalculate_locks = {}
95 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
96 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
97 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
99 self.dry_run_result = None
100 # support for generic debug attribute
101 if (not hasattr(self.op, "debug_level") or
102 not isinstance(self.op.debug_level, int)):
103 self.op.debug_level = 0
108 for attr_name in self._OP_REQP:
109 attr_val = getattr(op, attr_name, None)
111 raise errors.OpPrereqError("Required parameter '%s' missing" %
112 attr_name, errors.ECODE_INVAL)
114 self.CheckArguments()
117 """Returns the SshRunner object
121 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
124 ssh = property(fget=__GetSSH)
126 def CheckArguments(self):
127 """Check syntactic validity for the opcode arguments.
129 This method is for doing a simple syntactic check and ensure
130 validity of opcode parameters, without any cluster-related
131 checks. While the same can be accomplished in ExpandNames and/or
132 CheckPrereq, doing these separate is better because:
134 - ExpandNames is left as as purely a lock-related function
135 - CheckPrereq is run after we have acquired locks (and possible
138 The function is allowed to change the self.op attribute so that
139 later methods can no longer worry about missing parameters.
144 def ExpandNames(self):
145 """Expand names for this LU.
147 This method is called before starting to execute the opcode, and it should
148 update all the parameters of the opcode to their canonical form (e.g. a
149 short node name must be fully expanded after this method has successfully
150 completed). This way locking, hooks, logging, ecc. can work correctly.
152 LUs which implement this method must also populate the self.needed_locks
153 member, as a dict with lock levels as keys, and a list of needed lock names
156 - use an empty dict if you don't need any lock
157 - if you don't need any lock at a particular level omit that level
158 - don't put anything for the BGL level
159 - if you want all locks at a level use locking.ALL_SET as a value
161 If you need to share locks (rather than acquire them exclusively) at one
162 level you can modify self.share_locks, setting a true value (usually 1) for
163 that level. By default locks are not shared.
165 This function can also define a list of tasklets, which then will be
166 executed in order instead of the usual LU-level CheckPrereq and Exec
167 functions, if those are not defined by the LU.
171 # Acquire all nodes and one instance
172 self.needed_locks = {
173 locking.LEVEL_NODE: locking.ALL_SET,
174 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
176 # Acquire just two nodes
177 self.needed_locks = {
178 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
181 self.needed_locks = {} # No, you can't leave it to the default value None
184 # The implementation of this method is mandatory only if the new LU is
185 # concurrent, so that old LUs don't need to be changed all at the same
188 self.needed_locks = {} # Exclusive LUs don't need locks.
190 raise NotImplementedError
192 def DeclareLocks(self, level):
193 """Declare LU locking needs for a level
195 While most LUs can just declare their locking needs at ExpandNames time,
196 sometimes there's the need to calculate some locks after having acquired
197 the ones before. This function is called just before acquiring locks at a
198 particular level, but after acquiring the ones at lower levels, and permits
199 such calculations. It can be used to modify self.needed_locks, and by
200 default it does nothing.
202 This function is only called if you have something already set in
203 self.needed_locks for the level.
205 @param level: Locking level which is going to be locked
206 @type level: member of ganeti.locking.LEVELS
210 def CheckPrereq(self):
211 """Check prerequisites for this LU.
213 This method should check that the prerequisites for the execution
214 of this LU are fulfilled. It can do internode communication, but
215 it should be idempotent - no cluster or system changes are
218 The method should raise errors.OpPrereqError in case something is
219 not fulfilled. Its return value is ignored.
221 This method should also update all the parameters of the opcode to
222 their canonical form if it hasn't been done by ExpandNames before.
225 if self.tasklets is not None:
226 for (idx, tl) in enumerate(self.tasklets):
227 logging.debug("Checking prerequisites for tasklet %s/%s",
228 idx + 1, len(self.tasklets))
231 raise NotImplementedError
233 def Exec(self, feedback_fn):
236 This method should implement the actual work. It should raise
237 errors.OpExecError for failures that are somewhat dealt with in
241 if self.tasklets is not None:
242 for (idx, tl) in enumerate(self.tasklets):
243 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
246 raise NotImplementedError
248 def BuildHooksEnv(self):
249 """Build hooks environment for this LU.
251 This method should return a three-node tuple consisting of: a dict
252 containing the environment that will be used for running the
253 specific hook for this LU, a list of node names on which the hook
254 should run before the execution, and a list of node names on which
255 the hook should run after the execution.
257 The keys of the dict must not have 'GANETI_' prefixed as this will
258 be handled in the hooks runner. Also note additional keys will be
259 added by the hooks runner. If the LU doesn't define any
260 environment, an empty dict (and not None) should be returned.
262 No nodes should be returned as an empty list (and not None).
264 Note that if the HPATH for a LU class is None, this function will
268 raise NotImplementedError
270 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
271 """Notify the LU about the results of its hooks.
273 This method is called every time a hooks phase is executed, and notifies
274 the Logical Unit about the hooks' result. The LU can then use it to alter
275 its result based on the hooks. By default the method does nothing and the
276 previous result is passed back unchanged but any LU can define it if it
277 wants to use the local cluster hook-scripts somehow.
279 @param phase: one of L{constants.HOOKS_PHASE_POST} or
280 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
281 @param hook_results: the results of the multi-node hooks rpc call
282 @param feedback_fn: function used send feedback back to the caller
283 @param lu_result: the previous Exec result this LU had, or None
285 @return: the new Exec result, based on the previous result
289 # API must be kept, thus we ignore the unused argument and could
290 # be a function warnings
291 # pylint: disable-msg=W0613,R0201
294 def _ExpandAndLockInstance(self):
295 """Helper function to expand and lock an instance.
297 Many LUs that work on an instance take its name in self.op.instance_name
298 and need to expand it and then declare the expanded name for locking. This
299 function does it, and then updates self.op.instance_name to the expanded
300 name. It also initializes needed_locks as a dict, if this hasn't been done
304 if self.needed_locks is None:
305 self.needed_locks = {}
307 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
308 "_ExpandAndLockInstance called with instance-level locks set"
309 self.op.instance_name = _ExpandInstanceName(self.cfg,
310 self.op.instance_name)
311 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
313 def _LockInstancesNodes(self, primary_only=False):
314 """Helper function to declare instances' nodes for locking.
316 This function should be called after locking one or more instances to lock
317 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
318 with all primary or secondary nodes for instances already locked and
319 present in self.needed_locks[locking.LEVEL_INSTANCE].
321 It should be called from DeclareLocks, and for safety only works if
322 self.recalculate_locks[locking.LEVEL_NODE] is set.
324 In the future it may grow parameters to just lock some instance's nodes, or
325 to just lock primaries or secondary nodes, if needed.
327 If should be called in DeclareLocks in a way similar to::
329 if level == locking.LEVEL_NODE:
330 self._LockInstancesNodes()
332 @type primary_only: boolean
333 @param primary_only: only lock primary nodes of locked instances
336 assert locking.LEVEL_NODE in self.recalculate_locks, \
337 "_LockInstancesNodes helper function called with no nodes to recalculate"
339 # TODO: check if we're really been called with the instance locks held
341 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
342 # future we might want to have different behaviors depending on the value
343 # of self.recalculate_locks[locking.LEVEL_NODE]
345 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
346 instance = self.context.cfg.GetInstanceInfo(instance_name)
347 wanted_nodes.append(instance.primary_node)
349 wanted_nodes.extend(instance.secondary_nodes)
351 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
352 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
353 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
354 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
356 del self.recalculate_locks[locking.LEVEL_NODE]
359 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
360 """Simple LU which runs no hooks.
362 This LU is intended as a parent for other LogicalUnits which will
363 run no hooks, in order to reduce duplicate code.
369 def BuildHooksEnv(self):
370 """Empty BuildHooksEnv for NoHooksLu.
372 This just raises an error.
375 assert False, "BuildHooksEnv called for NoHooksLUs"
379 """Tasklet base class.
381 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
382 they can mix legacy code with tasklets. Locking needs to be done in the LU,
383 tasklets know nothing about locks.
385 Subclasses must follow these rules:
386 - Implement CheckPrereq
390 def __init__(self, lu):
397 def CheckPrereq(self):
398 """Check prerequisites for this tasklets.
400 This method should check whether the prerequisites for the execution of
401 this tasklet are fulfilled. It can do internode communication, but it
402 should be idempotent - no cluster or system changes are allowed.
404 The method should raise errors.OpPrereqError in case something is not
405 fulfilled. Its return value is ignored.
407 This method should also update all parameters to their canonical form if it
408 hasn't been done before.
411 raise NotImplementedError
413 def Exec(self, feedback_fn):
414 """Execute the tasklet.
416 This method should implement the actual work. It should raise
417 errors.OpExecError for failures that are somewhat dealt with in code, or
421 raise NotImplementedError
424 def _GetWantedNodes(lu, nodes):
425 """Returns list of checked and expanded node names.
427 @type lu: L{LogicalUnit}
428 @param lu: the logical unit on whose behalf we execute
430 @param nodes: list of node names or None for all nodes
432 @return: the list of nodes, sorted
433 @raise errors.ProgrammerError: if the nodes parameter is wrong type
436 if not isinstance(nodes, list):
437 raise errors.OpPrereqError("Invalid argument type 'nodes'",
441 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
442 " non-empty list of nodes whose name is to be expanded.")
444 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
445 return utils.NiceSort(wanted)
448 def _GetWantedInstances(lu, instances):
449 """Returns list of checked and expanded instance names.
451 @type lu: L{LogicalUnit}
452 @param lu: the logical unit on whose behalf we execute
453 @type instances: list
454 @param instances: list of instance names or None for all instances
456 @return: the list of instances, sorted
457 @raise errors.OpPrereqError: if the instances parameter is wrong type
458 @raise errors.OpPrereqError: if any of the passed instances is not found
461 if not isinstance(instances, list):
462 raise errors.OpPrereqError("Invalid argument type 'instances'",
466 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
468 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
472 def _CheckOutputFields(static, dynamic, selected):
473 """Checks whether all selected fields are valid.
475 @type static: L{utils.FieldSet}
476 @param static: static fields set
477 @type dynamic: L{utils.FieldSet}
478 @param dynamic: dynamic fields set
485 delta = f.NonMatching(selected)
487 raise errors.OpPrereqError("Unknown output fields selected: %s"
488 % ",".join(delta), errors.ECODE_INVAL)
491 def _CheckBooleanOpField(op, name):
492 """Validates boolean opcode parameters.
494 This will ensure that an opcode parameter is either a boolean value,
495 or None (but that it always exists).
498 val = getattr(op, name, None)
499 if not (val is None or isinstance(val, bool)):
500 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
501 (name, str(val)), errors.ECODE_INVAL)
502 setattr(op, name, val)
505 def _CheckGlobalHvParams(params):
506 """Validates that given hypervisor params are not global ones.
508 This will ensure that instances don't get customised versions of
512 used_globals = constants.HVC_GLOBALS.intersection(params)
514 msg = ("The following hypervisor parameters are global and cannot"
515 " be customized at instance level, please modify them at"
516 " cluster level: %s" % utils.CommaJoin(used_globals))
517 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
520 def _CheckNodeOnline(lu, node):
521 """Ensure that a given node is online.
523 @param lu: the LU on behalf of which we make the check
524 @param node: the node to check
525 @raise errors.OpPrereqError: if the node is offline
528 if lu.cfg.GetNodeInfo(node).offline:
529 raise errors.OpPrereqError("Can't use offline node %s" % node,
533 def _CheckNodeNotDrained(lu, node):
534 """Ensure that a given node is not drained.
536 @param lu: the LU on behalf of which we make the check
537 @param node: the node to check
538 @raise errors.OpPrereqError: if the node is drained
541 if lu.cfg.GetNodeInfo(node).drained:
542 raise errors.OpPrereqError("Can't use drained node %s" % node,
546 def _CheckNodeHasOS(lu, node, os_name, force_variant):
547 """Ensure that a node supports a given OS.
549 @param lu: the LU on behalf of which we make the check
550 @param node: the node to check
551 @param os_name: the OS to query about
552 @param force_variant: whether to ignore variant errors
553 @raise errors.OpPrereqError: if the node is not supporting the OS
556 result = lu.rpc.call_os_get(node, os_name)
557 result.Raise("OS '%s' not in supported OS list for node %s" %
559 prereq=True, ecode=errors.ECODE_INVAL)
560 if not force_variant:
561 _CheckOSVariant(result.payload, os_name)
564 def _RequireFileStorage():
565 """Checks that file storage is enabled.
567 @raise errors.OpPrereqError: when file storage is disabled
570 if not constants.ENABLE_FILE_STORAGE:
571 raise errors.OpPrereqError("File storage disabled at configure time",
575 def _CheckDiskTemplate(template):
576 """Ensure a given disk template is valid.
579 if template not in constants.DISK_TEMPLATES:
580 msg = ("Invalid disk template name '%s', valid templates are: %s" %
581 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
582 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
583 if template == constants.DT_FILE:
584 _RequireFileStorage()
587 def _CheckStorageType(storage_type):
588 """Ensure a given storage type is valid.
591 if storage_type not in constants.VALID_STORAGE_TYPES:
592 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
594 if storage_type == constants.ST_FILE:
595 _RequireFileStorage()
599 def _CheckInstanceDown(lu, instance, reason):
600 """Ensure that an instance is not running."""
601 if instance.admin_up:
602 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
603 (instance.name, reason), errors.ECODE_STATE)
605 pnode = instance.primary_node
606 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
607 ins_l.Raise("Can't contact node %s for instance information" % pnode,
608 prereq=True, ecode=errors.ECODE_ENVIRON)
610 if instance.name in ins_l.payload:
611 raise errors.OpPrereqError("Instance %s is running, %s" %
612 (instance.name, reason), errors.ECODE_STATE)
615 def _ExpandItemName(fn, name, kind):
616 """Expand an item name.
618 @param fn: the function to use for expansion
619 @param name: requested item name
620 @param kind: text description ('Node' or 'Instance')
621 @return: the resolved (full) name
622 @raise errors.OpPrereqError: if the item is not found
626 if full_name is None:
627 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
632 def _ExpandNodeName(cfg, name):
633 """Wrapper over L{_ExpandItemName} for nodes."""
634 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
637 def _ExpandInstanceName(cfg, name):
638 """Wrapper over L{_ExpandItemName} for instance."""
639 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
642 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
643 memory, vcpus, nics, disk_template, disks,
644 bep, hvp, hypervisor_name):
645 """Builds instance related env variables for hooks
647 This builds the hook environment from individual variables.
650 @param name: the name of the instance
651 @type primary_node: string
652 @param primary_node: the name of the instance's primary node
653 @type secondary_nodes: list
654 @param secondary_nodes: list of secondary nodes as strings
655 @type os_type: string
656 @param os_type: the name of the instance's OS
657 @type status: boolean
658 @param status: the should_run status of the instance
660 @param memory: the memory size of the instance
662 @param vcpus: the count of VCPUs the instance has
664 @param nics: list of tuples (ip, mac, mode, link) representing
665 the NICs the instance has
666 @type disk_template: string
667 @param disk_template: the disk template of the instance
669 @param disks: the list of (size, mode) pairs
671 @param bep: the backend parameters for the instance
673 @param hvp: the hypervisor parameters for the instance
674 @type hypervisor_name: string
675 @param hypervisor_name: the hypervisor for the instance
677 @return: the hook environment for this instance
686 "INSTANCE_NAME": name,
687 "INSTANCE_PRIMARY": primary_node,
688 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
689 "INSTANCE_OS_TYPE": os_type,
690 "INSTANCE_STATUS": str_status,
691 "INSTANCE_MEMORY": memory,
692 "INSTANCE_VCPUS": vcpus,
693 "INSTANCE_DISK_TEMPLATE": disk_template,
694 "INSTANCE_HYPERVISOR": hypervisor_name,
698 nic_count = len(nics)
699 for idx, (ip, mac, mode, link) in enumerate(nics):
702 env["INSTANCE_NIC%d_IP" % idx] = ip
703 env["INSTANCE_NIC%d_MAC" % idx] = mac
704 env["INSTANCE_NIC%d_MODE" % idx] = mode
705 env["INSTANCE_NIC%d_LINK" % idx] = link
706 if mode == constants.NIC_MODE_BRIDGED:
707 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
711 env["INSTANCE_NIC_COUNT"] = nic_count
714 disk_count = len(disks)
715 for idx, (size, mode) in enumerate(disks):
716 env["INSTANCE_DISK%d_SIZE" % idx] = size
717 env["INSTANCE_DISK%d_MODE" % idx] = mode
721 env["INSTANCE_DISK_COUNT"] = disk_count
723 for source, kind in [(bep, "BE"), (hvp, "HV")]:
724 for key, value in source.items():
725 env["INSTANCE_%s_%s" % (kind, key)] = value
730 def _NICListToTuple(lu, nics):
731 """Build a list of nic information tuples.
733 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
734 value in LUQueryInstanceData.
736 @type lu: L{LogicalUnit}
737 @param lu: the logical unit on whose behalf we execute
738 @type nics: list of L{objects.NIC}
739 @param nics: list of nics to convert to hooks tuples
743 c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
747 filled_params = objects.FillDict(c_nicparams, nic.nicparams)
748 mode = filled_params[constants.NIC_MODE]
749 link = filled_params[constants.NIC_LINK]
750 hooks_nics.append((ip, mac, mode, link))
754 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
755 """Builds instance related env variables for hooks from an object.
757 @type lu: L{LogicalUnit}
758 @param lu: the logical unit on whose behalf we execute
759 @type instance: L{objects.Instance}
760 @param instance: the instance for which we should build the
763 @param override: dictionary with key/values that will override
766 @return: the hook environment dictionary
769 cluster = lu.cfg.GetClusterInfo()
770 bep = cluster.FillBE(instance)
771 hvp = cluster.FillHV(instance)
773 'name': instance.name,
774 'primary_node': instance.primary_node,
775 'secondary_nodes': instance.secondary_nodes,
776 'os_type': instance.os,
777 'status': instance.admin_up,
778 'memory': bep[constants.BE_MEMORY],
779 'vcpus': bep[constants.BE_VCPUS],
780 'nics': _NICListToTuple(lu, instance.nics),
781 'disk_template': instance.disk_template,
782 'disks': [(disk.size, disk.mode) for disk in instance.disks],
785 'hypervisor_name': instance.hypervisor,
788 args.update(override)
789 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
792 def _AdjustCandidatePool(lu, exceptions):
793 """Adjust the candidate pool after node operations.
796 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
798 lu.LogInfo("Promoted nodes to master candidate role: %s",
799 utils.CommaJoin(node.name for node in mod_list))
800 for name in mod_list:
801 lu.context.ReaddNode(name)
802 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
804 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
808 def _DecideSelfPromotion(lu, exceptions=None):
809 """Decide whether I should promote myself as a master candidate.
812 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
813 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
814 # the new node will increase mc_max with one, so:
815 mc_should = min(mc_should + 1, cp_size)
816 return mc_now < mc_should
819 def _CheckNicsBridgesExist(lu, target_nics, target_node,
820 profile=constants.PP_DEFAULT):
821 """Check that the brigdes needed by a list of nics exist.
824 c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
825 paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
826 for nic in target_nics]
827 brlist = [params[constants.NIC_LINK] for params in paramslist
828 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
830 result = lu.rpc.call_bridges_exist(target_node, brlist)
831 result.Raise("Error checking bridges on destination node '%s'" %
832 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
835 def _CheckInstanceBridgesExist(lu, instance, node=None):
836 """Check that the brigdes needed by an instance exist.
840 node = instance.primary_node
841 _CheckNicsBridgesExist(lu, instance.nics, node)
844 def _CheckOSVariant(os_obj, name):
845 """Check whether an OS name conforms to the os variants specification.
847 @type os_obj: L{objects.OS}
848 @param os_obj: OS object to check
850 @param name: OS name passed by the user, to check for validity
853 if not os_obj.supported_variants:
856 variant = name.split("+", 1)[1]
858 raise errors.OpPrereqError("OS name must include a variant",
861 if variant not in os_obj.supported_variants:
862 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
865 def _GetNodeInstancesInner(cfg, fn):
866 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
869 def _GetNodeInstances(cfg, node_name):
870 """Returns a list of all primary and secondary instances on a node.
874 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
877 def _GetNodePrimaryInstances(cfg, node_name):
878 """Returns primary instances on a node.
881 return _GetNodeInstancesInner(cfg,
882 lambda inst: node_name == inst.primary_node)
885 def _GetNodeSecondaryInstances(cfg, node_name):
886 """Returns secondary instances on a node.
889 return _GetNodeInstancesInner(cfg,
890 lambda inst: node_name in inst.secondary_nodes)
893 def _GetStorageTypeArgs(cfg, storage_type):
894 """Returns the arguments for a storage type.
897 # Special case for file storage
898 if storage_type == constants.ST_FILE:
899 # storage.FileStorage wants a list of storage directories
900 return [[cfg.GetFileStorageDir()]]
905 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
908 for dev in instance.disks:
909 cfg.SetDiskID(dev, node_name)
911 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
912 result.Raise("Failed to get disk status from node %s" % node_name,
913 prereq=prereq, ecode=errors.ECODE_ENVIRON)
915 for idx, bdev_status in enumerate(result.payload):
916 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
922 def _FormatTimestamp(secs):
923 """Formats a Unix timestamp with the local timezone.
926 return time.strftime("%F %T %Z", time.gmtime(secs))
929 class LUPostInitCluster(LogicalUnit):
930 """Logical unit for running hooks after cluster initialization.
933 HPATH = "cluster-init"
934 HTYPE = constants.HTYPE_CLUSTER
937 def BuildHooksEnv(self):
941 env = {"OP_TARGET": self.cfg.GetClusterName()}
942 mn = self.cfg.GetMasterNode()
945 def CheckPrereq(self):
946 """No prerequisites to check.
951 def Exec(self, feedback_fn):
958 class LUDestroyCluster(LogicalUnit):
959 """Logical unit for destroying the cluster.
962 HPATH = "cluster-destroy"
963 HTYPE = constants.HTYPE_CLUSTER
966 def BuildHooksEnv(self):
970 env = {"OP_TARGET": self.cfg.GetClusterName()}
973 def CheckPrereq(self):
974 """Check prerequisites.
976 This checks whether the cluster is empty.
978 Any errors are signaled by raising errors.OpPrereqError.
981 master = self.cfg.GetMasterNode()
983 nodelist = self.cfg.GetNodeList()
984 if len(nodelist) != 1 or nodelist[0] != master:
985 raise errors.OpPrereqError("There are still %d node(s) in"
986 " this cluster." % (len(nodelist) - 1),
988 instancelist = self.cfg.GetInstanceList()
990 raise errors.OpPrereqError("There are still %d instance(s) in"
991 " this cluster." % len(instancelist),
994 def Exec(self, feedback_fn):
995 """Destroys the cluster.
998 master = self.cfg.GetMasterNode()
999 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1001 # Run post hooks on master node before it's removed
1002 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1004 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1006 # pylint: disable-msg=W0702
1007 self.LogWarning("Errors occurred running hooks on %s" % master)
1009 result = self.rpc.call_node_stop_master(master, False)
1010 result.Raise("Could not disable the master role")
1012 if modify_ssh_setup:
1013 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1014 utils.CreateBackup(priv_key)
1015 utils.CreateBackup(pub_key)
1020 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
1021 warn_days=constants.SSL_CERT_EXPIRATION_WARN,
1022 error_days=constants.SSL_CERT_EXPIRATION_ERROR):
1023 """Verifies certificate details for LUVerifyCluster.
1027 msg = "Certificate %s is expired" % filename
1029 if not_before is not None and not_after is not None:
1030 msg += (" (valid from %s to %s)" %
1031 (_FormatTimestamp(not_before),
1032 _FormatTimestamp(not_after)))
1033 elif not_before is not None:
1034 msg += " (valid from %s)" % _FormatTimestamp(not_before)
1035 elif not_after is not None:
1036 msg += " (valid until %s)" % _FormatTimestamp(not_after)
1038 return (LUVerifyCluster.ETYPE_ERROR, msg)
1040 elif not_before is not None and not_before > now:
1041 return (LUVerifyCluster.ETYPE_WARNING,
1042 "Certificate %s not yet valid (valid from %s)" %
1043 (filename, _FormatTimestamp(not_before)))
1045 elif not_after is not None:
1046 remaining_days = int((not_after - now) / (24 * 3600))
1048 msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1050 if remaining_days <= error_days:
1051 return (LUVerifyCluster.ETYPE_ERROR, msg)
1053 if remaining_days <= warn_days:
1054 return (LUVerifyCluster.ETYPE_WARNING, msg)
1059 def _VerifyCertificate(filename):
1060 """Verifies a certificate for LUVerifyCluster.
1062 @type filename: string
1063 @param filename: Path to PEM file
1067 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1068 utils.ReadFile(filename))
1069 except Exception, err: # pylint: disable-msg=W0703
1070 return (LUVerifyCluster.ETYPE_ERROR,
1071 "Failed to load X509 certificate %s: %s" % (filename, err))
1073 # Depending on the pyOpenSSL version, this can just return (None, None)
1074 (not_before, not_after) = utils.GetX509CertValidity(cert)
1076 return _VerifyCertificateInner(filename, cert.has_expired(),
1077 not_before, not_after, time.time())
1080 class LUVerifyCluster(LogicalUnit):
1081 """Verifies the cluster status.
1084 HPATH = "cluster-verify"
1085 HTYPE = constants.HTYPE_CLUSTER
1086 _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1089 TCLUSTER = "cluster"
1091 TINSTANCE = "instance"
1093 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1094 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1095 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1096 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1097 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1098 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1099 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1100 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1101 ENODEDRBD = (TNODE, "ENODEDRBD")
1102 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1103 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1104 ENODEHV = (TNODE, "ENODEHV")
1105 ENODELVM = (TNODE, "ENODELVM")
1106 ENODEN1 = (TNODE, "ENODEN1")
1107 ENODENET = (TNODE, "ENODENET")
1108 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1109 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1110 ENODERPC = (TNODE, "ENODERPC")
1111 ENODESSH = (TNODE, "ENODESSH")
1112 ENODEVERSION = (TNODE, "ENODEVERSION")
1113 ENODESETUP = (TNODE, "ENODESETUP")
1114 ENODETIME = (TNODE, "ENODETIME")
1116 ETYPE_FIELD = "code"
1117 ETYPE_ERROR = "ERROR"
1118 ETYPE_WARNING = "WARNING"
1120 class NodeImage(object):
1121 """A class representing the logical and physical status of a node.
1123 @ivar volumes: a structure as returned from
1124 L{ganeti.backend.GetVolumeList} (runtime)
1125 @ivar instances: a list of running instances (runtime)
1126 @ivar pinst: list of configured primary instances (config)
1127 @ivar sinst: list of configured secondary instances (config)
1128 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1129 of this node (config)
1130 @ivar mfree: free memory, as reported by hypervisor (runtime)
1131 @ivar dfree: free disk, as reported by the node (runtime)
1132 @ivar offline: the offline status (config)
1133 @type rpc_fail: boolean
1134 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1135 not whether the individual keys were correct) (runtime)
1136 @type lvm_fail: boolean
1137 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1138 @type hyp_fail: boolean
1139 @ivar hyp_fail: whether the RPC call didn't return the instance list
1140 @type ghost: boolean
1141 @ivar ghost: whether this is a known node or not (config)
1144 def __init__(self, offline=False):
1152 self.offline = offline
1153 self.rpc_fail = False
1154 self.lvm_fail = False
1155 self.hyp_fail = False
1158 def ExpandNames(self):
1159 self.needed_locks = {
1160 locking.LEVEL_NODE: locking.ALL_SET,
1161 locking.LEVEL_INSTANCE: locking.ALL_SET,
1163 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1165 def _Error(self, ecode, item, msg, *args, **kwargs):
1166 """Format an error message.
1168 Based on the opcode's error_codes parameter, either format a
1169 parseable error code, or a simpler error string.
1171 This must be called only from Exec and functions called from Exec.
1174 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1176 # first complete the msg
1179 # then format the whole message
1180 if self.op.error_codes:
1181 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1187 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1188 # and finally report it via the feedback_fn
1189 self._feedback_fn(" - %s" % msg)
1191 def _ErrorIf(self, cond, *args, **kwargs):
1192 """Log an error message if the passed condition is True.
1195 cond = bool(cond) or self.op.debug_simulate_errors
1197 self._Error(*args, **kwargs)
1198 # do not mark the operation as failed for WARN cases only
1199 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1200 self.bad = self.bad or cond
1202 def _VerifyNode(self, ninfo, nresult):
1203 """Run multiple tests against a node.
1207 - compares ganeti version
1208 - checks vg existence and size > 20G
1209 - checks config file checksum
1210 - checks ssh to other nodes
1212 @type ninfo: L{objects.Node}
1213 @param ninfo: the node to check
1214 @param nresult: the results from the node
1216 @return: whether overall this call was successful (and we can expect
1217 reasonable values in the respose)
1221 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1223 # main result, nresult should be a non-empty dict
1224 test = not nresult or not isinstance(nresult, dict)
1225 _ErrorIf(test, self.ENODERPC, node,
1226 "unable to verify node: no data returned")
1230 # compares ganeti version
1231 local_version = constants.PROTOCOL_VERSION
1232 remote_version = nresult.get("version", None)
1233 test = not (remote_version and
1234 isinstance(remote_version, (list, tuple)) and
1235 len(remote_version) == 2)
1236 _ErrorIf(test, self.ENODERPC, node,
1237 "connection to node returned invalid data")
1241 test = local_version != remote_version[0]
1242 _ErrorIf(test, self.ENODEVERSION, node,
1243 "incompatible protocol versions: master %s,"
1244 " node %s", local_version, remote_version[0])
1248 # node seems compatible, we can actually try to look into its results
1250 # full package version
1251 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1252 self.ENODEVERSION, node,
1253 "software version mismatch: master %s, node %s",
1254 constants.RELEASE_VERSION, remote_version[1],
1255 code=self.ETYPE_WARNING)
1257 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1258 if isinstance(hyp_result, dict):
1259 for hv_name, hv_result in hyp_result.iteritems():
1260 test = hv_result is not None
1261 _ErrorIf(test, self.ENODEHV, node,
1262 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1265 test = nresult.get(constants.NV_NODESETUP,
1266 ["Missing NODESETUP results"])
1267 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1272 def _VerifyNodeTime(self, ninfo, nresult,
1273 nvinfo_starttime, nvinfo_endtime):
1274 """Check the node time.
1276 @type ninfo: L{objects.Node}
1277 @param ninfo: the node to check
1278 @param nresult: the remote results for the node
1279 @param nvinfo_starttime: the start time of the RPC call
1280 @param nvinfo_endtime: the end time of the RPC call
1284 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1286 ntime = nresult.get(constants.NV_TIME, None)
1288 ntime_merged = utils.MergeTime(ntime)
1289 except (ValueError, TypeError):
1290 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1293 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1294 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1295 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1296 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1300 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1301 "Node time diverges by at least %s from master node time",
1304 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1305 """Check the node time.
1307 @type ninfo: L{objects.Node}
1308 @param ninfo: the node to check
1309 @param nresult: the remote results for the node
1310 @param vg_name: the configured VG name
1317 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1319 # checks vg existence and size > 20G
1320 vglist = nresult.get(constants.NV_VGLIST, None)
1322 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1324 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1325 constants.MIN_VG_SIZE)
1326 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1329 pvlist = nresult.get(constants.NV_PVLIST, None)
1330 test = pvlist is None
1331 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1333 # check that ':' is not present in PV names, since it's a
1334 # special character for lvcreate (denotes the range of PEs to
1336 for _, pvname, owner_vg in pvlist:
1337 test = ":" in pvname
1338 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1339 " '%s' of VG '%s'", pvname, owner_vg)
1341 def _VerifyNodeNetwork(self, ninfo, nresult):
1342 """Check the node time.
1344 @type ninfo: L{objects.Node}
1345 @param ninfo: the node to check
1346 @param nresult: the remote results for the node
1350 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1352 test = constants.NV_NODELIST not in nresult
1353 _ErrorIf(test, self.ENODESSH, node,
1354 "node hasn't returned node ssh connectivity data")
1356 if nresult[constants.NV_NODELIST]:
1357 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1358 _ErrorIf(True, self.ENODESSH, node,
1359 "ssh communication with node '%s': %s", a_node, a_msg)
1361 test = constants.NV_NODENETTEST not in nresult
1362 _ErrorIf(test, self.ENODENET, node,
1363 "node hasn't returned node tcp connectivity data")
1365 if nresult[constants.NV_NODENETTEST]:
1366 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1368 _ErrorIf(True, self.ENODENET, node,
1369 "tcp communication with node '%s': %s",
1370 anode, nresult[constants.NV_NODENETTEST][anode])
1372 def _VerifyInstance(self, instance, instanceconfig, node_image):
1373 """Verify an instance.
1375 This function checks to see if the required block devices are
1376 available on the instance's node.
1379 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1380 node_current = instanceconfig.primary_node
1382 node_vol_should = {}
1383 instanceconfig.MapLVsByNode(node_vol_should)
1385 for node in node_vol_should:
1386 n_img = node_image[node]
1387 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1388 # ignore missing volumes on offline or broken nodes
1390 for volume in node_vol_should[node]:
1391 test = volume not in n_img.volumes
1392 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1393 "volume %s missing on node %s", volume, node)
1395 if instanceconfig.admin_up:
1396 pri_img = node_image[node_current]
1397 test = instance not in pri_img.instances and not pri_img.offline
1398 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1399 "instance not running on its primary node %s",
1402 for node, n_img in node_image.items():
1403 if (not node == node_current):
1404 test = instance in n_img.instances
1405 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1406 "instance should not run on node %s", node)
1408 def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1409 """Verify if there are any unknown volumes in the cluster.
1411 The .os, .swap and backup volumes are ignored. All other volumes are
1412 reported as unknown.
1415 for node, n_img in node_image.items():
1416 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1417 # skip non-healthy nodes
1419 for volume in n_img.volumes:
1420 test = (node not in node_vol_should or
1421 volume not in node_vol_should[node])
1422 self._ErrorIf(test, self.ENODEORPHANLV, node,
1423 "volume %s is unknown", volume)
1425 def _VerifyOrphanInstances(self, instancelist, node_image):
1426 """Verify the list of running instances.
1428 This checks what instances are running but unknown to the cluster.
1431 for node, n_img in node_image.items():
1432 for o_inst in n_img.instances:
1433 test = o_inst not in instancelist
1434 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1435 "instance %s on node %s should not exist", o_inst, node)
1437 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1438 """Verify N+1 Memory Resilience.
1440 Check that if one single node dies we can still start all the
1441 instances it was primary for.
1444 for node, n_img in node_image.items():
1445 # This code checks that every node which is now listed as
1446 # secondary has enough memory to host all instances it is
1447 # supposed to should a single other node in the cluster fail.
1448 # FIXME: not ready for failover to an arbitrary node
1449 # FIXME: does not support file-backed instances
1450 # WARNING: we currently take into account down instances as well
1451 # as up ones, considering that even if they're down someone
1452 # might want to start them even in the event of a node failure.
1453 for prinode, instances in n_img.sbp.items():
1455 for instance in instances:
1456 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1457 if bep[constants.BE_AUTO_BALANCE]:
1458 needed_mem += bep[constants.BE_MEMORY]
1459 test = n_img.mfree < needed_mem
1460 self._ErrorIf(test, self.ENODEN1, node,
1461 "not enough memory on to accommodate"
1462 " failovers should peer node %s fail", prinode)
1464 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1466 """Verifies and computes the node required file checksums.
1468 @type ninfo: L{objects.Node}
1469 @param ninfo: the node to check
1470 @param nresult: the remote results for the node
1471 @param file_list: required list of files
1472 @param local_cksum: dictionary of local files and their checksums
1473 @param master_files: list of files that only masters should have
1477 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1479 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1480 test = not isinstance(remote_cksum, dict)
1481 _ErrorIf(test, self.ENODEFILECHECK, node,
1482 "node hasn't returned file checksum data")
1486 for file_name in file_list:
1487 node_is_mc = ninfo.master_candidate
1488 must_have = (file_name not in master_files) or node_is_mc
1490 test1 = file_name not in remote_cksum
1492 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1494 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1495 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1496 "file '%s' missing", file_name)
1497 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1498 "file '%s' has wrong checksum", file_name)
1499 # not candidate and this is not a must-have file
1500 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1501 "file '%s' should not exist on non master"
1502 " candidates (and the file is outdated)", file_name)
1503 # all good, except non-master/non-must have combination
1504 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1505 "file '%s' should not exist"
1506 " on non master candidates", file_name)
1508 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1509 """Verifies and the node DRBD status.
1511 @type ninfo: L{objects.Node}
1512 @param ninfo: the node to check
1513 @param nresult: the remote results for the node
1514 @param instanceinfo: the dict of instances
1515 @param drbd_map: the DRBD map as returned by
1516 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1520 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1522 # compute the DRBD minors
1524 for minor, instance in drbd_map[node].items():
1525 test = instance not in instanceinfo
1526 _ErrorIf(test, self.ECLUSTERCFG, None,
1527 "ghost instance '%s' in temporary DRBD map", instance)
1528 # ghost instance should not be running, but otherwise we
1529 # don't give double warnings (both ghost instance and
1530 # unallocated minor in use)
1532 node_drbd[minor] = (instance, False)
1534 instance = instanceinfo[instance]
1535 node_drbd[minor] = (instance.name, instance.admin_up)
1537 # and now check them
1538 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1539 test = not isinstance(used_minors, (tuple, list))
1540 _ErrorIf(test, self.ENODEDRBD, node,
1541 "cannot parse drbd status file: %s", str(used_minors))
1543 # we cannot check drbd status
1546 for minor, (iname, must_exist) in node_drbd.items():
1547 test = minor not in used_minors and must_exist
1548 _ErrorIf(test, self.ENODEDRBD, node,
1549 "drbd minor %d of instance %s is not active", minor, iname)
1550 for minor in used_minors:
1551 test = minor not in node_drbd
1552 _ErrorIf(test, self.ENODEDRBD, node,
1553 "unallocated drbd minor %d is in use", minor)
1555 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1556 """Verifies and updates the node volume data.
1558 This function will update a L{NodeImage}'s internal structures
1559 with data from the remote call.
1561 @type ninfo: L{objects.Node}
1562 @param ninfo: the node to check
1563 @param nresult: the remote results for the node
1564 @param nimg: the node image object
1565 @param vg_name: the configured VG name
1569 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1571 nimg.lvm_fail = True
1572 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1575 elif isinstance(lvdata, basestring):
1576 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1577 utils.SafeEncode(lvdata))
1578 elif not isinstance(lvdata, dict):
1579 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1581 nimg.volumes = lvdata
1582 nimg.lvm_fail = False
1584 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1585 """Verifies and updates the node instance list.
1587 If the listing was successful, then updates this node's instance
1588 list. Otherwise, it marks the RPC call as failed for the instance
1591 @type ninfo: L{objects.Node}
1592 @param ninfo: the node to check
1593 @param nresult: the remote results for the node
1594 @param nimg: the node image object
1597 idata = nresult.get(constants.NV_INSTANCELIST, None)
1598 test = not isinstance(idata, list)
1599 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1600 " (instancelist): %s", utils.SafeEncode(str(idata)))
1602 nimg.hyp_fail = True
1604 nimg.instances = idata
1606 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1607 """Verifies and computes a node information map
1609 @type ninfo: L{objects.Node}
1610 @param ninfo: the node to check
1611 @param nresult: the remote results for the node
1612 @param nimg: the node image object
1613 @param vg_name: the configured VG name
1617 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1619 # try to read free memory (from the hypervisor)
1620 hv_info = nresult.get(constants.NV_HVINFO, None)
1621 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1622 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1625 nimg.mfree = int(hv_info["memory_free"])
1626 except (ValueError, TypeError):
1627 _ErrorIf(True, self.ENODERPC, node,
1628 "node returned invalid nodeinfo, check hypervisor")
1630 # FIXME: devise a free space model for file based instances as well
1631 if vg_name is not None:
1632 test = (constants.NV_VGLIST not in nresult or
1633 vg_name not in nresult[constants.NV_VGLIST])
1634 _ErrorIf(test, self.ENODELVM, node,
1635 "node didn't return data for the volume group '%s'"
1636 " - it is either missing or broken", vg_name)
1639 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1640 except (ValueError, TypeError):
1641 _ErrorIf(True, self.ENODERPC, node,
1642 "node returned invalid LVM info, check LVM status")
1644 def CheckPrereq(self):
1645 """Check prerequisites.
1647 Transform the list of checks we're going to skip into a set and check that
1648 all its members are valid.
1651 self.skip_set = frozenset(self.op.skip_checks)
1652 if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1653 raise errors.OpPrereqError("Invalid checks to be skipped specified",
1656 def BuildHooksEnv(self):
1659 Cluster-Verify hooks just ran in the post phase and their failure makes
1660 the output be logged in the verify output and the verification to fail.
1663 all_nodes = self.cfg.GetNodeList()
1665 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1667 for node in self.cfg.GetAllNodesInfo().values():
1668 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1670 return env, [], all_nodes
1672 def Exec(self, feedback_fn):
1673 """Verify integrity of cluster, performing various test on nodes.
1677 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1678 verbose = self.op.verbose
1679 self._feedback_fn = feedback_fn
1680 feedback_fn("* Verifying global settings")
1681 for msg in self.cfg.VerifyConfig():
1682 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1684 # Check the cluster certificates
1685 for cert_filename in constants.ALL_CERT_FILES:
1686 (errcode, msg) = _VerifyCertificate(cert_filename)
1687 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1689 vg_name = self.cfg.GetVGName()
1690 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1691 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1692 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1693 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1694 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1695 for iname in instancelist)
1696 i_non_redundant = [] # Non redundant instances
1697 i_non_a_balanced = [] # Non auto-balanced instances
1698 n_offline = 0 # Count of offline nodes
1699 n_drained = 0 # Count of nodes being drained
1700 node_vol_should = {}
1702 # FIXME: verify OS list
1703 # do local checksums
1704 master_files = [constants.CLUSTER_CONF_FILE]
1706 file_names = ssconf.SimpleStore().GetFileList()
1707 file_names.extend(constants.ALL_CERT_FILES)
1708 file_names.extend(master_files)
1710 local_checksums = utils.FingerprintFiles(file_names)
1712 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1713 node_verify_param = {
1714 constants.NV_FILELIST: file_names,
1715 constants.NV_NODELIST: [node.name for node in nodeinfo
1716 if not node.offline],
1717 constants.NV_HYPERVISOR: hypervisors,
1718 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1719 node.secondary_ip) for node in nodeinfo
1720 if not node.offline],
1721 constants.NV_INSTANCELIST: hypervisors,
1722 constants.NV_VERSION: None,
1723 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1724 constants.NV_NODESETUP: None,
1725 constants.NV_TIME: None,
1728 if vg_name is not None:
1729 node_verify_param[constants.NV_VGLIST] = None
1730 node_verify_param[constants.NV_LVLIST] = vg_name
1731 node_verify_param[constants.NV_PVLIST] = [vg_name]
1732 node_verify_param[constants.NV_DRBDLIST] = None
1734 # Build our expected cluster state
1735 node_image = dict((node.name, self.NodeImage(offline=node.offline))
1736 for node in nodeinfo)
1738 for instance in instancelist:
1739 inst_config = instanceinfo[instance]
1741 for nname in inst_config.all_nodes:
1742 if nname not in node_image:
1744 gnode = self.NodeImage()
1746 node_image[nname] = gnode
1748 inst_config.MapLVsByNode(node_vol_should)
1750 pnode = inst_config.primary_node
1751 node_image[pnode].pinst.append(instance)
1753 for snode in inst_config.secondary_nodes:
1754 nimg = node_image[snode]
1755 nimg.sinst.append(instance)
1756 if pnode not in nimg.sbp:
1757 nimg.sbp[pnode] = []
1758 nimg.sbp[pnode].append(instance)
1760 # At this point, we have the in-memory data structures complete,
1761 # except for the runtime information, which we'll gather next
1763 # Due to the way our RPC system works, exact response times cannot be
1764 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1765 # time before and after executing the request, we can at least have a time
1767 nvinfo_starttime = time.time()
1768 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1769 self.cfg.GetClusterName())
1770 nvinfo_endtime = time.time()
1772 cluster = self.cfg.GetClusterInfo()
1773 master_node = self.cfg.GetMasterNode()
1774 all_drbd_map = self.cfg.ComputeDRBDMap()
1776 feedback_fn("* Verifying node status")
1777 for node_i in nodeinfo:
1779 nimg = node_image[node]
1783 feedback_fn("* Skipping offline node %s" % (node,))
1787 if node == master_node:
1789 elif node_i.master_candidate:
1790 ntype = "master candidate"
1791 elif node_i.drained:
1797 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1799 msg = all_nvinfo[node].fail_msg
1800 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1802 nimg.rpc_fail = True
1805 nresult = all_nvinfo[node].payload
1807 nimg.call_ok = self._VerifyNode(node_i, nresult)
1808 self._VerifyNodeNetwork(node_i, nresult)
1809 self._VerifyNodeLVM(node_i, nresult, vg_name)
1810 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1812 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1813 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1815 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1816 self._UpdateNodeInstances(node_i, nresult, nimg)
1817 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1819 feedback_fn("* Verifying instance status")
1820 for instance in instancelist:
1822 feedback_fn("* Verifying instance %s" % instance)
1823 inst_config = instanceinfo[instance]
1824 self._VerifyInstance(instance, inst_config, node_image)
1825 inst_nodes_offline = []
1827 pnode = inst_config.primary_node
1828 pnode_img = node_image[pnode]
1829 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1830 self.ENODERPC, pnode, "instance %s, connection to"
1831 " primary node failed", instance)
1833 if pnode_img.offline:
1834 inst_nodes_offline.append(pnode)
1836 # If the instance is non-redundant we cannot survive losing its primary
1837 # node, so we are not N+1 compliant. On the other hand we have no disk
1838 # templates with more than one secondary so that situation is not well
1840 # FIXME: does not support file-backed instances
1841 if not inst_config.secondary_nodes:
1842 i_non_redundant.append(instance)
1843 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1844 instance, "instance has multiple secondary nodes: %s",
1845 utils.CommaJoin(inst_config.secondary_nodes),
1846 code=self.ETYPE_WARNING)
1848 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1849 i_non_a_balanced.append(instance)
1851 for snode in inst_config.secondary_nodes:
1852 s_img = node_image[snode]
1853 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1854 "instance %s, connection to secondary node failed", instance)
1857 inst_nodes_offline.append(snode)
1859 # warn that the instance lives on offline nodes
1860 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1861 "instance lives on offline node(s) %s",
1862 utils.CommaJoin(inst_nodes_offline))
1863 # ... or ghost nodes
1864 for node in inst_config.all_nodes:
1865 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1866 "instance lives on ghost node %s", node)
1868 feedback_fn("* Verifying orphan volumes")
1869 self._VerifyOrphanVolumes(node_vol_should, node_image)
1871 feedback_fn("* Verifying oprhan instances")
1872 self._VerifyOrphanInstances(instancelist, node_image)
1874 if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1875 feedback_fn("* Verifying N+1 Memory redundancy")
1876 self._VerifyNPlusOneMemory(node_image, instanceinfo)
1878 feedback_fn("* Other Notes")
1880 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
1881 % len(i_non_redundant))
1883 if i_non_a_balanced:
1884 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
1885 % len(i_non_a_balanced))
1888 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
1891 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
1895 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1896 """Analyze the post-hooks' result
1898 This method analyses the hook result, handles it, and sends some
1899 nicely-formatted feedback back to the user.
1901 @param phase: one of L{constants.HOOKS_PHASE_POST} or
1902 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1903 @param hooks_results: the results of the multi-node hooks rpc call
1904 @param feedback_fn: function used send feedback back to the caller
1905 @param lu_result: previous Exec result
1906 @return: the new Exec result, based on the previous result
1910 # We only really run POST phase hooks, and are only interested in
1912 if phase == constants.HOOKS_PHASE_POST:
1913 # Used to change hooks' output to proper indentation
1914 indent_re = re.compile('^', re.M)
1915 feedback_fn("* Hooks Results")
1916 assert hooks_results, "invalid result from hooks"
1918 for node_name in hooks_results:
1919 res = hooks_results[node_name]
1921 test = msg and not res.offline
1922 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1923 "Communication failure in hooks execution: %s", msg)
1924 if res.offline or msg:
1925 # No need to investigate payload if node is offline or gave an error.
1926 # override manually lu_result here as _ErrorIf only
1927 # overrides self.bad
1930 for script, hkr, output in res.payload:
1931 test = hkr == constants.HKR_FAIL
1932 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1933 "Script %s failed, output:", script)
1935 output = indent_re.sub(' ', output)
1936 feedback_fn("%s" % output)
1942 class LUVerifyDisks(NoHooksLU):
1943 """Verifies the cluster disks status.
1949 def ExpandNames(self):
1950 self.needed_locks = {
1951 locking.LEVEL_NODE: locking.ALL_SET,
1952 locking.LEVEL_INSTANCE: locking.ALL_SET,
1954 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1956 def CheckPrereq(self):
1957 """Check prerequisites.
1959 This has no prerequisites.
1964 def Exec(self, feedback_fn):
1965 """Verify integrity of cluster disks.
1967 @rtype: tuple of three items
1968 @return: a tuple of (dict of node-to-node_error, list of instances
1969 which need activate-disks, dict of instance: (node, volume) for
1973 result = res_nodes, res_instances, res_missing = {}, [], {}
1975 vg_name = self.cfg.GetVGName()
1976 nodes = utils.NiceSort(self.cfg.GetNodeList())
1977 instances = [self.cfg.GetInstanceInfo(name)
1978 for name in self.cfg.GetInstanceList()]
1981 for inst in instances:
1983 if (not inst.admin_up or
1984 inst.disk_template not in constants.DTS_NET_MIRROR):
1986 inst.MapLVsByNode(inst_lvs)
1987 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1988 for node, vol_list in inst_lvs.iteritems():
1989 for vol in vol_list:
1990 nv_dict[(node, vol)] = inst
1995 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1999 node_res = node_lvs[node]
2000 if node_res.offline:
2002 msg = node_res.fail_msg
2004 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2005 res_nodes[node] = msg
2008 lvs = node_res.payload
2009 for lv_name, (_, _, lv_online) in lvs.items():
2010 inst = nv_dict.pop((node, lv_name), None)
2011 if (not lv_online and inst is not None
2012 and inst.name not in res_instances):
2013 res_instances.append(inst.name)
2015 # any leftover items in nv_dict are missing LVs, let's arrange the
2017 for key, inst in nv_dict.iteritems():
2018 if inst.name not in res_missing:
2019 res_missing[inst.name] = []
2020 res_missing[inst.name].append(key)
2025 class LURepairDiskSizes(NoHooksLU):
2026 """Verifies the cluster disks sizes.
2029 _OP_REQP = ["instances"]
2032 def ExpandNames(self):
2033 if not isinstance(self.op.instances, list):
2034 raise errors.OpPrereqError("Invalid argument type 'instances'",
2037 if self.op.instances:
2038 self.wanted_names = []
2039 for name in self.op.instances:
2040 full_name = _ExpandInstanceName(self.cfg, name)
2041 self.wanted_names.append(full_name)
2042 self.needed_locks = {
2043 locking.LEVEL_NODE: [],
2044 locking.LEVEL_INSTANCE: self.wanted_names,
2046 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2048 self.wanted_names = None
2049 self.needed_locks = {
2050 locking.LEVEL_NODE: locking.ALL_SET,
2051 locking.LEVEL_INSTANCE: locking.ALL_SET,
2053 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2055 def DeclareLocks(self, level):
2056 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2057 self._LockInstancesNodes(primary_only=True)
2059 def CheckPrereq(self):
2060 """Check prerequisites.
2062 This only checks the optional instance list against the existing names.
2065 if self.wanted_names is None:
2066 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2068 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2069 in self.wanted_names]
2071 def _EnsureChildSizes(self, disk):
2072 """Ensure children of the disk have the needed disk size.
2074 This is valid mainly for DRBD8 and fixes an issue where the
2075 children have smaller disk size.
2077 @param disk: an L{ganeti.objects.Disk} object
2080 if disk.dev_type == constants.LD_DRBD8:
2081 assert disk.children, "Empty children for DRBD8?"
2082 fchild = disk.children[0]
2083 mismatch = fchild.size < disk.size
2085 self.LogInfo("Child disk has size %d, parent %d, fixing",
2086 fchild.size, disk.size)
2087 fchild.size = disk.size
2089 # and we recurse on this child only, not on the metadev
2090 return self._EnsureChildSizes(fchild) or mismatch
2094 def Exec(self, feedback_fn):
2095 """Verify the size of cluster disks.
2098 # TODO: check child disks too
2099 # TODO: check differences in size between primary/secondary nodes
2101 for instance in self.wanted_instances:
2102 pnode = instance.primary_node
2103 if pnode not in per_node_disks:
2104 per_node_disks[pnode] = []
2105 for idx, disk in enumerate(instance.disks):
2106 per_node_disks[pnode].append((instance, idx, disk))
2109 for node, dskl in per_node_disks.items():
2110 newl = [v[2].Copy() for v in dskl]
2112 self.cfg.SetDiskID(dsk, node)
2113 result = self.rpc.call_blockdev_getsizes(node, newl)
2115 self.LogWarning("Failure in blockdev_getsizes call to node"
2116 " %s, ignoring", node)
2118 if len(result.data) != len(dskl):
2119 self.LogWarning("Invalid result from node %s, ignoring node results",
2122 for ((instance, idx, disk), size) in zip(dskl, result.data):
2124 self.LogWarning("Disk %d of instance %s did not return size"
2125 " information, ignoring", idx, instance.name)
2127 if not isinstance(size, (int, long)):
2128 self.LogWarning("Disk %d of instance %s did not return valid"
2129 " size information, ignoring", idx, instance.name)
2132 if size != disk.size:
2133 self.LogInfo("Disk %d of instance %s has mismatched size,"
2134 " correcting: recorded %d, actual %d", idx,
2135 instance.name, disk.size, size)
2137 self.cfg.Update(instance, feedback_fn)
2138 changed.append((instance.name, idx, size))
2139 if self._EnsureChildSizes(disk):
2140 self.cfg.Update(instance, feedback_fn)
2141 changed.append((instance.name, idx, disk.size))
2145 class LURenameCluster(LogicalUnit):
2146 """Rename the cluster.
2149 HPATH = "cluster-rename"
2150 HTYPE = constants.HTYPE_CLUSTER
2153 def BuildHooksEnv(self):
2158 "OP_TARGET": self.cfg.GetClusterName(),
2159 "NEW_NAME": self.op.name,
2161 mn = self.cfg.GetMasterNode()
2162 all_nodes = self.cfg.GetNodeList()
2163 return env, [mn], all_nodes
2165 def CheckPrereq(self):
2166 """Verify that the passed name is a valid one.
2169 hostname = utils.GetHostInfo(self.op.name)
2171 new_name = hostname.name
2172 self.ip = new_ip = hostname.ip
2173 old_name = self.cfg.GetClusterName()
2174 old_ip = self.cfg.GetMasterIP()
2175 if new_name == old_name and new_ip == old_ip:
2176 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2177 " cluster has changed",
2179 if new_ip != old_ip:
2180 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2181 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2182 " reachable on the network. Aborting." %
2183 new_ip, errors.ECODE_NOTUNIQUE)
2185 self.op.name = new_name
2187 def Exec(self, feedback_fn):
2188 """Rename the cluster.
2191 clustername = self.op.name
2194 # shutdown the master IP
2195 master = self.cfg.GetMasterNode()
2196 result = self.rpc.call_node_stop_master(master, False)
2197 result.Raise("Could not disable the master role")
2200 cluster = self.cfg.GetClusterInfo()
2201 cluster.cluster_name = clustername
2202 cluster.master_ip = ip
2203 self.cfg.Update(cluster, feedback_fn)
2205 # update the known hosts file
2206 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2207 node_list = self.cfg.GetNodeList()
2209 node_list.remove(master)
2212 result = self.rpc.call_upload_file(node_list,
2213 constants.SSH_KNOWN_HOSTS_FILE)
2214 for to_node, to_result in result.iteritems():
2215 msg = to_result.fail_msg
2217 msg = ("Copy of file %s to node %s failed: %s" %
2218 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2219 self.proc.LogWarning(msg)
2222 result = self.rpc.call_node_start_master(master, False, False)
2223 msg = result.fail_msg
2225 self.LogWarning("Could not re-enable the master role on"
2226 " the master, please restart manually: %s", msg)
2229 def _RecursiveCheckIfLVMBased(disk):
2230 """Check if the given disk or its children are lvm-based.
2232 @type disk: L{objects.Disk}
2233 @param disk: the disk to check
2235 @return: boolean indicating whether a LD_LV dev_type was found or not
2239 for chdisk in disk.children:
2240 if _RecursiveCheckIfLVMBased(chdisk):
2242 return disk.dev_type == constants.LD_LV
2245 class LUSetClusterParams(LogicalUnit):
2246 """Change the parameters of the cluster.
2249 HPATH = "cluster-modify"
2250 HTYPE = constants.HTYPE_CLUSTER
2254 def CheckArguments(self):
2258 for attr in ["candidate_pool_size",
2259 "uid_pool", "add_uids", "remove_uids"]:
2260 if not hasattr(self.op, attr):
2261 setattr(self.op, attr, None)
2263 if self.op.candidate_pool_size is not None:
2265 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2266 except (ValueError, TypeError), err:
2267 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2268 str(err), errors.ECODE_INVAL)
2269 if self.op.candidate_pool_size < 1:
2270 raise errors.OpPrereqError("At least one master candidate needed",
2273 _CheckBooleanOpField(self.op, "maintain_node_health")
2275 if self.op.uid_pool:
2276 uidpool.CheckUidPool(self.op.uid_pool)
2278 if self.op.add_uids:
2279 uidpool.CheckUidPool(self.op.add_uids)
2281 if self.op.remove_uids:
2282 uidpool.CheckUidPool(self.op.remove_uids)
2284 def ExpandNames(self):
2285 # FIXME: in the future maybe other cluster params won't require checking on
2286 # all nodes to be modified.
2287 self.needed_locks = {
2288 locking.LEVEL_NODE: locking.ALL_SET,
2290 self.share_locks[locking.LEVEL_NODE] = 1
2292 def BuildHooksEnv(self):
2297 "OP_TARGET": self.cfg.GetClusterName(),
2298 "NEW_VG_NAME": self.op.vg_name,
2300 mn = self.cfg.GetMasterNode()
2301 return env, [mn], [mn]
2303 def CheckPrereq(self):
2304 """Check prerequisites.
2306 This checks whether the given params don't conflict and
2307 if the given volume group is valid.
2310 if self.op.vg_name is not None and not self.op.vg_name:
2311 instances = self.cfg.GetAllInstancesInfo().values()
2312 for inst in instances:
2313 for disk in inst.disks:
2314 if _RecursiveCheckIfLVMBased(disk):
2315 raise errors.OpPrereqError("Cannot disable lvm storage while"
2316 " lvm-based instances exist",
2319 node_list = self.acquired_locks[locking.LEVEL_NODE]
2321 # if vg_name not None, checks given volume group on all nodes
2323 vglist = self.rpc.call_vg_list(node_list)
2324 for node in node_list:
2325 msg = vglist[node].fail_msg
2327 # ignoring down node
2328 self.LogWarning("Error while gathering data on node %s"
2329 " (ignoring node): %s", node, msg)
2331 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2333 constants.MIN_VG_SIZE)
2335 raise errors.OpPrereqError("Error on node '%s': %s" %
2336 (node, vgstatus), errors.ECODE_ENVIRON)
2338 self.cluster = cluster = self.cfg.GetClusterInfo()
2339 # validate params changes
2340 if self.op.beparams:
2341 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2342 self.new_beparams = objects.FillDict(
2343 cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2345 if self.op.nicparams:
2346 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2347 self.new_nicparams = objects.FillDict(
2348 cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2349 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2352 # check all instances for consistency
2353 for instance in self.cfg.GetAllInstancesInfo().values():
2354 for nic_idx, nic in enumerate(instance.nics):
2355 params_copy = copy.deepcopy(nic.nicparams)
2356 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2358 # check parameter syntax
2360 objects.NIC.CheckParameterSyntax(params_filled)
2361 except errors.ConfigurationError, err:
2362 nic_errors.append("Instance %s, nic/%d: %s" %
2363 (instance.name, nic_idx, err))
2365 # if we're moving instances to routed, check that they have an ip
2366 target_mode = params_filled[constants.NIC_MODE]
2367 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2368 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2369 (instance.name, nic_idx))
2371 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2372 "\n".join(nic_errors))
2374 # hypervisor list/parameters
2375 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2376 if self.op.hvparams:
2377 if not isinstance(self.op.hvparams, dict):
2378 raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2380 for hv_name, hv_dict in self.op.hvparams.items():
2381 if hv_name not in self.new_hvparams:
2382 self.new_hvparams[hv_name] = hv_dict
2384 self.new_hvparams[hv_name].update(hv_dict)
2386 # os hypervisor parameters
2387 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2389 if not isinstance(self.op.os_hvp, dict):
2390 raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2392 for os_name, hvs in self.op.os_hvp.items():
2393 if not isinstance(hvs, dict):
2394 raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2395 " input"), errors.ECODE_INVAL)
2396 if os_name not in self.new_os_hvp:
2397 self.new_os_hvp[os_name] = hvs
2399 for hv_name, hv_dict in hvs.items():
2400 if hv_name not in self.new_os_hvp[os_name]:
2401 self.new_os_hvp[os_name][hv_name] = hv_dict
2403 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2405 # changes to the hypervisor list
2406 if self.op.enabled_hypervisors is not None:
2407 self.hv_list = self.op.enabled_hypervisors
2408 if not self.hv_list:
2409 raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2410 " least one member",
2412 invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2414 raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2416 utils.CommaJoin(invalid_hvs),
2418 for hv in self.hv_list:
2419 # if the hypervisor doesn't already exist in the cluster
2420 # hvparams, we initialize it to empty, and then (in both
2421 # cases) we make sure to fill the defaults, as we might not
2422 # have a complete defaults list if the hypervisor wasn't
2424 if hv not in new_hvp:
2426 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2427 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2429 self.hv_list = cluster.enabled_hypervisors
2431 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2432 # either the enabled list has changed, or the parameters have, validate
2433 for hv_name, hv_params in self.new_hvparams.items():
2434 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2435 (self.op.enabled_hypervisors and
2436 hv_name in self.op.enabled_hypervisors)):
2437 # either this is a new hypervisor, or its parameters have changed
2438 hv_class = hypervisor.GetHypervisor(hv_name)
2439 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2440 hv_class.CheckParameterSyntax(hv_params)
2441 _CheckHVParams(self, node_list, hv_name, hv_params)
2444 # no need to check any newly-enabled hypervisors, since the
2445 # defaults have already been checked in the above code-block
2446 for os_name, os_hvp in self.new_os_hvp.items():
2447 for hv_name, hv_params in os_hvp.items():
2448 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2449 # we need to fill in the new os_hvp on top of the actual hv_p
2450 cluster_defaults = self.new_hvparams.get(hv_name, {})
2451 new_osp = objects.FillDict(cluster_defaults, hv_params)
2452 hv_class = hypervisor.GetHypervisor(hv_name)
2453 hv_class.CheckParameterSyntax(new_osp)
2454 _CheckHVParams(self, node_list, hv_name, new_osp)
2457 def Exec(self, feedback_fn):
2458 """Change the parameters of the cluster.
2461 if self.op.vg_name is not None:
2462 new_volume = self.op.vg_name
2465 if new_volume != self.cfg.GetVGName():
2466 self.cfg.SetVGName(new_volume)
2468 feedback_fn("Cluster LVM configuration already in desired"
2469 " state, not changing")
2470 if self.op.hvparams:
2471 self.cluster.hvparams = self.new_hvparams
2473 self.cluster.os_hvp = self.new_os_hvp
2474 if self.op.enabled_hypervisors is not None:
2475 self.cluster.hvparams = self.new_hvparams
2476 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2477 if self.op.beparams:
2478 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2479 if self.op.nicparams:
2480 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2482 if self.op.candidate_pool_size is not None:
2483 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2484 # we need to update the pool size here, otherwise the save will fail
2485 _AdjustCandidatePool(self, [])
2487 if self.op.maintain_node_health is not None:
2488 self.cluster.maintain_node_health = self.op.maintain_node_health
2490 if self.op.add_uids is not None:
2491 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2493 if self.op.remove_uids is not None:
2494 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2496 if self.op.uid_pool is not None:
2497 self.cluster.uid_pool = self.op.uid_pool
2499 self.cfg.Update(self.cluster, feedback_fn)
2502 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2503 """Distribute additional files which are part of the cluster configuration.
2505 ConfigWriter takes care of distributing the config and ssconf files, but
2506 there are more files which should be distributed to all nodes. This function
2507 makes sure those are copied.
2509 @param lu: calling logical unit
2510 @param additional_nodes: list of nodes not in the config to distribute to
2513 # 1. Gather target nodes
2514 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2515 dist_nodes = lu.cfg.GetOnlineNodeList()
2516 if additional_nodes is not None:
2517 dist_nodes.extend(additional_nodes)
2518 if myself.name in dist_nodes:
2519 dist_nodes.remove(myself.name)
2521 # 2. Gather files to distribute
2522 dist_files = set([constants.ETC_HOSTS,
2523 constants.SSH_KNOWN_HOSTS_FILE,
2524 constants.RAPI_CERT_FILE,
2525 constants.RAPI_USERS_FILE,
2526 constants.CONFD_HMAC_KEY,
2529 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2530 for hv_name in enabled_hypervisors:
2531 hv_class = hypervisor.GetHypervisor(hv_name)
2532 dist_files.update(hv_class.GetAncillaryFiles())
2534 # 3. Perform the files upload
2535 for fname in dist_files:
2536 if os.path.exists(fname):
2537 result = lu.rpc.call_upload_file(dist_nodes, fname)
2538 for to_node, to_result in result.items():
2539 msg = to_result.fail_msg
2541 msg = ("Copy of file %s to node %s failed: %s" %
2542 (fname, to_node, msg))
2543 lu.proc.LogWarning(msg)
2546 class LURedistributeConfig(NoHooksLU):
2547 """Force the redistribution of cluster configuration.
2549 This is a very simple LU.
2555 def ExpandNames(self):
2556 self.needed_locks = {
2557 locking.LEVEL_NODE: locking.ALL_SET,
2559 self.share_locks[locking.LEVEL_NODE] = 1
2561 def CheckPrereq(self):
2562 """Check prerequisites.
2566 def Exec(self, feedback_fn):
2567 """Redistribute the configuration.
2570 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2571 _RedistributeAncillaryFiles(self)
2574 def _WaitForSync(lu, instance, oneshot=False):
2575 """Sleep and poll for an instance's disk to sync.
2578 if not instance.disks:
2582 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2584 node = instance.primary_node
2586 for dev in instance.disks:
2587 lu.cfg.SetDiskID(dev, node)
2589 # TODO: Convert to utils.Retry
2592 degr_retries = 10 # in seconds, as we sleep 1 second each time
2596 cumul_degraded = False
2597 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2598 msg = rstats.fail_msg
2600 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2603 raise errors.RemoteError("Can't contact node %s for mirror data,"
2604 " aborting." % node)
2607 rstats = rstats.payload
2609 for i, mstat in enumerate(rstats):
2611 lu.LogWarning("Can't compute data for node %s/%s",
2612 node, instance.disks[i].iv_name)
2615 cumul_degraded = (cumul_degraded or
2616 (mstat.is_degraded and mstat.sync_percent is None))
2617 if mstat.sync_percent is not None:
2619 if mstat.estimated_time is not None:
2620 rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2621 max_time = mstat.estimated_time
2623 rem_time = "no time estimate"
2624 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2625 (instance.disks[i].iv_name, mstat.sync_percent,
2628 # if we're done but degraded, let's do a few small retries, to
2629 # make sure we see a stable and not transient situation; therefore
2630 # we force restart of the loop
2631 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2632 logging.info("Degraded disks found, %d retries left", degr_retries)
2640 time.sleep(min(60, max_time))
2643 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2644 return not cumul_degraded
2647 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2648 """Check that mirrors are not degraded.
2650 The ldisk parameter, if True, will change the test from the
2651 is_degraded attribute (which represents overall non-ok status for
2652 the device(s)) to the ldisk (representing the local storage status).
2655 lu.cfg.SetDiskID(dev, node)
2659 if on_primary or dev.AssembleOnSecondary():
2660 rstats = lu.rpc.call_blockdev_find(node, dev)
2661 msg = rstats.fail_msg
2663 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2665 elif not rstats.payload:
2666 lu.LogWarning("Can't find disk on node %s", node)
2670 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2672 result = result and not rstats.payload.is_degraded
2675 for child in dev.children:
2676 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2681 class LUDiagnoseOS(NoHooksLU):
2682 """Logical unit for OS diagnose/query.
2685 _OP_REQP = ["output_fields", "names"]
2687 _FIELDS_STATIC = utils.FieldSet()
2688 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2689 # Fields that need calculation of global os validity
2690 _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2692 def ExpandNames(self):
2694 raise errors.OpPrereqError("Selective OS query not supported",
2697 _CheckOutputFields(static=self._FIELDS_STATIC,
2698 dynamic=self._FIELDS_DYNAMIC,
2699 selected=self.op.output_fields)
2701 # Lock all nodes, in shared mode
2702 # Temporary removal of locks, should be reverted later
2703 # TODO: reintroduce locks when they are lighter-weight
2704 self.needed_locks = {}
2705 #self.share_locks[locking.LEVEL_NODE] = 1
2706 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2708 def CheckPrereq(self):
2709 """Check prerequisites.
2714 def _DiagnoseByOS(rlist):
2715 """Remaps a per-node return list into an a per-os per-node dictionary
2717 @param rlist: a map with node names as keys and OS objects as values
2720 @return: a dictionary with osnames as keys and as value another map, with
2721 nodes as keys and tuples of (path, status, diagnose) as values, eg::
2723 {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2724 (/srv/..., False, "invalid api")],
2725 "node2": [(/srv/..., True, "")]}
2730 # we build here the list of nodes that didn't fail the RPC (at RPC
2731 # level), so that nodes with a non-responding node daemon don't
2732 # make all OSes invalid
2733 good_nodes = [node_name for node_name in rlist
2734 if not rlist[node_name].fail_msg]
2735 for node_name, nr in rlist.items():
2736 if nr.fail_msg or not nr.payload:
2738 for name, path, status, diagnose, variants in nr.payload:
2739 if name not in all_os:
2740 # build a list of nodes for this os containing empty lists
2741 # for each node in node_list
2743 for nname in good_nodes:
2744 all_os[name][nname] = []
2745 all_os[name][node_name].append((path, status, diagnose, variants))
2748 def Exec(self, feedback_fn):
2749 """Compute the list of OSes.
2752 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2753 node_data = self.rpc.call_os_diagnose(valid_nodes)
2754 pol = self._DiagnoseByOS(node_data)
2756 calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2757 calc_variants = "variants" in self.op.output_fields
2759 for os_name, os_data in pol.items():
2764 for osl in os_data.values():
2765 valid = valid and osl and osl[0][1]
2770 node_variants = osl[0][3]
2771 if variants is None:
2772 variants = node_variants
2774 variants = [v for v in variants if v in node_variants]
2776 for field in self.op.output_fields:
2779 elif field == "valid":
2781 elif field == "node_status":
2782 # this is just a copy of the dict
2784 for node_name, nos_list in os_data.items():
2785 val[node_name] = nos_list
2786 elif field == "variants":
2789 raise errors.ParameterError(field)
2796 class LURemoveNode(LogicalUnit):
2797 """Logical unit for removing a node.
2800 HPATH = "node-remove"
2801 HTYPE = constants.HTYPE_NODE
2802 _OP_REQP = ["node_name"]
2804 def BuildHooksEnv(self):
2807 This doesn't run on the target node in the pre phase as a failed
2808 node would then be impossible to remove.
2812 "OP_TARGET": self.op.node_name,
2813 "NODE_NAME": self.op.node_name,
2815 all_nodes = self.cfg.GetNodeList()
2817 all_nodes.remove(self.op.node_name)
2819 logging.warning("Node %s which is about to be removed not found"
2820 " in the all nodes list", self.op.node_name)
2821 return env, all_nodes, all_nodes
2823 def CheckPrereq(self):
2824 """Check prerequisites.
2827 - the node exists in the configuration
2828 - it does not have primary or secondary instances
2829 - it's not the master
2831 Any errors are signaled by raising errors.OpPrereqError.
2834 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2835 node = self.cfg.GetNodeInfo(self.op.node_name)
2836 assert node is not None
2838 instance_list = self.cfg.GetInstanceList()
2840 masternode = self.cfg.GetMasterNode()
2841 if node.name == masternode:
2842 raise errors.OpPrereqError("Node is the master node,"
2843 " you need to failover first.",
2846 for instance_name in instance_list:
2847 instance = self.cfg.GetInstanceInfo(instance_name)
2848 if node.name in instance.all_nodes:
2849 raise errors.OpPrereqError("Instance %s is still running on the node,"
2850 " please remove first." % instance_name,
2852 self.op.node_name = node.name
2855 def Exec(self, feedback_fn):
2856 """Removes the node from the cluster.
2860 logging.info("Stopping the node daemon and removing configs from node %s",
2863 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2865 # Promote nodes to master candidate as needed
2866 _AdjustCandidatePool(self, exceptions=[node.name])
2867 self.context.RemoveNode(node.name)
2869 # Run post hooks on the node before it's removed
2870 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2872 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2874 # pylint: disable-msg=W0702
2875 self.LogWarning("Errors occurred running hooks on %s" % node.name)
2877 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2878 msg = result.fail_msg
2880 self.LogWarning("Errors encountered on the remote node while leaving"
2881 " the cluster: %s", msg)
2884 class LUQueryNodes(NoHooksLU):
2885 """Logical unit for querying nodes.
2888 # pylint: disable-msg=W0142
2889 _OP_REQP = ["output_fields", "names", "use_locking"]
2892 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2893 "master_candidate", "offline", "drained"]
2895 _FIELDS_DYNAMIC = utils.FieldSet(
2897 "mtotal", "mnode", "mfree",
2899 "ctotal", "cnodes", "csockets",
2902 _FIELDS_STATIC = utils.FieldSet(*[
2903 "pinst_cnt", "sinst_cnt",
2904 "pinst_list", "sinst_list",
2905 "pip", "sip", "tags",
2907 "role"] + _SIMPLE_FIELDS
2910 def ExpandNames(self):
2911 _CheckOutputFields(static=self._FIELDS_STATIC,
2912 dynamic=self._FIELDS_DYNAMIC,
2913 selected=self.op.output_fields)
2915 self.needed_locks = {}
2916 self.share_locks[locking.LEVEL_NODE] = 1
2919 self.wanted = _GetWantedNodes(self, self.op.names)
2921 self.wanted = locking.ALL_SET
2923 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2924 self.do_locking = self.do_node_query and self.op.use_locking
2926 # if we don't request only static fields, we need to lock the nodes
2927 self.needed_locks[locking.LEVEL_NODE] = self.wanted
2929 def CheckPrereq(self):
2930 """Check prerequisites.
2933 # The validation of the node list is done in the _GetWantedNodes,
2934 # if non empty, and if empty, there's no validation to do
2937 def Exec(self, feedback_fn):
2938 """Computes the list of nodes and their attributes.
2941 all_info = self.cfg.GetAllNodesInfo()
2943 nodenames = self.acquired_locks[locking.LEVEL_NODE]
2944 elif self.wanted != locking.ALL_SET:
2945 nodenames = self.wanted
2946 missing = set(nodenames).difference(all_info.keys())
2948 raise errors.OpExecError(
2949 "Some nodes were removed before retrieving their data: %s" % missing)
2951 nodenames = all_info.keys()
2953 nodenames = utils.NiceSort(nodenames)
2954 nodelist = [all_info[name] for name in nodenames]
2956 # begin data gathering
2958 if self.do_node_query:
2960 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2961 self.cfg.GetHypervisorType())
2962 for name in nodenames:
2963 nodeinfo = node_data[name]
2964 if not nodeinfo.fail_msg and nodeinfo.payload:
2965 nodeinfo = nodeinfo.payload
2966 fn = utils.TryConvert
2968 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2969 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2970 "mfree": fn(int, nodeinfo.get('memory_free', None)),
2971 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2972 "dfree": fn(int, nodeinfo.get('vg_free', None)),
2973 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2974 "bootid": nodeinfo.get('bootid', None),
2975 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2976 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2979 live_data[name] = {}
2981 live_data = dict.fromkeys(nodenames, {})
2983 node_to_primary = dict([(name, set()) for name in nodenames])
2984 node_to_secondary = dict([(name, set()) for name in nodenames])
2986 inst_fields = frozenset(("pinst_cnt", "pinst_list",
2987 "sinst_cnt", "sinst_list"))
2988 if inst_fields & frozenset(self.op.output_fields):
2989 inst_data = self.cfg.GetAllInstancesInfo()
2991 for inst in inst_data.values():
2992 if inst.primary_node in node_to_primary:
2993 node_to_primary[inst.primary_node].add(inst.name)
2994 for secnode in inst.secondary_nodes:
2995 if secnode in node_to_secondary:
2996 node_to_secondary[secnode].add(inst.name)
2998 master_node = self.cfg.GetMasterNode()
3000 # end data gathering
3003 for node in nodelist:
3005 for field in self.op.output_fields:
3006 if field in self._SIMPLE_FIELDS:
3007 val = getattr(node, field)
3008 elif field == "pinst_list":
3009 val = list(node_to_primary[node.name])
3010 elif field == "sinst_list":
3011 val = list(node_to_secondary[node.name])
3012 elif field == "pinst_cnt":
3013 val = len(node_to_primary[node.name])
3014 elif field == "sinst_cnt":
3015 val = len(node_to_secondary[node.name])
3016 elif field == "pip":
3017 val = node.primary_ip
3018 elif field == "sip":
3019 val = node.secondary_ip
3020 elif field == "tags":
3021 val = list(node.GetTags())
3022 elif field == "master":
3023 val = node.name == master_node
3024 elif self._FIELDS_DYNAMIC.Matches(field):
3025 val = live_data[node.name].get(field, None)
3026 elif field == "role":
3027 if node.name == master_node:
3029 elif node.master_candidate:
3038 raise errors.ParameterError(field)
3039 node_output.append(val)
3040 output.append(node_output)
3045 class LUQueryNodeVolumes(NoHooksLU):
3046 """Logical unit for getting volumes on node(s).
3049 _OP_REQP = ["nodes", "output_fields"]
3051 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3052 _FIELDS_STATIC = utils.FieldSet("node")
3054 def ExpandNames(self):
3055 _CheckOutputFields(static=self._FIELDS_STATIC,
3056 dynamic=self._FIELDS_DYNAMIC,
3057 selected=self.op.output_fields)
3059 self.needed_locks = {}
3060 self.share_locks[locking.LEVEL_NODE] = 1
3061 if not self.op.nodes:
3062 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3064 self.needed_locks[locking.LEVEL_NODE] = \
3065 _GetWantedNodes(self, self.op.nodes)
3067 def CheckPrereq(self):
3068 """Check prerequisites.
3070 This checks that the fields required are valid output fields.
3073 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3075 def Exec(self, feedback_fn):
3076 """Computes the list of nodes and their attributes.
3079 nodenames = self.nodes
3080 volumes = self.rpc.call_node_volumes(nodenames)
3082 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3083 in self.cfg.GetInstanceList()]
3085 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3088 for node in nodenames:
3089 nresult = volumes[node]
3092 msg = nresult.fail_msg
3094 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3097 node_vols = nresult.payload[:]
3098 node_vols.sort(key=lambda vol: vol['dev'])
3100 for vol in node_vols:
3102 for field in self.op.output_fields:
3105 elif field == "phys":
3109 elif field == "name":
3111 elif field == "size":
3112 val = int(float(vol['size']))
3113 elif field == "instance":
3115 if node not in lv_by_node[inst]:
3117 if vol['name'] in lv_by_node[inst][node]:
3123 raise errors.ParameterError(field)
3124 node_output.append(str(val))
3126 output.append(node_output)
3131 class LUQueryNodeStorage(NoHooksLU):
3132 """Logical unit for getting information on storage units on node(s).
3135 _OP_REQP = ["nodes", "storage_type", "output_fields"]
3137 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3139 def CheckArguments(self):
3140 _CheckStorageType(self.op.storage_type)
3142 _CheckOutputFields(static=self._FIELDS_STATIC,
3143 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3144 selected=self.op.output_fields)
3146 def ExpandNames(self):
3147 self.needed_locks = {}
3148 self.share_locks[locking.LEVEL_NODE] = 1
3151 self.needed_locks[locking.LEVEL_NODE] = \
3152 _GetWantedNodes(self, self.op.nodes)
3154 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3156 def CheckPrereq(self):
3157 """Check prerequisites.
3159 This checks that the fields required are valid output fields.
3162 self.op.name = getattr(self.op, "name", None)
3164 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3166 def Exec(self, feedback_fn):
3167 """Computes the list of nodes and their attributes.
3170 # Always get name to sort by
3171 if constants.SF_NAME in self.op.output_fields:
3172 fields = self.op.output_fields[:]
3174 fields = [constants.SF_NAME] + self.op.output_fields
3176 # Never ask for node or type as it's only known to the LU
3177 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3178 while extra in fields:
3179 fields.remove(extra)
3181 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3182 name_idx = field_idx[constants.SF_NAME]
3184 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3185 data = self.rpc.call_storage_list(self.nodes,
3186 self.op.storage_type, st_args,
3187 self.op.name, fields)
3191 for node in utils.NiceSort(self.nodes):
3192 nresult = data[node]
3196 msg = nresult.fail_msg
3198 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3201 rows = dict([(row[name_idx], row) for row in nresult.payload])
3203 for name in utils.NiceSort(rows.keys()):
3208 for field in self.op.output_fields:
3209 if field == constants.SF_NODE:
3211 elif field == constants.SF_TYPE:
3212 val = self.op.storage_type
3213 elif field in field_idx:
3214 val = row[field_idx[field]]
3216 raise errors.ParameterError(field)
3225 class LUModifyNodeStorage(NoHooksLU):
3226 """Logical unit for modifying a storage volume on a node.
3229 _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3232 def CheckArguments(self):
3233 self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3235 _CheckStorageType(self.op.storage_type)
3237 def ExpandNames(self):
3238 self.needed_locks = {
3239 locking.LEVEL_NODE: self.op.node_name,
3242 def CheckPrereq(self):
3243 """Check prerequisites.
3246 storage_type = self.op.storage_type
3249 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3251 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3252 " modified" % storage_type,
3255 diff = set(self.op.changes.keys()) - modifiable
3257 raise errors.OpPrereqError("The following fields can not be modified for"
3258 " storage units of type '%s': %r" %
3259 (storage_type, list(diff)),
3262 def Exec(self, feedback_fn):
3263 """Computes the list of nodes and their attributes.
3266 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3267 result = self.rpc.call_storage_modify(self.op.node_name,
3268 self.op.storage_type, st_args,
3269 self.op.name, self.op.changes)
3270 result.Raise("Failed to modify storage unit '%s' on %s" %
3271 (self.op.name, self.op.node_name))
3274 class LUAddNode(LogicalUnit):
3275 """Logical unit for adding node to the cluster.
3279 HTYPE = constants.HTYPE_NODE
3280 _OP_REQP = ["node_name"]
3282 def CheckArguments(self):
3283 # validate/normalize the node name
3284 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3286 def BuildHooksEnv(self):
3289 This will run on all nodes before, and on all nodes + the new node after.
3293 "OP_TARGET": self.op.node_name,
3294 "NODE_NAME": self.op.node_name,
3295 "NODE_PIP": self.op.primary_ip,
3296 "NODE_SIP": self.op.secondary_ip,
3298 nodes_0 = self.cfg.GetNodeList()
3299 nodes_1 = nodes_0 + [self.op.node_name, ]
3300 return env, nodes_0, nodes_1
3302 def CheckPrereq(self):
3303 """Check prerequisites.
3306 - the new node is not already in the config
3308 - its parameters (single/dual homed) matches the cluster
3310 Any errors are signaled by raising errors.OpPrereqError.
3313 node_name = self.op.node_name
3316 dns_data = utils.GetHostInfo(node_name)
3318 node = dns_data.name
3319 primary_ip = self.op.primary_ip = dns_data.ip
3320 secondary_ip = getattr(self.op, "secondary_ip", None)
3321 if secondary_ip is None:
3322 secondary_ip = primary_ip
3323 if not utils.IsValidIP(secondary_ip):
3324 raise errors.OpPrereqError("Invalid secondary IP given",
3326 self.op.secondary_ip = secondary_ip
3328 node_list = cfg.GetNodeList()
3329 if not self.op.readd and node in node_list:
3330 raise errors.OpPrereqError("Node %s is already in the configuration" %
3331 node, errors.ECODE_EXISTS)
3332 elif self.op.readd and node not in node_list:
3333 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3336 self.changed_primary_ip = False
3338 for existing_node_name in node_list:
3339 existing_node = cfg.GetNodeInfo(existing_node_name)
3341 if self.op.readd and node == existing_node_name:
3342 if existing_node.secondary_ip != secondary_ip:
3343 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3344 " address configuration as before",
3346 if existing_node.primary_ip != primary_ip:
3347 self.changed_primary_ip = True
3351 if (existing_node.primary_ip == primary_ip or
3352 existing_node.secondary_ip == primary_ip or
3353 existing_node.primary_ip == secondary_ip or
3354 existing_node.secondary_ip == secondary_ip):
3355 raise errors.OpPrereqError("New node ip address(es) conflict with"
3356 " existing node %s" % existing_node.name,
3357 errors.ECODE_NOTUNIQUE)
3359 # check that the type of the node (single versus dual homed) is the
3360 # same as for the master
3361 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3362 master_singlehomed = myself.secondary_ip == myself.primary_ip
3363 newbie_singlehomed = secondary_ip == primary_ip
3364 if master_singlehomed != newbie_singlehomed:
3365 if master_singlehomed:
3366 raise errors.OpPrereqError("The master has no private ip but the"
3367 " new node has one",
3370 raise errors.OpPrereqError("The master has a private ip but the"
3371 " new node doesn't have one",
3374 # checks reachability
3375 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3376 raise errors.OpPrereqError("Node not reachable by ping",
3377 errors.ECODE_ENVIRON)
3379 if not newbie_singlehomed:
3380 # check reachability from my secondary ip to newbie's secondary ip
3381 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3382 source=myself.secondary_ip):
3383 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3384 " based ping to noded port",
3385 errors.ECODE_ENVIRON)
3392 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3395 self.new_node = self.cfg.GetNodeInfo(node)
3396 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3398 self.new_node = objects.Node(name=node,
3399 primary_ip=primary_ip,
3400 secondary_ip=secondary_ip,
3401 master_candidate=self.master_candidate,
3402 offline=False, drained=False)
3404 def Exec(self, feedback_fn):
3405 """Adds the new node to the cluster.
3408 new_node = self.new_node
3409 node = new_node.name
3411 # for re-adds, reset the offline/drained/master-candidate flags;
3412 # we need to reset here, otherwise offline would prevent RPC calls
3413 # later in the procedure; this also means that if the re-add
3414 # fails, we are left with a non-offlined, broken node
3416 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3417 self.LogInfo("Readding a node, the offline/drained flags were reset")
3418 # if we demote the node, we do cleanup later in the procedure
3419 new_node.master_candidate = self.master_candidate
3420 if self.changed_primary_ip:
3421 new_node.primary_ip = self.op.primary_ip
3423 # notify the user about any possible mc promotion
3424 if new_node.master_candidate:
3425 self.LogInfo("Node will be a master candidate")
3427 # check connectivity
3428 result = self.rpc.call_version([node])[node]
3429 result.Raise("Can't get version information from node %s" % node)
3430 if constants.PROTOCOL_VERSION == result.payload:
3431 logging.info("Communication to node %s fine, sw version %s match",
3432 node, result.payload)
3434 raise errors.OpExecError("Version mismatch master version %s,"
3435 " node version %s" %
3436 (constants.PROTOCOL_VERSION, result.payload))
3439 if self.cfg.GetClusterInfo().modify_ssh_setup:
3440 logging.info("Copy ssh key to node %s", node)
3441 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3443 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3444 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3448 keyarray.append(utils.ReadFile(i))
3450 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3451 keyarray[2], keyarray[3], keyarray[4],
3453 result.Raise("Cannot transfer ssh keys to the new node")
3455 # Add node to our /etc/hosts, and add key to known_hosts
3456 if self.cfg.GetClusterInfo().modify_etc_hosts:
3457 utils.AddHostToEtcHosts(new_node.name)
3459 if new_node.secondary_ip != new_node.primary_ip:
3460 result = self.rpc.call_node_has_ip_address(new_node.name,
3461 new_node.secondary_ip)
3462 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3463 prereq=True, ecode=errors.ECODE_ENVIRON)
3464 if not result.payload:
3465 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3466 " you gave (%s). Please fix and re-run this"
3467 " command." % new_node.secondary_ip)
3469 node_verify_list = [self.cfg.GetMasterNode()]
3470 node_verify_param = {
3471 constants.NV_NODELIST: [node],
3472 # TODO: do a node-net-test as well?
3475 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3476 self.cfg.GetClusterName())
3477 for verifier in node_verify_list:
3478 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3479 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3481 for failed in nl_payload:
3482 feedback_fn("ssh/hostname verification failed"
3483 " (checking from %s): %s" %
3484 (verifier, nl_payload[failed]))
3485 raise errors.OpExecError("ssh/hostname verification failed.")
3488 _RedistributeAncillaryFiles(self)
3489 self.context.ReaddNode(new_node)
3490 # make sure we redistribute the config
3491 self.cfg.Update(new_node, feedback_fn)
3492 # and make sure the new node will not have old files around
3493 if not new_node.master_candidate:
3494 result = self.rpc.call_node_demote_from_mc(new_node.name)
3495 msg = result.fail_msg
3497 self.LogWarning("Node failed to demote itself from master"
3498 " candidate status: %s" % msg)
3500 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3501 self.context.AddNode(new_node, self.proc.GetECId())
3504 class LUSetNodeParams(LogicalUnit):
3505 """Modifies the parameters of a node.
3508 HPATH = "node-modify"
3509 HTYPE = constants.HTYPE_NODE
3510 _OP_REQP = ["node_name"]
3513 def CheckArguments(self):
3514 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3515 _CheckBooleanOpField(self.op, 'master_candidate')
3516 _CheckBooleanOpField(self.op, 'offline')
3517 _CheckBooleanOpField(self.op, 'drained')
3518 _CheckBooleanOpField(self.op, 'auto_promote')
3519 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3520 if all_mods.count(None) == 3:
3521 raise errors.OpPrereqError("Please pass at least one modification",
3523 if all_mods.count(True) > 1:
3524 raise errors.OpPrereqError("Can't set the node into more than one"
3525 " state at the same time",
3528 # Boolean value that tells us whether we're offlining or draining the node
3529 self.offline_or_drain = (self.op.offline == True or
3530 self.op.drained == True)
3531 self.deoffline_or_drain = (self.op.offline == False or
3532 self.op.drained == False)
3533 self.might_demote = (self.op.master_candidate == False or
3534 self.offline_or_drain)
3536 self.lock_all = self.op.auto_promote and self.might_demote
3539 def ExpandNames(self):
3541 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3543 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3545 def BuildHooksEnv(self):
3548 This runs on the master node.
3552 "OP_TARGET": self.op.node_name,
3553 "MASTER_CANDIDATE": str(self.op.master_candidate),
3554 "OFFLINE": str(self.op.offline),
3555 "DRAINED": str(self.op.drained),
3557 nl = [self.cfg.GetMasterNode(),
3561 def CheckPrereq(self):
3562 """Check prerequisites.
3564 This only checks the instance list against the existing names.
3567 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3569 if (self.op.master_candidate is not None or
3570 self.op.drained is not None or
3571 self.op.offline is not None):
3572 # we can't change the master's node flags
3573 if self.op.node_name == self.cfg.GetMasterNode():
3574 raise errors.OpPrereqError("The master role can be changed"
3575 " only via masterfailover",
3579 if node.master_candidate and self.might_demote and not self.lock_all:
3580 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3581 # check if after removing the current node, we're missing master
3583 (mc_remaining, mc_should, _) = \
3584 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3585 if mc_remaining < mc_should:
3586 raise errors.OpPrereqError("Not enough master candidates, please"
3587 " pass auto_promote to allow promotion",
3590 if (self.op.master_candidate == True and
3591 ((node.offline and not self.op.offline == False) or
3592 (node.drained and not self.op.drained == False))):
3593 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3594 " to master_candidate" % node.name,
3597 # If we're being deofflined/drained, we'll MC ourself if needed
3598 if (self.deoffline_or_drain and not self.offline_or_drain and not
3599 self.op.master_candidate == True and not node.master_candidate):
3600 self.op.master_candidate = _DecideSelfPromotion(self)
3601 if self.op.master_candidate:
3602 self.LogInfo("Autopromoting node to master candidate")
3606 def Exec(self, feedback_fn):
3615 if self.op.offline is not None:
3616 node.offline = self.op.offline
3617 result.append(("offline", str(self.op.offline)))
3618 if self.op.offline == True:
3619 if node.master_candidate:
3620 node.master_candidate = False
3622 result.append(("master_candidate", "auto-demotion due to offline"))
3624 node.drained = False
3625 result.append(("drained", "clear drained status due to offline"))
3627 if self.op.master_candidate is not None:
3628 node.master_candidate = self.op.master_candidate
3630 result.append(("master_candidate", str(self.op.master_candidate)))
3631 if self.op.master_candidate == False:
3632 rrc = self.rpc.call_node_demote_from_mc(node.name)
3635 self.LogWarning("Node failed to demote itself: %s" % msg)
3637 if self.op.drained is not None:
3638 node.drained = self.op.drained
3639 result.append(("drained", str(self.op.drained)))
3640 if self.op.drained == True:
3641 if node.master_candidate:
3642 node.master_candidate = False
3644 result.append(("master_candidate", "auto-demotion due to drain"))
3645 rrc = self.rpc.call_node_demote_from_mc(node.name)
3648 self.LogWarning("Node failed to demote itself: %s" % msg)
3650 node.offline = False
3651 result.append(("offline", "clear offline status due to drain"))
3653 # we locked all nodes, we adjust the CP before updating this node
3655 _AdjustCandidatePool(self, [node.name])
3657 # this will trigger configuration file update, if needed
3658 self.cfg.Update(node, feedback_fn)
3660 # this will trigger job queue propagation or cleanup
3662 self.context.ReaddNode(node)
3667 class LUPowercycleNode(NoHooksLU):
3668 """Powercycles a node.
3671 _OP_REQP = ["node_name", "force"]
3674 def CheckArguments(self):
3675 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3676 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3677 raise errors.OpPrereqError("The node is the master and the force"
3678 " parameter was not set",
3681 def ExpandNames(self):
3682 """Locking for PowercycleNode.
3684 This is a last-resort option and shouldn't block on other
3685 jobs. Therefore, we grab no locks.
3688 self.needed_locks = {}
3690 def CheckPrereq(self):
3691 """Check prerequisites.
3693 This LU has no prereqs.
3698 def Exec(self, feedback_fn):
3702 result = self.rpc.call_node_powercycle(self.op.node_name,
3703 self.cfg.GetHypervisorType())
3704 result.Raise("Failed to schedule the reboot")
3705 return result.payload
3708 class LUQueryClusterInfo(NoHooksLU):
3709 """Query cluster configuration.
3715 def ExpandNames(self):
3716 self.needed_locks = {}
3718 def CheckPrereq(self):
3719 """No prerequsites needed for this LU.
3724 def Exec(self, feedback_fn):
3725 """Return cluster config.
3728 cluster = self.cfg.GetClusterInfo()
3731 # Filter just for enabled hypervisors
3732 for os_name, hv_dict in cluster.os_hvp.items():
3733 os_hvp[os_name] = {}
3734 for hv_name, hv_params in hv_dict.items():
3735 if hv_name in cluster.enabled_hypervisors:
3736 os_hvp[os_name][hv_name] = hv_params
3739 "software_version": constants.RELEASE_VERSION,
3740 "protocol_version": constants.PROTOCOL_VERSION,
3741 "config_version": constants.CONFIG_VERSION,
3742 "os_api_version": max(constants.OS_API_VERSIONS),
3743 "export_version": constants.EXPORT_VERSION,
3744 "architecture": (platform.architecture()[0], platform.machine()),
3745 "name": cluster.cluster_name,
3746 "master": cluster.master_node,
3747 "default_hypervisor": cluster.enabled_hypervisors[0],
3748 "enabled_hypervisors": cluster.enabled_hypervisors,
3749 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3750 for hypervisor_name in cluster.enabled_hypervisors]),
3752 "beparams": cluster.beparams,
3753 "nicparams": cluster.nicparams,
3754 "candidate_pool_size": cluster.candidate_pool_size,
3755 "master_netdev": cluster.master_netdev,
3756 "volume_group_name": cluster.volume_group_name,
3757 "file_storage_dir": cluster.file_storage_dir,
3758 "maintain_node_health": cluster.maintain_node_health,
3759 "ctime": cluster.ctime,
3760 "mtime": cluster.mtime,
3761 "uuid": cluster.uuid,
3762 "tags": list(cluster.GetTags()),
3763 "uid_pool": cluster.uid_pool,
3769 class LUQueryConfigValues(NoHooksLU):
3770 """Return configuration values.
3775 _FIELDS_DYNAMIC = utils.FieldSet()
3776 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3779 def ExpandNames(self):
3780 self.needed_locks = {}
3782 _CheckOutputFields(static=self._FIELDS_STATIC,
3783 dynamic=self._FIELDS_DYNAMIC,
3784 selected=self.op.output_fields)
3786 def CheckPrereq(self):
3787 """No prerequisites.
3792 def Exec(self, feedback_fn):
3793 """Dump a representation of the cluster config to the standard output.
3797 for field in self.op.output_fields:
3798 if field == "cluster_name":
3799 entry = self.cfg.GetClusterName()
3800 elif field == "master_node":
3801 entry = self.cfg.GetMasterNode()
3802 elif field == "drain_flag":
3803 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3804 elif field == "watcher_pause":
3805 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3807 raise errors.ParameterError(field)
3808 values.append(entry)
3812 class LUActivateInstanceDisks(NoHooksLU):
3813 """Bring up an instance's disks.
3816 _OP_REQP = ["instance_name"]
3819 def ExpandNames(self):
3820 self._ExpandAndLockInstance()
3821 self.needed_locks[locking.LEVEL_NODE] = []
3822 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3824 def DeclareLocks(self, level):
3825 if level == locking.LEVEL_NODE:
3826 self._LockInstancesNodes()
3828 def CheckPrereq(self):
3829 """Check prerequisites.
3831 This checks that the instance is in the cluster.
3834 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3835 assert self.instance is not None, \
3836 "Cannot retrieve locked instance %s" % self.op.instance_name
3837 _CheckNodeOnline(self, self.instance.primary_node)
3838 if not hasattr(self.op, "ignore_size"):
3839 self.op.ignore_size = False
3841 def Exec(self, feedback_fn):
3842 """Activate the disks.
3845 disks_ok, disks_info = \
3846 _AssembleInstanceDisks(self, self.instance,
3847 ignore_size=self.op.ignore_size)
3849 raise errors.OpExecError("Cannot activate block devices")
3854 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3856 """Prepare the block devices for an instance.
3858 This sets up the block devices on all nodes.
3860 @type lu: L{LogicalUnit}
3861 @param lu: the logical unit on whose behalf we execute
3862 @type instance: L{objects.Instance}
3863 @param instance: the instance for whose disks we assemble
3864 @type ignore_secondaries: boolean
3865 @param ignore_secondaries: if true, errors on secondary nodes
3866 won't result in an error return from the function
3867 @type ignore_size: boolean
3868 @param ignore_size: if true, the current known size of the disk
3869 will not be used during the disk activation, useful for cases
3870 when the size is wrong
3871 @return: False if the operation failed, otherwise a list of
3872 (host, instance_visible_name, node_visible_name)
3873 with the mapping from node devices to instance devices
3878 iname = instance.name
3879 # With the two passes mechanism we try to reduce the window of
3880 # opportunity for the race condition of switching DRBD to primary
3881 # before handshaking occured, but we do not eliminate it
3883 # The proper fix would be to wait (with some limits) until the
3884 # connection has been made and drbd transitions from WFConnection
3885 # into any other network-connected state (Connected, SyncTarget,
3888 # 1st pass, assemble on all nodes in secondary mode
3889 for inst_disk in instance.disks:
3890 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3892 node_disk = node_disk.Copy()
3893 node_disk.UnsetSize()
3894 lu.cfg.SetDiskID(node_disk, node)
3895 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3896 msg = result.fail_msg
3898 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3899 " (is_primary=False, pass=1): %s",
3900 inst_disk.iv_name, node, msg)
3901 if not ignore_secondaries:
3904 # FIXME: race condition on drbd migration to primary
3906 # 2nd pass, do only the primary node
3907 for inst_disk in instance.disks:
3910 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3911 if node != instance.primary_node:
3914 node_disk = node_disk.Copy()
3915 node_disk.UnsetSize()
3916 lu.cfg.SetDiskID(node_disk, node)
3917 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3918 msg = result.fail_msg
3920 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3921 " (is_primary=True, pass=2): %s",
3922 inst_disk.iv_name, node, msg)
3925 dev_path = result.payload
3927 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3929 # leave the disks configured for the primary node
3930 # this is a workaround that would be fixed better by
3931 # improving the logical/physical id handling
3932 for disk in instance.disks:
3933 lu.cfg.SetDiskID(disk, instance.primary_node)
3935 return disks_ok, device_info
3938 def _StartInstanceDisks(lu, instance, force):
3939 """Start the disks of an instance.
3942 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3943 ignore_secondaries=force)
3945 _ShutdownInstanceDisks(lu, instance)
3946 if force is not None and not force:
3947 lu.proc.LogWarning("", hint="If the message above refers to a"
3949 " you can retry the operation using '--force'.")
3950 raise errors.OpExecError("Disk consistency error")
3953 class LUDeactivateInstanceDisks(NoHooksLU):
3954 """Shutdown an instance's disks.
3957 _OP_REQP = ["instance_name"]
3960 def ExpandNames(self):
3961 self._ExpandAndLockInstance()
3962 self.needed_locks[locking.LEVEL_NODE] = []
3963 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3965 def DeclareLocks(self, level):
3966 if level == locking.LEVEL_NODE:
3967 self._LockInstancesNodes()
3969 def CheckPrereq(self):
3970 """Check prerequisites.
3972 This checks that the instance is in the cluster.
3975 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3976 assert self.instance is not None, \
3977 "Cannot retrieve locked instance %s" % self.op.instance_name
3979 def Exec(self, feedback_fn):
3980 """Deactivate the disks
3983 instance = self.instance
3984 _SafeShutdownInstanceDisks(self, instance)
3987 def _SafeShutdownInstanceDisks(lu, instance):
3988 """Shutdown block devices of an instance.
3990 This function checks if an instance is running, before calling
3991 _ShutdownInstanceDisks.
3994 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
3995 _ShutdownInstanceDisks(lu, instance)
3998 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3999 """Shutdown block devices of an instance.
4001 This does the shutdown on all nodes of the instance.
4003 If the ignore_primary is false, errors on the primary node are
4008 for disk in instance.disks:
4009 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4010 lu.cfg.SetDiskID(top_disk, node)
4011 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4012 msg = result.fail_msg
4014 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4015 disk.iv_name, node, msg)
4016 if not ignore_primary or node != instance.primary_node:
4021 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4022 """Checks if a node has enough free memory.
4024 This function check if a given node has the needed amount of free
4025 memory. In case the node has less memory or we cannot get the
4026 information from the node, this function raise an OpPrereqError
4029 @type lu: C{LogicalUnit}
4030 @param lu: a logical unit from which we get configuration data
4032 @param node: the node to check
4033 @type reason: C{str}
4034 @param reason: string to use in the error message
4035 @type requested: C{int}
4036 @param requested: the amount of memory in MiB to check for
4037 @type hypervisor_name: C{str}
4038 @param hypervisor_name: the hypervisor to ask for memory stats
4039 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4040 we cannot check the node
4043 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4044 nodeinfo[node].Raise("Can't get data from node %s" % node,
4045 prereq=True, ecode=errors.ECODE_ENVIRON)
4046 free_mem = nodeinfo[node].payload.get('memory_free', None)
4047 if not isinstance(free_mem, int):
4048 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4049 " was '%s'" % (node, free_mem),
4050 errors.ECODE_ENVIRON)
4051 if requested > free_mem:
4052 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4053 " needed %s MiB, available %s MiB" %
4054 (node, reason, requested, free_mem),
4058 def _CheckNodesFreeDisk(lu, nodenames, requested):
4059 """Checks if nodes have enough free disk space in the default VG.
4061 This function check if all given nodes have the needed amount of
4062 free disk. In case any node has less disk or we cannot get the
4063 information from the node, this function raise an OpPrereqError
4066 @type lu: C{LogicalUnit}
4067 @param lu: a logical unit from which we get configuration data
4068 @type nodenames: C{list}
4069 @param nodenames: the list of node names to check
4070 @type requested: C{int}
4071 @param requested: the amount of disk in MiB to check for
4072 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4073 we cannot check the node
4076 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4077 lu.cfg.GetHypervisorType())
4078 for node in nodenames:
4079 info = nodeinfo[node]
4080 info.Raise("Cannot get current information from node %s" % node,
4081 prereq=True, ecode=errors.ECODE_ENVIRON)
4082 vg_free = info.payload.get("vg_free", None)
4083 if not isinstance(vg_free, int):
4084 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4085 " result was '%s'" % (node, vg_free),
4086 errors.ECODE_ENVIRON)
4087 if requested > vg_free:
4088 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4089 " required %d MiB, available %d MiB" %
4090 (node, requested, vg_free),
4094 class LUStartupInstance(LogicalUnit):
4095 """Starts an instance.
4098 HPATH = "instance-start"
4099 HTYPE = constants.HTYPE_INSTANCE
4100 _OP_REQP = ["instance_name", "force"]
4103 def ExpandNames(self):
4104 self._ExpandAndLockInstance()
4106 def BuildHooksEnv(self):
4109 This runs on master, primary and secondary nodes of the instance.
4113 "FORCE": self.op.force,
4115 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4116 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4119 def CheckPrereq(self):
4120 """Check prerequisites.
4122 This checks that the instance is in the cluster.
4125 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4126 assert self.instance is not None, \
4127 "Cannot retrieve locked instance %s" % self.op.instance_name
4130 self.beparams = getattr(self.op, "beparams", {})
4132 if not isinstance(self.beparams, dict):
4133 raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4134 " dict" % (type(self.beparams), ),
4136 # fill the beparams dict
4137 utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4138 self.op.beparams = self.beparams
4141 self.hvparams = getattr(self.op, "hvparams", {})
4143 if not isinstance(self.hvparams, dict):
4144 raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4145 " dict" % (type(self.hvparams), ),
4148 # check hypervisor parameter syntax (locally)
4149 cluster = self.cfg.GetClusterInfo()
4150 utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4151 filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4153 filled_hvp.update(self.hvparams)
4154 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4155 hv_type.CheckParameterSyntax(filled_hvp)
4156 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4157 self.op.hvparams = self.hvparams
4159 _CheckNodeOnline(self, instance.primary_node)
4161 bep = self.cfg.GetClusterInfo().FillBE(instance)
4162 # check bridges existence
4163 _CheckInstanceBridgesExist(self, instance)
4165 remote_info = self.rpc.call_instance_info(instance.primary_node,
4167 instance.hypervisor)
4168 remote_info.Raise("Error checking node %s" % instance.primary_node,
4169 prereq=True, ecode=errors.ECODE_ENVIRON)
4170 if not remote_info.payload: # not running already
4171 _CheckNodeFreeMemory(self, instance.primary_node,
4172 "starting instance %s" % instance.name,
4173 bep[constants.BE_MEMORY], instance.hypervisor)
4175 def Exec(self, feedback_fn):
4176 """Start the instance.
4179 instance = self.instance
4180 force = self.op.force
4182 self.cfg.MarkInstanceUp(instance.name)
4184 node_current = instance.primary_node
4186 _StartInstanceDisks(self, instance, force)
4188 result = self.rpc.call_instance_start(node_current, instance,
4189 self.hvparams, self.beparams)
4190 msg = result.fail_msg
4192 _ShutdownInstanceDisks(self, instance)
4193 raise errors.OpExecError("Could not start instance: %s" % msg)
4196 class LURebootInstance(LogicalUnit):
4197 """Reboot an instance.
4200 HPATH = "instance-reboot"
4201 HTYPE = constants.HTYPE_INSTANCE
4202 _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4205 def CheckArguments(self):
4206 """Check the arguments.
4209 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4210 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4212 def ExpandNames(self):
4213 if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4214 constants.INSTANCE_REBOOT_HARD,
4215 constants.INSTANCE_REBOOT_FULL]:
4216 raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4217 (constants.INSTANCE_REBOOT_SOFT,
4218 constants.INSTANCE_REBOOT_HARD,
4219 constants.INSTANCE_REBOOT_FULL))
4220 self._ExpandAndLockInstance()
4222 def BuildHooksEnv(self):
4225 This runs on master, primary and secondary nodes of the instance.
4229 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4230 "REBOOT_TYPE": self.op.reboot_type,
4231 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4233 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4234 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4237 def CheckPrereq(self):
4238 """Check prerequisites.
4240 This checks that the instance is in the cluster.
4243 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4244 assert self.instance is not None, \
4245 "Cannot retrieve locked instance %s" % self.op.instance_name
4247 _CheckNodeOnline(self, instance.primary_node)
4249 # check bridges existence
4250 _CheckInstanceBridgesExist(self, instance)
4252 def Exec(self, feedback_fn):
4253 """Reboot the instance.
4256 instance = self.instance
4257 ignore_secondaries = self.op.ignore_secondaries
4258 reboot_type = self.op.reboot_type
4260 node_current = instance.primary_node
4262 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4263 constants.INSTANCE_REBOOT_HARD]:
4264 for disk in instance.disks:
4265 self.cfg.SetDiskID(disk, node_current)
4266 result = self.rpc.call_instance_reboot(node_current, instance,
4268 self.shutdown_timeout)
4269 result.Raise("Could not reboot instance")
4271 result = self.rpc.call_instance_shutdown(node_current, instance,
4272 self.shutdown_timeout)
4273 result.Raise("Could not shutdown instance for full reboot")
4274 _ShutdownInstanceDisks(self, instance)
4275 _StartInstanceDisks(self, instance, ignore_secondaries)
4276 result = self.rpc.call_instance_start(node_current, instance, None, None)
4277 msg = result.fail_msg
4279 _ShutdownInstanceDisks(self, instance)
4280 raise errors.OpExecError("Could not start instance for"
4281 " full reboot: %s" % msg)
4283 self.cfg.MarkInstanceUp(instance.name)
4286 class LUShutdownInstance(LogicalUnit):
4287 """Shutdown an instance.
4290 HPATH = "instance-stop"
4291 HTYPE = constants.HTYPE_INSTANCE
4292 _OP_REQP = ["instance_name"]
4295 def CheckArguments(self):
4296 """Check the arguments.
4299 self.timeout = getattr(self.op, "timeout",
4300 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4302 def ExpandNames(self):
4303 self._ExpandAndLockInstance()
4305 def BuildHooksEnv(self):
4308 This runs on master, primary and secondary nodes of the instance.
4311 env = _BuildInstanceHookEnvByObject(self, self.instance)
4312 env["TIMEOUT"] = self.timeout
4313 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4316 def CheckPrereq(self):
4317 """Check prerequisites.
4319 This checks that the instance is in the cluster.
4322 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4323 assert self.instance is not None, \
4324 "Cannot retrieve locked instance %s" % self.op.instance_name
4325 _CheckNodeOnline(self, self.instance.primary_node)
4327 def Exec(self, feedback_fn):
4328 """Shutdown the instance.
4331 instance = self.instance
4332 node_current = instance.primary_node
4333 timeout = self.timeout
4334 self.cfg.MarkInstanceDown(instance.name)
4335 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4336 msg = result.fail_msg
4338 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4340 _ShutdownInstanceDisks(self, instance)
4343 class LUReinstallInstance(LogicalUnit):
4344 """Reinstall an instance.
4347 HPATH = "instance-reinstall"
4348 HTYPE = constants.HTYPE_INSTANCE
4349 _OP_REQP = ["instance_name"]
4352 def ExpandNames(self):
4353 self._ExpandAndLockInstance()
4355 def BuildHooksEnv(self):
4358 This runs on master, primary and secondary nodes of the instance.
4361 env = _BuildInstanceHookEnvByObject(self, self.instance)
4362 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4365 def CheckPrereq(self):
4366 """Check prerequisites.
4368 This checks that the instance is in the cluster and is not running.
4371 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4372 assert instance is not None, \
4373 "Cannot retrieve locked instance %s" % self.op.instance_name
4374 _CheckNodeOnline(self, instance.primary_node)
4376 if instance.disk_template == constants.DT_DISKLESS:
4377 raise errors.OpPrereqError("Instance '%s' has no disks" %
4378 self.op.instance_name,
4380 _CheckInstanceDown(self, instance, "cannot reinstall")
4382 self.op.os_type = getattr(self.op, "os_type", None)
4383 self.op.force_variant = getattr(self.op, "force_variant", False)
4384 if self.op.os_type is not None:
4386 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4387 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4389 self.instance = instance
4391 def Exec(self, feedback_fn):
4392 """Reinstall the instance.
4395 inst = self.instance
4397 if self.op.os_type is not None:
4398 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4399 inst.os = self.op.os_type
4400 self.cfg.Update(inst, feedback_fn)
4402 _StartInstanceDisks(self, inst, None)
4404 feedback_fn("Running the instance OS create scripts...")
4405 # FIXME: pass debug option from opcode to backend
4406 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4407 self.op.debug_level)
4408 result.Raise("Could not install OS for instance %s on node %s" %
4409 (inst.name, inst.primary_node))
4411 _ShutdownInstanceDisks(self, inst)
4414 class LURecreateInstanceDisks(LogicalUnit):
4415 """Recreate an instance's missing disks.
4418 HPATH = "instance-recreate-disks"
4419 HTYPE = constants.HTYPE_INSTANCE
4420 _OP_REQP = ["instance_name", "disks"]
4423 def CheckArguments(self):
4424 """Check the arguments.
4427 if not isinstance(self.op.disks, list):
4428 raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4429 for item in self.op.disks:
4430 if (not isinstance(item, int) or
4432 raise errors.OpPrereqError("Invalid disk specification '%s'" %
4433 str(item), errors.ECODE_INVAL)
4435 def ExpandNames(self):
4436 self._ExpandAndLockInstance()
4438 def BuildHooksEnv(self):
4441 This runs on master, primary and secondary nodes of the instance.
4444 env = _BuildInstanceHookEnvByObject(self, self.instance)
4445 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4448 def CheckPrereq(self):
4449 """Check prerequisites.
4451 This checks that the instance is in the cluster and is not running.
4454 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4455 assert instance is not None, \
4456 "Cannot retrieve locked instance %s" % self.op.instance_name
4457 _CheckNodeOnline(self, instance.primary_node)
4459 if instance.disk_template == constants.DT_DISKLESS:
4460 raise errors.OpPrereqError("Instance '%s' has no disks" %
4461 self.op.instance_name, errors.ECODE_INVAL)
4462 _CheckInstanceDown(self, instance, "cannot recreate disks")
4464 if not self.op.disks:
4465 self.op.disks = range(len(instance.disks))
4467 for idx in self.op.disks:
4468 if idx >= len(instance.disks):
4469 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4472 self.instance = instance
4474 def Exec(self, feedback_fn):
4475 """Recreate the disks.
4479 for idx, _ in enumerate(self.instance.disks):
4480 if idx not in self.op.disks: # disk idx has not been passed in
4484 _CreateDisks(self, self.instance, to_skip=to_skip)
4487 class LURenameInstance(LogicalUnit):
4488 """Rename an instance.
4491 HPATH = "instance-rename"
4492 HTYPE = constants.HTYPE_INSTANCE
4493 _OP_REQP = ["instance_name", "new_name"]
4495 def BuildHooksEnv(self):
4498 This runs on master, primary and secondary nodes of the instance.
4501 env = _BuildInstanceHookEnvByObject(self, self.instance)
4502 env["INSTANCE_NEW_NAME"] = self.op.new_name
4503 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4506 def CheckPrereq(self):
4507 """Check prerequisites.
4509 This checks that the instance is in the cluster and is not running.
4512 self.op.instance_name = _ExpandInstanceName(self.cfg,
4513 self.op.instance_name)
4514 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4515 assert instance is not None
4516 _CheckNodeOnline(self, instance.primary_node)
4517 _CheckInstanceDown(self, instance, "cannot rename")
4518 self.instance = instance
4520 # new name verification
4521 name_info = utils.GetHostInfo(self.op.new_name)
4523 self.op.new_name = new_name = name_info.name
4524 instance_list = self.cfg.GetInstanceList()
4525 if new_name in instance_list:
4526 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4527 new_name, errors.ECODE_EXISTS)
4529 if not getattr(self.op, "ignore_ip", False):
4530 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4531 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4532 (name_info.ip, new_name),
4533 errors.ECODE_NOTUNIQUE)
4536 def Exec(self, feedback_fn):
4537 """Reinstall the instance.
4540 inst = self.instance
4541 old_name = inst.name
4543 if inst.disk_template == constants.DT_FILE:
4544 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4546 self.cfg.RenameInstance(inst.name, self.op.new_name)
4547 # Change the instance lock. This is definitely safe while we hold the BGL
4548 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4549 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4551 # re-read the instance from the configuration after rename
4552 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4554 if inst.disk_template == constants.DT_FILE:
4555 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4556 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4557 old_file_storage_dir,
4558 new_file_storage_dir)
4559 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4560 " (but the instance has been renamed in Ganeti)" %
4561 (inst.primary_node, old_file_storage_dir,
4562 new_file_storage_dir))
4564 _StartInstanceDisks(self, inst, None)
4566 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4567 old_name, self.op.debug_level)
4568 msg = result.fail_msg
4570 msg = ("Could not run OS rename script for instance %s on node %s"
4571 " (but the instance has been renamed in Ganeti): %s" %
4572 (inst.name, inst.primary_node, msg))
4573 self.proc.LogWarning(msg)
4575 _ShutdownInstanceDisks(self, inst)
4578 class LURemoveInstance(LogicalUnit):
4579 """Remove an instance.
4582 HPATH = "instance-remove"
4583 HTYPE = constants.HTYPE_INSTANCE
4584 _OP_REQP = ["instance_name", "ignore_failures"]
4587 def CheckArguments(self):
4588 """Check the arguments.
4591 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4592 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4594 def ExpandNames(self):
4595 self._ExpandAndLockInstance()
4596 self.needed_locks[locking.LEVEL_NODE] = []
4597 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4599 def DeclareLocks(self, level):
4600 if level == locking.LEVEL_NODE:
4601 self._LockInstancesNodes()
4603 def BuildHooksEnv(self):
4606 This runs on master, primary and secondary nodes of the instance.
4609 env = _BuildInstanceHookEnvByObject(self, self.instance)
4610 env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4611 nl = [self.cfg.GetMasterNode()]
4612 nl_post = list(self.instance.all_nodes) + nl
4613 return env, nl, nl_post
4615 def CheckPrereq(self):
4616 """Check prerequisites.
4618 This checks that the instance is in the cluster.
4621 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4622 assert self.instance is not None, \
4623 "Cannot retrieve locked instance %s" % self.op.instance_name
4625 def Exec(self, feedback_fn):
4626 """Remove the instance.
4629 instance = self.instance
4630 logging.info("Shutting down instance %s on node %s",
4631 instance.name, instance.primary_node)
4633 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4634 self.shutdown_timeout)
4635 msg = result.fail_msg
4637 if self.op.ignore_failures:
4638 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4640 raise errors.OpExecError("Could not shutdown instance %s on"
4642 (instance.name, instance.primary_node, msg))
4644 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4647 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4648 """Utility function to remove an instance.
4651 logging.info("Removing block devices for instance %s", instance.name)
4653 if not _RemoveDisks(lu, instance):
4654 if not ignore_failures:
4655 raise errors.OpExecError("Can't remove instance's disks")
4656 feedback_fn("Warning: can't remove instance's disks")
4658 logging.info("Removing instance %s out of cluster config", instance.name)
4660 lu.cfg.RemoveInstance(instance.name)
4662 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4663 "Instance lock removal conflict"
4665 # Remove lock for the instance
4666 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4669 class LUQueryInstances(NoHooksLU):
4670 """Logical unit for querying instances.
4673 # pylint: disable-msg=W0142
4674 _OP_REQP = ["output_fields", "names", "use_locking"]
4676 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4677 "serial_no", "ctime", "mtime", "uuid"]
4678 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4680 "disk_template", "ip", "mac", "bridge",
4681 "nic_mode", "nic_link",
4682 "sda_size", "sdb_size", "vcpus", "tags",
4683 "network_port", "beparams",
4684 r"(disk)\.(size)/([0-9]+)",
4685 r"(disk)\.(sizes)", "disk_usage",
4686 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4687 r"(nic)\.(bridge)/([0-9]+)",
4688 r"(nic)\.(macs|ips|modes|links|bridges)",
4689 r"(disk|nic)\.(count)",
4691 ] + _SIMPLE_FIELDS +
4693 for name in constants.HVS_PARAMETERS
4694 if name not in constants.HVC_GLOBALS] +
4696 for name in constants.BES_PARAMETERS])
4697 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4700 def ExpandNames(self):
4701 _CheckOutputFields(static=self._FIELDS_STATIC,
4702 dynamic=self._FIELDS_DYNAMIC,
4703 selected=self.op.output_fields)
4705 self.needed_locks = {}
4706 self.share_locks[locking.LEVEL_INSTANCE] = 1
4707 self.share_locks[locking.LEVEL_NODE] = 1
4710 self.wanted = _GetWantedInstances(self, self.op.names)
4712 self.wanted = locking.ALL_SET
4714 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4715 self.do_locking = self.do_node_query and self.op.use_locking
4717 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4718 self.needed_locks[locking.LEVEL_NODE] = []
4719 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4721 def DeclareLocks(self, level):
4722 if level == locking.LEVEL_NODE and self.do_locking:
4723 self._LockInstancesNodes()
4725 def CheckPrereq(self):
4726 """Check prerequisites.
4731 def Exec(self, feedback_fn):
4732 """Computes the list of nodes and their attributes.
4735 # pylint: disable-msg=R0912
4736 # way too many branches here
4737 all_info = self.cfg.GetAllInstancesInfo()
4738 if self.wanted == locking.ALL_SET:
4739 # caller didn't specify instance names, so ordering is not important
4741 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4743 instance_names = all_info.keys()
4744 instance_names = utils.NiceSort(instance_names)
4746 # caller did specify names, so we must keep the ordering
4748 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4750 tgt_set = all_info.keys()
4751 missing = set(self.wanted).difference(tgt_set)
4753 raise errors.OpExecError("Some instances were removed before"
4754 " retrieving their data: %s" % missing)
4755 instance_names = self.wanted
4757 instance_list = [all_info[iname] for iname in instance_names]
4759 # begin data gathering
4761 nodes = frozenset([inst.primary_node for inst in instance_list])
4762 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4766 if self.do_node_query:
4768 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4770 result = node_data[name]
4772 # offline nodes will be in both lists
4773 off_nodes.append(name)
4775 bad_nodes.append(name)
4778 live_data.update(result.payload)
4779 # else no instance is alive
4781 live_data = dict([(name, {}) for name in instance_names])
4783 # end data gathering
4788 cluster = self.cfg.GetClusterInfo()
4789 for instance in instance_list:
4791 i_hv = cluster.FillHV(instance, skip_globals=True)
4792 i_be = cluster.FillBE(instance)
4793 i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4794 nic.nicparams) for nic in instance.nics]
4795 for field in self.op.output_fields:
4796 st_match = self._FIELDS_STATIC.Matches(field)
4797 if field in self._SIMPLE_FIELDS:
4798 val = getattr(instance, field)
4799 elif field == "pnode":
4800 val = instance.primary_node
4801 elif field == "snodes":
4802 val = list(instance.secondary_nodes)
4803 elif field == "admin_state":
4804 val = instance.admin_up
4805 elif field == "oper_state":
4806 if instance.primary_node in bad_nodes:
4809 val = bool(live_data.get(instance.name))
4810 elif field == "status":
4811 if instance.primary_node in off_nodes:
4812 val = "ERROR_nodeoffline"
4813 elif instance.primary_node in bad_nodes:
4814 val = "ERROR_nodedown"
4816 running = bool(live_data.get(instance.name))
4818 if instance.admin_up:
4823 if instance.admin_up:
4827 elif field == "oper_ram":
4828 if instance.primary_node in bad_nodes:
4830 elif instance.name in live_data:
4831 val = live_data[instance.name].get("memory", "?")
4834 elif field == "vcpus":
4835 val = i_be[constants.BE_VCPUS]
4836 elif field == "disk_template":
4837 val = instance.disk_template
4840 val = instance.nics[0].ip
4843 elif field == "nic_mode":
4845 val = i_nicp[0][constants.NIC_MODE]
4848 elif field == "nic_link":
4850 val = i_nicp[0][constants.NIC_LINK]
4853 elif field == "bridge":
4854 if (instance.nics and
4855 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4856 val = i_nicp[0][constants.NIC_LINK]
4859 elif field == "mac":
4861 val = instance.nics[0].mac
4864 elif field == "sda_size" or field == "sdb_size":
4865 idx = ord(field[2]) - ord('a')
4867 val = instance.FindDisk(idx).size
4868 except errors.OpPrereqError:
4870 elif field == "disk_usage": # total disk usage per node
4871 disk_sizes = [{'size': disk.size} for disk in instance.disks]
4872 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4873 elif field == "tags":
4874 val = list(instance.GetTags())
4875 elif field == "hvparams":
4877 elif (field.startswith(HVPREFIX) and
4878 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4879 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4880 val = i_hv.get(field[len(HVPREFIX):], None)
4881 elif field == "beparams":
4883 elif (field.startswith(BEPREFIX) and
4884 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4885 val = i_be.get(field[len(BEPREFIX):], None)
4886 elif st_match and st_match.groups():
4887 # matches a variable list
4888 st_groups = st_match.groups()
4889 if st_groups and st_groups[0] == "disk":
4890 if st_groups[1] == "count":
4891 val = len(instance.disks)
4892 elif st_groups[1] == "sizes":
4893 val = [disk.size for disk in instance.disks]
4894 elif st_groups[1] == "size":
4896 val = instance.FindDisk(st_groups[2]).size
4897 except errors.OpPrereqError:
4900 assert False, "Unhandled disk parameter"
4901 elif st_groups[0] == "nic":
4902 if st_groups[1] == "count":
4903 val = len(instance.nics)
4904 elif st_groups[1] == "macs":
4905 val = [nic.mac for nic in instance.nics]
4906 elif st_groups[1] == "ips":
4907 val = [nic.ip for nic in instance.nics]
4908 elif st_groups[1] == "modes":
4909 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4910 elif st_groups[1] == "links":
4911 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4912 elif st_groups[1] == "bridges":
4915 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4916 val.append(nicp[constants.NIC_LINK])
4921 nic_idx = int(st_groups[2])
4922 if nic_idx >= len(instance.nics):
4925 if st_groups[1] == "mac":
4926 val = instance.nics[nic_idx].mac
4927 elif st_groups[1] == "ip":
4928 val = instance.nics[nic_idx].ip
4929 elif st_groups[1] == "mode":
4930 val = i_nicp[nic_idx][constants.NIC_MODE]
4931 elif st_groups[1] == "link":
4932 val = i_nicp[nic_idx][constants.NIC_LINK]
4933 elif st_groups[1] == "bridge":
4934 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4935 if nic_mode == constants.NIC_MODE_BRIDGED:
4936 val = i_nicp[nic_idx][constants.NIC_LINK]
4940 assert False, "Unhandled NIC parameter"
4942 assert False, ("Declared but unhandled variable parameter '%s'" %
4945 assert False, "Declared but unhandled parameter '%s'" % field
4952 class LUFailoverInstance(LogicalUnit):
4953 """Failover an instance.
4956 HPATH = "instance-failover"
4957 HTYPE = constants.HTYPE_INSTANCE
4958 _OP_REQP = ["instance_name", "ignore_consistency"]
4961 def CheckArguments(self):
4962 """Check the arguments.
4965 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4966 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4968 def ExpandNames(self):
4969 self._ExpandAndLockInstance()
4970 self.needed_locks[locking.LEVEL_NODE] = []
4971 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4973 def DeclareLocks(self, level):
4974 if level == locking.LEVEL_NODE:
4975 self._LockInstancesNodes()
4977 def BuildHooksEnv(self):
4980 This runs on master, primary and secondary nodes of the instance.
4983 instance = self.instance
4984 source_node = instance.primary_node
4985 target_node = instance.secondary_nodes[0]
4987 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4988 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4989 "OLD_PRIMARY": source_node,
4990 "OLD_SECONDARY": target_node,
4991 "NEW_PRIMARY": target_node,
4992 "NEW_SECONDARY": source_node,
4994 env.update(_BuildInstanceHookEnvByObject(self, instance))
4995 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4997 nl_post.append(source_node)
4998 return env, nl, nl_post
5000 def CheckPrereq(self):
5001 """Check prerequisites.
5003 This checks that the instance is in the cluster.
5006 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5007 assert self.instance is not None, \
5008 "Cannot retrieve locked instance %s" % self.op.instance_name
5010 bep = self.cfg.GetClusterInfo().FillBE(instance)
5011 if instance.disk_template not in constants.DTS_NET_MIRROR:
5012 raise errors.OpPrereqError("Instance's disk layout is not"
5013 " network mirrored, cannot failover.",
5016 secondary_nodes = instance.secondary_nodes
5017 if not secondary_nodes:
5018 raise errors.ProgrammerError("no secondary node but using "
5019 "a mirrored disk template")
5021 target_node = secondary_nodes[0]
5022 _CheckNodeOnline(self, target_node)
5023 _CheckNodeNotDrained(self, target_node)
5024 if instance.admin_up:
5025 # check memory requirements on the secondary node
5026 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5027 instance.name, bep[constants.BE_MEMORY],
5028 instance.hypervisor)
5030 self.LogInfo("Not checking memory on the secondary node as"
5031 " instance will not be started")
5033 # check bridge existance
5034 _CheckInstanceBridgesExist(self, instance, node=target_node)
5036 def Exec(self, feedback_fn):
5037 """Failover an instance.
5039 The failover is done by shutting it down on its present node and
5040 starting it on the secondary.
5043 instance = self.instance
5045 source_node = instance.primary_node
5046 target_node = instance.secondary_nodes[0]
5048 if instance.admin_up:
5049 feedback_fn("* checking disk consistency between source and target")
5050 for dev in instance.disks:
5051 # for drbd, these are drbd over lvm
5052 if not _CheckDiskConsistency(self, dev, target_node, False):
5053 if not self.op.ignore_consistency:
5054 raise errors.OpExecError("Disk %s is degraded on target node,"
5055 " aborting failover." % dev.iv_name)
5057 feedback_fn("* not checking disk consistency as instance is not running")
5059 feedback_fn("* shutting down instance on source node")
5060 logging.info("Shutting down instance %s on node %s",
5061 instance.name, source_node)
5063 result = self.rpc.call_instance_shutdown(source_node, instance,
5064 self.shutdown_timeout)
5065 msg = result.fail_msg
5067 if self.op.ignore_consistency:
5068 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5069 " Proceeding anyway. Please make sure node"
5070 " %s is down. Error details: %s",
5071 instance.name, source_node, source_node, msg)
5073 raise errors.OpExecError("Could not shutdown instance %s on"
5075 (instance.name, source_node, msg))
5077 feedback_fn("* deactivating the instance's disks on source node")
5078 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5079 raise errors.OpExecError("Can't shut down the instance's disks.")
5081 instance.primary_node = target_node
5082 # distribute new instance config to the other nodes
5083 self.cfg.Update(instance, feedback_fn)
5085 # Only start the instance if it's marked as up
5086 if instance.admin_up:
5087 feedback_fn("* activating the instance's disks on target node")
5088 logging.info("Starting instance %s on node %s",
5089 instance.name, target_node)
5091 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5092 ignore_secondaries=True)
5094 _ShutdownInstanceDisks(self, instance)
5095 raise errors.OpExecError("Can't activate the instance's disks")
5097 feedback_fn("* starting the instance on the target node")
5098 result = self.rpc.call_instance_start(target_node, instance, None, None)
5099 msg = result.fail_msg
5101 _ShutdownInstanceDisks(self, instance)
5102 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5103 (instance.name, target_node, msg))
5106 class LUMigrateInstance(LogicalUnit):
5107 """Migrate an instance.
5109 This is migration without shutting down, compared to the failover,
5110 which is done with shutdown.
5113 HPATH = "instance-migrate"
5114 HTYPE = constants.HTYPE_INSTANCE
5115 _OP_REQP = ["instance_name", "live", "cleanup"]
5119 def ExpandNames(self):
5120 self._ExpandAndLockInstance()
5122 self.needed_locks[locking.LEVEL_NODE] = []
5123 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5125 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5126 self.op.live, self.op.cleanup)
5127 self.tasklets = [self._migrater]
5129 def DeclareLocks(self, level):
5130 if level == locking.LEVEL_NODE:
5131 self._LockInstancesNodes()
5133 def BuildHooksEnv(self):
5136 This runs on master, primary and secondary nodes of the instance.
5139 instance = self._migrater.instance
5140 source_node = instance.primary_node
5141 target_node = instance.secondary_nodes[0]
5142 env = _BuildInstanceHookEnvByObject(self, instance)
5143 env["MIGRATE_LIVE"] = self.op.live
5144 env["MIGRATE_CLEANUP"] = self.op.cleanup
5146 "OLD_PRIMARY": source_node,
5147 "OLD_SECONDARY": target_node,
5148 "NEW_PRIMARY": target_node,
5149 "NEW_SECONDARY": source_node,
5151 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5153 nl_post.append(source_node)
5154 return env, nl, nl_post
5157 class LUMoveInstance(LogicalUnit):
5158 """Move an instance by data-copying.
5161 HPATH = "instance-move"
5162 HTYPE = constants.HTYPE_INSTANCE
5163 _OP_REQP = ["instance_name", "target_node"]
5166 def CheckArguments(self):
5167 """Check the arguments.
5170 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5171 constants.DEFAULT_SHUTDOWN_TIMEOUT)
5173 def ExpandNames(self):
5174 self._ExpandAndLockInstance()
5175 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5176 self.op.target_node = target_node
5177 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5178 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5180 def DeclareLocks(self, level):
5181 if level == locking.LEVEL_NODE:
5182 self._LockInstancesNodes(primary_only=True)
5184 def BuildHooksEnv(self):
5187 This runs on master, primary and secondary nodes of the instance.
5191 "TARGET_NODE": self.op.target_node,
5192 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5194 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5195 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5196 self.op.target_node]
5199 def CheckPrereq(self):
5200 """Check prerequisites.
5202 This checks that the instance is in the cluster.
5205 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5206 assert self.instance is not None, \
5207 "Cannot retrieve locked instance %s" % self.op.instance_name
5209 node = self.cfg.GetNodeInfo(self.op.target_node)
5210 assert node is not None, \
5211 "Cannot retrieve locked node %s" % self.op.target_node
5213 self.target_node = target_node = node.name
5215 if target_node == instance.primary_node:
5216 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5217 (instance.name, target_node),
5220 bep = self.cfg.GetClusterInfo().FillBE(instance)
5222 for idx, dsk in enumerate(instance.disks):
5223 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5224 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5225 " cannot copy" % idx, errors.ECODE_STATE)
5227 _CheckNodeOnline(self, target_node)
5228 _CheckNodeNotDrained(self, target_node)
5230 if instance.admin_up:
5231 # check memory requirements on the secondary node
5232 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5233 instance.name, bep[constants.BE_MEMORY],
5234 instance.hypervisor)
5236 self.LogInfo("Not checking memory on the secondary node as"
5237 " instance will not be started")
5239 # check bridge existance
5240 _CheckInstanceBridgesExist(self, instance, node=target_node)
5242 def Exec(self, feedback_fn):
5243 """Move an instance.
5245 The move is done by shutting it down on its present node, copying
5246 the data over (slow) and starting it on the new node.
5249 instance = self.instance
5251 source_node = instance.primary_node
5252 target_node = self.target_node
5254 self.LogInfo("Shutting down instance %s on source node %s",
5255 instance.name, source_node)
5257 result = self.rpc.call_instance_shutdown(source_node, instance,
5258 self.shutdown_timeout)
5259 msg = result.fail_msg
5261 if self.op.ignore_consistency:
5262 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5263 " Proceeding anyway. Please make sure node"
5264 " %s is down. Error details: %s",
5265 instance.name, source_node, source_node, msg)
5267 raise errors.OpExecError("Could not shutdown instance %s on"
5269 (instance.name, source_node, msg))
5271 # create the target disks
5273 _CreateDisks(self, instance, target_node=target_node)
5274 except errors.OpExecError:
5275 self.LogWarning("Device creation failed, reverting...")
5277 _RemoveDisks(self, instance, target_node=target_node)
5279 self.cfg.ReleaseDRBDMinors(instance.name)
5282 cluster_name = self.cfg.GetClusterInfo().cluster_name
5285 # activate, get path, copy the data over
5286 for idx, disk in enumerate(instance.disks):
5287 self.LogInfo("Copying data for disk %d", idx)
5288 result = self.rpc.call_blockdev_assemble(target_node, disk,
5289 instance.name, True)
5291 self.LogWarning("Can't assemble newly created disk %d: %s",
5292 idx, result.fail_msg)
5293 errs.append(result.fail_msg)
5295 dev_path = result.payload
5296 result = self.rpc.call_blockdev_export(source_node, disk,
5297 target_node, dev_path,
5300 self.LogWarning("Can't copy data over for disk %d: %s",
5301 idx, result.fail_msg)
5302 errs.append(result.fail_msg)
5306 self.LogWarning("Some disks failed to copy, aborting")
5308 _RemoveDisks(self, instance, target_node=target_node)
5310 self.cfg.ReleaseDRBDMinors(instance.name)
5311 raise errors.OpExecError("Errors during disk copy: %s" %
5314 instance.primary_node = target_node
5315 self.cfg.Update(instance, feedback_fn)
5317 self.LogInfo("Removing the disks on the original node")
5318 _RemoveDisks(self, instance, target_node=source_node)
5320 # Only start the instance if it's marked as up
5321 if instance.admin_up:
5322 self.LogInfo("Starting instance %s on node %s",
5323 instance.name, target_node)
5325 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5326 ignore_secondaries=True)
5328 _ShutdownInstanceDisks(self, instance)
5329 raise errors.OpExecError("Can't activate the instance's disks")
5331 result = self.rpc.call_instance_start(target_node, instance, None, None)
5332 msg = result.fail_msg
5334 _ShutdownInstanceDisks(self, instance)
5335 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5336 (instance.name, target_node, msg))
5339 class LUMigrateNode(LogicalUnit):
5340 """Migrate all instances from a node.
5343 HPATH = "node-migrate"
5344 HTYPE = constants.HTYPE_NODE
5345 _OP_REQP = ["node_name", "live"]
5348 def ExpandNames(self):
5349 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5351 self.needed_locks = {
5352 locking.LEVEL_NODE: [self.op.node_name],
5355 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5357 # Create tasklets for migrating instances for all instances on this node
5361 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5362 logging.debug("Migrating instance %s", inst.name)
5363 names.append(inst.name)
5365 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5367 self.tasklets = tasklets
5369 # Declare instance locks
5370 self.needed_locks[locking.LEVEL_INSTANCE] = names
5372 def DeclareLocks(self, level):
5373 if level == locking.LEVEL_NODE:
5374 self._LockInstancesNodes()
5376 def BuildHooksEnv(self):
5379 This runs on the master, the primary and all the secondaries.
5383 "NODE_NAME": self.op.node_name,
5386 nl = [self.cfg.GetMasterNode()]
5388 return (env, nl, nl)
5391 class TLMigrateInstance(Tasklet):
5392 def __init__(self, lu, instance_name, live, cleanup):
5393 """Initializes this class.
5396 Tasklet.__init__(self, lu)
5399 self.instance_name = instance_name
5401 self.cleanup = cleanup
5403 def CheckPrereq(self):
5404 """Check prerequisites.
5406 This checks that the instance is in the cluster.
5409 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5410 instance = self.cfg.GetInstanceInfo(instance_name)
5411 assert instance is not None
5413 if instance.disk_template != constants.DT_DRBD8:
5414 raise errors.OpPrereqError("Instance's disk layout is not"
5415 " drbd8, cannot migrate.", errors.ECODE_STATE)
5417 secondary_nodes = instance.secondary_nodes
5418 if not secondary_nodes:
5419 raise errors.ConfigurationError("No secondary node but using"
5420 " drbd8 disk template")
5422 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5424 target_node = secondary_nodes[0]
5425 # check memory requirements on the secondary node
5426 _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5427 instance.name, i_be[constants.BE_MEMORY],
5428 instance.hypervisor)
5430 # check bridge existance
5431 _CheckInstanceBridgesExist(self, instance, node=target_node)
5433 if not self.cleanup:
5434 _CheckNodeNotDrained(self, target_node)
5435 result = self.rpc.call_instance_migratable(instance.primary_node,
5437 result.Raise("Can't migrate, please use failover",
5438 prereq=True, ecode=errors.ECODE_STATE)
5440 self.instance = instance
5442 def _WaitUntilSync(self):
5443 """Poll with custom rpc for disk sync.
5445 This uses our own step-based rpc call.
5448 self.feedback_fn("* wait until resync is done")
5452 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5454 self.instance.disks)
5456 for node, nres in result.items():
5457 nres.Raise("Cannot resync disks on node %s" % node)
5458 node_done, node_percent = nres.payload
5459 all_done = all_done and node_done
5460 if node_percent is not None:
5461 min_percent = min(min_percent, node_percent)
5463 if min_percent < 100:
5464 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5467 def _EnsureSecondary(self, node):
5468 """Demote a node to secondary.
5471 self.feedback_fn("* switching node %s to secondary mode" % node)
5473 for dev in self.instance.disks:
5474 self.cfg.SetDiskID(dev, node)
5476 result = self.rpc.call_blockdev_close(node, self.instance.name,
5477 self.instance.disks)
5478 result.Raise("Cannot change disk to secondary on node %s" % node)
5480 def _GoStandalone(self):
5481 """Disconnect from the network.
5484 self.feedback_fn("* changing into standalone mode")
5485 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5486 self.instance.disks)
5487 for node, nres in result.items():
5488 nres.Raise("Cannot disconnect disks node %s" % node)
5490 def _GoReconnect(self, multimaster):
5491 """Reconnect to the network.
5497 msg = "single-master"
5498 self.feedback_fn("* changing disks into %s mode" % msg)
5499 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5500 self.instance.disks,
5501 self.instance.name, multimaster)
5502 for node, nres in result.items():
5503 nres.Raise("Cannot change disks config on node %s" % node)
5505 def _ExecCleanup(self):
5506 """Try to cleanup after a failed migration.
5508 The cleanup is done by:
5509 - check that the instance is running only on one node
5510 (and update the config if needed)
5511 - change disks on its secondary node to secondary
5512 - wait until disks are fully synchronized
5513 - disconnect from the network
5514 - change disks into single-master mode
5515 - wait again until disks are fully synchronized
5518 instance = self.instance
5519 target_node = self.target_node
5520 source_node = self.source_node
5522 # check running on only one node
5523 self.feedback_fn("* checking where the instance actually runs"
5524 " (if this hangs, the hypervisor might be in"
5526 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5527 for node, result in ins_l.items():
5528 result.Raise("Can't contact node %s" % node)
5530 runningon_source = instance.name in ins_l[source_node].payload
5531 runningon_target = instance.name in ins_l[target_node].payload
5533 if runningon_source and runningon_target:
5534 raise errors.OpExecError("Instance seems to be running on two nodes,"
5535 " or the hypervisor is confused. You will have"
5536 " to ensure manually that it runs only on one"
5537 " and restart this operation.")
5539 if not (runningon_source or runningon_target):
5540 raise errors.OpExecError("Instance does not seem to be running at all."
5541 " In this case, it's safer to repair by"
5542 " running 'gnt-instance stop' to ensure disk"
5543 " shutdown, and then restarting it.")
5545 if runningon_target:
5546 # the migration has actually succeeded, we need to update the config
5547 self.feedback_fn("* instance running on secondary node (%s),"
5548 " updating config" % target_node)
5549 instance.primary_node = target_node
5550 self.cfg.Update(instance, self.feedback_fn)
5551 demoted_node = source_node
5553 self.feedback_fn("* instance confirmed to be running on its"
5554 " primary node (%s)" % source_node)
5555 demoted_node = target_node
5557 self._EnsureSecondary(demoted_node)
5559 self._WaitUntilSync()
5560 except errors.OpExecError:
5561 # we ignore here errors, since if the device is standalone, it
5562 # won't be able to sync
5564 self._GoStandalone()
5565 self._GoReconnect(False)
5566 self._WaitUntilSync()
5568 self.feedback_fn("* done")
5570 def _RevertDiskStatus(self):
5571 """Try to revert the disk status after a failed migration.
5574 target_node = self.target_node
5576 self._EnsureSecondary(target_node)
5577 self._GoStandalone()
5578 self._GoReconnect(False)
5579 self._WaitUntilSync()
5580 except errors.OpExecError, err:
5581 self.lu.LogWarning("Migration failed and I can't reconnect the"
5582 " drives: error '%s'\n"
5583 "Please look and recover the instance status" %
5586 def _AbortMigration(self):
5587 """Call the hypervisor code to abort a started migration.
5590 instance = self.instance
5591 target_node = self.target_node
5592 migration_info = self.migration_info
5594 abort_result = self.rpc.call_finalize_migration(target_node,
5598 abort_msg = abort_result.fail_msg
5600 logging.error("Aborting migration failed on target node %s: %s",
5601 target_node, abort_msg)
5602 # Don't raise an exception here, as we stil have to try to revert the
5603 # disk status, even if this step failed.
5605 def _ExecMigration(self):
5606 """Migrate an instance.
5608 The migrate is done by:
5609 - change the disks into dual-master mode
5610 - wait until disks are fully synchronized again
5611 - migrate the instance
5612 - change disks on the new secondary node (the old primary) to secondary
5613 - wait until disks are fully synchronized
5614 - change disks into single-master mode
5617 instance = self.instance
5618 target_node = self.target_node
5619 source_node = self.source_node
5621 self.feedback_fn("* checking disk consistency between source and target")
5622 for dev in instance.disks:
5623 if not _CheckDiskConsistency(self, dev, target_node, False):
5624 raise errors.OpExecError("Disk %s is degraded or not fully"
5625 " synchronized on target node,"
5626 " aborting migrate." % dev.iv_name)
5628 # First get the migration information from the remote node
5629 result = self.rpc.call_migration_info(source_node, instance)
5630 msg = result.fail_msg
5632 log_err = ("Failed fetching source migration information from %s: %s" %
5634 logging.error(log_err)
5635 raise errors.OpExecError(log_err)
5637 self.migration_info = migration_info = result.payload
5639 # Then switch the disks to master/master mode
5640 self._EnsureSecondary(target_node)
5641 self._GoStandalone()
5642 self._GoReconnect(True)
5643 self._WaitUntilSync()
5645 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5646 result = self.rpc.call_accept_instance(target_node,
5649 self.nodes_ip[target_node])
5651 msg = result.fail_msg
5653 logging.error("Instance pre-migration failed, trying to revert"
5654 " disk status: %s", msg)
5655 self.feedback_fn("Pre-migration failed, aborting")
5656 self._AbortMigration()
5657 self._RevertDiskStatus()
5658 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5659 (instance.name, msg))
5661 self.feedback_fn("* migrating instance to %s" % target_node)
5663 result = self.rpc.call_instance_migrate(source_node, instance,
5664 self.nodes_ip[target_node],
5666 msg = result.fail_msg
5668 logging.error("Instance migration failed, trying to revert"
5669 " disk status: %s", msg)
5670 self.feedback_fn("Migration failed, aborting")
5671 self._AbortMigration()
5672 self._RevertDiskStatus()
5673 raise errors.OpExecError("Could not migrate instance %s: %s" %
5674 (instance.name, msg))
5677 instance.primary_node = target_node
5678 # distribute new instance config to the other nodes
5679 self.cfg.Update(instance, self.feedback_fn)
5681 result = self.rpc.call_finalize_migration(target_node,
5685 msg = result.fail_msg
5687 logging.error("Instance migration succeeded, but finalization failed:"
5689 raise errors.OpExecError("Could not finalize instance migration: %s" %
5692 self._EnsureSecondary(source_node)
5693 self._WaitUntilSync()
5694 self._GoStandalone()
5695 self._GoReconnect(False)
5696 self._WaitUntilSync()
5698 self.feedback_fn("* done")
5700 def Exec(self, feedback_fn):
5701 """Perform the migration.
5704 feedback_fn("Migrating instance %s" % self.instance.name)
5706 self.feedback_fn = feedback_fn
5708 self.source_node = self.instance.primary_node
5709 self.target_node = self.instance.secondary_nodes[0]
5710 self.all_nodes = [self.source_node, self.target_node]
5712 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5713 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5717 return self._ExecCleanup()
5719 return self._ExecMigration()
5722 def _CreateBlockDev(lu, node, instance, device, force_create,
5724 """Create a tree of block devices on a given node.
5726 If this device type has to be created on secondaries, create it and
5729 If not, just recurse to children keeping the same 'force' value.
5731 @param lu: the lu on whose behalf we execute
5732 @param node: the node on which to create the device
5733 @type instance: L{objects.Instance}
5734 @param instance: the instance which owns the device
5735 @type device: L{objects.Disk}
5736 @param device: the device to create
5737 @type force_create: boolean
5738 @param force_create: whether to force creation of this device; this
5739 will be change to True whenever we find a device which has
5740 CreateOnSecondary() attribute
5741 @param info: the extra 'metadata' we should attach to the device
5742 (this will be represented as a LVM tag)
5743 @type force_open: boolean
5744 @param force_open: this parameter will be passes to the
5745 L{backend.BlockdevCreate} function where it specifies
5746 whether we run on primary or not, and it affects both
5747 the child assembly and the device own Open() execution
5750 if device.CreateOnSecondary():
5754 for child in device.children:
5755 _CreateBlockDev(lu, node, instance, child, force_create,
5758 if not force_create:
5761 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5764 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5765 """Create a single block device on a given node.
5767 This will not recurse over children of the device, so they must be
5770 @param lu: the lu on whose behalf we execute
5771 @param node: the node on which to create the device
5772 @type instance: L{objects.Instance}
5773 @param instance: the instance which owns the device
5774 @type device: L{objects.Disk}
5775 @param device: the device to create
5776 @param info: the extra 'metadata' we should attach to the device
5777 (this will be represented as a LVM tag)
5778 @type force_open: boolean
5779 @param force_open: this parameter will be passes to the
5780 L{backend.BlockdevCreate} function where it specifies
5781 whether we run on primary or not, and it affects both
5782 the child assembly and the device own Open() execution
5785 lu.cfg.SetDiskID(device, node)
5786 result = lu.rpc.call_blockdev_create(node, device, device.size,
5787 instance.name, force_open, info)
5788 result.Raise("Can't create block device %s on"
5789 " node %s for instance %s" % (device, node, instance.name))
5790 if device.physical_id is None:
5791 device.physical_id = result.payload
5794 def _GenerateUniqueNames(lu, exts):
5795 """Generate a suitable LV name.
5797 This will generate a logical volume name for the given instance.
5802 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5803 results.append("%s%s" % (new_id, val))
5807 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5809 """Generate a drbd8 device complete with its children.
5812 port = lu.cfg.AllocatePort()
5813 vgname = lu.cfg.GetVGName()
5814 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5815 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5816 logical_id=(vgname, names[0]))
5817 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5818 logical_id=(vgname, names[1]))
5819 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5820 logical_id=(primary, secondary, port,
5823 children=[dev_data, dev_meta],
5828 def _GenerateDiskTemplate(lu, template_name,
5829 instance_name, primary_node,
5830 secondary_nodes, disk_info,
5831 file_storage_dir, file_driver,
5833 """Generate the entire disk layout for a given template type.
5836 #TODO: compute space requirements
5838 vgname = lu.cfg.GetVGName()
5839 disk_count = len(disk_info)
5841 if template_name == constants.DT_DISKLESS:
5843 elif template_name == constants.DT_PLAIN:
5844 if len(secondary_nodes) != 0:
5845 raise errors.ProgrammerError("Wrong template configuration")
5847 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5848 for i in range(disk_count)])
5849 for idx, disk in enumerate(disk_info):
5850 disk_index = idx + base_index
5851 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5852 logical_id=(vgname, names[idx]),
5853 iv_name="disk/%d" % disk_index,
5855 disks.append(disk_dev)
5856 elif template_name == constants.DT_DRBD8:
5857 if len(secondary_nodes) != 1:
5858 raise errors.ProgrammerError("Wrong template configuration")
5859 remote_node = secondary_nodes[0]
5860 minors = lu.cfg.AllocateDRBDMinor(
5861 [primary_node, remote_node] * len(disk_info), instance_name)
5864 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5865 for i in range(disk_count)]):
5866 names.append(lv_prefix + "_data")
5867 names.append(lv_prefix + "_meta")
5868 for idx, disk in enumerate(disk_info):
5869 disk_index = idx + base_index
5870 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5871 disk["size"], names[idx*2:idx*2+2],
5872 "disk/%d" % disk_index,
5873 minors[idx*2], minors[idx*2+1])
5874 disk_dev.mode = disk["mode"]
5875 disks.append(disk_dev)
5876 elif template_name == constants.DT_FILE:
5877 if len(secondary_nodes) != 0:
5878 raise errors.ProgrammerError("Wrong template configuration")
5880 _RequireFileStorage()
5882 for idx, disk in enumerate(disk_info):
5883 disk_index = idx + base_index
5884 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5885 iv_name="disk/%d" % disk_index,
5886 logical_id=(file_driver,
5887 "%s/disk%d" % (file_storage_dir,
5890 disks.append(disk_dev)
5892 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5896 def _GetInstanceInfoText(instance):
5897 """Compute that text that should be added to the disk's metadata.
5900 return "originstname+%s" % instance.name
5903 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5904 """Create all disks for an instance.
5906 This abstracts away some work from AddInstance.
5908 @type lu: L{LogicalUnit}
5909 @param lu: the logical unit on whose behalf we execute
5910 @type instance: L{objects.Instance}
5911 @param instance: the instance whose disks we should create
5913 @param to_skip: list of indices to skip
5914 @type target_node: string
5915 @param target_node: if passed, overrides the target node for creation
5917 @return: the success of the creation
5920 info = _GetInstanceInfoText(instance)
5921 if target_node is None:
5922 pnode = instance.primary_node
5923 all_nodes = instance.all_nodes
5928 if instance.disk_template == constants.DT_FILE:
5929 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5930 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5932 result.Raise("Failed to create directory '%s' on"
5933 " node %s" % (file_storage_dir, pnode))
5935 # Note: this needs to be kept in sync with adding of disks in
5936 # LUSetInstanceParams
5937 for idx, device in enumerate(instance.disks):
5938 if to_skip and idx in to_skip:
5940 logging.info("Creating volume %s for instance %s",
5941 device.iv_name, instance.name)
5943 for node in all_nodes:
5944 f_create = node == pnode
5945 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5948 def _RemoveDisks(lu, instance, target_node=None):
5949 """Remove all disks for an instance.
5951 This abstracts away some work from `AddInstance()` and
5952 `RemoveInstance()`. Note that in case some of the devices couldn't
5953 be removed, the removal will continue with the other ones (compare
5954 with `_CreateDisks()`).
5956 @type lu: L{LogicalUnit}
5957 @param lu: the logical unit on whose behalf we execute
5958 @type instance: L{objects.Instance}
5959 @param instance: the instance whose disks we should remove
5960 @type target_node: string
5961 @param target_node: used to override the node on which to remove the disks
5963 @return: the success of the removal
5966 logging.info("Removing block devices for instance %s", instance.name)
5969 for device in instance.disks:
5971 edata = [(target_node, device)]
5973 edata = device.ComputeNodeTree(instance.primary_node)
5974 for node, disk in edata:
5975 lu.cfg.SetDiskID(disk, node)
5976 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5978 lu.LogWarning("Could not remove block device %s on node %s,"
5979 " continuing anyway: %s", device.iv_name, node, msg)
5982 if instance.disk_template == constants.DT_FILE:
5983 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5987 tgt = instance.primary_node
5988 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5990 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5991 file_storage_dir, instance.primary_node, result.fail_msg)
5997 def _ComputeDiskSize(disk_template, disks):
5998 """Compute disk size requirements in the volume group
6001 # Required free disk space as a function of disk and swap space
6003 constants.DT_DISKLESS: None,
6004 constants.DT_PLAIN: sum(d["size"] for d in disks),
6005 # 128 MB are added for drbd metadata for each disk
6006 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6007 constants.DT_FILE: None,
6010 if disk_template not in req_size_dict:
6011 raise errors.ProgrammerError("Disk template '%s' size requirement"
6012 " is unknown" % disk_template)
6014 return req_size_dict[disk_template]
6017 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6018 """Hypervisor parameter validation.
6020 This function abstract the hypervisor parameter validation to be
6021 used in both instance create and instance modify.
6023 @type lu: L{LogicalUnit}
6024 @param lu: the logical unit for which we check
6025 @type nodenames: list
6026 @param nodenames: the list of nodes on which we should check
6027 @type hvname: string
6028 @param hvname: the name of the hypervisor we should use
6029 @type hvparams: dict
6030 @param hvparams: the parameters which we need to check
6031 @raise errors.OpPrereqError: if the parameters are not valid
6034 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6037 for node in nodenames:
6041 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6044 class LUCreateInstance(LogicalUnit):
6045 """Create an instance.
6048 HPATH = "instance-add"
6049 HTYPE = constants.HTYPE_INSTANCE
6050 _OP_REQP = ["instance_name", "disks",
6052 "wait_for_sync", "ip_check", "nics",
6053 "hvparams", "beparams"]
6056 def CheckArguments(self):
6060 # set optional parameters to none if they don't exist
6061 for attr in ["pnode", "snode", "iallocator", "hypervisor",
6062 "disk_template", "identify_defaults"]:
6063 if not hasattr(self.op, attr):
6064 setattr(self.op, attr, None)
6066 # do not require name_check to ease forward/backward compatibility
6068 if not hasattr(self.op, "name_check"):
6069 self.op.name_check = True
6070 if not hasattr(self.op, "no_install"):
6071 self.op.no_install = False
6072 if self.op.no_install and self.op.start:
6073 self.LogInfo("No-installation mode selected, disabling startup")
6074 self.op.start = False
6075 # validate/normalize the instance name
6076 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6077 if self.op.ip_check and not self.op.name_check:
6078 # TODO: make the ip check more flexible and not depend on the name check
6079 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6081 # check disk information: either all adopt, or no adopt
6082 has_adopt = has_no_adopt = False
6083 for disk in self.op.disks:
6088 if has_adopt and has_no_adopt:
6089 raise errors.OpPrereqError("Either all disks are adopted or none is",
6092 if self.op.disk_template != constants.DT_PLAIN:
6093 raise errors.OpPrereqError("Disk adoption is only supported for the"
6094 " 'plain' disk template",
6096 if self.op.iallocator is not None:
6097 raise errors.OpPrereqError("Disk adoption not allowed with an"
6098 " iallocator script", errors.ECODE_INVAL)
6099 if self.op.mode == constants.INSTANCE_IMPORT:
6100 raise errors.OpPrereqError("Disk adoption not allowed for"
6101 " instance import", errors.ECODE_INVAL)
6103 self.adopt_disks = has_adopt
6105 # verify creation mode
6106 if self.op.mode not in (constants.INSTANCE_CREATE,
6107 constants.INSTANCE_IMPORT):
6108 raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6109 self.op.mode, errors.ECODE_INVAL)
6111 # instance name verification
6112 if self.op.name_check:
6113 self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6114 self.op.instance_name = self.hostname1.name
6115 # used in CheckPrereq for ip ping check
6116 self.check_ip = self.hostname1.ip
6118 self.check_ip = None
6120 # file storage checks
6121 if (self.op.file_driver and
6122 not self.op.file_driver in constants.FILE_DRIVER):
6123 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6124 self.op.file_driver, errors.ECODE_INVAL)
6126 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6127 raise errors.OpPrereqError("File storage directory path not absolute",
6130 ### Node/iallocator related checks
6131 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6132 raise errors.OpPrereqError("One and only one of iallocator and primary"
6133 " node must be given",
6136 if self.op.mode == constants.INSTANCE_IMPORT:
6137 # On import force_variant must be True, because if we forced it at
6138 # initial install, our only chance when importing it back is that it
6140 self.op.force_variant = True
6142 if self.op.no_install:
6143 self.LogInfo("No-installation mode has no effect during import")
6145 else: # INSTANCE_CREATE
6146 if getattr(self.op, "os_type", None) is None:
6147 raise errors.OpPrereqError("No guest OS specified",
6149 self.op.force_variant = getattr(self.op, "force_variant", False)
6150 if self.op.disk_template is None:
6151 raise errors.OpPrereqError("No disk template specified",
6154 def ExpandNames(self):
6155 """ExpandNames for CreateInstance.
6157 Figure out the right locks for instance creation.
6160 self.needed_locks = {}
6162 instance_name = self.op.instance_name
6163 # this is just a preventive check, but someone might still add this
6164 # instance in the meantime, and creation will fail at lock-add time
6165 if instance_name in self.cfg.GetInstanceList():
6166 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6167 instance_name, errors.ECODE_EXISTS)
6169 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6171 if self.op.iallocator:
6172 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6174 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6175 nodelist = [self.op.pnode]
6176 if self.op.snode is not None:
6177 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6178 nodelist.append(self.op.snode)
6179 self.needed_locks[locking.LEVEL_NODE] = nodelist
6181 # in case of import lock the source node too
6182 if self.op.mode == constants.INSTANCE_IMPORT:
6183 src_node = getattr(self.op, "src_node", None)
6184 src_path = getattr(self.op, "src_path", None)
6186 if src_path is None:
6187 self.op.src_path = src_path = self.op.instance_name
6189 if src_node is None:
6190 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6191 self.op.src_node = None
6192 if os.path.isabs(src_path):
6193 raise errors.OpPrereqError("Importing an instance from an absolute"
6194 " path requires a source node option.",
6197 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6198 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6199 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6200 if not os.path.isabs(src_path):
6201 self.op.src_path = src_path = \
6202 utils.PathJoin(constants.EXPORT_DIR, src_path)
6204 def _RunAllocator(self):
6205 """Run the allocator based on input opcode.
6208 nics = [n.ToDict() for n in self.nics]
6209 ial = IAllocator(self.cfg, self.rpc,
6210 mode=constants.IALLOCATOR_MODE_ALLOC,
6211 name=self.op.instance_name,
6212 disk_template=self.op.disk_template,
6215 vcpus=self.be_full[constants.BE_VCPUS],
6216 mem_size=self.be_full[constants.BE_MEMORY],
6219 hypervisor=self.op.hypervisor,
6222 ial.Run(self.op.iallocator)
6225 raise errors.OpPrereqError("Can't compute nodes using"
6226 " iallocator '%s': %s" %
6227 (self.op.iallocator, ial.info),
6229 if len(ial.result) != ial.required_nodes:
6230 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6231 " of nodes (%s), required %s" %
6232 (self.op.iallocator, len(ial.result),
6233 ial.required_nodes), errors.ECODE_FAULT)
6234 self.op.pnode = ial.result[0]
6235 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6236 self.op.instance_name, self.op.iallocator,
6237 utils.CommaJoin(ial.result))
6238 if ial.required_nodes == 2:
6239 self.op.snode = ial.result[1]
6241 def BuildHooksEnv(self):
6244 This runs on master, primary and secondary nodes of the instance.
6248 "ADD_MODE": self.op.mode,
6250 if self.op.mode == constants.INSTANCE_IMPORT:
6251 env["SRC_NODE"] = self.op.src_node
6252 env["SRC_PATH"] = self.op.src_path
6253 env["SRC_IMAGES"] = self.src_images
6255 env.update(_BuildInstanceHookEnv(
6256 name=self.op.instance_name,
6257 primary_node=self.op.pnode,
6258 secondary_nodes=self.secondaries,
6259 status=self.op.start,
6260 os_type=self.op.os_type,
6261 memory=self.be_full[constants.BE_MEMORY],
6262 vcpus=self.be_full[constants.BE_VCPUS],
6263 nics=_NICListToTuple(self, self.nics),
6264 disk_template=self.op.disk_template,
6265 disks=[(d["size"], d["mode"]) for d in self.disks],
6268 hypervisor_name=self.op.hypervisor,
6271 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6275 def _ReadExportInfo(self):
6276 """Reads the export information from disk.
6278 It will override the opcode source node and path with the actual
6279 information, if these two were not specified before.
6281 @return: the export information
6284 assert self.op.mode == constants.INSTANCE_IMPORT
6286 src_node = self.op.src_node
6287 src_path = self.op.src_path
6289 if src_node is None:
6290 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6291 exp_list = self.rpc.call_export_list(locked_nodes)
6293 for node in exp_list:
6294 if exp_list[node].fail_msg:
6296 if src_path in exp_list[node].payload:
6298 self.op.src_node = src_node = node
6299 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6303 raise errors.OpPrereqError("No export found for relative path %s" %
6304 src_path, errors.ECODE_INVAL)
6306 _CheckNodeOnline(self, src_node)
6307 result = self.rpc.call_export_info(src_node, src_path)
6308 result.Raise("No export or invalid export found in dir %s" % src_path)
6310 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6311 if not export_info.has_section(constants.INISECT_EXP):
6312 raise errors.ProgrammerError("Corrupted export config",
6313 errors.ECODE_ENVIRON)
6315 ei_version = export_info.get(constants.INISECT_EXP, "version")
6316 if (int(ei_version) != constants.EXPORT_VERSION):
6317 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6318 (ei_version, constants.EXPORT_VERSION),
6319 errors.ECODE_ENVIRON)
6322 def _ReadExportParams(self, einfo):
6323 """Use export parameters as defaults.
6325 In case the opcode doesn't specify (as in override) some instance
6326 parameters, then try to use them from the export information, if
6330 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6332 if self.op.disk_template is None:
6333 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6334 self.op.disk_template = einfo.get(constants.INISECT_INS,
6337 raise errors.OpPrereqError("No disk template specified and the export"
6338 " is missing the disk_template information",
6341 if not self.op.disks:
6342 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6344 # TODO: import the disk iv_name too
6345 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6346 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6347 disks.append({"size": disk_sz})
6348 self.op.disks = disks
6350 raise errors.OpPrereqError("No disk info specified and the export"
6351 " is missing the disk information",
6354 if (not self.op.nics and
6355 einfo.has_option(constants.INISECT_INS, "nic_count")):
6357 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6359 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6360 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6365 if (self.op.hypervisor is None and
6366 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6367 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6368 if einfo.has_section(constants.INISECT_HYP):
6369 # use the export parameters but do not override the ones
6370 # specified by the user
6371 for name, value in einfo.items(constants.INISECT_HYP):
6372 if name not in self.op.hvparams:
6373 self.op.hvparams[name] = value
6375 if einfo.has_section(constants.INISECT_BEP):
6376 # use the parameters, without overriding
6377 for name, value in einfo.items(constants.INISECT_BEP):
6378 if name not in self.op.beparams:
6379 self.op.beparams[name] = value
6381 # try to read the parameters old style, from the main section
6382 for name in constants.BES_PARAMETERS:
6383 if (name not in self.op.beparams and
6384 einfo.has_option(constants.INISECT_INS, name)):
6385 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6387 def _RevertToDefaults(self, cluster):
6388 """Revert the instance parameters to the default values.
6392 hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6393 for name in self.op.hvparams.keys():
6394 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6395 del self.op.hvparams[name]
6397 be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6398 for name in self.op.beparams.keys():
6399 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6400 del self.op.beparams[name]
6402 nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6403 for nic in self.op.nics:
6404 for name in constants.NICS_PARAMETERS:
6405 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6408 def CheckPrereq(self):
6409 """Check prerequisites.
6412 if self.op.mode == constants.INSTANCE_IMPORT:
6413 export_info = self._ReadExportInfo()
6414 self._ReadExportParams(export_info)
6416 _CheckDiskTemplate(self.op.disk_template)
6418 if (not self.cfg.GetVGName() and
6419 self.op.disk_template not in constants.DTS_NOT_LVM):
6420 raise errors.OpPrereqError("Cluster does not support lvm-based"
6421 " instances", errors.ECODE_STATE)
6423 if self.op.hypervisor is None:
6424 self.op.hypervisor = self.cfg.GetHypervisorType()
6426 cluster = self.cfg.GetClusterInfo()
6427 enabled_hvs = cluster.enabled_hypervisors
6428 if self.op.hypervisor not in enabled_hvs:
6429 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6430 " cluster (%s)" % (self.op.hypervisor,
6431 ",".join(enabled_hvs)),
6434 # check hypervisor parameter syntax (locally)
6435 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6436 filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6439 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6440 hv_type.CheckParameterSyntax(filled_hvp)
6441 self.hv_full = filled_hvp
6442 # check that we don't specify global parameters on an instance
6443 _CheckGlobalHvParams(self.op.hvparams)
6445 # fill and remember the beparams dict
6446 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6447 self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6450 # now that hvp/bep are in final format, let's reset to defaults,
6452 if self.op.identify_defaults:
6453 self._RevertToDefaults(cluster)
6457 for idx, nic in enumerate(self.op.nics):
6458 nic_mode_req = nic.get("mode", None)
6459 nic_mode = nic_mode_req
6460 if nic_mode is None:
6461 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6463 # in routed mode, for the first nic, the default ip is 'auto'
6464 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6465 default_ip_mode = constants.VALUE_AUTO
6467 default_ip_mode = constants.VALUE_NONE
6469 # ip validity checks
6470 ip = nic.get("ip", default_ip_mode)
6471 if ip is None or ip.lower() == constants.VALUE_NONE:
6473 elif ip.lower() == constants.VALUE_AUTO:
6474 if not self.op.name_check:
6475 raise errors.OpPrereqError("IP address set to auto but name checks"
6476 " have been skipped. Aborting.",
6478 nic_ip = self.hostname1.ip
6480 if not utils.IsValidIP(ip):
6481 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6482 " like a valid IP" % ip,
6486 # TODO: check the ip address for uniqueness
6487 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6488 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6491 # MAC address verification
6492 mac = nic.get("mac", constants.VALUE_AUTO)
6493 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6494 mac = utils.NormalizeAndValidateMac(mac)
6497 self.cfg.ReserveMAC(mac, self.proc.GetECId())
6498 except errors.ReservationError:
6499 raise errors.OpPrereqError("MAC address %s already in use"
6500 " in cluster" % mac,
6501 errors.ECODE_NOTUNIQUE)
6503 # bridge verification
6504 bridge = nic.get("bridge", None)
6505 link = nic.get("link", None)
6507 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6508 " at the same time", errors.ECODE_INVAL)
6509 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6510 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6517 nicparams[constants.NIC_MODE] = nic_mode_req
6519 nicparams[constants.NIC_LINK] = link
6521 check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6523 objects.NIC.CheckParameterSyntax(check_params)
6524 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6526 # disk checks/pre-build
6528 for disk in self.op.disks:
6529 mode = disk.get("mode", constants.DISK_RDWR)
6530 if mode not in constants.DISK_ACCESS_SET:
6531 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6532 mode, errors.ECODE_INVAL)
6533 size = disk.get("size", None)
6535 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6538 except (TypeError, ValueError):
6539 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6541 new_disk = {"size": size, "mode": mode}
6543 new_disk["adopt"] = disk["adopt"]
6544 self.disks.append(new_disk)
6546 if self.op.mode == constants.INSTANCE_IMPORT:
6548 # Check that the new instance doesn't have less disks than the export
6549 instance_disks = len(self.disks)
6550 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6551 if instance_disks < export_disks:
6552 raise errors.OpPrereqError("Not enough disks to import."
6553 " (instance: %d, export: %d)" %
6554 (instance_disks, export_disks),
6558 for idx in range(export_disks):
6559 option = 'disk%d_dump' % idx
6560 if export_info.has_option(constants.INISECT_INS, option):
6561 # FIXME: are the old os-es, disk sizes, etc. useful?
6562 export_name = export_info.get(constants.INISECT_INS, option)
6563 image = utils.PathJoin(self.op.src_path, export_name)
6564 disk_images.append(image)
6566 disk_images.append(False)
6568 self.src_images = disk_images
6570 old_name = export_info.get(constants.INISECT_INS, 'name')
6572 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6573 except (TypeError, ValueError), err:
6574 raise errors.OpPrereqError("Invalid export file, nic_count is not"
6575 " an integer: %s" % str(err),
6577 if self.op.instance_name == old_name:
6578 for idx, nic in enumerate(self.nics):
6579 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6580 nic_mac_ini = 'nic%d_mac' % idx
6581 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6583 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6585 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6586 if self.op.ip_check:
6587 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6588 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6589 (self.check_ip, self.op.instance_name),
6590 errors.ECODE_NOTUNIQUE)
6592 #### mac address generation
6593 # By generating here the mac address both the allocator and the hooks get
6594 # the real final mac address rather than the 'auto' or 'generate' value.
6595 # There is a race condition between the generation and the instance object
6596 # creation, which means that we know the mac is valid now, but we're not
6597 # sure it will be when we actually add the instance. If things go bad
6598 # adding the instance will abort because of a duplicate mac, and the
6599 # creation job will fail.
6600 for nic in self.nics:
6601 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6602 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6606 if self.op.iallocator is not None:
6607 self._RunAllocator()
6609 #### node related checks
6611 # check primary node
6612 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6613 assert self.pnode is not None, \
6614 "Cannot retrieve locked node %s" % self.op.pnode
6616 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6617 pnode.name, errors.ECODE_STATE)
6619 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6620 pnode.name, errors.ECODE_STATE)
6622 self.secondaries = []
6624 # mirror node verification
6625 if self.op.disk_template in constants.DTS_NET_MIRROR:
6626 if self.op.snode is None:
6627 raise errors.OpPrereqError("The networked disk templates need"
6628 " a mirror node", errors.ECODE_INVAL)
6629 if self.op.snode == pnode.name:
6630 raise errors.OpPrereqError("The secondary node cannot be the"
6631 " primary node.", errors.ECODE_INVAL)
6632 _CheckNodeOnline(self, self.op.snode)
6633 _CheckNodeNotDrained(self, self.op.snode)
6634 self.secondaries.append(self.op.snode)
6636 nodenames = [pnode.name] + self.secondaries
6638 req_size = _ComputeDiskSize(self.op.disk_template,
6641 # Check lv size requirements, if not adopting
6642 if req_size is not None and not self.adopt_disks:
6643 _CheckNodesFreeDisk(self, nodenames, req_size)
6645 if self.adopt_disks: # instead, we must check the adoption data
6646 all_lvs = set([i["adopt"] for i in self.disks])
6647 if len(all_lvs) != len(self.disks):
6648 raise errors.OpPrereqError("Duplicate volume names given for adoption",
6650 for lv_name in all_lvs:
6652 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6653 except errors.ReservationError:
6654 raise errors.OpPrereqError("LV named %s used by another instance" %
6655 lv_name, errors.ECODE_NOTUNIQUE)
6657 node_lvs = self.rpc.call_lv_list([pnode.name],
6658 self.cfg.GetVGName())[pnode.name]
6659 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6660 node_lvs = node_lvs.payload
6661 delta = all_lvs.difference(node_lvs.keys())
6663 raise errors.OpPrereqError("Missing logical volume(s): %s" %
6664 utils.CommaJoin(delta),
6666 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6668 raise errors.OpPrereqError("Online logical volumes found, cannot"
6669 " adopt: %s" % utils.CommaJoin(online_lvs),
6671 # update the size of disk based on what is found
6672 for dsk in self.disks:
6673 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6675 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6677 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6679 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6681 # memory check on primary node
6683 _CheckNodeFreeMemory(self, self.pnode.name,
6684 "creating instance %s" % self.op.instance_name,
6685 self.be_full[constants.BE_MEMORY],
6688 self.dry_run_result = list(nodenames)
6690 def Exec(self, feedback_fn):
6691 """Create and add the instance to the cluster.
6694 instance = self.op.instance_name
6695 pnode_name = self.pnode.name
6697 ht_kind = self.op.hypervisor
6698 if ht_kind in constants.HTS_REQ_PORT:
6699 network_port = self.cfg.AllocatePort()
6703 if constants.ENABLE_FILE_STORAGE:
6704 # this is needed because os.path.join does not accept None arguments
6705 if self.op.file_storage_dir is None:
6706 string_file_storage_dir = ""
6708 string_file_storage_dir = self.op.file_storage_dir
6710 # build the full file storage dir path
6711 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6712 string_file_storage_dir, instance)
6714 file_storage_dir = ""
6716 disks = _GenerateDiskTemplate(self,
6717 self.op.disk_template,
6718 instance, pnode_name,
6722 self.op.file_driver,
6725 iobj = objects.Instance(name=instance, os=self.op.os_type,
6726 primary_node=pnode_name,
6727 nics=self.nics, disks=disks,
6728 disk_template=self.op.disk_template,
6730 network_port=network_port,
6731 beparams=self.op.beparams,
6732 hvparams=self.op.hvparams,
6733 hypervisor=self.op.hypervisor,
6736 if self.adopt_disks:
6737 # rename LVs to the newly-generated names; we need to construct
6738 # 'fake' LV disks with the old data, plus the new unique_id
6739 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6741 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6742 rename_to.append(t_dsk.logical_id)
6743 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6744 self.cfg.SetDiskID(t_dsk, pnode_name)
6745 result = self.rpc.call_blockdev_rename(pnode_name,
6746 zip(tmp_disks, rename_to))
6747 result.Raise("Failed to rename adoped LVs")
6749 feedback_fn("* creating instance disks...")
6751 _CreateDisks(self, iobj)
6752 except errors.OpExecError:
6753 self.LogWarning("Device creation failed, reverting...")
6755 _RemoveDisks(self, iobj)
6757 self.cfg.ReleaseDRBDMinors(instance)
6760 feedback_fn("adding instance %s to cluster config" % instance)
6762 self.cfg.AddInstance(iobj, self.proc.GetECId())
6764 # Declare that we don't want to remove the instance lock anymore, as we've
6765 # added the instance to the config
6766 del self.remove_locks[locking.LEVEL_INSTANCE]
6767 # Unlock all the nodes
6768 if self.op.mode == constants.INSTANCE_IMPORT:
6769 nodes_keep = [self.op.src_node]
6770 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6771 if node != self.op.src_node]
6772 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6773 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6775 self.context.glm.release(locking.LEVEL_NODE)
6776 del self.acquired_locks[locking.LEVEL_NODE]
6778 if self.op.wait_for_sync:
6779 disk_abort = not _WaitForSync(self, iobj)
6780 elif iobj.disk_template in constants.DTS_NET_MIRROR:
6781 # make sure the disks are not degraded (still sync-ing is ok)
6783 feedback_fn("* checking mirrors status")
6784 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6789 _RemoveDisks(self, iobj)
6790 self.cfg.RemoveInstance(iobj.name)
6791 # Make sure the instance lock gets removed
6792 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6793 raise errors.OpExecError("There are some degraded disks for"
6796 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6797 if self.op.mode == constants.INSTANCE_CREATE:
6798 if not self.op.no_install:
6799 feedback_fn("* running the instance OS create scripts...")
6800 # FIXME: pass debug option from opcode to backend
6801 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6802 self.op.debug_level)
6803 result.Raise("Could not add os for instance %s"
6804 " on node %s" % (instance, pnode_name))
6806 elif self.op.mode == constants.INSTANCE_IMPORT:
6807 feedback_fn("* running the instance OS import scripts...")
6808 src_node = self.op.src_node
6809 src_images = self.src_images
6810 cluster_name = self.cfg.GetClusterName()
6811 # FIXME: pass debug option from opcode to backend
6812 import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6813 src_node, src_images,
6815 self.op.debug_level)
6816 msg = import_result.fail_msg
6818 self.LogWarning("Error while importing the disk images for instance"
6819 " %s on node %s: %s" % (instance, pnode_name, msg))
6821 # also checked in the prereq part
6822 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6826 iobj.admin_up = True
6827 self.cfg.Update(iobj, feedback_fn)
6828 logging.info("Starting instance %s on node %s", instance, pnode_name)
6829 feedback_fn("* starting instance...")
6830 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6831 result.Raise("Could not start instance")
6833 return list(iobj.all_nodes)
6836 class LUConnectConsole(NoHooksLU):
6837 """Connect to an instance's console.
6839 This is somewhat special in that it returns the command line that
6840 you need to run on the master node in order to connect to the
6844 _OP_REQP = ["instance_name"]
6847 def ExpandNames(self):
6848 self._ExpandAndLockInstance()
6850 def CheckPrereq(self):
6851 """Check prerequisites.
6853 This checks that the instance is in the cluster.
6856 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6857 assert self.instance is not None, \
6858 "Cannot retrieve locked instance %s" % self.op.instance_name
6859 _CheckNodeOnline(self, self.instance.primary_node)
6861 def Exec(self, feedback_fn):
6862 """Connect to the console of an instance
6865 instance = self.instance
6866 node = instance.primary_node
6868 node_insts = self.rpc.call_instance_list([node],
6869 [instance.hypervisor])[node]
6870 node_insts.Raise("Can't get node information from %s" % node)
6872 if instance.name not in node_insts.payload:
6873 raise errors.OpExecError("Instance %s is not running." % instance.name)
6875 logging.debug("Connecting to console of %s on %s", instance.name, node)
6877 hyper = hypervisor.GetHypervisor(instance.hypervisor)
6878 cluster = self.cfg.GetClusterInfo()
6879 # beparams and hvparams are passed separately, to avoid editing the
6880 # instance and then saving the defaults in the instance itself.
6881 hvparams = cluster.FillHV(instance)
6882 beparams = cluster.FillBE(instance)
6883 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6886 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6889 class LUReplaceDisks(LogicalUnit):
6890 """Replace the disks of an instance.
6893 HPATH = "mirrors-replace"
6894 HTYPE = constants.HTYPE_INSTANCE
6895 _OP_REQP = ["instance_name", "mode", "disks"]
6898 def CheckArguments(self):
6899 if not hasattr(self.op, "remote_node"):
6900 self.op.remote_node = None
6901 if not hasattr(self.op, "iallocator"):
6902 self.op.iallocator = None
6903 if not hasattr(self.op, "early_release"):
6904 self.op.early_release = False
6906 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6909 def ExpandNames(self):
6910 self._ExpandAndLockInstance()
6912 if self.op.iallocator is not None:
6913 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6915 elif self.op.remote_node is not None:
6916 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6917 self.op.remote_node = remote_node
6919 # Warning: do not remove the locking of the new secondary here
6920 # unless DRBD8.AddChildren is changed to work in parallel;
6921 # currently it doesn't since parallel invocations of
6922 # FindUnusedMinor will conflict
6923 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6924 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6927 self.needed_locks[locking.LEVEL_NODE] = []
6928 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6930 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6931 self.op.iallocator, self.op.remote_node,
6932 self.op.disks, False, self.op.early_release)
6934 self.tasklets = [self.replacer]
6936 def DeclareLocks(self, level):
6937 # If we're not already locking all nodes in the set we have to declare the
6938 # instance's primary/secondary nodes.
6939 if (level == locking.LEVEL_NODE and
6940 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6941 self._LockInstancesNodes()
6943 def BuildHooksEnv(self):
6946 This runs on the master, the primary and all the secondaries.
6949 instance = self.replacer.instance
6951 "MODE": self.op.mode,
6952 "NEW_SECONDARY": self.op.remote_node,
6953 "OLD_SECONDARY": instance.secondary_nodes[0],
6955 env.update(_BuildInstanceHookEnvByObject(self, instance))
6957 self.cfg.GetMasterNode(),
6958 instance.primary_node,
6960 if self.op.remote_node is not None:
6961 nl.append(self.op.remote_node)
6965 class LUEvacuateNode(LogicalUnit):
6966 """Relocate the secondary instances from a node.
6969 HPATH = "node-evacuate"
6970 HTYPE = constants.HTYPE_NODE
6971 _OP_REQP = ["node_name"]
6974 def CheckArguments(self):
6975 if not hasattr(self.op, "remote_node"):
6976 self.op.remote_node = None
6977 if not hasattr(self.op, "iallocator"):
6978 self.op.iallocator = None
6979 if not hasattr(self.op, "early_release"):
6980 self.op.early_release = False
6982 TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6983 self.op.remote_node,
6986 def ExpandNames(self):
6987 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6989 self.needed_locks = {}
6991 # Declare node locks
6992 if self.op.iallocator is not None:
6993 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6995 elif self.op.remote_node is not None:
6996 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6998 # Warning: do not remove the locking of the new secondary here
6999 # unless DRBD8.AddChildren is changed to work in parallel;
7000 # currently it doesn't since parallel invocations of
7001 # FindUnusedMinor will conflict
7002 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7003 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7006 raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7008 # Create tasklets for replacing disks for all secondary instances on this
7013 for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7014 logging.debug("Replacing disks for instance %s", inst.name)
7015 names.append(inst.name)
7017 replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7018 self.op.iallocator, self.op.remote_node, [],
7019 True, self.op.early_release)
7020 tasklets.append(replacer)
7022 self.tasklets = tasklets
7023 self.instance_names = names
7025 # Declare instance locks
7026 self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7028 def DeclareLocks(self, level):
7029 # If we're not already locking all nodes in the set we have to declare the
7030 # instance's primary/secondary nodes.
7031 if (level == locking.LEVEL_NODE and
7032 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7033 self._LockInstancesNodes()
7035 def BuildHooksEnv(self):
7038 This runs on the master, the primary and all the secondaries.
7042 "NODE_NAME": self.op.node_name,
7045 nl = [self.cfg.GetMasterNode()]
7047 if self.op.remote_node is not None:
7048 env["NEW_SECONDARY"] = self.op.remote_node
7049 nl.append(self.op.remote_node)
7051 return (env, nl, nl)
7054 class TLReplaceDisks(Tasklet):
7055 """Replaces disks for an instance.
7057 Note: Locking is not within the scope of this class.
7060 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7061 disks, delay_iallocator, early_release):
7062 """Initializes this class.
7065 Tasklet.__init__(self, lu)
7068 self.instance_name = instance_name
7070 self.iallocator_name = iallocator_name
7071 self.remote_node = remote_node
7073 self.delay_iallocator = delay_iallocator
7074 self.early_release = early_release
7077 self.instance = None
7078 self.new_node = None
7079 self.target_node = None
7080 self.other_node = None
7081 self.remote_node_info = None
7082 self.node_secondary_ip = None
7085 def CheckArguments(mode, remote_node, iallocator):
7086 """Helper function for users of this class.
7089 # check for valid parameter combination
7090 if mode == constants.REPLACE_DISK_CHG:
7091 if remote_node is None and iallocator is None:
7092 raise errors.OpPrereqError("When changing the secondary either an"
7093 " iallocator script must be used or the"
7094 " new node given", errors.ECODE_INVAL)
7096 if remote_node is not None and iallocator is not None:
7097 raise errors.OpPrereqError("Give either the iallocator or the new"
7098 " secondary, not both", errors.ECODE_INVAL)
7100 elif remote_node is not None or iallocator is not None:
7101 # Not replacing the secondary
7102 raise errors.OpPrereqError("The iallocator and new node options can"
7103 " only be used when changing the"
7104 " secondary node", errors.ECODE_INVAL)
7107 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7108 """Compute a new secondary node using an IAllocator.
7111 ial = IAllocator(lu.cfg, lu.rpc,
7112 mode=constants.IALLOCATOR_MODE_RELOC,
7114 relocate_from=relocate_from)
7116 ial.Run(iallocator_name)
7119 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7120 " %s" % (iallocator_name, ial.info),
7123 if len(ial.result) != ial.required_nodes:
7124 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7125 " of nodes (%s), required %s" %
7127 len(ial.result), ial.required_nodes),
7130 remote_node_name = ial.result[0]
7132 lu.LogInfo("Selected new secondary for instance '%s': %s",
7133 instance_name, remote_node_name)
7135 return remote_node_name
7137 def _FindFaultyDisks(self, node_name):
7138 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7141 def CheckPrereq(self):
7142 """Check prerequisites.
7144 This checks that the instance is in the cluster.
7147 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7148 assert instance is not None, \
7149 "Cannot retrieve locked instance %s" % self.instance_name
7151 if instance.disk_template != constants.DT_DRBD8:
7152 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7153 " instances", errors.ECODE_INVAL)
7155 if len(instance.secondary_nodes) != 1:
7156 raise errors.OpPrereqError("The instance has a strange layout,"
7157 " expected one secondary but found %d" %
7158 len(instance.secondary_nodes),
7161 if not self.delay_iallocator:
7162 self._CheckPrereq2()
7164 def _CheckPrereq2(self):
7165 """Check prerequisites, second part.
7167 This function should always be part of CheckPrereq. It was separated and is
7168 now called from Exec because during node evacuation iallocator was only
7169 called with an unmodified cluster model, not taking planned changes into
7173 instance = self.instance
7174 secondary_node = instance.secondary_nodes[0]
7176 if self.iallocator_name is None:
7177 remote_node = self.remote_node
7179 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7180 instance.name, instance.secondary_nodes)
7182 if remote_node is not None:
7183 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7184 assert self.remote_node_info is not None, \
7185 "Cannot retrieve locked node %s" % remote_node
7187 self.remote_node_info = None
7189 if remote_node == self.instance.primary_node:
7190 raise errors.OpPrereqError("The specified node is the primary node of"
7191 " the instance.", errors.ECODE_INVAL)
7193 if remote_node == secondary_node:
7194 raise errors.OpPrereqError("The specified node is already the"
7195 " secondary node of the instance.",
7198 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7199 constants.REPLACE_DISK_CHG):
7200 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7203 if self.mode == constants.REPLACE_DISK_AUTO:
7204 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7205 faulty_secondary = self._FindFaultyDisks(secondary_node)
7207 if faulty_primary and faulty_secondary:
7208 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7209 " one node and can not be repaired"
7210 " automatically" % self.instance_name,
7214 self.disks = faulty_primary
7215 self.target_node = instance.primary_node
7216 self.other_node = secondary_node
7217 check_nodes = [self.target_node, self.other_node]
7218 elif faulty_secondary:
7219 self.disks = faulty_secondary
7220 self.target_node = secondary_node
7221 self.other_node = instance.primary_node
7222 check_nodes = [self.target_node, self.other_node]
7228 # Non-automatic modes
7229 if self.mode == constants.REPLACE_DISK_PRI:
7230 self.target_node = instance.primary_node
7231 self.other_node = secondary_node
7232 check_nodes = [self.target_node, self.other_node]
7234 elif self.mode == constants.REPLACE_DISK_SEC:
7235 self.target_node = secondary_node
7236 self.other_node = instance.primary_node
7237 check_nodes = [self.target_node, self.other_node]
7239 elif self.mode == constants.REPLACE_DISK_CHG:
7240 self.new_node = remote_node
7241 self.other_node = instance.primary_node
7242 self.target_node = secondary_node
7243 check_nodes = [self.new_node, self.other_node]
7245 _CheckNodeNotDrained(self.lu, remote_node)
7247 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7248 assert old_node_info is not None
7249 if old_node_info.offline and not self.early_release:
7250 # doesn't make sense to delay the release
7251 self.early_release = True
7252 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7253 " early-release mode", secondary_node)
7256 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7259 # If not specified all disks should be replaced
7261 self.disks = range(len(self.instance.disks))
7263 for node in check_nodes:
7264 _CheckNodeOnline(self.lu, node)
7266 # Check whether disks are valid
7267 for disk_idx in self.disks:
7268 instance.FindDisk(disk_idx)
7270 # Get secondary node IP addresses
7273 for node_name in [self.target_node, self.other_node, self.new_node]:
7274 if node_name is not None:
7275 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7277 self.node_secondary_ip = node_2nd_ip
7279 def Exec(self, feedback_fn):
7280 """Execute disk replacement.
7282 This dispatches the disk replacement to the appropriate handler.
7285 if self.delay_iallocator:
7286 self._CheckPrereq2()
7289 feedback_fn("No disks need replacement")
7292 feedback_fn("Replacing disk(s) %s for %s" %
7293 (utils.CommaJoin(self.disks), self.instance.name))
7295 activate_disks = (not self.instance.admin_up)
7297 # Activate the instance disks if we're replacing them on a down instance
7299 _StartInstanceDisks(self.lu, self.instance, True)
7302 # Should we replace the secondary node?
7303 if self.new_node is not None:
7304 fn = self._ExecDrbd8Secondary
7306 fn = self._ExecDrbd8DiskOnly
7308 return fn(feedback_fn)
7311 # Deactivate the instance disks if we're replacing them on a
7314 _SafeShutdownInstanceDisks(self.lu, self.instance)
7316 def _CheckVolumeGroup(self, nodes):
7317 self.lu.LogInfo("Checking volume groups")
7319 vgname = self.cfg.GetVGName()
7321 # Make sure volume group exists on all involved nodes
7322 results = self.rpc.call_vg_list(nodes)
7324 raise errors.OpExecError("Can't list volume groups on the nodes")
7328 res.Raise("Error checking node %s" % node)
7329 if vgname not in res.payload:
7330 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7333 def _CheckDisksExistence(self, nodes):
7334 # Check disk existence
7335 for idx, dev in enumerate(self.instance.disks):
7336 if idx not in self.disks:
7340 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7341 self.cfg.SetDiskID(dev, node)
7343 result = self.rpc.call_blockdev_find(node, dev)
7345 msg = result.fail_msg
7346 if msg or not result.payload:
7348 msg = "disk not found"
7349 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7352 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7353 for idx, dev in enumerate(self.instance.disks):
7354 if idx not in self.disks:
7357 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7360 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7362 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7363 " replace disks for instance %s" %
7364 (node_name, self.instance.name))
7366 def _CreateNewStorage(self, node_name):
7367 vgname = self.cfg.GetVGName()
7370 for idx, dev in enumerate(self.instance.disks):
7371 if idx not in self.disks:
7374 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7376 self.cfg.SetDiskID(dev, node_name)
7378 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7379 names = _GenerateUniqueNames(self.lu, lv_names)
7381 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7382 logical_id=(vgname, names[0]))
7383 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7384 logical_id=(vgname, names[1]))
7386 new_lvs = [lv_data, lv_meta]
7387 old_lvs = dev.children
7388 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7390 # we pass force_create=True to force the LVM creation
7391 for new_lv in new_lvs:
7392 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7393 _GetInstanceInfoText(self.instance), False)
7397 def _CheckDevices(self, node_name, iv_names):
7398 for name, (dev, _, _) in iv_names.iteritems():
7399 self.cfg.SetDiskID(dev, node_name)
7401 result = self.rpc.call_blockdev_find(node_name, dev)
7403 msg = result.fail_msg
7404 if msg or not result.payload:
7406 msg = "disk not found"
7407 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7410 if result.payload.is_degraded:
7411 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7413 def _RemoveOldStorage(self, node_name, iv_names):
7414 for name, (_, old_lvs, _) in iv_names.iteritems():
7415 self.lu.LogInfo("Remove logical volumes for %s" % name)
7418 self.cfg.SetDiskID(lv, node_name)
7420 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7422 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7423 hint="remove unused LVs manually")
7425 def _ReleaseNodeLock(self, node_name):
7426 """Releases the lock for a given node."""
7427 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7429 def _ExecDrbd8DiskOnly(self, feedback_fn):
7430 """Replace a disk on the primary or secondary for DRBD 8.
7432 The algorithm for replace is quite complicated:
7434 1. for each disk to be replaced:
7436 1. create new LVs on the target node with unique names
7437 1. detach old LVs from the drbd device
7438 1. rename old LVs to name_replaced.<time_t>
7439 1. rename new LVs to old LVs
7440 1. attach the new LVs (with the old names now) to the drbd device
7442 1. wait for sync across all devices
7444 1. for each modified disk:
7446 1. remove old LVs (which have the name name_replaces.<time_t>)
7448 Failures are not very well handled.
7453 # Step: check device activation
7454 self.lu.LogStep(1, steps_total, "Check device existence")
7455 self._CheckDisksExistence([self.other_node, self.target_node])
7456 self._CheckVolumeGroup([self.target_node, self.other_node])
7458 # Step: check other node consistency
7459 self.lu.LogStep(2, steps_total, "Check peer consistency")
7460 self._CheckDisksConsistency(self.other_node,
7461 self.other_node == self.instance.primary_node,
7464 # Step: create new storage
7465 self.lu.LogStep(3, steps_total, "Allocate new storage")
7466 iv_names = self._CreateNewStorage(self.target_node)
7468 # Step: for each lv, detach+rename*2+attach
7469 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7470 for dev, old_lvs, new_lvs in iv_names.itervalues():
7471 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7473 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7475 result.Raise("Can't detach drbd from local storage on node"
7476 " %s for device %s" % (self.target_node, dev.iv_name))
7478 #cfg.Update(instance)
7480 # ok, we created the new LVs, so now we know we have the needed
7481 # storage; as such, we proceed on the target node to rename
7482 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7483 # using the assumption that logical_id == physical_id (which in
7484 # turn is the unique_id on that node)
7486 # FIXME(iustin): use a better name for the replaced LVs
7487 temp_suffix = int(time.time())
7488 ren_fn = lambda d, suff: (d.physical_id[0],
7489 d.physical_id[1] + "_replaced-%s" % suff)
7491 # Build the rename list based on what LVs exist on the node
7492 rename_old_to_new = []
7493 for to_ren in old_lvs:
7494 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7495 if not result.fail_msg and result.payload:
7497 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7499 self.lu.LogInfo("Renaming the old LVs on the target node")
7500 result = self.rpc.call_blockdev_rename(self.target_node,
7502 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7504 # Now we rename the new LVs to the old LVs
7505 self.lu.LogInfo("Renaming the new LVs on the target node")
7506 rename_new_to_old = [(new, old.physical_id)
7507 for old, new in zip(old_lvs, new_lvs)]
7508 result = self.rpc.call_blockdev_rename(self.target_node,
7510 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7512 for old, new in zip(old_lvs, new_lvs):
7513 new.logical_id = old.logical_id
7514 self.cfg.SetDiskID(new, self.target_node)
7516 for disk in old_lvs:
7517 disk.logical_id = ren_fn(disk, temp_suffix)
7518 self.cfg.SetDiskID(disk, self.target_node)
7520 # Now that the new lvs have the old name, we can add them to the device
7521 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7522 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7524 msg = result.fail_msg
7526 for new_lv in new_lvs:
7527 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7530 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7531 hint=("cleanup manually the unused logical"
7533 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7535 dev.children = new_lvs
7537 self.cfg.Update(self.instance, feedback_fn)
7540 if self.early_release:
7541 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7543 self._RemoveOldStorage(self.target_node, iv_names)
7544 # WARNING: we release both node locks here, do not do other RPCs
7545 # than WaitForSync to the primary node
7546 self._ReleaseNodeLock([self.target_node, self.other_node])
7549 # This can fail as the old devices are degraded and _WaitForSync
7550 # does a combined result over all disks, so we don't check its return value
7551 self.lu.LogStep(cstep, steps_total, "Sync devices")
7553 _WaitForSync(self.lu, self.instance)
7555 # Check all devices manually
7556 self._CheckDevices(self.instance.primary_node, iv_names)
7558 # Step: remove old storage
7559 if not self.early_release:
7560 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7562 self._RemoveOldStorage(self.target_node, iv_names)
7564 def _ExecDrbd8Secondary(self, feedback_fn):
7565 """Replace the secondary node for DRBD 8.
7567 The algorithm for replace is quite complicated:
7568 - for all disks of the instance:
7569 - create new LVs on the new node with same names
7570 - shutdown the drbd device on the old secondary
7571 - disconnect the drbd network on the primary
7572 - create the drbd device on the new secondary
7573 - network attach the drbd on the primary, using an artifice:
7574 the drbd code for Attach() will connect to the network if it
7575 finds a device which is connected to the good local disks but
7577 - wait for sync across all devices
7578 - remove all disks from the old secondary
7580 Failures are not very well handled.
7585 # Step: check device activation
7586 self.lu.LogStep(1, steps_total, "Check device existence")
7587 self._CheckDisksExistence([self.instance.primary_node])
7588 self._CheckVolumeGroup([self.instance.primary_node])
7590 # Step: check other node consistency
7591 self.lu.LogStep(2, steps_total, "Check peer consistency")
7592 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7594 # Step: create new storage
7595 self.lu.LogStep(3, steps_total, "Allocate new storage")
7596 for idx, dev in enumerate(self.instance.disks):
7597 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7598 (self.new_node, idx))
7599 # we pass force_create=True to force LVM creation
7600 for new_lv in dev.children:
7601 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7602 _GetInstanceInfoText(self.instance), False)
7604 # Step 4: dbrd minors and drbd setups changes
7605 # after this, we must manually remove the drbd minors on both the
7606 # error and the success paths
7607 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7608 minors = self.cfg.AllocateDRBDMinor([self.new_node
7609 for dev in self.instance.disks],
7611 logging.debug("Allocated minors %r", minors)
7614 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7615 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7616 (self.new_node, idx))
7617 # create new devices on new_node; note that we create two IDs:
7618 # one without port, so the drbd will be activated without
7619 # networking information on the new node at this stage, and one
7620 # with network, for the latter activation in step 4
7621 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7622 if self.instance.primary_node == o_node1:
7625 assert self.instance.primary_node == o_node2, "Three-node instance?"
7628 new_alone_id = (self.instance.primary_node, self.new_node, None,
7629 p_minor, new_minor, o_secret)
7630 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7631 p_minor, new_minor, o_secret)
7633 iv_names[idx] = (dev, dev.children, new_net_id)
7634 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7636 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7637 logical_id=new_alone_id,
7638 children=dev.children,
7641 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7642 _GetInstanceInfoText(self.instance), False)
7643 except errors.GenericError:
7644 self.cfg.ReleaseDRBDMinors(self.instance.name)
7647 # We have new devices, shutdown the drbd on the old secondary
7648 for idx, dev in enumerate(self.instance.disks):
7649 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7650 self.cfg.SetDiskID(dev, self.target_node)
7651 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7653 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7654 "node: %s" % (idx, msg),
7655 hint=("Please cleanup this device manually as"
7656 " soon as possible"))
7658 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7659 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7660 self.node_secondary_ip,
7661 self.instance.disks)\
7662 [self.instance.primary_node]
7664 msg = result.fail_msg
7666 # detaches didn't succeed (unlikely)
7667 self.cfg.ReleaseDRBDMinors(self.instance.name)
7668 raise errors.OpExecError("Can't detach the disks from the network on"
7669 " old node: %s" % (msg,))
7671 # if we managed to detach at least one, we update all the disks of
7672 # the instance to point to the new secondary
7673 self.lu.LogInfo("Updating instance configuration")
7674 for dev, _, new_logical_id in iv_names.itervalues():
7675 dev.logical_id = new_logical_id
7676 self.cfg.SetDiskID(dev, self.instance.primary_node)
7678 self.cfg.Update(self.instance, feedback_fn)
7680 # and now perform the drbd attach
7681 self.lu.LogInfo("Attaching primary drbds to new secondary"
7682 " (standalone => connected)")
7683 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7685 self.node_secondary_ip,
7686 self.instance.disks,
7689 for to_node, to_result in result.items():
7690 msg = to_result.fail_msg
7692 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7694 hint=("please do a gnt-instance info to see the"
7695 " status of disks"))
7697 if self.early_release:
7698 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7700 self._RemoveOldStorage(self.target_node, iv_names)
7701 # WARNING: we release all node locks here, do not do other RPCs
7702 # than WaitForSync to the primary node
7703 self._ReleaseNodeLock([self.instance.primary_node,
7708 # This can fail as the old devices are degraded and _WaitForSync
7709 # does a combined result over all disks, so we don't check its return value
7710 self.lu.LogStep(cstep, steps_total, "Sync devices")
7712 _WaitForSync(self.lu, self.instance)
7714 # Check all devices manually
7715 self._CheckDevices(self.instance.primary_node, iv_names)
7717 # Step: remove old storage
7718 if not self.early_release:
7719 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7720 self._RemoveOldStorage(self.target_node, iv_names)
7723 class LURepairNodeStorage(NoHooksLU):
7724 """Repairs the volume group on a node.
7727 _OP_REQP = ["node_name"]
7730 def CheckArguments(self):
7731 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7733 _CheckStorageType(self.op.storage_type)
7735 def ExpandNames(self):
7736 self.needed_locks = {
7737 locking.LEVEL_NODE: [self.op.node_name],
7740 def _CheckFaultyDisks(self, instance, node_name):
7741 """Ensure faulty disks abort the opcode or at least warn."""
7743 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7745 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7746 " node '%s'" % (instance.name, node_name),
7748 except errors.OpPrereqError, err:
7749 if self.op.ignore_consistency:
7750 self.proc.LogWarning(str(err.args[0]))
7754 def CheckPrereq(self):
7755 """Check prerequisites.
7758 storage_type = self.op.storage_type
7760 if (constants.SO_FIX_CONSISTENCY not in
7761 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7762 raise errors.OpPrereqError("Storage units of type '%s' can not be"
7763 " repaired" % storage_type,
7766 # Check whether any instance on this node has faulty disks
7767 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7768 if not inst.admin_up:
7770 check_nodes = set(inst.all_nodes)
7771 check_nodes.discard(self.op.node_name)
7772 for inst_node_name in check_nodes:
7773 self._CheckFaultyDisks(inst, inst_node_name)
7775 def Exec(self, feedback_fn):
7776 feedback_fn("Repairing storage unit '%s' on %s ..." %
7777 (self.op.name, self.op.node_name))
7779 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7780 result = self.rpc.call_storage_execute(self.op.node_name,
7781 self.op.storage_type, st_args,
7783 constants.SO_FIX_CONSISTENCY)
7784 result.Raise("Failed to repair storage unit '%s' on %s" %
7785 (self.op.name, self.op.node_name))
7788 class LUNodeEvacuationStrategy(NoHooksLU):
7789 """Computes the node evacuation strategy.
7792 _OP_REQP = ["nodes"]
7795 def CheckArguments(self):
7796 if not hasattr(self.op, "remote_node"):
7797 self.op.remote_node = None
7798 if not hasattr(self.op, "iallocator"):
7799 self.op.iallocator = None
7800 if self.op.remote_node is not None and self.op.iallocator is not None:
7801 raise errors.OpPrereqError("Give either the iallocator or the new"
7802 " secondary, not both", errors.ECODE_INVAL)
7804 def ExpandNames(self):
7805 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7806 self.needed_locks = locks = {}
7807 if self.op.remote_node is None:
7808 locks[locking.LEVEL_NODE] = locking.ALL_SET
7810 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7811 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7813 def CheckPrereq(self):
7816 def Exec(self, feedback_fn):
7817 if self.op.remote_node is not None:
7819 for node in self.op.nodes:
7820 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7823 if i.primary_node == self.op.remote_node:
7824 raise errors.OpPrereqError("Node %s is the primary node of"
7825 " instance %s, cannot use it as"
7827 (self.op.remote_node, i.name),
7829 result.append([i.name, self.op.remote_node])
7831 ial = IAllocator(self.cfg, self.rpc,
7832 mode=constants.IALLOCATOR_MODE_MEVAC,
7833 evac_nodes=self.op.nodes)
7834 ial.Run(self.op.iallocator, validate=True)
7836 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7842 class LUGrowDisk(LogicalUnit):
7843 """Grow a disk of an instance.
7847 HTYPE = constants.HTYPE_INSTANCE
7848 _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7851 def ExpandNames(self):
7852 self._ExpandAndLockInstance()
7853 self.needed_locks[locking.LEVEL_NODE] = []
7854 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7856 def DeclareLocks(self, level):
7857 if level == locking.LEVEL_NODE:
7858 self._LockInstancesNodes()
7860 def BuildHooksEnv(self):
7863 This runs on the master, the primary and all the secondaries.
7867 "DISK": self.op.disk,
7868 "AMOUNT": self.op.amount,
7870 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7871 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7874 def CheckPrereq(self):
7875 """Check prerequisites.
7877 This checks that the instance is in the cluster.
7880 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7881 assert instance is not None, \
7882 "Cannot retrieve locked instance %s" % self.op.instance_name
7883 nodenames = list(instance.all_nodes)
7884 for node in nodenames:
7885 _CheckNodeOnline(self, node)
7888 self.instance = instance
7890 if instance.disk_template not in constants.DTS_GROWABLE:
7891 raise errors.OpPrereqError("Instance's disk layout does not support"
7892 " growing.", errors.ECODE_INVAL)
7894 self.disk = instance.FindDisk(self.op.disk)
7896 if instance.disk_template != constants.DT_FILE:
7897 # TODO: check the free disk space for file, when that feature will be
7899 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7901 def Exec(self, feedback_fn):
7902 """Execute disk grow.
7905 instance = self.instance
7907 for node in instance.all_nodes:
7908 self.cfg.SetDiskID(disk, node)
7909 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7910 result.Raise("Grow request failed to node %s" % node)
7912 # TODO: Rewrite code to work properly
7913 # DRBD goes into sync mode for a short amount of time after executing the
7914 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7915 # calling "resize" in sync mode fails. Sleeping for a short amount of
7916 # time is a work-around.
7919 disk.RecordGrow(self.op.amount)
7920 self.cfg.Update(instance, feedback_fn)
7921 if self.op.wait_for_sync:
7922 disk_abort = not _WaitForSync(self, instance)
7924 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7925 " status.\nPlease check the instance.")
7928 class LUQueryInstanceData(NoHooksLU):
7929 """Query runtime instance data.
7932 _OP_REQP = ["instances", "static"]
7935 def ExpandNames(self):
7936 self.needed_locks = {}
7937 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7939 if not isinstance(self.op.instances, list):
7940 raise errors.OpPrereqError("Invalid argument type 'instances'",
7943 if self.op.instances:
7944 self.wanted_names = []
7945 for name in self.op.instances:
7946 full_name = _ExpandInstanceName(self.cfg, name)
7947 self.wanted_names.append(full_name)
7948 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7950 self.wanted_names = None
7951 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7953 self.needed_locks[locking.LEVEL_NODE] = []
7954 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7956 def DeclareLocks(self, level):
7957 if level == locking.LEVEL_NODE:
7958 self._LockInstancesNodes()
7960 def CheckPrereq(self):
7961 """Check prerequisites.
7963 This only checks the optional instance list against the existing names.
7966 if self.wanted_names is None:
7967 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7969 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7970 in self.wanted_names]
7973 def _ComputeBlockdevStatus(self, node, instance_name, dev):
7974 """Returns the status of a block device
7977 if self.op.static or not node:
7980 self.cfg.SetDiskID(dev, node)
7982 result = self.rpc.call_blockdev_find(node, dev)
7986 result.Raise("Can't compute disk status for %s" % instance_name)
7988 status = result.payload
7992 return (status.dev_path, status.major, status.minor,
7993 status.sync_percent, status.estimated_time,
7994 status.is_degraded, status.ldisk_status)
7996 def _ComputeDiskStatus(self, instance, snode, dev):
7997 """Compute block device status.
8000 if dev.dev_type in constants.LDS_DRBD:
8001 # we change the snode then (otherwise we use the one passed in)
8002 if dev.logical_id[0] == instance.primary_node:
8003 snode = dev.logical_id[1]
8005 snode = dev.logical_id[0]
8007 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8009 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8012 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8013 for child in dev.children]
8018 "iv_name": dev.iv_name,
8019 "dev_type": dev.dev_type,
8020 "logical_id": dev.logical_id,
8021 "physical_id": dev.physical_id,
8022 "pstatus": dev_pstatus,
8023 "sstatus": dev_sstatus,
8024 "children": dev_children,
8031 def Exec(self, feedback_fn):
8032 """Gather and return data"""
8035 cluster = self.cfg.GetClusterInfo()
8037 for instance in self.wanted_instances:
8038 if not self.op.static:
8039 remote_info = self.rpc.call_instance_info(instance.primary_node,
8041 instance.hypervisor)
8042 remote_info.Raise("Error checking node %s" % instance.primary_node)
8043 remote_info = remote_info.payload
8044 if remote_info and "state" in remote_info:
8047 remote_state = "down"
8050 if instance.admin_up:
8053 config_state = "down"
8055 disks = [self._ComputeDiskStatus(instance, None, device)
8056 for device in instance.disks]
8059 "name": instance.name,
8060 "config_state": config_state,
8061 "run_state": remote_state,
8062 "pnode": instance.primary_node,
8063 "snodes": instance.secondary_nodes,
8065 # this happens to be the same format used for hooks
8066 "nics": _NICListToTuple(self, instance.nics),
8068 "hypervisor": instance.hypervisor,
8069 "network_port": instance.network_port,
8070 "hv_instance": instance.hvparams,
8071 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8072 "be_instance": instance.beparams,
8073 "be_actual": cluster.FillBE(instance),
8074 "serial_no": instance.serial_no,
8075 "mtime": instance.mtime,
8076 "ctime": instance.ctime,
8077 "uuid": instance.uuid,
8080 result[instance.name] = idict
8085 class LUSetInstanceParams(LogicalUnit):
8086 """Modifies an instances's parameters.
8089 HPATH = "instance-modify"
8090 HTYPE = constants.HTYPE_INSTANCE
8091 _OP_REQP = ["instance_name"]
8094 def CheckArguments(self):
8095 if not hasattr(self.op, 'nics'):
8097 if not hasattr(self.op, 'disks'):
8099 if not hasattr(self.op, 'beparams'):
8100 self.op.beparams = {}
8101 if not hasattr(self.op, 'hvparams'):
8102 self.op.hvparams = {}
8103 if not hasattr(self.op, "disk_template"):
8104 self.op.disk_template = None
8105 if not hasattr(self.op, "remote_node"):
8106 self.op.remote_node = None
8107 if not hasattr(self.op, "os_name"):
8108 self.op.os_name = None
8109 if not hasattr(self.op, "force_variant"):
8110 self.op.force_variant = False
8111 self.op.force = getattr(self.op, "force", False)
8112 if not (self.op.nics or self.op.disks or self.op.disk_template or
8113 self.op.hvparams or self.op.beparams or self.op.os_name):
8114 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8116 if self.op.hvparams:
8117 _CheckGlobalHvParams(self.op.hvparams)
8121 for disk_op, disk_dict in self.op.disks:
8122 if disk_op == constants.DDM_REMOVE:
8125 elif disk_op == constants.DDM_ADD:
8128 if not isinstance(disk_op, int):
8129 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8130 if not isinstance(disk_dict, dict):
8131 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8132 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8134 if disk_op == constants.DDM_ADD:
8135 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8136 if mode not in constants.DISK_ACCESS_SET:
8137 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8139 size = disk_dict.get('size', None)
8141 raise errors.OpPrereqError("Required disk parameter size missing",
8145 except (TypeError, ValueError), err:
8146 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8147 str(err), errors.ECODE_INVAL)
8148 disk_dict['size'] = size
8150 # modification of disk
8151 if 'size' in disk_dict:
8152 raise errors.OpPrereqError("Disk size change not possible, use"
8153 " grow-disk", errors.ECODE_INVAL)
8155 if disk_addremove > 1:
8156 raise errors.OpPrereqError("Only one disk add or remove operation"
8157 " supported at a time", errors.ECODE_INVAL)
8159 if self.op.disks and self.op.disk_template is not None:
8160 raise errors.OpPrereqError("Disk template conversion and other disk"
8161 " changes not supported at the same time",
8164 if self.op.disk_template:
8165 _CheckDiskTemplate(self.op.disk_template)
8166 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8167 self.op.remote_node is None):
8168 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8169 " one requires specifying a secondary node",
8174 for nic_op, nic_dict in self.op.nics:
8175 if nic_op == constants.DDM_REMOVE:
8178 elif nic_op == constants.DDM_ADD:
8181 if not isinstance(nic_op, int):
8182 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8183 if not isinstance(nic_dict, dict):
8184 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8185 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8187 # nic_dict should be a dict
8188 nic_ip = nic_dict.get('ip', None)
8189 if nic_ip is not None:
8190 if nic_ip.lower() == constants.VALUE_NONE:
8191 nic_dict['ip'] = None
8193 if not utils.IsValidIP(nic_ip):
8194 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8197 nic_bridge = nic_dict.get('bridge', None)
8198 nic_link = nic_dict.get('link', None)
8199 if nic_bridge and nic_link:
8200 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8201 " at the same time", errors.ECODE_INVAL)
8202 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8203 nic_dict['bridge'] = None
8204 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8205 nic_dict['link'] = None
8207 if nic_op == constants.DDM_ADD:
8208 nic_mac = nic_dict.get('mac', None)
8210 nic_dict['mac'] = constants.VALUE_AUTO
8212 if 'mac' in nic_dict:
8213 nic_mac = nic_dict['mac']
8214 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8215 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8217 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8218 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8219 " modifying an existing nic",
8222 if nic_addremove > 1:
8223 raise errors.OpPrereqError("Only one NIC add or remove operation"
8224 " supported at a time", errors.ECODE_INVAL)
8226 def ExpandNames(self):
8227 self._ExpandAndLockInstance()
8228 self.needed_locks[locking.LEVEL_NODE] = []
8229 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8231 def DeclareLocks(self, level):
8232 if level == locking.LEVEL_NODE:
8233 self._LockInstancesNodes()
8234 if self.op.disk_template and self.op.remote_node:
8235 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8236 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8238 def BuildHooksEnv(self):
8241 This runs on the master, primary and secondaries.
8245 if constants.BE_MEMORY in self.be_new:
8246 args['memory'] = self.be_new[constants.BE_MEMORY]
8247 if constants.BE_VCPUS in self.be_new:
8248 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8249 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8250 # information at all.
8253 nic_override = dict(self.op.nics)
8254 c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8255 for idx, nic in enumerate(self.instance.nics):
8256 if idx in nic_override:
8257 this_nic_override = nic_override[idx]
8259 this_nic_override = {}
8260 if 'ip' in this_nic_override:
8261 ip = this_nic_override['ip']
8264 if 'mac' in this_nic_override:
8265 mac = this_nic_override['mac']
8268 if idx in self.nic_pnew:
8269 nicparams = self.nic_pnew[idx]
8271 nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8272 mode = nicparams[constants.NIC_MODE]
8273 link = nicparams[constants.NIC_LINK]
8274 args['nics'].append((ip, mac, mode, link))
8275 if constants.DDM_ADD in nic_override:
8276 ip = nic_override[constants.DDM_ADD].get('ip', None)
8277 mac = nic_override[constants.DDM_ADD]['mac']
8278 nicparams = self.nic_pnew[constants.DDM_ADD]
8279 mode = nicparams[constants.NIC_MODE]
8280 link = nicparams[constants.NIC_LINK]
8281 args['nics'].append((ip, mac, mode, link))
8282 elif constants.DDM_REMOVE in nic_override:
8283 del args['nics'][-1]
8285 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8286 if self.op.disk_template:
8287 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8288 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8292 def _GetUpdatedParams(old_params, update_dict,
8293 default_values, parameter_types):
8294 """Return the new params dict for the given params.
8296 @type old_params: dict
8297 @param old_params: old parameters
8298 @type update_dict: dict
8299 @param update_dict: dict containing new parameter values,
8300 or constants.VALUE_DEFAULT to reset the
8301 parameter to its default value
8302 @type default_values: dict
8303 @param default_values: default values for the filled parameters
8304 @type parameter_types: dict
8305 @param parameter_types: dict mapping target dict keys to types
8306 in constants.ENFORCEABLE_TYPES
8307 @rtype: (dict, dict)
8308 @return: (new_parameters, filled_parameters)
8311 params_copy = copy.deepcopy(old_params)
8312 for key, val in update_dict.iteritems():
8313 if val == constants.VALUE_DEFAULT:
8315 del params_copy[key]
8319 params_copy[key] = val
8320 utils.ForceDictType(params_copy, parameter_types)
8321 params_filled = objects.FillDict(default_values, params_copy)
8322 return (params_copy, params_filled)
8324 def CheckPrereq(self):
8325 """Check prerequisites.
8327 This only checks the instance list against the existing names.
8330 self.force = self.op.force
8332 # checking the new params on the primary/secondary nodes
8334 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8335 cluster = self.cluster = self.cfg.GetClusterInfo()
8336 assert self.instance is not None, \
8337 "Cannot retrieve locked instance %s" % self.op.instance_name
8338 pnode = instance.primary_node
8339 nodelist = list(instance.all_nodes)
8341 if self.op.disk_template:
8342 if instance.disk_template == self.op.disk_template:
8343 raise errors.OpPrereqError("Instance already has disk template %s" %
8344 instance.disk_template, errors.ECODE_INVAL)
8346 if (instance.disk_template,
8347 self.op.disk_template) not in self._DISK_CONVERSIONS:
8348 raise errors.OpPrereqError("Unsupported disk template conversion from"
8349 " %s to %s" % (instance.disk_template,
8350 self.op.disk_template),
8352 if self.op.disk_template in constants.DTS_NET_MIRROR:
8353 _CheckNodeOnline(self, self.op.remote_node)
8354 _CheckNodeNotDrained(self, self.op.remote_node)
8355 disks = [{"size": d.size} for d in instance.disks]
8356 required = _ComputeDiskSize(self.op.disk_template, disks)
8357 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8358 _CheckInstanceDown(self, instance, "cannot change disk template")
8360 # hvparams processing
8361 if self.op.hvparams:
8362 i_hvdict, hv_new = self._GetUpdatedParams(
8363 instance.hvparams, self.op.hvparams,
8364 cluster.hvparams[instance.hypervisor],
8365 constants.HVS_PARAMETER_TYPES)
8367 hypervisor.GetHypervisor(
8368 instance.hypervisor).CheckParameterSyntax(hv_new)
8369 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8370 self.hv_new = hv_new # the new actual values
8371 self.hv_inst = i_hvdict # the new dict (without defaults)
8373 self.hv_new = self.hv_inst = {}
8375 # beparams processing
8376 if self.op.beparams:
8377 i_bedict, be_new = self._GetUpdatedParams(
8378 instance.beparams, self.op.beparams,
8379 cluster.beparams[constants.PP_DEFAULT],
8380 constants.BES_PARAMETER_TYPES)
8381 self.be_new = be_new # the new actual values
8382 self.be_inst = i_bedict # the new dict (without defaults)
8384 self.be_new = self.be_inst = {}
8388 if constants.BE_MEMORY in self.op.beparams and not self.force:
8389 mem_check_list = [pnode]
8390 if be_new[constants.BE_AUTO_BALANCE]:
8391 # either we changed auto_balance to yes or it was from before
8392 mem_check_list.extend(instance.secondary_nodes)
8393 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8394 instance.hypervisor)
8395 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8396 instance.hypervisor)
8397 pninfo = nodeinfo[pnode]
8398 msg = pninfo.fail_msg
8400 # Assume the primary node is unreachable and go ahead
8401 self.warn.append("Can't get info from primary node %s: %s" %
8403 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8404 self.warn.append("Node data from primary node %s doesn't contain"
8405 " free memory information" % pnode)
8406 elif instance_info.fail_msg:
8407 self.warn.append("Can't get instance runtime information: %s" %
8408 instance_info.fail_msg)
8410 if instance_info.payload:
8411 current_mem = int(instance_info.payload['memory'])
8413 # Assume instance not running
8414 # (there is a slight race condition here, but it's not very probable,
8415 # and we have no other way to check)
8417 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8418 pninfo.payload['memory_free'])
8420 raise errors.OpPrereqError("This change will prevent the instance"
8421 " from starting, due to %d MB of memory"
8422 " missing on its primary node" % miss_mem,
8425 if be_new[constants.BE_AUTO_BALANCE]:
8426 for node, nres in nodeinfo.items():
8427 if node not in instance.secondary_nodes:
8431 self.warn.append("Can't get info from secondary node %s: %s" %
8433 elif not isinstance(nres.payload.get('memory_free', None), int):
8434 self.warn.append("Secondary node %s didn't return free"
8435 " memory information" % node)
8436 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8437 self.warn.append("Not enough memory to failover instance to"
8438 " secondary node %s" % node)
8443 for nic_op, nic_dict in self.op.nics:
8444 if nic_op == constants.DDM_REMOVE:
8445 if not instance.nics:
8446 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8449 if nic_op != constants.DDM_ADD:
8451 if not instance.nics:
8452 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8453 " no NICs" % nic_op,
8455 if nic_op < 0 or nic_op >= len(instance.nics):
8456 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8458 (nic_op, len(instance.nics) - 1),
8460 old_nic_params = instance.nics[nic_op].nicparams
8461 old_nic_ip = instance.nics[nic_op].ip
8466 update_params_dict = dict([(key, nic_dict[key])
8467 for key in constants.NICS_PARAMETERS
8468 if key in nic_dict])
8470 if 'bridge' in nic_dict:
8471 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8473 new_nic_params, new_filled_nic_params = \
8474 self._GetUpdatedParams(old_nic_params, update_params_dict,
8475 cluster.nicparams[constants.PP_DEFAULT],
8476 constants.NICS_PARAMETER_TYPES)
8477 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8478 self.nic_pinst[nic_op] = new_nic_params
8479 self.nic_pnew[nic_op] = new_filled_nic_params
8480 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8482 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8483 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8484 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8486 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8488 self.warn.append(msg)
8490 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8491 if new_nic_mode == constants.NIC_MODE_ROUTED:
8492 if 'ip' in nic_dict:
8493 nic_ip = nic_dict['ip']
8497 raise errors.OpPrereqError('Cannot set the nic ip to None'
8498 ' on a routed nic', errors.ECODE_INVAL)
8499 if 'mac' in nic_dict:
8500 nic_mac = nic_dict['mac']
8502 raise errors.OpPrereqError('Cannot set the nic mac to None',
8504 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8505 # otherwise generate the mac
8506 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8508 # or validate/reserve the current one
8510 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8511 except errors.ReservationError:
8512 raise errors.OpPrereqError("MAC address %s already in use"
8513 " in cluster" % nic_mac,
8514 errors.ECODE_NOTUNIQUE)
8517 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8518 raise errors.OpPrereqError("Disk operations not supported for"
8519 " diskless instances",
8521 for disk_op, _ in self.op.disks:
8522 if disk_op == constants.DDM_REMOVE:
8523 if len(instance.disks) == 1:
8524 raise errors.OpPrereqError("Cannot remove the last disk of"
8525 " an instance", errors.ECODE_INVAL)
8526 _CheckInstanceDown(self, instance, "cannot remove disks")
8528 if (disk_op == constants.DDM_ADD and
8529 len(instance.nics) >= constants.MAX_DISKS):
8530 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8531 " add more" % constants.MAX_DISKS,
8533 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8535 if disk_op < 0 or disk_op >= len(instance.disks):
8536 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8538 (disk_op, len(instance.disks)),
8542 if self.op.os_name and not self.op.force:
8543 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8544 self.op.force_variant)
8548 def _ConvertPlainToDrbd(self, feedback_fn):
8549 """Converts an instance from plain to drbd.
8552 feedback_fn("Converting template to drbd")
8553 instance = self.instance
8554 pnode = instance.primary_node
8555 snode = self.op.remote_node
8557 # create a fake disk info for _GenerateDiskTemplate
8558 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8559 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8560 instance.name, pnode, [snode],
8561 disk_info, None, None, 0)
8562 info = _GetInstanceInfoText(instance)
8563 feedback_fn("Creating aditional volumes...")
8564 # first, create the missing data and meta devices
8565 for disk in new_disks:
8566 # unfortunately this is... not too nice
8567 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8569 for child in disk.children:
8570 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8571 # at this stage, all new LVs have been created, we can rename the
8573 feedback_fn("Renaming original volumes...")
8574 rename_list = [(o, n.children[0].logical_id)
8575 for (o, n) in zip(instance.disks, new_disks)]
8576 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8577 result.Raise("Failed to rename original LVs")
8579 feedback_fn("Initializing DRBD devices...")
8580 # all child devices are in place, we can now create the DRBD devices
8581 for disk in new_disks:
8582 for node in [pnode, snode]:
8583 f_create = node == pnode
8584 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8586 # at this point, the instance has been modified
8587 instance.disk_template = constants.DT_DRBD8
8588 instance.disks = new_disks
8589 self.cfg.Update(instance, feedback_fn)
8591 # disks are created, waiting for sync
8592 disk_abort = not _WaitForSync(self, instance)
8594 raise errors.OpExecError("There are some degraded disks for"
8595 " this instance, please cleanup manually")
8597 def _ConvertDrbdToPlain(self, feedback_fn):
8598 """Converts an instance from drbd to plain.
8601 instance = self.instance
8602 assert len(instance.secondary_nodes) == 1
8603 pnode = instance.primary_node
8604 snode = instance.secondary_nodes[0]
8605 feedback_fn("Converting template to plain")
8607 old_disks = instance.disks
8608 new_disks = [d.children[0] for d in old_disks]
8610 # copy over size and mode
8611 for parent, child in zip(old_disks, new_disks):
8612 child.size = parent.size
8613 child.mode = parent.mode
8615 # update instance structure
8616 instance.disks = new_disks
8617 instance.disk_template = constants.DT_PLAIN
8618 self.cfg.Update(instance, feedback_fn)
8620 feedback_fn("Removing volumes on the secondary node...")
8621 for disk in old_disks:
8622 self.cfg.SetDiskID(disk, snode)
8623 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8625 self.LogWarning("Could not remove block device %s on node %s,"
8626 " continuing anyway: %s", disk.iv_name, snode, msg)
8628 feedback_fn("Removing unneeded volumes on the primary node...")
8629 for idx, disk in enumerate(old_disks):
8630 meta = disk.children[1]
8631 self.cfg.SetDiskID(meta, pnode)
8632 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8634 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8635 " continuing anyway: %s", idx, pnode, msg)
8638 def Exec(self, feedback_fn):
8639 """Modifies an instance.
8641 All parameters take effect only at the next restart of the instance.
8644 # Process here the warnings from CheckPrereq, as we don't have a
8645 # feedback_fn there.
8646 for warn in self.warn:
8647 feedback_fn("WARNING: %s" % warn)
8650 instance = self.instance
8652 for disk_op, disk_dict in self.op.disks:
8653 if disk_op == constants.DDM_REMOVE:
8654 # remove the last disk
8655 device = instance.disks.pop()
8656 device_idx = len(instance.disks)
8657 for node, disk in device.ComputeNodeTree(instance.primary_node):
8658 self.cfg.SetDiskID(disk, node)
8659 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8661 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8662 " continuing anyway", device_idx, node, msg)
8663 result.append(("disk/%d" % device_idx, "remove"))
8664 elif disk_op == constants.DDM_ADD:
8666 if instance.disk_template == constants.DT_FILE:
8667 file_driver, file_path = instance.disks[0].logical_id
8668 file_path = os.path.dirname(file_path)
8670 file_driver = file_path = None
8671 disk_idx_base = len(instance.disks)
8672 new_disk = _GenerateDiskTemplate(self,
8673 instance.disk_template,
8674 instance.name, instance.primary_node,
8675 instance.secondary_nodes,
8680 instance.disks.append(new_disk)
8681 info = _GetInstanceInfoText(instance)
8683 logging.info("Creating volume %s for instance %s",
8684 new_disk.iv_name, instance.name)
8685 # Note: this needs to be kept in sync with _CreateDisks
8687 for node in instance.all_nodes:
8688 f_create = node == instance.primary_node
8690 _CreateBlockDev(self, node, instance, new_disk,
8691 f_create, info, f_create)
8692 except errors.OpExecError, err:
8693 self.LogWarning("Failed to create volume %s (%s) on"
8695 new_disk.iv_name, new_disk, node, err)
8696 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8697 (new_disk.size, new_disk.mode)))
8699 # change a given disk
8700 instance.disks[disk_op].mode = disk_dict['mode']
8701 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8703 if self.op.disk_template:
8704 r_shut = _ShutdownInstanceDisks(self, instance)
8706 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8707 " proceed with disk template conversion")
8708 mode = (instance.disk_template, self.op.disk_template)
8710 self._DISK_CONVERSIONS[mode](self, feedback_fn)
8712 self.cfg.ReleaseDRBDMinors(instance.name)
8714 result.append(("disk_template", self.op.disk_template))
8717 for nic_op, nic_dict in self.op.nics:
8718 if nic_op == constants.DDM_REMOVE:
8719 # remove the last nic
8720 del instance.nics[-1]
8721 result.append(("nic.%d" % len(instance.nics), "remove"))
8722 elif nic_op == constants.DDM_ADD:
8723 # mac and bridge should be set, by now
8724 mac = nic_dict['mac']
8725 ip = nic_dict.get('ip', None)
8726 nicparams = self.nic_pinst[constants.DDM_ADD]
8727 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8728 instance.nics.append(new_nic)
8729 result.append(("nic.%d" % (len(instance.nics) - 1),
8730 "add:mac=%s,ip=%s,mode=%s,link=%s" %
8731 (new_nic.mac, new_nic.ip,
8732 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8733 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8736 for key in 'mac', 'ip':
8738 setattr(instance.nics[nic_op], key, nic_dict[key])
8739 if nic_op in self.nic_pinst:
8740 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8741 for key, val in nic_dict.iteritems():
8742 result.append(("nic.%s/%d" % (key, nic_op), val))
8745 if self.op.hvparams:
8746 instance.hvparams = self.hv_inst
8747 for key, val in self.op.hvparams.iteritems():
8748 result.append(("hv/%s" % key, val))
8751 if self.op.beparams:
8752 instance.beparams = self.be_inst
8753 for key, val in self.op.beparams.iteritems():
8754 result.append(("be/%s" % key, val))
8758 instance.os = self.op.os_name
8760 self.cfg.Update(instance, feedback_fn)
8764 _DISK_CONVERSIONS = {
8765 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8766 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8769 class LUQueryExports(NoHooksLU):
8770 """Query the exports list
8773 _OP_REQP = ['nodes']
8776 def ExpandNames(self):
8777 self.needed_locks = {}
8778 self.share_locks[locking.LEVEL_NODE] = 1
8779 if not self.op.nodes:
8780 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8782 self.needed_locks[locking.LEVEL_NODE] = \
8783 _GetWantedNodes(self, self.op.nodes)
8785 def CheckPrereq(self):
8786 """Check prerequisites.
8789 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8791 def Exec(self, feedback_fn):
8792 """Compute the list of all the exported system images.
8795 @return: a dictionary with the structure node->(export-list)
8796 where export-list is a list of the instances exported on
8800 rpcresult = self.rpc.call_export_list(self.nodes)
8802 for node in rpcresult:
8803 if rpcresult[node].fail_msg:
8804 result[node] = False
8806 result[node] = rpcresult[node].payload
8811 class LUExportInstance(LogicalUnit):
8812 """Export an instance to an image in the cluster.
8815 HPATH = "instance-export"
8816 HTYPE = constants.HTYPE_INSTANCE
8817 _OP_REQP = ["instance_name", "target_node", "shutdown"]
8820 def CheckArguments(self):
8821 """Check the arguments.
8824 _CheckBooleanOpField(self.op, "remove_instance")
8825 _CheckBooleanOpField(self.op, "ignore_remove_failures")
8827 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8828 constants.DEFAULT_SHUTDOWN_TIMEOUT)
8829 self.remove_instance = getattr(self.op, "remove_instance", False)
8830 self.ignore_remove_failures = getattr(self.op, "ignore_remove_failures",
8833 if self.remove_instance and not self.op.shutdown:
8834 raise errors.OpPrereqError("Can not remove instance without shutting it"
8837 def ExpandNames(self):
8838 self._ExpandAndLockInstance()
8840 # FIXME: lock only instance primary and destination node
8842 # Sad but true, for now we have do lock all nodes, as we don't know where
8843 # the previous export might be, and and in this LU we search for it and
8844 # remove it from its current node. In the future we could fix this by:
8845 # - making a tasklet to search (share-lock all), then create the new one,
8846 # then one to remove, after
8847 # - removing the removal operation altogether
8848 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8850 def DeclareLocks(self, level):
8851 """Last minute lock declaration."""
8852 # All nodes are locked anyway, so nothing to do here.
8854 def BuildHooksEnv(self):
8857 This will run on the master, primary node and target node.
8861 "EXPORT_NODE": self.op.target_node,
8862 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8863 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8864 # TODO: Generic function for boolean env variables
8865 "REMOVE_INSTANCE": str(bool(self.remove_instance)),
8867 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8868 nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8869 self.op.target_node]
8872 def CheckPrereq(self):
8873 """Check prerequisites.
8875 This checks that the instance and node names are valid.
8878 instance_name = self.op.instance_name
8879 self.instance = self.cfg.GetInstanceInfo(instance_name)
8880 assert self.instance is not None, \
8881 "Cannot retrieve locked instance %s" % self.op.instance_name
8882 _CheckNodeOnline(self, self.instance.primary_node)
8884 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8885 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8886 assert self.dst_node is not None
8888 _CheckNodeOnline(self, self.dst_node.name)
8889 _CheckNodeNotDrained(self, self.dst_node.name)
8891 # instance disk type verification
8892 # TODO: Implement export support for file-based disks
8893 for disk in self.instance.disks:
8894 if disk.dev_type == constants.LD_FILE:
8895 raise errors.OpPrereqError("Export not supported for instances with"
8896 " file-based disks", errors.ECODE_INVAL)
8898 def Exec(self, feedback_fn):
8899 """Export an instance to an image in the cluster.
8902 instance = self.instance
8903 dst_node = self.dst_node
8904 src_node = instance.primary_node
8906 if self.op.shutdown:
8907 # shutdown the instance, but not the disks
8908 feedback_fn("Shutting down instance %s" % instance.name)
8909 result = self.rpc.call_instance_shutdown(src_node, instance,
8910 self.shutdown_timeout)
8911 # TODO: Maybe ignore failures if ignore_remove_failures is set
8912 result.Raise("Could not shutdown instance %s on"
8913 " node %s" % (instance.name, src_node))
8915 vgname = self.cfg.GetVGName()
8919 # set the disks ID correctly since call_instance_start needs the
8920 # correct drbd minor to create the symlinks
8921 for disk in instance.disks:
8922 self.cfg.SetDiskID(disk, src_node)
8924 activate_disks = (not instance.admin_up)
8927 # Activate the instance disks if we'exporting a stopped instance
8928 feedback_fn("Activating disks for %s" % instance.name)
8929 _StartInstanceDisks(self, instance, None)
8935 for idx, disk in enumerate(instance.disks):
8936 feedback_fn("Creating a snapshot of disk/%s on node %s" %
8939 # result.payload will be a snapshot of an lvm leaf of the one we
8941 result = self.rpc.call_blockdev_snapshot(src_node, disk)
8942 msg = result.fail_msg
8944 self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8946 snap_disks.append(False)
8948 disk_id = (vgname, result.payload)
8949 new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8950 logical_id=disk_id, physical_id=disk_id,
8951 iv_name=disk.iv_name)
8952 snap_disks.append(new_dev)
8955 if self.op.shutdown and instance.admin_up and not self.remove_instance:
8956 feedback_fn("Starting instance %s" % instance.name)
8957 result = self.rpc.call_instance_start(src_node, instance, None, None)
8958 msg = result.fail_msg
8960 _ShutdownInstanceDisks(self, instance)
8961 raise errors.OpExecError("Could not start instance: %s" % msg)
8963 # TODO: check for size
8965 cluster_name = self.cfg.GetClusterName()
8966 for idx, dev in enumerate(snap_disks):
8967 feedback_fn("Exporting snapshot %s from %s to %s" %
8968 (idx, src_node, dst_node.name))
8970 # FIXME: pass debug from opcode to backend
8971 result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8972 instance, cluster_name,
8973 idx, self.op.debug_level)
8974 msg = result.fail_msg
8976 self.LogWarning("Could not export disk/%s from node %s to"
8977 " node %s: %s", idx, src_node, dst_node.name, msg)
8978 dresults.append(False)
8980 dresults.append(True)
8981 msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8983 self.LogWarning("Could not remove snapshot for disk/%d from node"
8984 " %s: %s", idx, src_node, msg)
8986 dresults.append(False)
8988 feedback_fn("Finalizing export on %s" % dst_node.name)
8989 result = self.rpc.call_finalize_export(dst_node.name, instance,
8992 msg = result.fail_msg
8994 self.LogWarning("Could not finalize export for instance %s"
8995 " on node %s: %s", instance.name, dst_node.name, msg)
9000 feedback_fn("Deactivating disks for %s" % instance.name)
9001 _ShutdownInstanceDisks(self, instance)
9003 # Remove instance if requested
9004 if self.remove_instance:
9005 feedback_fn("Removing instance %s" % instance.name)
9006 _RemoveInstance(self, feedback_fn, instance, self.ignore_remove_failures)
9008 nodelist = self.cfg.GetNodeList()
9009 nodelist.remove(dst_node.name)
9011 # on one-node clusters nodelist will be empty after the removal
9012 # if we proceed the backup would be removed because OpQueryExports
9013 # substitutes an empty list with the full cluster node list.
9014 iname = instance.name
9016 feedback_fn("Removing old exports for instance %s" % iname)
9017 exportlist = self.rpc.call_export_list(nodelist)
9018 for node in exportlist:
9019 if exportlist[node].fail_msg:
9021 if iname in exportlist[node].payload:
9022 msg = self.rpc.call_export_remove(node, iname).fail_msg
9024 self.LogWarning("Could not remove older export for instance %s"
9025 " on node %s: %s", iname, node, msg)
9027 return fin_resu, dresults
9030 class LURemoveExport(NoHooksLU):
9031 """Remove exports related to the named instance.
9034 _OP_REQP = ["instance_name"]
9037 def ExpandNames(self):
9038 self.needed_locks = {}
9039 # We need all nodes to be locked in order for RemoveExport to work, but we
9040 # don't need to lock the instance itself, as nothing will happen to it (and
9041 # we can remove exports also for a removed instance)
9042 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9044 def CheckPrereq(self):
9045 """Check prerequisites.
9049 def Exec(self, feedback_fn):
9050 """Remove any export.
9053 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9054 # If the instance was not found we'll try with the name that was passed in.
9055 # This will only work if it was an FQDN, though.
9057 if not instance_name:
9059 instance_name = self.op.instance_name
9061 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9062 exportlist = self.rpc.call_export_list(locked_nodes)
9064 for node in exportlist:
9065 msg = exportlist[node].fail_msg
9067 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9069 if instance_name in exportlist[node].payload:
9071 result = self.rpc.call_export_remove(node, instance_name)
9072 msg = result.fail_msg
9074 logging.error("Could not remove export for instance %s"
9075 " on node %s: %s", instance_name, node, msg)
9077 if fqdn_warn and not found:
9078 feedback_fn("Export not found. If trying to remove an export belonging"
9079 " to a deleted instance please use its Fully Qualified"
9083 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9086 This is an abstract class which is the parent of all the other tags LUs.
9090 def ExpandNames(self):
9091 self.needed_locks = {}
9092 if self.op.kind == constants.TAG_NODE:
9093 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9094 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9095 elif self.op.kind == constants.TAG_INSTANCE:
9096 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9097 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9099 def CheckPrereq(self):
9100 """Check prerequisites.
9103 if self.op.kind == constants.TAG_CLUSTER:
9104 self.target = self.cfg.GetClusterInfo()
9105 elif self.op.kind == constants.TAG_NODE:
9106 self.target = self.cfg.GetNodeInfo(self.op.name)
9107 elif self.op.kind == constants.TAG_INSTANCE:
9108 self.target = self.cfg.GetInstanceInfo(self.op.name)
9110 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9111 str(self.op.kind), errors.ECODE_INVAL)
9114 class LUGetTags(TagsLU):
9115 """Returns the tags of a given object.
9118 _OP_REQP = ["kind", "name"]
9121 def Exec(self, feedback_fn):
9122 """Returns the tag list.
9125 return list(self.target.GetTags())
9128 class LUSearchTags(NoHooksLU):
9129 """Searches the tags for a given pattern.
9132 _OP_REQP = ["pattern"]
9135 def ExpandNames(self):
9136 self.needed_locks = {}
9138 def CheckPrereq(self):
9139 """Check prerequisites.
9141 This checks the pattern passed for validity by compiling it.
9145 self.re = re.compile(self.op.pattern)
9146 except re.error, err:
9147 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9148 (self.op.pattern, err), errors.ECODE_INVAL)
9150 def Exec(self, feedback_fn):
9151 """Returns the tag list.
9155 tgts = [("/cluster", cfg.GetClusterInfo())]
9156 ilist = cfg.GetAllInstancesInfo().values()
9157 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9158 nlist = cfg.GetAllNodesInfo().values()
9159 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9161 for path, target in tgts:
9162 for tag in target.GetTags():
9163 if self.re.search(tag):
9164 results.append((path, tag))
9168 class LUAddTags(TagsLU):
9169 """Sets a tag on a given object.
9172 _OP_REQP = ["kind", "name", "tags"]
9175 def CheckPrereq(self):
9176 """Check prerequisites.
9178 This checks the type and length of the tag name and value.
9181 TagsLU.CheckPrereq(self)
9182 for tag in self.op.tags:
9183 objects.TaggableObject.ValidateTag(tag)
9185 def Exec(self, feedback_fn):
9190 for tag in self.op.tags:
9191 self.target.AddTag(tag)
9192 except errors.TagError, err:
9193 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9194 self.cfg.Update(self.target, feedback_fn)
9197 class LUDelTags(TagsLU):
9198 """Delete a list of tags from a given object.
9201 _OP_REQP = ["kind", "name", "tags"]
9204 def CheckPrereq(self):
9205 """Check prerequisites.
9207 This checks that we have the given tag.
9210 TagsLU.CheckPrereq(self)
9211 for tag in self.op.tags:
9212 objects.TaggableObject.ValidateTag(tag)
9213 del_tags = frozenset(self.op.tags)
9214 cur_tags = self.target.GetTags()
9215 if not del_tags <= cur_tags:
9216 diff_tags = del_tags - cur_tags
9217 diff_names = ["'%s'" % tag for tag in diff_tags]
9219 raise errors.OpPrereqError("Tag(s) %s not found" %
9220 (",".join(diff_names)), errors.ECODE_NOENT)
9222 def Exec(self, feedback_fn):
9223 """Remove the tag from the object.
9226 for tag in self.op.tags:
9227 self.target.RemoveTag(tag)
9228 self.cfg.Update(self.target, feedback_fn)
9231 class LUTestDelay(NoHooksLU):
9232 """Sleep for a specified amount of time.
9234 This LU sleeps on the master and/or nodes for a specified amount of
9238 _OP_REQP = ["duration", "on_master", "on_nodes"]
9241 def ExpandNames(self):
9242 """Expand names and set required locks.
9244 This expands the node list, if any.
9247 self.needed_locks = {}
9248 if self.op.on_nodes:
9249 # _GetWantedNodes can be used here, but is not always appropriate to use
9250 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9252 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9253 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9255 def CheckPrereq(self):
9256 """Check prerequisites.
9260 def Exec(self, feedback_fn):
9261 """Do the actual sleep.
9264 if self.op.on_master:
9265 if not utils.TestDelay(self.op.duration):
9266 raise errors.OpExecError("Error during master delay test")
9267 if self.op.on_nodes:
9268 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9269 for node, node_result in result.items():
9270 node_result.Raise("Failure during rpc call to node %s" % node)
9273 class IAllocator(object):
9274 """IAllocator framework.
9276 An IAllocator instance has three sets of attributes:
9277 - cfg that is needed to query the cluster
9278 - input data (all members of the _KEYS class attribute are required)
9279 - four buffer attributes (in|out_data|text), that represent the
9280 input (to the external script) in text and data structure format,
9281 and the output from it, again in two formats
9282 - the result variables from the script (success, info, nodes) for
9286 # pylint: disable-msg=R0902
9287 # lots of instance attributes
9289 "name", "mem_size", "disks", "disk_template",
9290 "os", "tags", "nics", "vcpus", "hypervisor",
9293 "name", "relocate_from",
9299 def __init__(self, cfg, rpc, mode, **kwargs):
9302 # init buffer variables
9303 self.in_text = self.out_text = self.in_data = self.out_data = None
9304 # init all input fields so that pylint is happy
9306 self.mem_size = self.disks = self.disk_template = None
9307 self.os = self.tags = self.nics = self.vcpus = None
9308 self.hypervisor = None
9309 self.relocate_from = None
9311 self.evac_nodes = None
9313 self.required_nodes = None
9314 # init result fields
9315 self.success = self.info = self.result = None
9316 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9317 keyset = self._ALLO_KEYS
9318 fn = self._AddNewInstance
9319 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9320 keyset = self._RELO_KEYS
9321 fn = self._AddRelocateInstance
9322 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9323 keyset = self._EVAC_KEYS
9324 fn = self._AddEvacuateNodes
9326 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9327 " IAllocator" % self.mode)
9329 if key not in keyset:
9330 raise errors.ProgrammerError("Invalid input parameter '%s' to"
9331 " IAllocator" % key)
9332 setattr(self, key, kwargs[key])
9335 if key not in kwargs:
9336 raise errors.ProgrammerError("Missing input parameter '%s' to"
9337 " IAllocator" % key)
9338 self._BuildInputData(fn)
9340 def _ComputeClusterData(self):
9341 """Compute the generic allocator input data.
9343 This is the data that is independent of the actual operation.
9347 cluster_info = cfg.GetClusterInfo()
9350 "version": constants.IALLOCATOR_VERSION,
9351 "cluster_name": cfg.GetClusterName(),
9352 "cluster_tags": list(cluster_info.GetTags()),
9353 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9354 # we don't have job IDs
9356 iinfo = cfg.GetAllInstancesInfo().values()
9357 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9361 node_list = cfg.GetNodeList()
9363 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9364 hypervisor_name = self.hypervisor
9365 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9366 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9367 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9368 hypervisor_name = cluster_info.enabled_hypervisors[0]
9370 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9373 self.rpc.call_all_instances_info(node_list,
9374 cluster_info.enabled_hypervisors)
9375 for nname, nresult in node_data.items():
9376 # first fill in static (config-based) values
9377 ninfo = cfg.GetNodeInfo(nname)
9379 "tags": list(ninfo.GetTags()),
9380 "primary_ip": ninfo.primary_ip,
9381 "secondary_ip": ninfo.secondary_ip,
9382 "offline": ninfo.offline,
9383 "drained": ninfo.drained,
9384 "master_candidate": ninfo.master_candidate,
9387 if not (ninfo.offline or ninfo.drained):
9388 nresult.Raise("Can't get data for node %s" % nname)
9389 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9391 remote_info = nresult.payload
9393 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9394 'vg_size', 'vg_free', 'cpu_total']:
9395 if attr not in remote_info:
9396 raise errors.OpExecError("Node '%s' didn't return attribute"
9397 " '%s'" % (nname, attr))
9398 if not isinstance(remote_info[attr], int):
9399 raise errors.OpExecError("Node '%s' returned invalid value"
9401 (nname, attr, remote_info[attr]))
9402 # compute memory used by primary instances
9403 i_p_mem = i_p_up_mem = 0
9404 for iinfo, beinfo in i_list:
9405 if iinfo.primary_node == nname:
9406 i_p_mem += beinfo[constants.BE_MEMORY]
9407 if iinfo.name not in node_iinfo[nname].payload:
9410 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9411 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9412 remote_info['memory_free'] -= max(0, i_mem_diff)
9415 i_p_up_mem += beinfo[constants.BE_MEMORY]
9417 # compute memory used by instances
9419 "total_memory": remote_info['memory_total'],
9420 "reserved_memory": remote_info['memory_dom0'],
9421 "free_memory": remote_info['memory_free'],
9422 "total_disk": remote_info['vg_size'],
9423 "free_disk": remote_info['vg_free'],
9424 "total_cpus": remote_info['cpu_total'],
9425 "i_pri_memory": i_p_mem,
9426 "i_pri_up_memory": i_p_up_mem,
9430 node_results[nname] = pnr
9431 data["nodes"] = node_results
9435 for iinfo, beinfo in i_list:
9437 for nic in iinfo.nics:
9438 filled_params = objects.FillDict(
9439 cluster_info.nicparams[constants.PP_DEFAULT],
9441 nic_dict = {"mac": nic.mac,
9443 "mode": filled_params[constants.NIC_MODE],
9444 "link": filled_params[constants.NIC_LINK],
9446 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9447 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9448 nic_data.append(nic_dict)
9450 "tags": list(iinfo.GetTags()),
9451 "admin_up": iinfo.admin_up,
9452 "vcpus": beinfo[constants.BE_VCPUS],
9453 "memory": beinfo[constants.BE_MEMORY],
9455 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9457 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9458 "disk_template": iinfo.disk_template,
9459 "hypervisor": iinfo.hypervisor,
9461 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9463 instance_data[iinfo.name] = pir
9465 data["instances"] = instance_data
9469 def _AddNewInstance(self):
9470 """Add new instance data to allocator structure.
9472 This in combination with _AllocatorGetClusterData will create the
9473 correct structure needed as input for the allocator.
9475 The checks for the completeness of the opcode must have already been
9479 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9481 if self.disk_template in constants.DTS_NET_MIRROR:
9482 self.required_nodes = 2
9484 self.required_nodes = 1
9487 "disk_template": self.disk_template,
9490 "vcpus": self.vcpus,
9491 "memory": self.mem_size,
9492 "disks": self.disks,
9493 "disk_space_total": disk_space,
9495 "required_nodes": self.required_nodes,
9499 def _AddRelocateInstance(self):
9500 """Add relocate instance data to allocator structure.
9502 This in combination with _IAllocatorGetClusterData will create the
9503 correct structure needed as input for the allocator.
9505 The checks for the completeness of the opcode must have already been
9509 instance = self.cfg.GetInstanceInfo(self.name)
9510 if instance is None:
9511 raise errors.ProgrammerError("Unknown instance '%s' passed to"
9512 " IAllocator" % self.name)
9514 if instance.disk_template not in constants.DTS_NET_MIRROR:
9515 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9518 if len(instance.secondary_nodes) != 1:
9519 raise errors.OpPrereqError("Instance has not exactly one secondary node",
9522 self.required_nodes = 1
9523 disk_sizes = [{'size': disk.size} for disk in instance.disks]
9524 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9528 "disk_space_total": disk_space,
9529 "required_nodes": self.required_nodes,
9530 "relocate_from": self.relocate_from,
9534 def _AddEvacuateNodes(self):
9535 """Add evacuate nodes data to allocator structure.
9539 "evac_nodes": self.evac_nodes
9543 def _BuildInputData(self, fn):
9544 """Build input data structures.
9547 self._ComputeClusterData()
9550 request["type"] = self.mode
9551 self.in_data["request"] = request
9553 self.in_text = serializer.Dump(self.in_data)
9555 def Run(self, name, validate=True, call_fn=None):
9556 """Run an instance allocator and return the results.
9560 call_fn = self.rpc.call_iallocator_runner
9562 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9563 result.Raise("Failure while running the iallocator script")
9565 self.out_text = result.payload
9567 self._ValidateResult()
9569 def _ValidateResult(self):
9570 """Process the allocator results.
9572 This will process and if successful save the result in
9573 self.out_data and the other parameters.
9577 rdict = serializer.Load(self.out_text)
9578 except Exception, err:
9579 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9581 if not isinstance(rdict, dict):
9582 raise errors.OpExecError("Can't parse iallocator results: not a dict")
9584 # TODO: remove backwards compatiblity in later versions
9585 if "nodes" in rdict and "result" not in rdict:
9586 rdict["result"] = rdict["nodes"]
9589 for key in "success", "info", "result":
9590 if key not in rdict:
9591 raise errors.OpExecError("Can't parse iallocator results:"
9592 " missing key '%s'" % key)
9593 setattr(self, key, rdict[key])
9595 if not isinstance(rdict["result"], list):
9596 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9598 self.out_data = rdict
9601 class LUTestAllocator(NoHooksLU):
9602 """Run allocator tests.
9604 This LU runs the allocator tests
9607 _OP_REQP = ["direction", "mode", "name"]
9609 def CheckPrereq(self):
9610 """Check prerequisites.
9612 This checks the opcode parameters depending on the director and mode test.
9615 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9616 for attr in ["name", "mem_size", "disks", "disk_template",
9617 "os", "tags", "nics", "vcpus"]:
9618 if not hasattr(self.op, attr):
9619 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9620 attr, errors.ECODE_INVAL)
9621 iname = self.cfg.ExpandInstanceName(self.op.name)
9622 if iname is not None:
9623 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9624 iname, errors.ECODE_EXISTS)
9625 if not isinstance(self.op.nics, list):
9626 raise errors.OpPrereqError("Invalid parameter 'nics'",
9628 for row in self.op.nics:
9629 if (not isinstance(row, dict) or
9632 "bridge" not in row):
9633 raise errors.OpPrereqError("Invalid contents of the 'nics'"
9634 " parameter", errors.ECODE_INVAL)
9635 if not isinstance(self.op.disks, list):
9636 raise errors.OpPrereqError("Invalid parameter 'disks'",
9638 for row in self.op.disks:
9639 if (not isinstance(row, dict) or
9640 "size" not in row or
9641 not isinstance(row["size"], int) or
9642 "mode" not in row or
9643 row["mode"] not in ['r', 'w']):
9644 raise errors.OpPrereqError("Invalid contents of the 'disks'"
9645 " parameter", errors.ECODE_INVAL)
9646 if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9647 self.op.hypervisor = self.cfg.GetHypervisorType()
9648 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9649 if not hasattr(self.op, "name"):
9650 raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9652 fname = _ExpandInstanceName(self.cfg, self.op.name)
9653 self.op.name = fname
9654 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9655 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9656 if not hasattr(self.op, "evac_nodes"):
9657 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9658 " opcode input", errors.ECODE_INVAL)
9660 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9661 self.op.mode, errors.ECODE_INVAL)
9663 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9664 if not hasattr(self.op, "allocator") or self.op.allocator is None:
9665 raise errors.OpPrereqError("Missing allocator name",
9667 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9668 raise errors.OpPrereqError("Wrong allocator test '%s'" %
9669 self.op.direction, errors.ECODE_INVAL)
9671 def Exec(self, feedback_fn):
9672 """Run the allocator test.
9675 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9676 ial = IAllocator(self.cfg, self.rpc,
9679 mem_size=self.op.mem_size,
9680 disks=self.op.disks,
9681 disk_template=self.op.disk_template,
9685 vcpus=self.op.vcpus,
9686 hypervisor=self.op.hypervisor,
9688 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9689 ial = IAllocator(self.cfg, self.rpc,
9692 relocate_from=list(self.relocate_from),
9694 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9695 ial = IAllocator(self.cfg, self.rpc,
9697 evac_nodes=self.op.evac_nodes)
9699 raise errors.ProgrammerError("Uncatched mode %s in"
9700 " LUTestAllocator.Exec", self.op.mode)
9702 if self.op.direction == constants.IALLOCATOR_DIR_IN:
9703 result = ial.in_text
9705 ial.Run(self.op.allocator, validate=False)
9706 result = ial.out_text