4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
38 from ganeti import ssh
39 from ganeti import utils
40 from ganeti import errors
41 from ganeti import hypervisor
42 from ganeti import locking
43 from ganeti import constants
44 from ganeti import objects
45 from ganeti import serializer
46 from ganeti import ssconf
47 from ganeti import uidpool
48 from ganeti import compat
51 class LogicalUnit(object):
52 """Logical Unit base class.
54 Subclasses must follow these rules:
55 - implement ExpandNames
56 - implement CheckPrereq (except when tasklets are used)
57 - implement Exec (except when tasklets are used)
58 - implement BuildHooksEnv
59 - redefine HPATH and HTYPE
60 - optionally redefine their run requirements:
61 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
63 Note that all commands require root permissions.
65 @ivar dry_run_result: the value (if any) that will be returned to the caller
66 in dry-run mode (signalled by opcode dry_run parameter)
74 def __init__(self, processor, op, context, rpc):
75 """Constructor for LogicalUnit.
77 This needs to be overridden in derived classes in order to check op
83 self.cfg = context.cfg
84 self.context = context
86 # Dicts used to declare locking needs to mcpu
87 self.needed_locks = None
88 self.acquired_locks = {}
89 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
91 self.remove_locks = {}
92 # Used to force good behavior when calling helper functions
93 self.recalculate_locks = {}
96 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
97 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
98 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
100 self.dry_run_result = None
101 # support for generic debug attribute
102 if (not hasattr(self.op, "debug_level") or
103 not isinstance(self.op.debug_level, int)):
104 self.op.debug_level = 0
109 for attr_name in self._OP_REQP:
110 attr_val = getattr(op, attr_name, None)
112 raise errors.OpPrereqError("Required parameter '%s' missing" %
113 attr_name, errors.ECODE_INVAL)
115 self.CheckArguments()
118 """Returns the SshRunner object
122 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
125 ssh = property(fget=__GetSSH)
127 def CheckArguments(self):
128 """Check syntactic validity for the opcode arguments.
130 This method is for doing a simple syntactic check and ensure
131 validity of opcode parameters, without any cluster-related
132 checks. While the same can be accomplished in ExpandNames and/or
133 CheckPrereq, doing these separate is better because:
135 - ExpandNames is left as as purely a lock-related function
136 - CheckPrereq is run after we have acquired locks (and possible
139 The function is allowed to change the self.op attribute so that
140 later methods can no longer worry about missing parameters.
145 def ExpandNames(self):
146 """Expand names for this LU.
148 This method is called before starting to execute the opcode, and it should
149 update all the parameters of the opcode to their canonical form (e.g. a
150 short node name must be fully expanded after this method has successfully
151 completed). This way locking, hooks, logging, ecc. can work correctly.
153 LUs which implement this method must also populate the self.needed_locks
154 member, as a dict with lock levels as keys, and a list of needed lock names
157 - use an empty dict if you don't need any lock
158 - if you don't need any lock at a particular level omit that level
159 - don't put anything for the BGL level
160 - if you want all locks at a level use locking.ALL_SET as a value
162 If you need to share locks (rather than acquire them exclusively) at one
163 level you can modify self.share_locks, setting a true value (usually 1) for
164 that level. By default locks are not shared.
166 This function can also define a list of tasklets, which then will be
167 executed in order instead of the usual LU-level CheckPrereq and Exec
168 functions, if those are not defined by the LU.
172 # Acquire all nodes and one instance
173 self.needed_locks = {
174 locking.LEVEL_NODE: locking.ALL_SET,
175 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
177 # Acquire just two nodes
178 self.needed_locks = {
179 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
182 self.needed_locks = {} # No, you can't leave it to the default value None
185 # The implementation of this method is mandatory only if the new LU is
186 # concurrent, so that old LUs don't need to be changed all at the same
189 self.needed_locks = {} # Exclusive LUs don't need locks.
191 raise NotImplementedError
193 def DeclareLocks(self, level):
194 """Declare LU locking needs for a level
196 While most LUs can just declare their locking needs at ExpandNames time,
197 sometimes there's the need to calculate some locks after having acquired
198 the ones before. This function is called just before acquiring locks at a
199 particular level, but after acquiring the ones at lower levels, and permits
200 such calculations. It can be used to modify self.needed_locks, and by
201 default it does nothing.
203 This function is only called if you have something already set in
204 self.needed_locks for the level.
206 @param level: Locking level which is going to be locked
207 @type level: member of ganeti.locking.LEVELS
211 def CheckPrereq(self):
212 """Check prerequisites for this LU.
214 This method should check that the prerequisites for the execution
215 of this LU are fulfilled. It can do internode communication, but
216 it should be idempotent - no cluster or system changes are
219 The method should raise errors.OpPrereqError in case something is
220 not fulfilled. Its return value is ignored.
222 This method should also update all the parameters of the opcode to
223 their canonical form if it hasn't been done by ExpandNames before.
226 if self.tasklets is not None:
227 for (idx, tl) in enumerate(self.tasklets):
228 logging.debug("Checking prerequisites for tasklet %s/%s",
229 idx + 1, len(self.tasklets))
232 raise NotImplementedError
234 def Exec(self, feedback_fn):
237 This method should implement the actual work. It should raise
238 errors.OpExecError for failures that are somewhat dealt with in
242 if self.tasklets is not None:
243 for (idx, tl) in enumerate(self.tasklets):
244 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
247 raise NotImplementedError
249 def BuildHooksEnv(self):
250 """Build hooks environment for this LU.
252 This method should return a three-node tuple consisting of: a dict
253 containing the environment that will be used for running the
254 specific hook for this LU, a list of node names on which the hook
255 should run before the execution, and a list of node names on which
256 the hook should run after the execution.
258 The keys of the dict must not have 'GANETI_' prefixed as this will
259 be handled in the hooks runner. Also note additional keys will be
260 added by the hooks runner. If the LU doesn't define any
261 environment, an empty dict (and not None) should be returned.
263 No nodes should be returned as an empty list (and not None).
265 Note that if the HPATH for a LU class is None, this function will
269 raise NotImplementedError
271 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
272 """Notify the LU about the results of its hooks.
274 This method is called every time a hooks phase is executed, and notifies
275 the Logical Unit about the hooks' result. The LU can then use it to alter
276 its result based on the hooks. By default the method does nothing and the
277 previous result is passed back unchanged but any LU can define it if it
278 wants to use the local cluster hook-scripts somehow.
280 @param phase: one of L{constants.HOOKS_PHASE_POST} or
281 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
282 @param hook_results: the results of the multi-node hooks rpc call
283 @param feedback_fn: function used send feedback back to the caller
284 @param lu_result: the previous Exec result this LU had, or None
286 @return: the new Exec result, based on the previous result
290 # API must be kept, thus we ignore the unused argument and could
291 # be a function warnings
292 # pylint: disable-msg=W0613,R0201
295 def _ExpandAndLockInstance(self):
296 """Helper function to expand and lock an instance.
298 Many LUs that work on an instance take its name in self.op.instance_name
299 and need to expand it and then declare the expanded name for locking. This
300 function does it, and then updates self.op.instance_name to the expanded
301 name. It also initializes needed_locks as a dict, if this hasn't been done
305 if self.needed_locks is None:
306 self.needed_locks = {}
308 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
309 "_ExpandAndLockInstance called with instance-level locks set"
310 self.op.instance_name = _ExpandInstanceName(self.cfg,
311 self.op.instance_name)
312 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
314 def _LockInstancesNodes(self, primary_only=False):
315 """Helper function to declare instances' nodes for locking.
317 This function should be called after locking one or more instances to lock
318 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
319 with all primary or secondary nodes for instances already locked and
320 present in self.needed_locks[locking.LEVEL_INSTANCE].
322 It should be called from DeclareLocks, and for safety only works if
323 self.recalculate_locks[locking.LEVEL_NODE] is set.
325 In the future it may grow parameters to just lock some instance's nodes, or
326 to just lock primaries or secondary nodes, if needed.
328 If should be called in DeclareLocks in a way similar to::
330 if level == locking.LEVEL_NODE:
331 self._LockInstancesNodes()
333 @type primary_only: boolean
334 @param primary_only: only lock primary nodes of locked instances
337 assert locking.LEVEL_NODE in self.recalculate_locks, \
338 "_LockInstancesNodes helper function called with no nodes to recalculate"
340 # TODO: check if we're really been called with the instance locks held
342 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
343 # future we might want to have different behaviors depending on the value
344 # of self.recalculate_locks[locking.LEVEL_NODE]
346 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
347 instance = self.context.cfg.GetInstanceInfo(instance_name)
348 wanted_nodes.append(instance.primary_node)
350 wanted_nodes.extend(instance.secondary_nodes)
352 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
353 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
354 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
355 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
357 del self.recalculate_locks[locking.LEVEL_NODE]
360 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
361 """Simple LU which runs no hooks.
363 This LU is intended as a parent for other LogicalUnits which will
364 run no hooks, in order to reduce duplicate code.
370 def BuildHooksEnv(self):
371 """Empty BuildHooksEnv for NoHooksLu.
373 This just raises an error.
376 assert False, "BuildHooksEnv called for NoHooksLUs"
380 """Tasklet base class.
382 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
383 they can mix legacy code with tasklets. Locking needs to be done in the LU,
384 tasklets know nothing about locks.
386 Subclasses must follow these rules:
387 - Implement CheckPrereq
391 def __init__(self, lu):
398 def CheckPrereq(self):
399 """Check prerequisites for this tasklets.
401 This method should check whether the prerequisites for the execution of
402 this tasklet are fulfilled. It can do internode communication, but it
403 should be idempotent - no cluster or system changes are allowed.
405 The method should raise errors.OpPrereqError in case something is not
406 fulfilled. Its return value is ignored.
408 This method should also update all parameters to their canonical form if it
409 hasn't been done before.
412 raise NotImplementedError
414 def Exec(self, feedback_fn):
415 """Execute the tasklet.
417 This method should implement the actual work. It should raise
418 errors.OpExecError for failures that are somewhat dealt with in code, or
422 raise NotImplementedError
425 def _GetWantedNodes(lu, nodes):
426 """Returns list of checked and expanded node names.
428 @type lu: L{LogicalUnit}
429 @param lu: the logical unit on whose behalf we execute
431 @param nodes: list of node names or None for all nodes
433 @return: the list of nodes, sorted
434 @raise errors.ProgrammerError: if the nodes parameter is wrong type
437 if not isinstance(nodes, list):
438 raise errors.OpPrereqError("Invalid argument type 'nodes'",
442 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
443 " non-empty list of nodes whose name is to be expanded.")
445 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
446 return utils.NiceSort(wanted)
449 def _GetWantedInstances(lu, instances):
450 """Returns list of checked and expanded instance names.
452 @type lu: L{LogicalUnit}
453 @param lu: the logical unit on whose behalf we execute
454 @type instances: list
455 @param instances: list of instance names or None for all instances
457 @return: the list of instances, sorted
458 @raise errors.OpPrereqError: if the instances parameter is wrong type
459 @raise errors.OpPrereqError: if any of the passed instances is not found
462 if not isinstance(instances, list):
463 raise errors.OpPrereqError("Invalid argument type 'instances'",
467 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
469 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
473 def _CheckOutputFields(static, dynamic, selected):
474 """Checks whether all selected fields are valid.
476 @type static: L{utils.FieldSet}
477 @param static: static fields set
478 @type dynamic: L{utils.FieldSet}
479 @param dynamic: dynamic fields set
486 delta = f.NonMatching(selected)
488 raise errors.OpPrereqError("Unknown output fields selected: %s"
489 % ",".join(delta), errors.ECODE_INVAL)
492 def _CheckBooleanOpField(op, name):
493 """Validates boolean opcode parameters.
495 This will ensure that an opcode parameter is either a boolean value,
496 or None (but that it always exists).
499 val = getattr(op, name, None)
500 if not (val is None or isinstance(val, bool)):
501 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
502 (name, str(val)), errors.ECODE_INVAL)
503 setattr(op, name, val)
506 def _CheckGlobalHvParams(params):
507 """Validates that given hypervisor params are not global ones.
509 This will ensure that instances don't get customised versions of
513 used_globals = constants.HVC_GLOBALS.intersection(params)
515 msg = ("The following hypervisor parameters are global and cannot"
516 " be customized at instance level, please modify them at"
517 " cluster level: %s" % utils.CommaJoin(used_globals))
518 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
521 def _CheckNodeOnline(lu, node):
522 """Ensure that a given node is online.
524 @param lu: the LU on behalf of which we make the check
525 @param node: the node to check
526 @raise errors.OpPrereqError: if the node is offline
529 if lu.cfg.GetNodeInfo(node).offline:
530 raise errors.OpPrereqError("Can't use offline node %s" % node,
534 def _CheckNodeNotDrained(lu, node):
535 """Ensure that a given node is not drained.
537 @param lu: the LU on behalf of which we make the check
538 @param node: the node to check
539 @raise errors.OpPrereqError: if the node is drained
542 if lu.cfg.GetNodeInfo(node).drained:
543 raise errors.OpPrereqError("Can't use drained node %s" % node,
547 def _CheckNodeHasOS(lu, node, os_name, force_variant):
548 """Ensure that a node supports a given OS.
550 @param lu: the LU on behalf of which we make the check
551 @param node: the node to check
552 @param os_name: the OS to query about
553 @param force_variant: whether to ignore variant errors
554 @raise errors.OpPrereqError: if the node is not supporting the OS
557 result = lu.rpc.call_os_get(node, os_name)
558 result.Raise("OS '%s' not in supported OS list for node %s" %
560 prereq=True, ecode=errors.ECODE_INVAL)
561 if not force_variant:
562 _CheckOSVariant(result.payload, os_name)
565 def _RequireFileStorage():
566 """Checks that file storage is enabled.
568 @raise errors.OpPrereqError: when file storage is disabled
571 if not constants.ENABLE_FILE_STORAGE:
572 raise errors.OpPrereqError("File storage disabled at configure time",
576 def _CheckDiskTemplate(template):
577 """Ensure a given disk template is valid.
580 if template not in constants.DISK_TEMPLATES:
581 msg = ("Invalid disk template name '%s', valid templates are: %s" %
582 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
583 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
584 if template == constants.DT_FILE:
585 _RequireFileStorage()
588 def _CheckStorageType(storage_type):
589 """Ensure a given storage type is valid.
592 if storage_type not in constants.VALID_STORAGE_TYPES:
593 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
595 if storage_type == constants.ST_FILE:
596 _RequireFileStorage()
600 def _CheckInstanceDown(lu, instance, reason):
601 """Ensure that an instance is not running."""
602 if instance.admin_up:
603 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
604 (instance.name, reason), errors.ECODE_STATE)
606 pnode = instance.primary_node
607 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
608 ins_l.Raise("Can't contact node %s for instance information" % pnode,
609 prereq=True, ecode=errors.ECODE_ENVIRON)
611 if instance.name in ins_l.payload:
612 raise errors.OpPrereqError("Instance %s is running, %s" %
613 (instance.name, reason), errors.ECODE_STATE)
616 def _ExpandItemName(fn, name, kind):
617 """Expand an item name.
619 @param fn: the function to use for expansion
620 @param name: requested item name
621 @param kind: text description ('Node' or 'Instance')
622 @return: the resolved (full) name
623 @raise errors.OpPrereqError: if the item is not found
627 if full_name is None:
628 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
633 def _ExpandNodeName(cfg, name):
634 """Wrapper over L{_ExpandItemName} for nodes."""
635 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
638 def _ExpandInstanceName(cfg, name):
639 """Wrapper over L{_ExpandItemName} for instance."""
640 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
643 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
644 memory, vcpus, nics, disk_template, disks,
645 bep, hvp, hypervisor_name):
646 """Builds instance related env variables for hooks
648 This builds the hook environment from individual variables.
651 @param name: the name of the instance
652 @type primary_node: string
653 @param primary_node: the name of the instance's primary node
654 @type secondary_nodes: list
655 @param secondary_nodes: list of secondary nodes as strings
656 @type os_type: string
657 @param os_type: the name of the instance's OS
658 @type status: boolean
659 @param status: the should_run status of the instance
661 @param memory: the memory size of the instance
663 @param vcpus: the count of VCPUs the instance has
665 @param nics: list of tuples (ip, mac, mode, link) representing
666 the NICs the instance has
667 @type disk_template: string
668 @param disk_template: the disk template of the instance
670 @param disks: the list of (size, mode) pairs
672 @param bep: the backend parameters for the instance
674 @param hvp: the hypervisor parameters for the instance
675 @type hypervisor_name: string
676 @param hypervisor_name: the hypervisor for the instance
678 @return: the hook environment for this instance
687 "INSTANCE_NAME": name,
688 "INSTANCE_PRIMARY": primary_node,
689 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
690 "INSTANCE_OS_TYPE": os_type,
691 "INSTANCE_STATUS": str_status,
692 "INSTANCE_MEMORY": memory,
693 "INSTANCE_VCPUS": vcpus,
694 "INSTANCE_DISK_TEMPLATE": disk_template,
695 "INSTANCE_HYPERVISOR": hypervisor_name,
699 nic_count = len(nics)
700 for idx, (ip, mac, mode, link) in enumerate(nics):
703 env["INSTANCE_NIC%d_IP" % idx] = ip
704 env["INSTANCE_NIC%d_MAC" % idx] = mac
705 env["INSTANCE_NIC%d_MODE" % idx] = mode
706 env["INSTANCE_NIC%d_LINK" % idx] = link
707 if mode == constants.NIC_MODE_BRIDGED:
708 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
712 env["INSTANCE_NIC_COUNT"] = nic_count
715 disk_count = len(disks)
716 for idx, (size, mode) in enumerate(disks):
717 env["INSTANCE_DISK%d_SIZE" % idx] = size
718 env["INSTANCE_DISK%d_MODE" % idx] = mode
722 env["INSTANCE_DISK_COUNT"] = disk_count
724 for source, kind in [(bep, "BE"), (hvp, "HV")]:
725 for key, value in source.items():
726 env["INSTANCE_%s_%s" % (kind, key)] = value
731 def _NICListToTuple(lu, nics):
732 """Build a list of nic information tuples.
734 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
735 value in LUQueryInstanceData.
737 @type lu: L{LogicalUnit}
738 @param lu: the logical unit on whose behalf we execute
739 @type nics: list of L{objects.NIC}
740 @param nics: list of nics to convert to hooks tuples
744 c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
748 filled_params = objects.FillDict(c_nicparams, nic.nicparams)
749 mode = filled_params[constants.NIC_MODE]
750 link = filled_params[constants.NIC_LINK]
751 hooks_nics.append((ip, mac, mode, link))
755 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
756 """Builds instance related env variables for hooks from an object.
758 @type lu: L{LogicalUnit}
759 @param lu: the logical unit on whose behalf we execute
760 @type instance: L{objects.Instance}
761 @param instance: the instance for which we should build the
764 @param override: dictionary with key/values that will override
767 @return: the hook environment dictionary
770 cluster = lu.cfg.GetClusterInfo()
771 bep = cluster.FillBE(instance)
772 hvp = cluster.FillHV(instance)
774 'name': instance.name,
775 'primary_node': instance.primary_node,
776 'secondary_nodes': instance.secondary_nodes,
777 'os_type': instance.os,
778 'status': instance.admin_up,
779 'memory': bep[constants.BE_MEMORY],
780 'vcpus': bep[constants.BE_VCPUS],
781 'nics': _NICListToTuple(lu, instance.nics),
782 'disk_template': instance.disk_template,
783 'disks': [(disk.size, disk.mode) for disk in instance.disks],
786 'hypervisor_name': instance.hypervisor,
789 args.update(override)
790 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
793 def _AdjustCandidatePool(lu, exceptions):
794 """Adjust the candidate pool after node operations.
797 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
799 lu.LogInfo("Promoted nodes to master candidate role: %s",
800 utils.CommaJoin(node.name for node in mod_list))
801 for name in mod_list:
802 lu.context.ReaddNode(name)
803 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
805 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
809 def _DecideSelfPromotion(lu, exceptions=None):
810 """Decide whether I should promote myself as a master candidate.
813 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
814 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
815 # the new node will increase mc_max with one, so:
816 mc_should = min(mc_should + 1, cp_size)
817 return mc_now < mc_should
820 def _CheckNicsBridgesExist(lu, target_nics, target_node,
821 profile=constants.PP_DEFAULT):
822 """Check that the brigdes needed by a list of nics exist.
825 c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
826 paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
827 for nic in target_nics]
828 brlist = [params[constants.NIC_LINK] for params in paramslist
829 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
831 result = lu.rpc.call_bridges_exist(target_node, brlist)
832 result.Raise("Error checking bridges on destination node '%s'" %
833 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
836 def _CheckInstanceBridgesExist(lu, instance, node=None):
837 """Check that the brigdes needed by an instance exist.
841 node = instance.primary_node
842 _CheckNicsBridgesExist(lu, instance.nics, node)
845 def _CheckOSVariant(os_obj, name):
846 """Check whether an OS name conforms to the os variants specification.
848 @type os_obj: L{objects.OS}
849 @param os_obj: OS object to check
851 @param name: OS name passed by the user, to check for validity
854 if not os_obj.supported_variants:
857 variant = name.split("+", 1)[1]
859 raise errors.OpPrereqError("OS name must include a variant",
862 if variant not in os_obj.supported_variants:
863 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
866 def _GetNodeInstancesInner(cfg, fn):
867 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
870 def _GetNodeInstances(cfg, node_name):
871 """Returns a list of all primary and secondary instances on a node.
875 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
878 def _GetNodePrimaryInstances(cfg, node_name):
879 """Returns primary instances on a node.
882 return _GetNodeInstancesInner(cfg,
883 lambda inst: node_name == inst.primary_node)
886 def _GetNodeSecondaryInstances(cfg, node_name):
887 """Returns secondary instances on a node.
890 return _GetNodeInstancesInner(cfg,
891 lambda inst: node_name in inst.secondary_nodes)
894 def _GetStorageTypeArgs(cfg, storage_type):
895 """Returns the arguments for a storage type.
898 # Special case for file storage
899 if storage_type == constants.ST_FILE:
900 # storage.FileStorage wants a list of storage directories
901 return [[cfg.GetFileStorageDir()]]
906 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
909 for dev in instance.disks:
910 cfg.SetDiskID(dev, node_name)
912 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
913 result.Raise("Failed to get disk status from node %s" % node_name,
914 prereq=prereq, ecode=errors.ECODE_ENVIRON)
916 for idx, bdev_status in enumerate(result.payload):
917 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
923 def _FormatTimestamp(secs):
924 """Formats a Unix timestamp with the local timezone.
927 return time.strftime("%F %T %Z", time.gmtime(secs))
930 class LUPostInitCluster(LogicalUnit):
931 """Logical unit for running hooks after cluster initialization.
934 HPATH = "cluster-init"
935 HTYPE = constants.HTYPE_CLUSTER
938 def BuildHooksEnv(self):
942 env = {"OP_TARGET": self.cfg.GetClusterName()}
943 mn = self.cfg.GetMasterNode()
946 def CheckPrereq(self):
947 """No prerequisites to check.
952 def Exec(self, feedback_fn):
959 class LUDestroyCluster(LogicalUnit):
960 """Logical unit for destroying the cluster.
963 HPATH = "cluster-destroy"
964 HTYPE = constants.HTYPE_CLUSTER
967 def BuildHooksEnv(self):
971 env = {"OP_TARGET": self.cfg.GetClusterName()}
974 def CheckPrereq(self):
975 """Check prerequisites.
977 This checks whether the cluster is empty.
979 Any errors are signaled by raising errors.OpPrereqError.
982 master = self.cfg.GetMasterNode()
984 nodelist = self.cfg.GetNodeList()
985 if len(nodelist) != 1 or nodelist[0] != master:
986 raise errors.OpPrereqError("There are still %d node(s) in"
987 " this cluster." % (len(nodelist) - 1),
989 instancelist = self.cfg.GetInstanceList()
991 raise errors.OpPrereqError("There are still %d instance(s) in"
992 " this cluster." % len(instancelist),
995 def Exec(self, feedback_fn):
996 """Destroys the cluster.
999 master = self.cfg.GetMasterNode()
1000 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1002 # Run post hooks on master node before it's removed
1003 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1005 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1007 # pylint: disable-msg=W0702
1008 self.LogWarning("Errors occurred running hooks on %s" % master)
1010 result = self.rpc.call_node_stop_master(master, False)
1011 result.Raise("Could not disable the master role")
1013 if modify_ssh_setup:
1014 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1015 utils.CreateBackup(priv_key)
1016 utils.CreateBackup(pub_key)
1021 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
1022 warn_days=constants.SSL_CERT_EXPIRATION_WARN,
1023 error_days=constants.SSL_CERT_EXPIRATION_ERROR):
1024 """Verifies certificate details for LUVerifyCluster.
1028 msg = "Certificate %s is expired" % filename
1030 if not_before is not None and not_after is not None:
1031 msg += (" (valid from %s to %s)" %
1032 (_FormatTimestamp(not_before),
1033 _FormatTimestamp(not_after)))
1034 elif not_before is not None:
1035 msg += " (valid from %s)" % _FormatTimestamp(not_before)
1036 elif not_after is not None:
1037 msg += " (valid until %s)" % _FormatTimestamp(not_after)
1039 return (LUVerifyCluster.ETYPE_ERROR, msg)
1041 elif not_before is not None and not_before > now:
1042 return (LUVerifyCluster.ETYPE_WARNING,
1043 "Certificate %s not yet valid (valid from %s)" %
1044 (filename, _FormatTimestamp(not_before)))
1046 elif not_after is not None:
1047 remaining_days = int((not_after - now) / (24 * 3600))
1049 msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1051 if remaining_days <= error_days:
1052 return (LUVerifyCluster.ETYPE_ERROR, msg)
1054 if remaining_days <= warn_days:
1055 return (LUVerifyCluster.ETYPE_WARNING, msg)
1060 def _VerifyCertificate(filename):
1061 """Verifies a certificate for LUVerifyCluster.
1063 @type filename: string
1064 @param filename: Path to PEM file
1068 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1069 utils.ReadFile(filename))
1070 except Exception, err: # pylint: disable-msg=W0703
1071 return (LUVerifyCluster.ETYPE_ERROR,
1072 "Failed to load X509 certificate %s: %s" % (filename, err))
1074 # Depending on the pyOpenSSL version, this can just return (None, None)
1075 (not_before, not_after) = utils.GetX509CertValidity(cert)
1077 return _VerifyCertificateInner(filename, cert.has_expired(),
1078 not_before, not_after, time.time())
1081 class LUVerifyCluster(LogicalUnit):
1082 """Verifies the cluster status.
1085 HPATH = "cluster-verify"
1086 HTYPE = constants.HTYPE_CLUSTER
1087 _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1090 TCLUSTER = "cluster"
1092 TINSTANCE = "instance"
1094 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1095 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1096 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1097 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1098 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1099 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1100 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1101 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1102 ENODEDRBD = (TNODE, "ENODEDRBD")
1103 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1104 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1105 ENODEHV = (TNODE, "ENODEHV")
1106 ENODELVM = (TNODE, "ENODELVM")
1107 ENODEN1 = (TNODE, "ENODEN1")
1108 ENODENET = (TNODE, "ENODENET")
1109 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1110 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1111 ENODERPC = (TNODE, "ENODERPC")
1112 ENODESSH = (TNODE, "ENODESSH")
1113 ENODEVERSION = (TNODE, "ENODEVERSION")
1114 ENODESETUP = (TNODE, "ENODESETUP")
1115 ENODETIME = (TNODE, "ENODETIME")
1117 ETYPE_FIELD = "code"
1118 ETYPE_ERROR = "ERROR"
1119 ETYPE_WARNING = "WARNING"
1121 class NodeImage(object):
1122 """A class representing the logical and physical status of a node.
1124 @ivar volumes: a structure as returned from
1125 L{ganeti.backend.GetVolumeList} (runtime)
1126 @ivar instances: a list of running instances (runtime)
1127 @ivar pinst: list of configured primary instances (config)
1128 @ivar sinst: list of configured secondary instances (config)
1129 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1130 of this node (config)
1131 @ivar mfree: free memory, as reported by hypervisor (runtime)
1132 @ivar dfree: free disk, as reported by the node (runtime)
1133 @ivar offline: the offline status (config)
1134 @type rpc_fail: boolean
1135 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1136 not whether the individual keys were correct) (runtime)
1137 @type lvm_fail: boolean
1138 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1139 @type hyp_fail: boolean
1140 @ivar hyp_fail: whether the RPC call didn't return the instance list
1141 @type ghost: boolean
1142 @ivar ghost: whether this is a known node or not (config)
1145 def __init__(self, offline=False):
1153 self.offline = offline
1154 self.rpc_fail = False
1155 self.lvm_fail = False
1156 self.hyp_fail = False
1159 def ExpandNames(self):
1160 self.needed_locks = {
1161 locking.LEVEL_NODE: locking.ALL_SET,
1162 locking.LEVEL_INSTANCE: locking.ALL_SET,
1164 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1166 def _Error(self, ecode, item, msg, *args, **kwargs):
1167 """Format an error message.
1169 Based on the opcode's error_codes parameter, either format a
1170 parseable error code, or a simpler error string.
1172 This must be called only from Exec and functions called from Exec.
1175 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1177 # first complete the msg
1180 # then format the whole message
1181 if self.op.error_codes:
1182 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1188 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1189 # and finally report it via the feedback_fn
1190 self._feedback_fn(" - %s" % msg)
1192 def _ErrorIf(self, cond, *args, **kwargs):
1193 """Log an error message if the passed condition is True.
1196 cond = bool(cond) or self.op.debug_simulate_errors
1198 self._Error(*args, **kwargs)
1199 # do not mark the operation as failed for WARN cases only
1200 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1201 self.bad = self.bad or cond
1203 def _VerifyNode(self, ninfo, nresult):
1204 """Run multiple tests against a node.
1208 - compares ganeti version
1209 - checks vg existence and size > 20G
1210 - checks config file checksum
1211 - checks ssh to other nodes
1213 @type ninfo: L{objects.Node}
1214 @param ninfo: the node to check
1215 @param nresult: the results from the node
1217 @return: whether overall this call was successful (and we can expect
1218 reasonable values in the respose)
1222 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1224 # main result, nresult should be a non-empty dict
1225 test = not nresult or not isinstance(nresult, dict)
1226 _ErrorIf(test, self.ENODERPC, node,
1227 "unable to verify node: no data returned")
1231 # compares ganeti version
1232 local_version = constants.PROTOCOL_VERSION
1233 remote_version = nresult.get("version", None)
1234 test = not (remote_version and
1235 isinstance(remote_version, (list, tuple)) and
1236 len(remote_version) == 2)
1237 _ErrorIf(test, self.ENODERPC, node,
1238 "connection to node returned invalid data")
1242 test = local_version != remote_version[0]
1243 _ErrorIf(test, self.ENODEVERSION, node,
1244 "incompatible protocol versions: master %s,"
1245 " node %s", local_version, remote_version[0])
1249 # node seems compatible, we can actually try to look into its results
1251 # full package version
1252 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1253 self.ENODEVERSION, node,
1254 "software version mismatch: master %s, node %s",
1255 constants.RELEASE_VERSION, remote_version[1],
1256 code=self.ETYPE_WARNING)
1258 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1259 if isinstance(hyp_result, dict):
1260 for hv_name, hv_result in hyp_result.iteritems():
1261 test = hv_result is not None
1262 _ErrorIf(test, self.ENODEHV, node,
1263 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1266 test = nresult.get(constants.NV_NODESETUP,
1267 ["Missing NODESETUP results"])
1268 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1273 def _VerifyNodeTime(self, ninfo, nresult,
1274 nvinfo_starttime, nvinfo_endtime):
1275 """Check the node time.
1277 @type ninfo: L{objects.Node}
1278 @param ninfo: the node to check
1279 @param nresult: the remote results for the node
1280 @param nvinfo_starttime: the start time of the RPC call
1281 @param nvinfo_endtime: the end time of the RPC call
1285 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1287 ntime = nresult.get(constants.NV_TIME, None)
1289 ntime_merged = utils.MergeTime(ntime)
1290 except (ValueError, TypeError):
1291 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1294 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1295 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1296 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1297 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1301 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1302 "Node time diverges by at least %s from master node time",
1305 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1306 """Check the node time.
1308 @type ninfo: L{objects.Node}
1309 @param ninfo: the node to check
1310 @param nresult: the remote results for the node
1311 @param vg_name: the configured VG name
1318 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1320 # checks vg existence and size > 20G
1321 vglist = nresult.get(constants.NV_VGLIST, None)
1323 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1325 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1326 constants.MIN_VG_SIZE)
1327 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1330 pvlist = nresult.get(constants.NV_PVLIST, None)
1331 test = pvlist is None
1332 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1334 # check that ':' is not present in PV names, since it's a
1335 # special character for lvcreate (denotes the range of PEs to
1337 for _, pvname, owner_vg in pvlist:
1338 test = ":" in pvname
1339 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1340 " '%s' of VG '%s'", pvname, owner_vg)
1342 def _VerifyNodeNetwork(self, ninfo, nresult):
1343 """Check the node time.
1345 @type ninfo: L{objects.Node}
1346 @param ninfo: the node to check
1347 @param nresult: the remote results for the node
1351 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1353 test = constants.NV_NODELIST not in nresult
1354 _ErrorIf(test, self.ENODESSH, node,
1355 "node hasn't returned node ssh connectivity data")
1357 if nresult[constants.NV_NODELIST]:
1358 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1359 _ErrorIf(True, self.ENODESSH, node,
1360 "ssh communication with node '%s': %s", a_node, a_msg)
1362 test = constants.NV_NODENETTEST not in nresult
1363 _ErrorIf(test, self.ENODENET, node,
1364 "node hasn't returned node tcp connectivity data")
1366 if nresult[constants.NV_NODENETTEST]:
1367 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1369 _ErrorIf(True, self.ENODENET, node,
1370 "tcp communication with node '%s': %s",
1371 anode, nresult[constants.NV_NODENETTEST][anode])
1373 def _VerifyInstance(self, instance, instanceconfig, node_image):
1374 """Verify an instance.
1376 This function checks to see if the required block devices are
1377 available on the instance's node.
1380 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1381 node_current = instanceconfig.primary_node
1383 node_vol_should = {}
1384 instanceconfig.MapLVsByNode(node_vol_should)
1386 for node in node_vol_should:
1387 n_img = node_image[node]
1388 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1389 # ignore missing volumes on offline or broken nodes
1391 for volume in node_vol_should[node]:
1392 test = volume not in n_img.volumes
1393 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1394 "volume %s missing on node %s", volume, node)
1396 if instanceconfig.admin_up:
1397 pri_img = node_image[node_current]
1398 test = instance not in pri_img.instances and not pri_img.offline
1399 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1400 "instance not running on its primary node %s",
1403 for node, n_img in node_image.items():
1404 if (not node == node_current):
1405 test = instance in n_img.instances
1406 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1407 "instance should not run on node %s", node)
1409 def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1410 """Verify if there are any unknown volumes in the cluster.
1412 The .os, .swap and backup volumes are ignored. All other volumes are
1413 reported as unknown.
1416 for node, n_img in node_image.items():
1417 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1418 # skip non-healthy nodes
1420 for volume in n_img.volumes:
1421 test = (node not in node_vol_should or
1422 volume not in node_vol_should[node])
1423 self._ErrorIf(test, self.ENODEORPHANLV, node,
1424 "volume %s is unknown", volume)
1426 def _VerifyOrphanInstances(self, instancelist, node_image):
1427 """Verify the list of running instances.
1429 This checks what instances are running but unknown to the cluster.
1432 for node, n_img in node_image.items():
1433 for o_inst in n_img.instances:
1434 test = o_inst not in instancelist
1435 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1436 "instance %s on node %s should not exist", o_inst, node)
1438 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1439 """Verify N+1 Memory Resilience.
1441 Check that if one single node dies we can still start all the
1442 instances it was primary for.
1445 for node, n_img in node_image.items():
1446 # This code checks that every node which is now listed as
1447 # secondary has enough memory to host all instances it is
1448 # supposed to should a single other node in the cluster fail.
1449 # FIXME: not ready for failover to an arbitrary node
1450 # FIXME: does not support file-backed instances
1451 # WARNING: we currently take into account down instances as well
1452 # as up ones, considering that even if they're down someone
1453 # might want to start them even in the event of a node failure.
1454 for prinode, instances in n_img.sbp.items():
1456 for instance in instances:
1457 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1458 if bep[constants.BE_AUTO_BALANCE]:
1459 needed_mem += bep[constants.BE_MEMORY]
1460 test = n_img.mfree < needed_mem
1461 self._ErrorIf(test, self.ENODEN1, node,
1462 "not enough memory on to accommodate"
1463 " failovers should peer node %s fail", prinode)
1465 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1467 """Verifies and computes the node required file checksums.
1469 @type ninfo: L{objects.Node}
1470 @param ninfo: the node to check
1471 @param nresult: the remote results for the node
1472 @param file_list: required list of files
1473 @param local_cksum: dictionary of local files and their checksums
1474 @param master_files: list of files that only masters should have
1478 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1480 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1481 test = not isinstance(remote_cksum, dict)
1482 _ErrorIf(test, self.ENODEFILECHECK, node,
1483 "node hasn't returned file checksum data")
1487 for file_name in file_list:
1488 node_is_mc = ninfo.master_candidate
1489 must_have = (file_name not in master_files) or node_is_mc
1491 test1 = file_name not in remote_cksum
1493 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1495 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1496 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1497 "file '%s' missing", file_name)
1498 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1499 "file '%s' has wrong checksum", file_name)
1500 # not candidate and this is not a must-have file
1501 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1502 "file '%s' should not exist on non master"
1503 " candidates (and the file is outdated)", file_name)
1504 # all good, except non-master/non-must have combination
1505 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1506 "file '%s' should not exist"
1507 " on non master candidates", file_name)
1509 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1510 """Verifies and the node DRBD status.
1512 @type ninfo: L{objects.Node}
1513 @param ninfo: the node to check
1514 @param nresult: the remote results for the node
1515 @param instanceinfo: the dict of instances
1516 @param drbd_map: the DRBD map as returned by
1517 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1521 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1523 # compute the DRBD minors
1525 for minor, instance in drbd_map[node].items():
1526 test = instance not in instanceinfo
1527 _ErrorIf(test, self.ECLUSTERCFG, None,
1528 "ghost instance '%s' in temporary DRBD map", instance)
1529 # ghost instance should not be running, but otherwise we
1530 # don't give double warnings (both ghost instance and
1531 # unallocated minor in use)
1533 node_drbd[minor] = (instance, False)
1535 instance = instanceinfo[instance]
1536 node_drbd[minor] = (instance.name, instance.admin_up)
1538 # and now check them
1539 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1540 test = not isinstance(used_minors, (tuple, list))
1541 _ErrorIf(test, self.ENODEDRBD, node,
1542 "cannot parse drbd status file: %s", str(used_minors))
1544 # we cannot check drbd status
1547 for minor, (iname, must_exist) in node_drbd.items():
1548 test = minor not in used_minors and must_exist
1549 _ErrorIf(test, self.ENODEDRBD, node,
1550 "drbd minor %d of instance %s is not active", minor, iname)
1551 for minor in used_minors:
1552 test = minor not in node_drbd
1553 _ErrorIf(test, self.ENODEDRBD, node,
1554 "unallocated drbd minor %d is in use", minor)
1556 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1557 """Verifies and updates the node volume data.
1559 This function will update a L{NodeImage}'s internal structures
1560 with data from the remote call.
1562 @type ninfo: L{objects.Node}
1563 @param ninfo: the node to check
1564 @param nresult: the remote results for the node
1565 @param nimg: the node image object
1566 @param vg_name: the configured VG name
1570 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1572 nimg.lvm_fail = True
1573 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1576 elif isinstance(lvdata, basestring):
1577 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1578 utils.SafeEncode(lvdata))
1579 elif not isinstance(lvdata, dict):
1580 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1582 nimg.volumes = lvdata
1583 nimg.lvm_fail = False
1585 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1586 """Verifies and updates the node instance list.
1588 If the listing was successful, then updates this node's instance
1589 list. Otherwise, it marks the RPC call as failed for the instance
1592 @type ninfo: L{objects.Node}
1593 @param ninfo: the node to check
1594 @param nresult: the remote results for the node
1595 @param nimg: the node image object
1598 idata = nresult.get(constants.NV_INSTANCELIST, None)
1599 test = not isinstance(idata, list)
1600 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1601 " (instancelist): %s", utils.SafeEncode(str(idata)))
1603 nimg.hyp_fail = True
1605 nimg.instances = idata
1607 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1608 """Verifies and computes a node information map
1610 @type ninfo: L{objects.Node}
1611 @param ninfo: the node to check
1612 @param nresult: the remote results for the node
1613 @param nimg: the node image object
1614 @param vg_name: the configured VG name
1618 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1620 # try to read free memory (from the hypervisor)
1621 hv_info = nresult.get(constants.NV_HVINFO, None)
1622 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1623 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1626 nimg.mfree = int(hv_info["memory_free"])
1627 except (ValueError, TypeError):
1628 _ErrorIf(True, self.ENODERPC, node,
1629 "node returned invalid nodeinfo, check hypervisor")
1631 # FIXME: devise a free space model for file based instances as well
1632 if vg_name is not None:
1633 test = (constants.NV_VGLIST not in nresult or
1634 vg_name not in nresult[constants.NV_VGLIST])
1635 _ErrorIf(test, self.ENODELVM, node,
1636 "node didn't return data for the volume group '%s'"
1637 " - it is either missing or broken", vg_name)
1640 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1641 except (ValueError, TypeError):
1642 _ErrorIf(True, self.ENODERPC, node,
1643 "node returned invalid LVM info, check LVM status")
1645 def CheckPrereq(self):
1646 """Check prerequisites.
1648 Transform the list of checks we're going to skip into a set and check that
1649 all its members are valid.
1652 self.skip_set = frozenset(self.op.skip_checks)
1653 if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1654 raise errors.OpPrereqError("Invalid checks to be skipped specified",
1657 def BuildHooksEnv(self):
1660 Cluster-Verify hooks just ran in the post phase and their failure makes
1661 the output be logged in the verify output and the verification to fail.
1664 all_nodes = self.cfg.GetNodeList()
1666 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1668 for node in self.cfg.GetAllNodesInfo().values():
1669 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1671 return env, [], all_nodes
1673 def Exec(self, feedback_fn):
1674 """Verify integrity of cluster, performing various test on nodes.
1678 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1679 verbose = self.op.verbose
1680 self._feedback_fn = feedback_fn
1681 feedback_fn("* Verifying global settings")
1682 for msg in self.cfg.VerifyConfig():
1683 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1685 # Check the cluster certificates
1686 for cert_filename in constants.ALL_CERT_FILES:
1687 (errcode, msg) = _VerifyCertificate(cert_filename)
1688 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1690 vg_name = self.cfg.GetVGName()
1691 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1692 cluster = self.cfg.GetClusterInfo()
1693 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1694 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1695 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1696 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1697 for iname in instancelist)
1698 i_non_redundant = [] # Non redundant instances
1699 i_non_a_balanced = [] # Non auto-balanced instances
1700 n_offline = 0 # Count of offline nodes
1701 n_drained = 0 # Count of nodes being drained
1702 node_vol_should = {}
1704 # FIXME: verify OS list
1705 # do local checksums
1706 master_files = [constants.CLUSTER_CONF_FILE]
1708 file_names = ssconf.SimpleStore().GetFileList()
1709 file_names.extend(constants.ALL_CERT_FILES)
1710 file_names.extend(master_files)
1711 if cluster.modify_etc_hosts:
1712 file_names.append(constants.ETC_HOSTS)
1714 local_checksums = utils.FingerprintFiles(file_names)
1716 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1717 node_verify_param = {
1718 constants.NV_FILELIST: file_names,
1719 constants.NV_NODELIST: [node.name for node in nodeinfo
1720 if not node.offline],
1721 constants.NV_HYPERVISOR: hypervisors,
1722 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1723 node.secondary_ip) for node in nodeinfo
1724 if not node.offline],
1725 constants.NV_INSTANCELIST: hypervisors,
1726 constants.NV_VERSION: None,
1727 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1728 constants.NV_NODESETUP: None,
1729 constants.NV_TIME: None,
1732 if vg_name is not None:
1733 node_verify_param[constants.NV_VGLIST] = None
1734 node_verify_param[constants.NV_LVLIST] = vg_name
1735 node_verify_param[constants.NV_PVLIST] = [vg_name]
1736 node_verify_param[constants.NV_DRBDLIST] = None
1738 # Build our expected cluster state
1739 node_image = dict((node.name, self.NodeImage(offline=node.offline))
1740 for node in nodeinfo)
1742 for instance in instancelist:
1743 inst_config = instanceinfo[instance]
1745 for nname in inst_config.all_nodes:
1746 if nname not in node_image:
1748 gnode = self.NodeImage()
1750 node_image[nname] = gnode
1752 inst_config.MapLVsByNode(node_vol_should)
1754 pnode = inst_config.primary_node
1755 node_image[pnode].pinst.append(instance)
1757 for snode in inst_config.secondary_nodes:
1758 nimg = node_image[snode]
1759 nimg.sinst.append(instance)
1760 if pnode not in nimg.sbp:
1761 nimg.sbp[pnode] = []
1762 nimg.sbp[pnode].append(instance)
1764 # At this point, we have the in-memory data structures complete,
1765 # except for the runtime information, which we'll gather next
1767 # Due to the way our RPC system works, exact response times cannot be
1768 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1769 # time before and after executing the request, we can at least have a time
1771 nvinfo_starttime = time.time()
1772 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1773 self.cfg.GetClusterName())
1774 nvinfo_endtime = time.time()
1776 master_node = self.cfg.GetMasterNode()
1777 all_drbd_map = self.cfg.ComputeDRBDMap()
1779 feedback_fn("* Verifying node status")
1780 for node_i in nodeinfo:
1782 nimg = node_image[node]
1786 feedback_fn("* Skipping offline node %s" % (node,))
1790 if node == master_node:
1792 elif node_i.master_candidate:
1793 ntype = "master candidate"
1794 elif node_i.drained:
1800 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1802 msg = all_nvinfo[node].fail_msg
1803 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1805 nimg.rpc_fail = True
1808 nresult = all_nvinfo[node].payload
1810 nimg.call_ok = self._VerifyNode(node_i, nresult)
1811 self._VerifyNodeNetwork(node_i, nresult)
1812 self._VerifyNodeLVM(node_i, nresult, vg_name)
1813 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1815 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1816 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1818 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1819 self._UpdateNodeInstances(node_i, nresult, nimg)
1820 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1822 feedback_fn("* Verifying instance status")
1823 for instance in instancelist:
1825 feedback_fn("* Verifying instance %s" % instance)
1826 inst_config = instanceinfo[instance]
1827 self._VerifyInstance(instance, inst_config, node_image)
1828 inst_nodes_offline = []
1830 pnode = inst_config.primary_node
1831 pnode_img = node_image[pnode]
1832 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1833 self.ENODERPC, pnode, "instance %s, connection to"
1834 " primary node failed", instance)
1836 if pnode_img.offline:
1837 inst_nodes_offline.append(pnode)
1839 # If the instance is non-redundant we cannot survive losing its primary
1840 # node, so we are not N+1 compliant. On the other hand we have no disk
1841 # templates with more than one secondary so that situation is not well
1843 # FIXME: does not support file-backed instances
1844 if not inst_config.secondary_nodes:
1845 i_non_redundant.append(instance)
1846 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1847 instance, "instance has multiple secondary nodes: %s",
1848 utils.CommaJoin(inst_config.secondary_nodes),
1849 code=self.ETYPE_WARNING)
1851 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1852 i_non_a_balanced.append(instance)
1854 for snode in inst_config.secondary_nodes:
1855 s_img = node_image[snode]
1856 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1857 "instance %s, connection to secondary node failed", instance)
1860 inst_nodes_offline.append(snode)
1862 # warn that the instance lives on offline nodes
1863 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1864 "instance lives on offline node(s) %s",
1865 utils.CommaJoin(inst_nodes_offline))
1866 # ... or ghost nodes
1867 for node in inst_config.all_nodes:
1868 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1869 "instance lives on ghost node %s", node)
1871 feedback_fn("* Verifying orphan volumes")
1872 self._VerifyOrphanVolumes(node_vol_should, node_image)
1874 feedback_fn("* Verifying oprhan instances")
1875 self._VerifyOrphanInstances(instancelist, node_image)
1877 if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1878 feedback_fn("* Verifying N+1 Memory redundancy")
1879 self._VerifyNPlusOneMemory(node_image, instanceinfo)
1881 feedback_fn("* Other Notes")
1883 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
1884 % len(i_non_redundant))
1886 if i_non_a_balanced:
1887 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
1888 % len(i_non_a_balanced))
1891 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
1894 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
1898 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1899 """Analyze the post-hooks' result
1901 This method analyses the hook result, handles it, and sends some
1902 nicely-formatted feedback back to the user.
1904 @param phase: one of L{constants.HOOKS_PHASE_POST} or
1905 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1906 @param hooks_results: the results of the multi-node hooks rpc call
1907 @param feedback_fn: function used send feedback back to the caller
1908 @param lu_result: previous Exec result
1909 @return: the new Exec result, based on the previous result
1913 # We only really run POST phase hooks, and are only interested in
1915 if phase == constants.HOOKS_PHASE_POST:
1916 # Used to change hooks' output to proper indentation
1917 indent_re = re.compile('^', re.M)
1918 feedback_fn("* Hooks Results")
1919 assert hooks_results, "invalid result from hooks"
1921 for node_name in hooks_results:
1922 res = hooks_results[node_name]
1924 test = msg and not res.offline
1925 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1926 "Communication failure in hooks execution: %s", msg)
1927 if res.offline or msg:
1928 # No need to investigate payload if node is offline or gave an error.
1929 # override manually lu_result here as _ErrorIf only
1930 # overrides self.bad
1933 for script, hkr, output in res.payload:
1934 test = hkr == constants.HKR_FAIL
1935 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1936 "Script %s failed, output:", script)
1938 output = indent_re.sub(' ', output)
1939 feedback_fn("%s" % output)
1945 class LUVerifyDisks(NoHooksLU):
1946 """Verifies the cluster disks status.
1952 def ExpandNames(self):
1953 self.needed_locks = {
1954 locking.LEVEL_NODE: locking.ALL_SET,
1955 locking.LEVEL_INSTANCE: locking.ALL_SET,
1957 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1959 def CheckPrereq(self):
1960 """Check prerequisites.
1962 This has no prerequisites.
1967 def Exec(self, feedback_fn):
1968 """Verify integrity of cluster disks.
1970 @rtype: tuple of three items
1971 @return: a tuple of (dict of node-to-node_error, list of instances
1972 which need activate-disks, dict of instance: (node, volume) for
1976 result = res_nodes, res_instances, res_missing = {}, [], {}
1978 vg_name = self.cfg.GetVGName()
1979 nodes = utils.NiceSort(self.cfg.GetNodeList())
1980 instances = [self.cfg.GetInstanceInfo(name)
1981 for name in self.cfg.GetInstanceList()]
1984 for inst in instances:
1986 if (not inst.admin_up or
1987 inst.disk_template not in constants.DTS_NET_MIRROR):
1989 inst.MapLVsByNode(inst_lvs)
1990 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1991 for node, vol_list in inst_lvs.iteritems():
1992 for vol in vol_list:
1993 nv_dict[(node, vol)] = inst
1998 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2002 node_res = node_lvs[node]
2003 if node_res.offline:
2005 msg = node_res.fail_msg
2007 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2008 res_nodes[node] = msg
2011 lvs = node_res.payload
2012 for lv_name, (_, _, lv_online) in lvs.items():
2013 inst = nv_dict.pop((node, lv_name), None)
2014 if (not lv_online and inst is not None
2015 and inst.name not in res_instances):
2016 res_instances.append(inst.name)
2018 # any leftover items in nv_dict are missing LVs, let's arrange the
2020 for key, inst in nv_dict.iteritems():
2021 if inst.name not in res_missing:
2022 res_missing[inst.name] = []
2023 res_missing[inst.name].append(key)
2028 class LURepairDiskSizes(NoHooksLU):
2029 """Verifies the cluster disks sizes.
2032 _OP_REQP = ["instances"]
2035 def ExpandNames(self):
2036 if not isinstance(self.op.instances, list):
2037 raise errors.OpPrereqError("Invalid argument type 'instances'",
2040 if self.op.instances:
2041 self.wanted_names = []
2042 for name in self.op.instances:
2043 full_name = _ExpandInstanceName(self.cfg, name)
2044 self.wanted_names.append(full_name)
2045 self.needed_locks = {
2046 locking.LEVEL_NODE: [],
2047 locking.LEVEL_INSTANCE: self.wanted_names,
2049 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2051 self.wanted_names = None
2052 self.needed_locks = {
2053 locking.LEVEL_NODE: locking.ALL_SET,
2054 locking.LEVEL_INSTANCE: locking.ALL_SET,
2056 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2058 def DeclareLocks(self, level):
2059 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2060 self._LockInstancesNodes(primary_only=True)
2062 def CheckPrereq(self):
2063 """Check prerequisites.
2065 This only checks the optional instance list against the existing names.
2068 if self.wanted_names is None:
2069 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2071 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2072 in self.wanted_names]
2074 def _EnsureChildSizes(self, disk):
2075 """Ensure children of the disk have the needed disk size.
2077 This is valid mainly for DRBD8 and fixes an issue where the
2078 children have smaller disk size.
2080 @param disk: an L{ganeti.objects.Disk} object
2083 if disk.dev_type == constants.LD_DRBD8:
2084 assert disk.children, "Empty children for DRBD8?"
2085 fchild = disk.children[0]
2086 mismatch = fchild.size < disk.size
2088 self.LogInfo("Child disk has size %d, parent %d, fixing",
2089 fchild.size, disk.size)
2090 fchild.size = disk.size
2092 # and we recurse on this child only, not on the metadev
2093 return self._EnsureChildSizes(fchild) or mismatch
2097 def Exec(self, feedback_fn):
2098 """Verify the size of cluster disks.
2101 # TODO: check child disks too
2102 # TODO: check differences in size between primary/secondary nodes
2104 for instance in self.wanted_instances:
2105 pnode = instance.primary_node
2106 if pnode not in per_node_disks:
2107 per_node_disks[pnode] = []
2108 for idx, disk in enumerate(instance.disks):
2109 per_node_disks[pnode].append((instance, idx, disk))
2112 for node, dskl in per_node_disks.items():
2113 newl = [v[2].Copy() for v in dskl]
2115 self.cfg.SetDiskID(dsk, node)
2116 result = self.rpc.call_blockdev_getsizes(node, newl)
2118 self.LogWarning("Failure in blockdev_getsizes call to node"
2119 " %s, ignoring", node)
2121 if len(result.data) != len(dskl):
2122 self.LogWarning("Invalid result from node %s, ignoring node results",
2125 for ((instance, idx, disk), size) in zip(dskl, result.data):
2127 self.LogWarning("Disk %d of instance %s did not return size"
2128 " information, ignoring", idx, instance.name)
2130 if not isinstance(size, (int, long)):
2131 self.LogWarning("Disk %d of instance %s did not return valid"
2132 " size information, ignoring", idx, instance.name)
2135 if size != disk.size:
2136 self.LogInfo("Disk %d of instance %s has mismatched size,"
2137 " correcting: recorded %d, actual %d", idx,
2138 instance.name, disk.size, size)
2140 self.cfg.Update(instance, feedback_fn)
2141 changed.append((instance.name, idx, size))
2142 if self._EnsureChildSizes(disk):
2143 self.cfg.Update(instance, feedback_fn)
2144 changed.append((instance.name, idx, disk.size))
2148 class LURenameCluster(LogicalUnit):
2149 """Rename the cluster.
2152 HPATH = "cluster-rename"
2153 HTYPE = constants.HTYPE_CLUSTER
2156 def BuildHooksEnv(self):
2161 "OP_TARGET": self.cfg.GetClusterName(),
2162 "NEW_NAME": self.op.name,
2164 mn = self.cfg.GetMasterNode()
2165 all_nodes = self.cfg.GetNodeList()
2166 return env, [mn], all_nodes
2168 def CheckPrereq(self):
2169 """Verify that the passed name is a valid one.
2172 hostname = utils.GetHostInfo(self.op.name)
2174 new_name = hostname.name
2175 self.ip = new_ip = hostname.ip
2176 old_name = self.cfg.GetClusterName()
2177 old_ip = self.cfg.GetMasterIP()
2178 if new_name == old_name and new_ip == old_ip:
2179 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2180 " cluster has changed",
2182 if new_ip != old_ip:
2183 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2184 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2185 " reachable on the network. Aborting." %
2186 new_ip, errors.ECODE_NOTUNIQUE)
2188 self.op.name = new_name
2190 def Exec(self, feedback_fn):
2191 """Rename the cluster.
2194 clustername = self.op.name
2197 # shutdown the master IP
2198 master = self.cfg.GetMasterNode()
2199 result = self.rpc.call_node_stop_master(master, False)
2200 result.Raise("Could not disable the master role")
2203 cluster = self.cfg.GetClusterInfo()
2204 cluster.cluster_name = clustername
2205 cluster.master_ip = ip
2206 self.cfg.Update(cluster, feedback_fn)
2208 # update the known hosts file
2209 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2210 node_list = self.cfg.GetNodeList()
2212 node_list.remove(master)
2215 result = self.rpc.call_upload_file(node_list,
2216 constants.SSH_KNOWN_HOSTS_FILE)
2217 for to_node, to_result in result.iteritems():
2218 msg = to_result.fail_msg
2220 msg = ("Copy of file %s to node %s failed: %s" %
2221 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2222 self.proc.LogWarning(msg)
2225 result = self.rpc.call_node_start_master(master, False, False)
2226 msg = result.fail_msg
2228 self.LogWarning("Could not re-enable the master role on"
2229 " the master, please restart manually: %s", msg)
2232 def _RecursiveCheckIfLVMBased(disk):
2233 """Check if the given disk or its children are lvm-based.
2235 @type disk: L{objects.Disk}
2236 @param disk: the disk to check
2238 @return: boolean indicating whether a LD_LV dev_type was found or not
2242 for chdisk in disk.children:
2243 if _RecursiveCheckIfLVMBased(chdisk):
2245 return disk.dev_type == constants.LD_LV
2248 class LUSetClusterParams(LogicalUnit):
2249 """Change the parameters of the cluster.
2252 HPATH = "cluster-modify"
2253 HTYPE = constants.HTYPE_CLUSTER
2257 def CheckArguments(self):
2261 for attr in ["candidate_pool_size",
2262 "uid_pool", "add_uids", "remove_uids"]:
2263 if not hasattr(self.op, attr):
2264 setattr(self.op, attr, None)
2266 if self.op.candidate_pool_size is not None:
2268 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2269 except (ValueError, TypeError), err:
2270 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2271 str(err), errors.ECODE_INVAL)
2272 if self.op.candidate_pool_size < 1:
2273 raise errors.OpPrereqError("At least one master candidate needed",
2276 _CheckBooleanOpField(self.op, "maintain_node_health")
2278 if self.op.uid_pool:
2279 uidpool.CheckUidPool(self.op.uid_pool)
2281 if self.op.add_uids:
2282 uidpool.CheckUidPool(self.op.add_uids)
2284 if self.op.remove_uids:
2285 uidpool.CheckUidPool(self.op.remove_uids)
2287 def ExpandNames(self):
2288 # FIXME: in the future maybe other cluster params won't require checking on
2289 # all nodes to be modified.
2290 self.needed_locks = {
2291 locking.LEVEL_NODE: locking.ALL_SET,
2293 self.share_locks[locking.LEVEL_NODE] = 1
2295 def BuildHooksEnv(self):
2300 "OP_TARGET": self.cfg.GetClusterName(),
2301 "NEW_VG_NAME": self.op.vg_name,
2303 mn = self.cfg.GetMasterNode()
2304 return env, [mn], [mn]
2306 def CheckPrereq(self):
2307 """Check prerequisites.
2309 This checks whether the given params don't conflict and
2310 if the given volume group is valid.
2313 if self.op.vg_name is not None and not self.op.vg_name:
2314 instances = self.cfg.GetAllInstancesInfo().values()
2315 for inst in instances:
2316 for disk in inst.disks:
2317 if _RecursiveCheckIfLVMBased(disk):
2318 raise errors.OpPrereqError("Cannot disable lvm storage while"
2319 " lvm-based instances exist",
2322 node_list = self.acquired_locks[locking.LEVEL_NODE]
2324 # if vg_name not None, checks given volume group on all nodes
2326 vglist = self.rpc.call_vg_list(node_list)
2327 for node in node_list:
2328 msg = vglist[node].fail_msg
2330 # ignoring down node
2331 self.LogWarning("Error while gathering data on node %s"
2332 " (ignoring node): %s", node, msg)
2334 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2336 constants.MIN_VG_SIZE)
2338 raise errors.OpPrereqError("Error on node '%s': %s" %
2339 (node, vgstatus), errors.ECODE_ENVIRON)
2341 self.cluster = cluster = self.cfg.GetClusterInfo()
2342 # validate params changes
2343 if self.op.beparams:
2344 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2345 self.new_beparams = objects.FillDict(
2346 cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2348 if self.op.nicparams:
2349 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2350 self.new_nicparams = objects.FillDict(
2351 cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2352 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2355 # check all instances for consistency
2356 for instance in self.cfg.GetAllInstancesInfo().values():
2357 for nic_idx, nic in enumerate(instance.nics):
2358 params_copy = copy.deepcopy(nic.nicparams)
2359 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2361 # check parameter syntax
2363 objects.NIC.CheckParameterSyntax(params_filled)
2364 except errors.ConfigurationError, err:
2365 nic_errors.append("Instance %s, nic/%d: %s" %
2366 (instance.name, nic_idx, err))
2368 # if we're moving instances to routed, check that they have an ip
2369 target_mode = params_filled[constants.NIC_MODE]
2370 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2371 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2372 (instance.name, nic_idx))
2374 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2375 "\n".join(nic_errors))
2377 # hypervisor list/parameters
2378 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2379 if self.op.hvparams:
2380 if not isinstance(self.op.hvparams, dict):
2381 raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2383 for hv_name, hv_dict in self.op.hvparams.items():
2384 if hv_name not in self.new_hvparams:
2385 self.new_hvparams[hv_name] = hv_dict
2387 self.new_hvparams[hv_name].update(hv_dict)
2389 # os hypervisor parameters
2390 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2392 if not isinstance(self.op.os_hvp, dict):
2393 raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2395 for os_name, hvs in self.op.os_hvp.items():
2396 if not isinstance(hvs, dict):
2397 raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2398 " input"), errors.ECODE_INVAL)
2399 if os_name not in self.new_os_hvp:
2400 self.new_os_hvp[os_name] = hvs
2402 for hv_name, hv_dict in hvs.items():
2403 if hv_name not in self.new_os_hvp[os_name]:
2404 self.new_os_hvp[os_name][hv_name] = hv_dict
2406 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2408 # changes to the hypervisor list
2409 if self.op.enabled_hypervisors is not None:
2410 self.hv_list = self.op.enabled_hypervisors
2411 if not self.hv_list:
2412 raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2413 " least one member",
2415 invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2417 raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2419 utils.CommaJoin(invalid_hvs),
2421 for hv in self.hv_list:
2422 # if the hypervisor doesn't already exist in the cluster
2423 # hvparams, we initialize it to empty, and then (in both
2424 # cases) we make sure to fill the defaults, as we might not
2425 # have a complete defaults list if the hypervisor wasn't
2427 if hv not in new_hvp:
2429 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2430 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2432 self.hv_list = cluster.enabled_hypervisors
2434 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2435 # either the enabled list has changed, or the parameters have, validate
2436 for hv_name, hv_params in self.new_hvparams.items():
2437 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2438 (self.op.enabled_hypervisors and
2439 hv_name in self.op.enabled_hypervisors)):
2440 # either this is a new hypervisor, or its parameters have changed
2441 hv_class = hypervisor.GetHypervisor(hv_name)
2442 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2443 hv_class.CheckParameterSyntax(hv_params)
2444 _CheckHVParams(self, node_list, hv_name, hv_params)
2447 # no need to check any newly-enabled hypervisors, since the
2448 # defaults have already been checked in the above code-block
2449 for os_name, os_hvp in self.new_os_hvp.items():
2450 for hv_name, hv_params in os_hvp.items():
2451 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2452 # we need to fill in the new os_hvp on top of the actual hv_p
2453 cluster_defaults = self.new_hvparams.get(hv_name, {})
2454 new_osp = objects.FillDict(cluster_defaults, hv_params)
2455 hv_class = hypervisor.GetHypervisor(hv_name)
2456 hv_class.CheckParameterSyntax(new_osp)
2457 _CheckHVParams(self, node_list, hv_name, new_osp)
2460 def Exec(self, feedback_fn):
2461 """Change the parameters of the cluster.
2464 if self.op.vg_name is not None:
2465 new_volume = self.op.vg_name
2468 if new_volume != self.cfg.GetVGName():
2469 self.cfg.SetVGName(new_volume)
2471 feedback_fn("Cluster LVM configuration already in desired"
2472 " state, not changing")
2473 if self.op.hvparams:
2474 self.cluster.hvparams = self.new_hvparams
2476 self.cluster.os_hvp = self.new_os_hvp
2477 if self.op.enabled_hypervisors is not None:
2478 self.cluster.hvparams = self.new_hvparams
2479 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2480 if self.op.beparams:
2481 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2482 if self.op.nicparams:
2483 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2485 if self.op.candidate_pool_size is not None:
2486 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2487 # we need to update the pool size here, otherwise the save will fail
2488 _AdjustCandidatePool(self, [])
2490 if self.op.maintain_node_health is not None:
2491 self.cluster.maintain_node_health = self.op.maintain_node_health
2493 if self.op.add_uids is not None:
2494 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2496 if self.op.remove_uids is not None:
2497 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2499 if self.op.uid_pool is not None:
2500 self.cluster.uid_pool = self.op.uid_pool
2502 self.cfg.Update(self.cluster, feedback_fn)
2505 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2506 """Distribute additional files which are part of the cluster configuration.
2508 ConfigWriter takes care of distributing the config and ssconf files, but
2509 there are more files which should be distributed to all nodes. This function
2510 makes sure those are copied.
2512 @param lu: calling logical unit
2513 @param additional_nodes: list of nodes not in the config to distribute to
2516 # 1. Gather target nodes
2517 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2518 dist_nodes = lu.cfg.GetOnlineNodeList()
2519 if additional_nodes is not None:
2520 dist_nodes.extend(additional_nodes)
2521 if myself.name in dist_nodes:
2522 dist_nodes.remove(myself.name)
2524 # 2. Gather files to distribute
2525 dist_files = set([constants.ETC_HOSTS,
2526 constants.SSH_KNOWN_HOSTS_FILE,
2527 constants.RAPI_CERT_FILE,
2528 constants.RAPI_USERS_FILE,
2529 constants.CONFD_HMAC_KEY,
2532 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2533 for hv_name in enabled_hypervisors:
2534 hv_class = hypervisor.GetHypervisor(hv_name)
2535 dist_files.update(hv_class.GetAncillaryFiles())
2537 # 3. Perform the files upload
2538 for fname in dist_files:
2539 if os.path.exists(fname):
2540 result = lu.rpc.call_upload_file(dist_nodes, fname)
2541 for to_node, to_result in result.items():
2542 msg = to_result.fail_msg
2544 msg = ("Copy of file %s to node %s failed: %s" %
2545 (fname, to_node, msg))
2546 lu.proc.LogWarning(msg)
2549 class LURedistributeConfig(NoHooksLU):
2550 """Force the redistribution of cluster configuration.
2552 This is a very simple LU.
2558 def ExpandNames(self):
2559 self.needed_locks = {
2560 locking.LEVEL_NODE: locking.ALL_SET,
2562 self.share_locks[locking.LEVEL_NODE] = 1
2564 def CheckPrereq(self):
2565 """Check prerequisites.
2569 def Exec(self, feedback_fn):
2570 """Redistribute the configuration.
2573 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2574 _RedistributeAncillaryFiles(self)
2577 def _WaitForSync(lu, instance, oneshot=False):
2578 """Sleep and poll for an instance's disk to sync.
2581 if not instance.disks:
2585 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2587 node = instance.primary_node
2589 for dev in instance.disks:
2590 lu.cfg.SetDiskID(dev, node)
2592 # TODO: Convert to utils.Retry
2595 degr_retries = 10 # in seconds, as we sleep 1 second each time
2599 cumul_degraded = False
2600 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2601 msg = rstats.fail_msg
2603 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2606 raise errors.RemoteError("Can't contact node %s for mirror data,"
2607 " aborting." % node)
2610 rstats = rstats.payload
2612 for i, mstat in enumerate(rstats):
2614 lu.LogWarning("Can't compute data for node %s/%s",
2615 node, instance.disks[i].iv_name)
2618 cumul_degraded = (cumul_degraded or
2619 (mstat.is_degraded and mstat.sync_percent is None))
2620 if mstat.sync_percent is not None:
2622 if mstat.estimated_time is not None:
2623 rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2624 max_time = mstat.estimated_time
2626 rem_time = "no time estimate"
2627 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2628 (instance.disks[i].iv_name, mstat.sync_percent,
2631 # if we're done but degraded, let's do a few small retries, to
2632 # make sure we see a stable and not transient situation; therefore
2633 # we force restart of the loop
2634 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2635 logging.info("Degraded disks found, %d retries left", degr_retries)
2643 time.sleep(min(60, max_time))
2646 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2647 return not cumul_degraded
2650 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2651 """Check that mirrors are not degraded.
2653 The ldisk parameter, if True, will change the test from the
2654 is_degraded attribute (which represents overall non-ok status for
2655 the device(s)) to the ldisk (representing the local storage status).
2658 lu.cfg.SetDiskID(dev, node)
2662 if on_primary or dev.AssembleOnSecondary():
2663 rstats = lu.rpc.call_blockdev_find(node, dev)
2664 msg = rstats.fail_msg
2666 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2668 elif not rstats.payload:
2669 lu.LogWarning("Can't find disk on node %s", node)
2673 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2675 result = result and not rstats.payload.is_degraded
2678 for child in dev.children:
2679 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2684 class LUDiagnoseOS(NoHooksLU):
2685 """Logical unit for OS diagnose/query.
2688 _OP_REQP = ["output_fields", "names"]
2690 _FIELDS_STATIC = utils.FieldSet()
2691 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2692 # Fields that need calculation of global os validity
2693 _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2695 def ExpandNames(self):
2697 raise errors.OpPrereqError("Selective OS query not supported",
2700 _CheckOutputFields(static=self._FIELDS_STATIC,
2701 dynamic=self._FIELDS_DYNAMIC,
2702 selected=self.op.output_fields)
2704 # Lock all nodes, in shared mode
2705 # Temporary removal of locks, should be reverted later
2706 # TODO: reintroduce locks when they are lighter-weight
2707 self.needed_locks = {}
2708 #self.share_locks[locking.LEVEL_NODE] = 1
2709 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2711 def CheckPrereq(self):
2712 """Check prerequisites.
2717 def _DiagnoseByOS(rlist):
2718 """Remaps a per-node return list into an a per-os per-node dictionary
2720 @param rlist: a map with node names as keys and OS objects as values
2723 @return: a dictionary with osnames as keys and as value another map, with
2724 nodes as keys and tuples of (path, status, diagnose) as values, eg::
2726 {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2727 (/srv/..., False, "invalid api")],
2728 "node2": [(/srv/..., True, "")]}
2733 # we build here the list of nodes that didn't fail the RPC (at RPC
2734 # level), so that nodes with a non-responding node daemon don't
2735 # make all OSes invalid
2736 good_nodes = [node_name for node_name in rlist
2737 if not rlist[node_name].fail_msg]
2738 for node_name, nr in rlist.items():
2739 if nr.fail_msg or not nr.payload:
2741 for name, path, status, diagnose, variants in nr.payload:
2742 if name not in all_os:
2743 # build a list of nodes for this os containing empty lists
2744 # for each node in node_list
2746 for nname in good_nodes:
2747 all_os[name][nname] = []
2748 all_os[name][node_name].append((path, status, diagnose, variants))
2751 def Exec(self, feedback_fn):
2752 """Compute the list of OSes.
2755 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2756 node_data = self.rpc.call_os_diagnose(valid_nodes)
2757 pol = self._DiagnoseByOS(node_data)
2759 calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2760 calc_variants = "variants" in self.op.output_fields
2762 for os_name, os_data in pol.items():
2767 for osl in os_data.values():
2768 valid = valid and osl and osl[0][1]
2773 node_variants = osl[0][3]
2774 if variants is None:
2775 variants = node_variants
2777 variants = [v for v in variants if v in node_variants]
2779 for field in self.op.output_fields:
2782 elif field == "valid":
2784 elif field == "node_status":
2785 # this is just a copy of the dict
2787 for node_name, nos_list in os_data.items():
2788 val[node_name] = nos_list
2789 elif field == "variants":
2792 raise errors.ParameterError(field)
2799 class LURemoveNode(LogicalUnit):
2800 """Logical unit for removing a node.
2803 HPATH = "node-remove"
2804 HTYPE = constants.HTYPE_NODE
2805 _OP_REQP = ["node_name"]
2807 def BuildHooksEnv(self):
2810 This doesn't run on the target node in the pre phase as a failed
2811 node would then be impossible to remove.
2815 "OP_TARGET": self.op.node_name,
2816 "NODE_NAME": self.op.node_name,
2818 all_nodes = self.cfg.GetNodeList()
2820 all_nodes.remove(self.op.node_name)
2822 logging.warning("Node %s which is about to be removed not found"
2823 " in the all nodes list", self.op.node_name)
2824 return env, all_nodes, all_nodes
2826 def CheckPrereq(self):
2827 """Check prerequisites.
2830 - the node exists in the configuration
2831 - it does not have primary or secondary instances
2832 - it's not the master
2834 Any errors are signaled by raising errors.OpPrereqError.
2837 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2838 node = self.cfg.GetNodeInfo(self.op.node_name)
2839 assert node is not None
2841 instance_list = self.cfg.GetInstanceList()
2843 masternode = self.cfg.GetMasterNode()
2844 if node.name == masternode:
2845 raise errors.OpPrereqError("Node is the master node,"
2846 " you need to failover first.",
2849 for instance_name in instance_list:
2850 instance = self.cfg.GetInstanceInfo(instance_name)
2851 if node.name in instance.all_nodes:
2852 raise errors.OpPrereqError("Instance %s is still running on the node,"
2853 " please remove first." % instance_name,
2855 self.op.node_name = node.name
2858 def Exec(self, feedback_fn):
2859 """Removes the node from the cluster.
2863 logging.info("Stopping the node daemon and removing configs from node %s",
2866 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2868 # Promote nodes to master candidate as needed
2869 _AdjustCandidatePool(self, exceptions=[node.name])
2870 self.context.RemoveNode(node.name)
2872 # Run post hooks on the node before it's removed
2873 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2875 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2877 # pylint: disable-msg=W0702
2878 self.LogWarning("Errors occurred running hooks on %s" % node.name)
2880 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2881 msg = result.fail_msg
2883 self.LogWarning("Errors encountered on the remote node while leaving"
2884 " the cluster: %s", msg)
2887 class LUQueryNodes(NoHooksLU):
2888 """Logical unit for querying nodes.
2891 # pylint: disable-msg=W0142
2892 _OP_REQP = ["output_fields", "names", "use_locking"]
2895 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2896 "master_candidate", "offline", "drained"]
2898 _FIELDS_DYNAMIC = utils.FieldSet(
2900 "mtotal", "mnode", "mfree",
2902 "ctotal", "cnodes", "csockets",
2905 _FIELDS_STATIC = utils.FieldSet(*[
2906 "pinst_cnt", "sinst_cnt",
2907 "pinst_list", "sinst_list",
2908 "pip", "sip", "tags",
2910 "role"] + _SIMPLE_FIELDS
2913 def ExpandNames(self):
2914 _CheckOutputFields(static=self._FIELDS_STATIC,
2915 dynamic=self._FIELDS_DYNAMIC,
2916 selected=self.op.output_fields)
2918 self.needed_locks = {}
2919 self.share_locks[locking.LEVEL_NODE] = 1
2922 self.wanted = _GetWantedNodes(self, self.op.names)
2924 self.wanted = locking.ALL_SET
2926 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2927 self.do_locking = self.do_node_query and self.op.use_locking
2929 # if we don't request only static fields, we need to lock the nodes
2930 self.needed_locks[locking.LEVEL_NODE] = self.wanted
2932 def CheckPrereq(self):
2933 """Check prerequisites.
2936 # The validation of the node list is done in the _GetWantedNodes,
2937 # if non empty, and if empty, there's no validation to do
2940 def Exec(self, feedback_fn):
2941 """Computes the list of nodes and their attributes.
2944 all_info = self.cfg.GetAllNodesInfo()
2946 nodenames = self.acquired_locks[locking.LEVEL_NODE]
2947 elif self.wanted != locking.ALL_SET:
2948 nodenames = self.wanted
2949 missing = set(nodenames).difference(all_info.keys())
2951 raise errors.OpExecError(
2952 "Some nodes were removed before retrieving their data: %s" % missing)
2954 nodenames = all_info.keys()
2956 nodenames = utils.NiceSort(nodenames)
2957 nodelist = [all_info[name] for name in nodenames]
2959 # begin data gathering
2961 if self.do_node_query:
2963 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2964 self.cfg.GetHypervisorType())
2965 for name in nodenames:
2966 nodeinfo = node_data[name]
2967 if not nodeinfo.fail_msg and nodeinfo.payload:
2968 nodeinfo = nodeinfo.payload
2969 fn = utils.TryConvert
2971 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2972 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2973 "mfree": fn(int, nodeinfo.get('memory_free', None)),
2974 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2975 "dfree": fn(int, nodeinfo.get('vg_free', None)),
2976 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2977 "bootid": nodeinfo.get('bootid', None),
2978 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2979 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2982 live_data[name] = {}
2984 live_data = dict.fromkeys(nodenames, {})
2986 node_to_primary = dict([(name, set()) for name in nodenames])
2987 node_to_secondary = dict([(name, set()) for name in nodenames])
2989 inst_fields = frozenset(("pinst_cnt", "pinst_list",
2990 "sinst_cnt", "sinst_list"))
2991 if inst_fields & frozenset(self.op.output_fields):
2992 inst_data = self.cfg.GetAllInstancesInfo()
2994 for inst in inst_data.values():
2995 if inst.primary_node in node_to_primary:
2996 node_to_primary[inst.primary_node].add(inst.name)
2997 for secnode in inst.secondary_nodes:
2998 if secnode in node_to_secondary:
2999 node_to_secondary[secnode].add(inst.name)
3001 master_node = self.cfg.GetMasterNode()
3003 # end data gathering
3006 for node in nodelist:
3008 for field in self.op.output_fields:
3009 if field in self._SIMPLE_FIELDS:
3010 val = getattr(node, field)
3011 elif field == "pinst_list":
3012 val = list(node_to_primary[node.name])
3013 elif field == "sinst_list":
3014 val = list(node_to_secondary[node.name])
3015 elif field == "pinst_cnt":
3016 val = len(node_to_primary[node.name])
3017 elif field == "sinst_cnt":
3018 val = len(node_to_secondary[node.name])
3019 elif field == "pip":
3020 val = node.primary_ip
3021 elif field == "sip":
3022 val = node.secondary_ip
3023 elif field == "tags":
3024 val = list(node.GetTags())
3025 elif field == "master":
3026 val = node.name == master_node
3027 elif self._FIELDS_DYNAMIC.Matches(field):
3028 val = live_data[node.name].get(field, None)
3029 elif field == "role":
3030 if node.name == master_node:
3032 elif node.master_candidate:
3041 raise errors.ParameterError(field)
3042 node_output.append(val)
3043 output.append(node_output)
3048 class LUQueryNodeVolumes(NoHooksLU):
3049 """Logical unit for getting volumes on node(s).
3052 _OP_REQP = ["nodes", "output_fields"]
3054 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3055 _FIELDS_STATIC = utils.FieldSet("node")
3057 def ExpandNames(self):
3058 _CheckOutputFields(static=self._FIELDS_STATIC,
3059 dynamic=self._FIELDS_DYNAMIC,
3060 selected=self.op.output_fields)
3062 self.needed_locks = {}
3063 self.share_locks[locking.LEVEL_NODE] = 1
3064 if not self.op.nodes:
3065 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3067 self.needed_locks[locking.LEVEL_NODE] = \
3068 _GetWantedNodes(self, self.op.nodes)
3070 def CheckPrereq(self):
3071 """Check prerequisites.
3073 This checks that the fields required are valid output fields.
3076 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3078 def Exec(self, feedback_fn):
3079 """Computes the list of nodes and their attributes.
3082 nodenames = self.nodes
3083 volumes = self.rpc.call_node_volumes(nodenames)
3085 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3086 in self.cfg.GetInstanceList()]
3088 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3091 for node in nodenames:
3092 nresult = volumes[node]
3095 msg = nresult.fail_msg
3097 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3100 node_vols = nresult.payload[:]
3101 node_vols.sort(key=lambda vol: vol['dev'])
3103 for vol in node_vols:
3105 for field in self.op.output_fields:
3108 elif field == "phys":
3112 elif field == "name":
3114 elif field == "size":
3115 val = int(float(vol['size']))
3116 elif field == "instance":
3118 if node not in lv_by_node[inst]:
3120 if vol['name'] in lv_by_node[inst][node]:
3126 raise errors.ParameterError(field)
3127 node_output.append(str(val))
3129 output.append(node_output)
3134 class LUQueryNodeStorage(NoHooksLU):
3135 """Logical unit for getting information on storage units on node(s).
3138 _OP_REQP = ["nodes", "storage_type", "output_fields"]
3140 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3142 def CheckArguments(self):
3143 _CheckStorageType(self.op.storage_type)
3145 _CheckOutputFields(static=self._FIELDS_STATIC,
3146 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3147 selected=self.op.output_fields)
3149 def ExpandNames(self):
3150 self.needed_locks = {}
3151 self.share_locks[locking.LEVEL_NODE] = 1
3154 self.needed_locks[locking.LEVEL_NODE] = \
3155 _GetWantedNodes(self, self.op.nodes)
3157 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3159 def CheckPrereq(self):
3160 """Check prerequisites.
3162 This checks that the fields required are valid output fields.
3165 self.op.name = getattr(self.op, "name", None)
3167 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3169 def Exec(self, feedback_fn):
3170 """Computes the list of nodes and their attributes.
3173 # Always get name to sort by
3174 if constants.SF_NAME in self.op.output_fields:
3175 fields = self.op.output_fields[:]
3177 fields = [constants.SF_NAME] + self.op.output_fields
3179 # Never ask for node or type as it's only known to the LU
3180 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3181 while extra in fields:
3182 fields.remove(extra)
3184 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3185 name_idx = field_idx[constants.SF_NAME]
3187 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3188 data = self.rpc.call_storage_list(self.nodes,
3189 self.op.storage_type, st_args,
3190 self.op.name, fields)
3194 for node in utils.NiceSort(self.nodes):
3195 nresult = data[node]
3199 msg = nresult.fail_msg
3201 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3204 rows = dict([(row[name_idx], row) for row in nresult.payload])
3206 for name in utils.NiceSort(rows.keys()):
3211 for field in self.op.output_fields:
3212 if field == constants.SF_NODE:
3214 elif field == constants.SF_TYPE:
3215 val = self.op.storage_type
3216 elif field in field_idx:
3217 val = row[field_idx[field]]
3219 raise errors.ParameterError(field)
3228 class LUModifyNodeStorage(NoHooksLU):
3229 """Logical unit for modifying a storage volume on a node.
3232 _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3235 def CheckArguments(self):
3236 self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3238 _CheckStorageType(self.op.storage_type)
3240 def ExpandNames(self):
3241 self.needed_locks = {
3242 locking.LEVEL_NODE: self.op.node_name,
3245 def CheckPrereq(self):
3246 """Check prerequisites.
3249 storage_type = self.op.storage_type
3252 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3254 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3255 " modified" % storage_type,
3258 diff = set(self.op.changes.keys()) - modifiable
3260 raise errors.OpPrereqError("The following fields can not be modified for"
3261 " storage units of type '%s': %r" %
3262 (storage_type, list(diff)),
3265 def Exec(self, feedback_fn):
3266 """Computes the list of nodes and their attributes.
3269 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3270 result = self.rpc.call_storage_modify(self.op.node_name,
3271 self.op.storage_type, st_args,
3272 self.op.name, self.op.changes)
3273 result.Raise("Failed to modify storage unit '%s' on %s" %
3274 (self.op.name, self.op.node_name))
3277 class LUAddNode(LogicalUnit):
3278 """Logical unit for adding node to the cluster.
3282 HTYPE = constants.HTYPE_NODE
3283 _OP_REQP = ["node_name"]
3285 def CheckArguments(self):
3286 # validate/normalize the node name
3287 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3289 def BuildHooksEnv(self):
3292 This will run on all nodes before, and on all nodes + the new node after.
3296 "OP_TARGET": self.op.node_name,
3297 "NODE_NAME": self.op.node_name,
3298 "NODE_PIP": self.op.primary_ip,
3299 "NODE_SIP": self.op.secondary_ip,
3301 nodes_0 = self.cfg.GetNodeList()
3302 nodes_1 = nodes_0 + [self.op.node_name, ]
3303 return env, nodes_0, nodes_1
3305 def CheckPrereq(self):
3306 """Check prerequisites.
3309 - the new node is not already in the config
3311 - its parameters (single/dual homed) matches the cluster
3313 Any errors are signaled by raising errors.OpPrereqError.
3316 node_name = self.op.node_name
3319 dns_data = utils.GetHostInfo(node_name)
3321 node = dns_data.name
3322 primary_ip = self.op.primary_ip = dns_data.ip
3323 secondary_ip = getattr(self.op, "secondary_ip", None)
3324 if secondary_ip is None:
3325 secondary_ip = primary_ip
3326 if not utils.IsValidIP(secondary_ip):
3327 raise errors.OpPrereqError("Invalid secondary IP given",
3329 self.op.secondary_ip = secondary_ip
3331 node_list = cfg.GetNodeList()
3332 if not self.op.readd and node in node_list:
3333 raise errors.OpPrereqError("Node %s is already in the configuration" %
3334 node, errors.ECODE_EXISTS)
3335 elif self.op.readd and node not in node_list:
3336 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3339 self.changed_primary_ip = False
3341 for existing_node_name in node_list:
3342 existing_node = cfg.GetNodeInfo(existing_node_name)
3344 if self.op.readd and node == existing_node_name:
3345 if existing_node.secondary_ip != secondary_ip:
3346 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3347 " address configuration as before",
3349 if existing_node.primary_ip != primary_ip:
3350 self.changed_primary_ip = True
3354 if (existing_node.primary_ip == primary_ip or
3355 existing_node.secondary_ip == primary_ip or
3356 existing_node.primary_ip == secondary_ip or
3357 existing_node.secondary_ip == secondary_ip):
3358 raise errors.OpPrereqError("New node ip address(es) conflict with"
3359 " existing node %s" % existing_node.name,
3360 errors.ECODE_NOTUNIQUE)
3362 # check that the type of the node (single versus dual homed) is the
3363 # same as for the master
3364 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3365 master_singlehomed = myself.secondary_ip == myself.primary_ip
3366 newbie_singlehomed = secondary_ip == primary_ip
3367 if master_singlehomed != newbie_singlehomed:
3368 if master_singlehomed:
3369 raise errors.OpPrereqError("The master has no private ip but the"
3370 " new node has one",
3373 raise errors.OpPrereqError("The master has a private ip but the"
3374 " new node doesn't have one",
3377 # checks reachability
3378 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3379 raise errors.OpPrereqError("Node not reachable by ping",
3380 errors.ECODE_ENVIRON)
3382 if not newbie_singlehomed:
3383 # check reachability from my secondary ip to newbie's secondary ip
3384 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3385 source=myself.secondary_ip):
3386 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3387 " based ping to noded port",
3388 errors.ECODE_ENVIRON)
3395 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3398 self.new_node = self.cfg.GetNodeInfo(node)
3399 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3401 self.new_node = objects.Node(name=node,
3402 primary_ip=primary_ip,
3403 secondary_ip=secondary_ip,
3404 master_candidate=self.master_candidate,
3405 offline=False, drained=False)
3407 def Exec(self, feedback_fn):
3408 """Adds the new node to the cluster.
3411 new_node = self.new_node
3412 node = new_node.name
3414 # for re-adds, reset the offline/drained/master-candidate flags;
3415 # we need to reset here, otherwise offline would prevent RPC calls
3416 # later in the procedure; this also means that if the re-add
3417 # fails, we are left with a non-offlined, broken node
3419 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3420 self.LogInfo("Readding a node, the offline/drained flags were reset")
3421 # if we demote the node, we do cleanup later in the procedure
3422 new_node.master_candidate = self.master_candidate
3423 if self.changed_primary_ip:
3424 new_node.primary_ip = self.op.primary_ip
3426 # notify the user about any possible mc promotion
3427 if new_node.master_candidate:
3428 self.LogInfo("Node will be a master candidate")
3430 # check connectivity
3431 result = self.rpc.call_version([node])[node]
3432 result.Raise("Can't get version information from node %s" % node)
3433 if constants.PROTOCOL_VERSION == result.payload:
3434 logging.info("Communication to node %s fine, sw version %s match",
3435 node, result.payload)
3437 raise errors.OpExecError("Version mismatch master version %s,"
3438 " node version %s" %
3439 (constants.PROTOCOL_VERSION, result.payload))
3442 if self.cfg.GetClusterInfo().modify_ssh_setup:
3443 logging.info("Copy ssh key to node %s", node)
3444 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3446 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3447 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3451 keyarray.append(utils.ReadFile(i))
3453 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3454 keyarray[2], keyarray[3], keyarray[4],
3456 result.Raise("Cannot transfer ssh keys to the new node")
3458 # Add node to our /etc/hosts, and add key to known_hosts
3459 if self.cfg.GetClusterInfo().modify_etc_hosts:
3460 utils.AddHostToEtcHosts(new_node.name)
3462 if new_node.secondary_ip != new_node.primary_ip:
3463 result = self.rpc.call_node_has_ip_address(new_node.name,
3464 new_node.secondary_ip)
3465 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3466 prereq=True, ecode=errors.ECODE_ENVIRON)
3467 if not result.payload:
3468 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3469 " you gave (%s). Please fix and re-run this"
3470 " command." % new_node.secondary_ip)
3472 node_verify_list = [self.cfg.GetMasterNode()]
3473 node_verify_param = {
3474 constants.NV_NODELIST: [node],
3475 # TODO: do a node-net-test as well?
3478 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3479 self.cfg.GetClusterName())
3480 for verifier in node_verify_list:
3481 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3482 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3484 for failed in nl_payload:
3485 feedback_fn("ssh/hostname verification failed"
3486 " (checking from %s): %s" %
3487 (verifier, nl_payload[failed]))
3488 raise errors.OpExecError("ssh/hostname verification failed.")
3491 _RedistributeAncillaryFiles(self)
3492 self.context.ReaddNode(new_node)
3493 # make sure we redistribute the config
3494 self.cfg.Update(new_node, feedback_fn)
3495 # and make sure the new node will not have old files around
3496 if not new_node.master_candidate:
3497 result = self.rpc.call_node_demote_from_mc(new_node.name)
3498 msg = result.fail_msg
3500 self.LogWarning("Node failed to demote itself from master"
3501 " candidate status: %s" % msg)
3503 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3504 self.context.AddNode(new_node, self.proc.GetECId())
3507 class LUSetNodeParams(LogicalUnit):
3508 """Modifies the parameters of a node.
3511 HPATH = "node-modify"
3512 HTYPE = constants.HTYPE_NODE
3513 _OP_REQP = ["node_name"]
3516 def CheckArguments(self):
3517 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3518 _CheckBooleanOpField(self.op, 'master_candidate')
3519 _CheckBooleanOpField(self.op, 'offline')
3520 _CheckBooleanOpField(self.op, 'drained')
3521 _CheckBooleanOpField(self.op, 'auto_promote')
3522 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3523 if all_mods.count(None) == 3:
3524 raise errors.OpPrereqError("Please pass at least one modification",
3526 if all_mods.count(True) > 1:
3527 raise errors.OpPrereqError("Can't set the node into more than one"
3528 " state at the same time",
3531 # Boolean value that tells us whether we're offlining or draining the node
3532 self.offline_or_drain = (self.op.offline == True or
3533 self.op.drained == True)
3534 self.deoffline_or_drain = (self.op.offline == False or
3535 self.op.drained == False)
3536 self.might_demote = (self.op.master_candidate == False or
3537 self.offline_or_drain)
3539 self.lock_all = self.op.auto_promote and self.might_demote
3542 def ExpandNames(self):
3544 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3546 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3548 def BuildHooksEnv(self):
3551 This runs on the master node.
3555 "OP_TARGET": self.op.node_name,
3556 "MASTER_CANDIDATE": str(self.op.master_candidate),
3557 "OFFLINE": str(self.op.offline),
3558 "DRAINED": str(self.op.drained),
3560 nl = [self.cfg.GetMasterNode(),
3564 def CheckPrereq(self):
3565 """Check prerequisites.
3567 This only checks the instance list against the existing names.
3570 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3572 if (self.op.master_candidate is not None or
3573 self.op.drained is not None or
3574 self.op.offline is not None):
3575 # we can't change the master's node flags
3576 if self.op.node_name == self.cfg.GetMasterNode():
3577 raise errors.OpPrereqError("The master role can be changed"
3578 " only via masterfailover",
3582 if node.master_candidate and self.might_demote and not self.lock_all:
3583 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3584 # check if after removing the current node, we're missing master
3586 (mc_remaining, mc_should, _) = \
3587 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3588 if mc_remaining < mc_should:
3589 raise errors.OpPrereqError("Not enough master candidates, please"
3590 " pass auto_promote to allow promotion",
3593 if (self.op.master_candidate == True and
3594 ((node.offline and not self.op.offline == False) or
3595 (node.drained and not self.op.drained == False))):
3596 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3597 " to master_candidate" % node.name,
3600 # If we're being deofflined/drained, we'll MC ourself if needed
3601 if (self.deoffline_or_drain and not self.offline_or_drain and not
3602 self.op.master_candidate == True and not node.master_candidate):
3603 self.op.master_candidate = _DecideSelfPromotion(self)
3604 if self.op.master_candidate:
3605 self.LogInfo("Autopromoting node to master candidate")
3609 def Exec(self, feedback_fn):
3618 if self.op.offline is not None:
3619 node.offline = self.op.offline
3620 result.append(("offline", str(self.op.offline)))
3621 if self.op.offline == True:
3622 if node.master_candidate:
3623 node.master_candidate = False
3625 result.append(("master_candidate", "auto-demotion due to offline"))
3627 node.drained = False
3628 result.append(("drained", "clear drained status due to offline"))
3630 if self.op.master_candidate is not None:
3631 node.master_candidate = self.op.master_candidate
3633 result.append(("master_candidate", str(self.op.master_candidate)))
3634 if self.op.master_candidate == False:
3635 rrc = self.rpc.call_node_demote_from_mc(node.name)
3638 self.LogWarning("Node failed to demote itself: %s" % msg)
3640 if self.op.drained is not None:
3641 node.drained = self.op.drained
3642 result.append(("drained", str(self.op.drained)))
3643 if self.op.drained == True:
3644 if node.master_candidate:
3645 node.master_candidate = False
3647 result.append(("master_candidate", "auto-demotion due to drain"))
3648 rrc = self.rpc.call_node_demote_from_mc(node.name)
3651 self.LogWarning("Node failed to demote itself: %s" % msg)
3653 node.offline = False
3654 result.append(("offline", "clear offline status due to drain"))
3656 # we locked all nodes, we adjust the CP before updating this node
3658 _AdjustCandidatePool(self, [node.name])
3660 # this will trigger configuration file update, if needed
3661 self.cfg.Update(node, feedback_fn)
3663 # this will trigger job queue propagation or cleanup
3665 self.context.ReaddNode(node)
3670 class LUPowercycleNode(NoHooksLU):
3671 """Powercycles a node.
3674 _OP_REQP = ["node_name", "force"]
3677 def CheckArguments(self):
3678 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3679 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3680 raise errors.OpPrereqError("The node is the master and the force"
3681 " parameter was not set",
3684 def ExpandNames(self):
3685 """Locking for PowercycleNode.
3687 This is a last-resort option and shouldn't block on other
3688 jobs. Therefore, we grab no locks.
3691 self.needed_locks = {}
3693 def CheckPrereq(self):
3694 """Check prerequisites.
3696 This LU has no prereqs.
3701 def Exec(self, feedback_fn):
3705 result = self.rpc.call_node_powercycle(self.op.node_name,
3706 self.cfg.GetHypervisorType())
3707 result.Raise("Failed to schedule the reboot")
3708 return result.payload
3711 class LUQueryClusterInfo(NoHooksLU):
3712 """Query cluster configuration.
3718 def ExpandNames(self):
3719 self.needed_locks = {}
3721 def CheckPrereq(self):
3722 """No prerequsites needed for this LU.
3727 def Exec(self, feedback_fn):
3728 """Return cluster config.
3731 cluster = self.cfg.GetClusterInfo()
3734 # Filter just for enabled hypervisors
3735 for os_name, hv_dict in cluster.os_hvp.items():
3736 os_hvp[os_name] = {}
3737 for hv_name, hv_params in hv_dict.items():
3738 if hv_name in cluster.enabled_hypervisors:
3739 os_hvp[os_name][hv_name] = hv_params
3742 "software_version": constants.RELEASE_VERSION,
3743 "protocol_version": constants.PROTOCOL_VERSION,
3744 "config_version": constants.CONFIG_VERSION,
3745 "os_api_version": max(constants.OS_API_VERSIONS),
3746 "export_version": constants.EXPORT_VERSION,
3747 "architecture": (platform.architecture()[0], platform.machine()),
3748 "name": cluster.cluster_name,
3749 "master": cluster.master_node,
3750 "default_hypervisor": cluster.enabled_hypervisors[0],
3751 "enabled_hypervisors": cluster.enabled_hypervisors,
3752 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3753 for hypervisor_name in cluster.enabled_hypervisors]),
3755 "beparams": cluster.beparams,
3756 "nicparams": cluster.nicparams,
3757 "candidate_pool_size": cluster.candidate_pool_size,
3758 "master_netdev": cluster.master_netdev,
3759 "volume_group_name": cluster.volume_group_name,
3760 "file_storage_dir": cluster.file_storage_dir,
3761 "maintain_node_health": cluster.maintain_node_health,
3762 "ctime": cluster.ctime,
3763 "mtime": cluster.mtime,
3764 "uuid": cluster.uuid,
3765 "tags": list(cluster.GetTags()),
3766 "uid_pool": cluster.uid_pool,
3772 class LUQueryConfigValues(NoHooksLU):
3773 """Return configuration values.
3778 _FIELDS_DYNAMIC = utils.FieldSet()
3779 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3782 def ExpandNames(self):
3783 self.needed_locks = {}
3785 _CheckOutputFields(static=self._FIELDS_STATIC,
3786 dynamic=self._FIELDS_DYNAMIC,
3787 selected=self.op.output_fields)
3789 def CheckPrereq(self):
3790 """No prerequisites.
3795 def Exec(self, feedback_fn):
3796 """Dump a representation of the cluster config to the standard output.
3800 for field in self.op.output_fields:
3801 if field == "cluster_name":
3802 entry = self.cfg.GetClusterName()
3803 elif field == "master_node":
3804 entry = self.cfg.GetMasterNode()
3805 elif field == "drain_flag":
3806 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3807 elif field == "watcher_pause":
3808 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3810 raise errors.ParameterError(field)
3811 values.append(entry)
3815 class LUActivateInstanceDisks(NoHooksLU):
3816 """Bring up an instance's disks.
3819 _OP_REQP = ["instance_name"]
3822 def ExpandNames(self):
3823 self._ExpandAndLockInstance()
3824 self.needed_locks[locking.LEVEL_NODE] = []
3825 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3827 def DeclareLocks(self, level):
3828 if level == locking.LEVEL_NODE:
3829 self._LockInstancesNodes()
3831 def CheckPrereq(self):
3832 """Check prerequisites.
3834 This checks that the instance is in the cluster.
3837 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3838 assert self.instance is not None, \
3839 "Cannot retrieve locked instance %s" % self.op.instance_name
3840 _CheckNodeOnline(self, self.instance.primary_node)
3841 if not hasattr(self.op, "ignore_size"):
3842 self.op.ignore_size = False
3844 def Exec(self, feedback_fn):
3845 """Activate the disks.
3848 disks_ok, disks_info = \
3849 _AssembleInstanceDisks(self, self.instance,
3850 ignore_size=self.op.ignore_size)
3852 raise errors.OpExecError("Cannot activate block devices")
3857 def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3859 """Prepare the block devices for an instance.
3861 This sets up the block devices on all nodes.
3863 @type lu: L{LogicalUnit}
3864 @param lu: the logical unit on whose behalf we execute
3865 @type instance: L{objects.Instance}
3866 @param instance: the instance for whose disks we assemble
3867 @type ignore_secondaries: boolean
3868 @param ignore_secondaries: if true, errors on secondary nodes
3869 won't result in an error return from the function
3870 @type ignore_size: boolean
3871 @param ignore_size: if true, the current known size of the disk
3872 will not be used during the disk activation, useful for cases
3873 when the size is wrong
3874 @return: False if the operation failed, otherwise a list of
3875 (host, instance_visible_name, node_visible_name)
3876 with the mapping from node devices to instance devices
3881 iname = instance.name
3882 # With the two passes mechanism we try to reduce the window of
3883 # opportunity for the race condition of switching DRBD to primary
3884 # before handshaking occured, but we do not eliminate it
3886 # The proper fix would be to wait (with some limits) until the
3887 # connection has been made and drbd transitions from WFConnection
3888 # into any other network-connected state (Connected, SyncTarget,
3891 # 1st pass, assemble on all nodes in secondary mode
3892 for inst_disk in instance.disks:
3893 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3895 node_disk = node_disk.Copy()
3896 node_disk.UnsetSize()
3897 lu.cfg.SetDiskID(node_disk, node)
3898 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3899 msg = result.fail_msg
3901 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3902 " (is_primary=False, pass=1): %s",
3903 inst_disk.iv_name, node, msg)
3904 if not ignore_secondaries:
3907 # FIXME: race condition on drbd migration to primary
3909 # 2nd pass, do only the primary node
3910 for inst_disk in instance.disks:
3913 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3914 if node != instance.primary_node:
3917 node_disk = node_disk.Copy()
3918 node_disk.UnsetSize()
3919 lu.cfg.SetDiskID(node_disk, node)
3920 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3921 msg = result.fail_msg
3923 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3924 " (is_primary=True, pass=2): %s",
3925 inst_disk.iv_name, node, msg)
3928 dev_path = result.payload
3930 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3932 # leave the disks configured for the primary node
3933 # this is a workaround that would be fixed better by
3934 # improving the logical/physical id handling
3935 for disk in instance.disks:
3936 lu.cfg.SetDiskID(disk, instance.primary_node)
3938 return disks_ok, device_info
3941 def _StartInstanceDisks(lu, instance, force):
3942 """Start the disks of an instance.
3945 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3946 ignore_secondaries=force)
3948 _ShutdownInstanceDisks(lu, instance)
3949 if force is not None and not force:
3950 lu.proc.LogWarning("", hint="If the message above refers to a"
3952 " you can retry the operation using '--force'.")
3953 raise errors.OpExecError("Disk consistency error")
3956 class LUDeactivateInstanceDisks(NoHooksLU):
3957 """Shutdown an instance's disks.
3960 _OP_REQP = ["instance_name"]
3963 def ExpandNames(self):
3964 self._ExpandAndLockInstance()
3965 self.needed_locks[locking.LEVEL_NODE] = []
3966 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3968 def DeclareLocks(self, level):
3969 if level == locking.LEVEL_NODE:
3970 self._LockInstancesNodes()
3972 def CheckPrereq(self):
3973 """Check prerequisites.
3975 This checks that the instance is in the cluster.
3978 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3979 assert self.instance is not None, \
3980 "Cannot retrieve locked instance %s" % self.op.instance_name
3982 def Exec(self, feedback_fn):
3983 """Deactivate the disks
3986 instance = self.instance
3987 _SafeShutdownInstanceDisks(self, instance)
3990 def _SafeShutdownInstanceDisks(lu, instance):
3991 """Shutdown block devices of an instance.
3993 This function checks if an instance is running, before calling
3994 _ShutdownInstanceDisks.
3997 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
3998 _ShutdownInstanceDisks(lu, instance)
4001 def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
4002 """Shutdown block devices of an instance.
4004 This does the shutdown on all nodes of the instance.
4006 If the ignore_primary is false, errors on the primary node are
4011 for disk in instance.disks:
4012 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4013 lu.cfg.SetDiskID(top_disk, node)
4014 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4015 msg = result.fail_msg
4017 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4018 disk.iv_name, node, msg)
4019 if not ignore_primary or node != instance.primary_node:
4024 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4025 """Checks if a node has enough free memory.
4027 This function check if a given node has the needed amount of free
4028 memory. In case the node has less memory or we cannot get the
4029 information from the node, this function raise an OpPrereqError
4032 @type lu: C{LogicalUnit}
4033 @param lu: a logical unit from which we get configuration data
4035 @param node: the node to check
4036 @type reason: C{str}
4037 @param reason: string to use in the error message
4038 @type requested: C{int}
4039 @param requested: the amount of memory in MiB to check for
4040 @type hypervisor_name: C{str}
4041 @param hypervisor_name: the hypervisor to ask for memory stats
4042 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4043 we cannot check the node
4046 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4047 nodeinfo[node].Raise("Can't get data from node %s" % node,
4048 prereq=True, ecode=errors.ECODE_ENVIRON)
4049 free_mem = nodeinfo[node].payload.get('memory_free', None)
4050 if not isinstance(free_mem, int):
4051 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4052 " was '%s'" % (node, free_mem),
4053 errors.ECODE_ENVIRON)
4054 if requested > free_mem:
4055 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4056 " needed %s MiB, available %s MiB" %
4057 (node, reason, requested, free_mem),
4061 def _CheckNodesFreeDisk(lu, nodenames, requested):
4062 """Checks if nodes have enough free disk space in the default VG.
4064 This function check if all given nodes have the needed amount of
4065 free disk. In case any node has less disk or we cannot get the
4066 information from the node, this function raise an OpPrereqError
4069 @type lu: C{LogicalUnit}
4070 @param lu: a logical unit from which we get configuration data
4071 @type nodenames: C{list}
4072 @param nodenames: the list of node names to check
4073 @type requested: C{int}
4074 @param requested: the amount of disk in MiB to check for
4075 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4076 we cannot check the node
4079 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4080 lu.cfg.GetHypervisorType())
4081 for node in nodenames:
4082 info = nodeinfo[node]
4083 info.Raise("Cannot get current information from node %s" % node,
4084 prereq=True, ecode=errors.ECODE_ENVIRON)
4085 vg_free = info.payload.get("vg_free", None)
4086 if not isinstance(vg_free, int):
4087 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4088 " result was '%s'" % (node, vg_free),
4089 errors.ECODE_ENVIRON)
4090 if requested > vg_free:
4091 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4092 " required %d MiB, available %d MiB" %
4093 (node, requested, vg_free),
4097 class LUStartupInstance(LogicalUnit):
4098 """Starts an instance.
4101 HPATH = "instance-start"
4102 HTYPE = constants.HTYPE_INSTANCE
4103 _OP_REQP = ["instance_name", "force"]
4106 def ExpandNames(self):
4107 self._ExpandAndLockInstance()
4109 def BuildHooksEnv(self):
4112 This runs on master, primary and secondary nodes of the instance.
4116 "FORCE": self.op.force,
4118 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4119 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4122 def CheckPrereq(self):
4123 """Check prerequisites.
4125 This checks that the instance is in the cluster.
4128 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4129 assert self.instance is not None, \
4130 "Cannot retrieve locked instance %s" % self.op.instance_name
4133 self.beparams = getattr(self.op, "beparams", {})
4135 if not isinstance(self.beparams, dict):
4136 raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4137 " dict" % (type(self.beparams), ),
4139 # fill the beparams dict
4140 utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4141 self.op.beparams = self.beparams
4144 self.hvparams = getattr(self.op, "hvparams", {})
4146 if not isinstance(self.hvparams, dict):
4147 raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4148 " dict" % (type(self.hvparams), ),
4151 # check hypervisor parameter syntax (locally)
4152 cluster = self.cfg.GetClusterInfo()
4153 utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4154 filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4156 filled_hvp.update(self.hvparams)
4157 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4158 hv_type.CheckParameterSyntax(filled_hvp)
4159 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4160 self.op.hvparams = self.hvparams
4162 _CheckNodeOnline(self, instance.primary_node)
4164 bep = self.cfg.GetClusterInfo().FillBE(instance)
4165 # check bridges existence
4166 _CheckInstanceBridgesExist(self, instance)
4168 remote_info = self.rpc.call_instance_info(instance.primary_node,
4170 instance.hypervisor)
4171 remote_info.Raise("Error checking node %s" % instance.primary_node,
4172 prereq=True, ecode=errors.ECODE_ENVIRON)
4173 if not remote_info.payload: # not running already
4174 _CheckNodeFreeMemory(self, instance.primary_node,
4175 "starting instance %s" % instance.name,
4176 bep[constants.BE_MEMORY], instance.hypervisor)
4178 def Exec(self, feedback_fn):
4179 """Start the instance.
4182 instance = self.instance
4183 force = self.op.force
4185 self.cfg.MarkInstanceUp(instance.name)
4187 node_current = instance.primary_node
4189 _StartInstanceDisks(self, instance, force)
4191 result = self.rpc.call_instance_start(node_current, instance,
4192 self.hvparams, self.beparams)
4193 msg = result.fail_msg
4195 _ShutdownInstanceDisks(self, instance)
4196 raise errors.OpExecError("Could not start instance: %s" % msg)
4199 class LURebootInstance(LogicalUnit):
4200 """Reboot an instance.
4203 HPATH = "instance-reboot"
4204 HTYPE = constants.HTYPE_INSTANCE
4205 _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4208 def CheckArguments(self):
4209 """Check the arguments.
4212 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4213 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4215 def ExpandNames(self):
4216 if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4217 constants.INSTANCE_REBOOT_HARD,
4218 constants.INSTANCE_REBOOT_FULL]:
4219 raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4220 (constants.INSTANCE_REBOOT_SOFT,
4221 constants.INSTANCE_REBOOT_HARD,
4222 constants.INSTANCE_REBOOT_FULL))
4223 self._ExpandAndLockInstance()
4225 def BuildHooksEnv(self):
4228 This runs on master, primary and secondary nodes of the instance.
4232 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4233 "REBOOT_TYPE": self.op.reboot_type,
4234 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4236 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4237 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4240 def CheckPrereq(self):
4241 """Check prerequisites.
4243 This checks that the instance is in the cluster.
4246 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4247 assert self.instance is not None, \
4248 "Cannot retrieve locked instance %s" % self.op.instance_name
4250 _CheckNodeOnline(self, instance.primary_node)
4252 # check bridges existence
4253 _CheckInstanceBridgesExist(self, instance)
4255 def Exec(self, feedback_fn):
4256 """Reboot the instance.
4259 instance = self.instance
4260 ignore_secondaries = self.op.ignore_secondaries
4261 reboot_type = self.op.reboot_type
4263 node_current = instance.primary_node
4265 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4266 constants.INSTANCE_REBOOT_HARD]:
4267 for disk in instance.disks:
4268 self.cfg.SetDiskID(disk, node_current)
4269 result = self.rpc.call_instance_reboot(node_current, instance,
4271 self.shutdown_timeout)
4272 result.Raise("Could not reboot instance")
4274 result = self.rpc.call_instance_shutdown(node_current, instance,
4275 self.shutdown_timeout)
4276 result.Raise("Could not shutdown instance for full reboot")
4277 _ShutdownInstanceDisks(self, instance)
4278 _StartInstanceDisks(self, instance, ignore_secondaries)
4279 result = self.rpc.call_instance_start(node_current, instance, None, None)
4280 msg = result.fail_msg
4282 _ShutdownInstanceDisks(self, instance)
4283 raise errors.OpExecError("Could not start instance for"
4284 " full reboot: %s" % msg)
4286 self.cfg.MarkInstanceUp(instance.name)
4289 class LUShutdownInstance(LogicalUnit):
4290 """Shutdown an instance.
4293 HPATH = "instance-stop"
4294 HTYPE = constants.HTYPE_INSTANCE
4295 _OP_REQP = ["instance_name"]
4298 def CheckArguments(self):
4299 """Check the arguments.
4302 self.timeout = getattr(self.op, "timeout",
4303 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4305 def ExpandNames(self):
4306 self._ExpandAndLockInstance()
4308 def BuildHooksEnv(self):
4311 This runs on master, primary and secondary nodes of the instance.
4314 env = _BuildInstanceHookEnvByObject(self, self.instance)
4315 env["TIMEOUT"] = self.timeout
4316 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4319 def CheckPrereq(self):
4320 """Check prerequisites.
4322 This checks that the instance is in the cluster.
4325 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4326 assert self.instance is not None, \
4327 "Cannot retrieve locked instance %s" % self.op.instance_name
4328 _CheckNodeOnline(self, self.instance.primary_node)
4330 def Exec(self, feedback_fn):
4331 """Shutdown the instance.
4334 instance = self.instance
4335 node_current = instance.primary_node
4336 timeout = self.timeout
4337 self.cfg.MarkInstanceDown(instance.name)
4338 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4339 msg = result.fail_msg
4341 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4343 _ShutdownInstanceDisks(self, instance)
4346 class LUReinstallInstance(LogicalUnit):
4347 """Reinstall an instance.
4350 HPATH = "instance-reinstall"
4351 HTYPE = constants.HTYPE_INSTANCE
4352 _OP_REQP = ["instance_name"]
4355 def ExpandNames(self):
4356 self._ExpandAndLockInstance()
4358 def BuildHooksEnv(self):
4361 This runs on master, primary and secondary nodes of the instance.
4364 env = _BuildInstanceHookEnvByObject(self, self.instance)
4365 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4368 def CheckPrereq(self):
4369 """Check prerequisites.
4371 This checks that the instance is in the cluster and is not running.
4374 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4375 assert instance is not None, \
4376 "Cannot retrieve locked instance %s" % self.op.instance_name
4377 _CheckNodeOnline(self, instance.primary_node)
4379 if instance.disk_template == constants.DT_DISKLESS:
4380 raise errors.OpPrereqError("Instance '%s' has no disks" %
4381 self.op.instance_name,
4383 _CheckInstanceDown(self, instance, "cannot reinstall")
4385 self.op.os_type = getattr(self.op, "os_type", None)
4386 self.op.force_variant = getattr(self.op, "force_variant", False)
4387 if self.op.os_type is not None:
4389 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4390 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4392 self.instance = instance
4394 def Exec(self, feedback_fn):
4395 """Reinstall the instance.
4398 inst = self.instance
4400 if self.op.os_type is not None:
4401 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4402 inst.os = self.op.os_type
4403 self.cfg.Update(inst, feedback_fn)
4405 _StartInstanceDisks(self, inst, None)
4407 feedback_fn("Running the instance OS create scripts...")
4408 # FIXME: pass debug option from opcode to backend
4409 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4410 self.op.debug_level)
4411 result.Raise("Could not install OS for instance %s on node %s" %
4412 (inst.name, inst.primary_node))
4414 _ShutdownInstanceDisks(self, inst)
4417 class LURecreateInstanceDisks(LogicalUnit):
4418 """Recreate an instance's missing disks.
4421 HPATH = "instance-recreate-disks"
4422 HTYPE = constants.HTYPE_INSTANCE
4423 _OP_REQP = ["instance_name", "disks"]
4426 def CheckArguments(self):
4427 """Check the arguments.
4430 if not isinstance(self.op.disks, list):
4431 raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4432 for item in self.op.disks:
4433 if (not isinstance(item, int) or
4435 raise errors.OpPrereqError("Invalid disk specification '%s'" %
4436 str(item), errors.ECODE_INVAL)
4438 def ExpandNames(self):
4439 self._ExpandAndLockInstance()
4441 def BuildHooksEnv(self):
4444 This runs on master, primary and secondary nodes of the instance.
4447 env = _BuildInstanceHookEnvByObject(self, self.instance)
4448 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4451 def CheckPrereq(self):
4452 """Check prerequisites.
4454 This checks that the instance is in the cluster and is not running.
4457 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4458 assert instance is not None, \
4459 "Cannot retrieve locked instance %s" % self.op.instance_name
4460 _CheckNodeOnline(self, instance.primary_node)
4462 if instance.disk_template == constants.DT_DISKLESS:
4463 raise errors.OpPrereqError("Instance '%s' has no disks" %
4464 self.op.instance_name, errors.ECODE_INVAL)
4465 _CheckInstanceDown(self, instance, "cannot recreate disks")
4467 if not self.op.disks:
4468 self.op.disks = range(len(instance.disks))
4470 for idx in self.op.disks:
4471 if idx >= len(instance.disks):
4472 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4475 self.instance = instance
4477 def Exec(self, feedback_fn):
4478 """Recreate the disks.
4482 for idx, _ in enumerate(self.instance.disks):
4483 if idx not in self.op.disks: # disk idx has not been passed in
4487 _CreateDisks(self, self.instance, to_skip=to_skip)
4490 class LURenameInstance(LogicalUnit):
4491 """Rename an instance.
4494 HPATH = "instance-rename"
4495 HTYPE = constants.HTYPE_INSTANCE
4496 _OP_REQP = ["instance_name", "new_name"]
4498 def BuildHooksEnv(self):
4501 This runs on master, primary and secondary nodes of the instance.
4504 env = _BuildInstanceHookEnvByObject(self, self.instance)
4505 env["INSTANCE_NEW_NAME"] = self.op.new_name
4506 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4509 def CheckPrereq(self):
4510 """Check prerequisites.
4512 This checks that the instance is in the cluster and is not running.
4515 self.op.instance_name = _ExpandInstanceName(self.cfg,
4516 self.op.instance_name)
4517 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4518 assert instance is not None
4519 _CheckNodeOnline(self, instance.primary_node)
4520 _CheckInstanceDown(self, instance, "cannot rename")
4521 self.instance = instance
4523 # new name verification
4524 name_info = utils.GetHostInfo(self.op.new_name)
4526 self.op.new_name = new_name = name_info.name
4527 instance_list = self.cfg.GetInstanceList()
4528 if new_name in instance_list:
4529 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4530 new_name, errors.ECODE_EXISTS)
4532 if not getattr(self.op, "ignore_ip", False):
4533 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4534 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4535 (name_info.ip, new_name),
4536 errors.ECODE_NOTUNIQUE)
4539 def Exec(self, feedback_fn):
4540 """Reinstall the instance.
4543 inst = self.instance
4544 old_name = inst.name
4546 if inst.disk_template == constants.DT_FILE:
4547 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4549 self.cfg.RenameInstance(inst.name, self.op.new_name)
4550 # Change the instance lock. This is definitely safe while we hold the BGL
4551 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4552 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4554 # re-read the instance from the configuration after rename
4555 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4557 if inst.disk_template == constants.DT_FILE:
4558 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4559 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4560 old_file_storage_dir,
4561 new_file_storage_dir)
4562 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4563 " (but the instance has been renamed in Ganeti)" %
4564 (inst.primary_node, old_file_storage_dir,
4565 new_file_storage_dir))
4567 _StartInstanceDisks(self, inst, None)
4569 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4570 old_name, self.op.debug_level)
4571 msg = result.fail_msg
4573 msg = ("Could not run OS rename script for instance %s on node %s"
4574 " (but the instance has been renamed in Ganeti): %s" %
4575 (inst.name, inst.primary_node, msg))
4576 self.proc.LogWarning(msg)
4578 _ShutdownInstanceDisks(self, inst)
4581 class LURemoveInstance(LogicalUnit):
4582 """Remove an instance.
4585 HPATH = "instance-remove"
4586 HTYPE = constants.HTYPE_INSTANCE
4587 _OP_REQP = ["instance_name", "ignore_failures"]
4590 def CheckArguments(self):
4591 """Check the arguments.
4594 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4595 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4597 def ExpandNames(self):
4598 self._ExpandAndLockInstance()
4599 self.needed_locks[locking.LEVEL_NODE] = []
4600 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4602 def DeclareLocks(self, level):
4603 if level == locking.LEVEL_NODE:
4604 self._LockInstancesNodes()
4606 def BuildHooksEnv(self):
4609 This runs on master, primary and secondary nodes of the instance.
4612 env = _BuildInstanceHookEnvByObject(self, self.instance)
4613 env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4614 nl = [self.cfg.GetMasterNode()]
4615 nl_post = list(self.instance.all_nodes) + nl
4616 return env, nl, nl_post
4618 def CheckPrereq(self):
4619 """Check prerequisites.
4621 This checks that the instance is in the cluster.
4624 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4625 assert self.instance is not None, \
4626 "Cannot retrieve locked instance %s" % self.op.instance_name
4628 def Exec(self, feedback_fn):
4629 """Remove the instance.
4632 instance = self.instance
4633 logging.info("Shutting down instance %s on node %s",
4634 instance.name, instance.primary_node)
4636 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4637 self.shutdown_timeout)
4638 msg = result.fail_msg
4640 if self.op.ignore_failures:
4641 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4643 raise errors.OpExecError("Could not shutdown instance %s on"
4645 (instance.name, instance.primary_node, msg))
4647 logging.info("Removing block devices for instance %s", instance.name)
4649 if not _RemoveDisks(self, instance):
4650 if self.op.ignore_failures:
4651 feedback_fn("Warning: can't remove instance's disks")
4653 raise errors.OpExecError("Can't remove instance's disks")
4655 logging.info("Removing instance %s out of cluster config", instance.name)
4657 self.cfg.RemoveInstance(instance.name)
4658 self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4661 class LUQueryInstances(NoHooksLU):
4662 """Logical unit for querying instances.
4665 # pylint: disable-msg=W0142
4666 _OP_REQP = ["output_fields", "names", "use_locking"]
4668 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4669 "serial_no", "ctime", "mtime", "uuid"]
4670 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4672 "disk_template", "ip", "mac", "bridge",
4673 "nic_mode", "nic_link",
4674 "sda_size", "sdb_size", "vcpus", "tags",
4675 "network_port", "beparams",
4676 r"(disk)\.(size)/([0-9]+)",
4677 r"(disk)\.(sizes)", "disk_usage",
4678 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4679 r"(nic)\.(bridge)/([0-9]+)",
4680 r"(nic)\.(macs|ips|modes|links|bridges)",
4681 r"(disk|nic)\.(count)",
4683 ] + _SIMPLE_FIELDS +
4685 for name in constants.HVS_PARAMETERS
4686 if name not in constants.HVC_GLOBALS] +
4688 for name in constants.BES_PARAMETERS])
4689 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4692 def ExpandNames(self):
4693 _CheckOutputFields(static=self._FIELDS_STATIC,
4694 dynamic=self._FIELDS_DYNAMIC,
4695 selected=self.op.output_fields)
4697 self.needed_locks = {}
4698 self.share_locks[locking.LEVEL_INSTANCE] = 1
4699 self.share_locks[locking.LEVEL_NODE] = 1
4702 self.wanted = _GetWantedInstances(self, self.op.names)
4704 self.wanted = locking.ALL_SET
4706 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4707 self.do_locking = self.do_node_query and self.op.use_locking
4709 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4710 self.needed_locks[locking.LEVEL_NODE] = []
4711 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4713 def DeclareLocks(self, level):
4714 if level == locking.LEVEL_NODE and self.do_locking:
4715 self._LockInstancesNodes()
4717 def CheckPrereq(self):
4718 """Check prerequisites.
4723 def Exec(self, feedback_fn):
4724 """Computes the list of nodes and their attributes.
4727 # pylint: disable-msg=R0912
4728 # way too many branches here
4729 all_info = self.cfg.GetAllInstancesInfo()
4730 if self.wanted == locking.ALL_SET:
4731 # caller didn't specify instance names, so ordering is not important
4733 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4735 instance_names = all_info.keys()
4736 instance_names = utils.NiceSort(instance_names)
4738 # caller did specify names, so we must keep the ordering
4740 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4742 tgt_set = all_info.keys()
4743 missing = set(self.wanted).difference(tgt_set)
4745 raise errors.OpExecError("Some instances were removed before"
4746 " retrieving their data: %s" % missing)
4747 instance_names = self.wanted
4749 instance_list = [all_info[iname] for iname in instance_names]
4751 # begin data gathering
4753 nodes = frozenset([inst.primary_node for inst in instance_list])
4754 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4758 if self.do_node_query:
4760 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4762 result = node_data[name]
4764 # offline nodes will be in both lists
4765 off_nodes.append(name)
4767 bad_nodes.append(name)
4770 live_data.update(result.payload)
4771 # else no instance is alive
4773 live_data = dict([(name, {}) for name in instance_names])
4775 # end data gathering
4780 cluster = self.cfg.GetClusterInfo()
4781 for instance in instance_list:
4783 i_hv = cluster.FillHV(instance, skip_globals=True)
4784 i_be = cluster.FillBE(instance)
4785 i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4786 nic.nicparams) for nic in instance.nics]
4787 for field in self.op.output_fields:
4788 st_match = self._FIELDS_STATIC.Matches(field)
4789 if field in self._SIMPLE_FIELDS:
4790 val = getattr(instance, field)
4791 elif field == "pnode":
4792 val = instance.primary_node
4793 elif field == "snodes":
4794 val = list(instance.secondary_nodes)
4795 elif field == "admin_state":
4796 val = instance.admin_up
4797 elif field == "oper_state":
4798 if instance.primary_node in bad_nodes:
4801 val = bool(live_data.get(instance.name))
4802 elif field == "status":
4803 if instance.primary_node in off_nodes:
4804 val = "ERROR_nodeoffline"
4805 elif instance.primary_node in bad_nodes:
4806 val = "ERROR_nodedown"
4808 running = bool(live_data.get(instance.name))
4810 if instance.admin_up:
4815 if instance.admin_up:
4819 elif field == "oper_ram":
4820 if instance.primary_node in bad_nodes:
4822 elif instance.name in live_data:
4823 val = live_data[instance.name].get("memory", "?")
4826 elif field == "vcpus":
4827 val = i_be[constants.BE_VCPUS]
4828 elif field == "disk_template":
4829 val = instance.disk_template
4832 val = instance.nics[0].ip
4835 elif field == "nic_mode":
4837 val = i_nicp[0][constants.NIC_MODE]
4840 elif field == "nic_link":
4842 val = i_nicp[0][constants.NIC_LINK]
4845 elif field == "bridge":
4846 if (instance.nics and
4847 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4848 val = i_nicp[0][constants.NIC_LINK]
4851 elif field == "mac":
4853 val = instance.nics[0].mac
4856 elif field == "sda_size" or field == "sdb_size":
4857 idx = ord(field[2]) - ord('a')
4859 val = instance.FindDisk(idx).size
4860 except errors.OpPrereqError:
4862 elif field == "disk_usage": # total disk usage per node
4863 disk_sizes = [{'size': disk.size} for disk in instance.disks]
4864 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4865 elif field == "tags":
4866 val = list(instance.GetTags())
4867 elif field == "hvparams":
4869 elif (field.startswith(HVPREFIX) and
4870 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4871 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4872 val = i_hv.get(field[len(HVPREFIX):], None)
4873 elif field == "beparams":
4875 elif (field.startswith(BEPREFIX) and
4876 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4877 val = i_be.get(field[len(BEPREFIX):], None)
4878 elif st_match and st_match.groups():
4879 # matches a variable list
4880 st_groups = st_match.groups()
4881 if st_groups and st_groups[0] == "disk":
4882 if st_groups[1] == "count":
4883 val = len(instance.disks)
4884 elif st_groups[1] == "sizes":
4885 val = [disk.size for disk in instance.disks]
4886 elif st_groups[1] == "size":
4888 val = instance.FindDisk(st_groups[2]).size
4889 except errors.OpPrereqError:
4892 assert False, "Unhandled disk parameter"
4893 elif st_groups[0] == "nic":
4894 if st_groups[1] == "count":
4895 val = len(instance.nics)
4896 elif st_groups[1] == "macs":
4897 val = [nic.mac for nic in instance.nics]
4898 elif st_groups[1] == "ips":
4899 val = [nic.ip for nic in instance.nics]
4900 elif st_groups[1] == "modes":
4901 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4902 elif st_groups[1] == "links":
4903 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4904 elif st_groups[1] == "bridges":
4907 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4908 val.append(nicp[constants.NIC_LINK])
4913 nic_idx = int(st_groups[2])
4914 if nic_idx >= len(instance.nics):
4917 if st_groups[1] == "mac":
4918 val = instance.nics[nic_idx].mac
4919 elif st_groups[1] == "ip":
4920 val = instance.nics[nic_idx].ip
4921 elif st_groups[1] == "mode":
4922 val = i_nicp[nic_idx][constants.NIC_MODE]
4923 elif st_groups[1] == "link":
4924 val = i_nicp[nic_idx][constants.NIC_LINK]
4925 elif st_groups[1] == "bridge":
4926 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4927 if nic_mode == constants.NIC_MODE_BRIDGED:
4928 val = i_nicp[nic_idx][constants.NIC_LINK]
4932 assert False, "Unhandled NIC parameter"
4934 assert False, ("Declared but unhandled variable parameter '%s'" %
4937 assert False, "Declared but unhandled parameter '%s'" % field
4944 class LUFailoverInstance(LogicalUnit):
4945 """Failover an instance.
4948 HPATH = "instance-failover"
4949 HTYPE = constants.HTYPE_INSTANCE
4950 _OP_REQP = ["instance_name", "ignore_consistency"]
4953 def CheckArguments(self):
4954 """Check the arguments.
4957 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4958 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4960 def ExpandNames(self):
4961 self._ExpandAndLockInstance()
4962 self.needed_locks[locking.LEVEL_NODE] = []
4963 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4965 def DeclareLocks(self, level):
4966 if level == locking.LEVEL_NODE:
4967 self._LockInstancesNodes()
4969 def BuildHooksEnv(self):
4972 This runs on master, primary and secondary nodes of the instance.
4975 instance = self.instance
4976 source_node = instance.primary_node
4977 target_node = instance.secondary_nodes[0]
4979 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4980 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4981 "OLD_PRIMARY": source_node,
4982 "OLD_SECONDARY": target_node,
4983 "NEW_PRIMARY": target_node,
4984 "NEW_SECONDARY": source_node,
4986 env.update(_BuildInstanceHookEnvByObject(self, instance))
4987 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4989 nl_post.append(source_node)
4990 return env, nl, nl_post
4992 def CheckPrereq(self):
4993 """Check prerequisites.
4995 This checks that the instance is in the cluster.
4998 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4999 assert self.instance is not None, \
5000 "Cannot retrieve locked instance %s" % self.op.instance_name
5002 bep = self.cfg.GetClusterInfo().FillBE(instance)
5003 if instance.disk_template not in constants.DTS_NET_MIRROR:
5004 raise errors.OpPrereqError("Instance's disk layout is not"
5005 " network mirrored, cannot failover.",
5008 secondary_nodes = instance.secondary_nodes
5009 if not secondary_nodes:
5010 raise errors.ProgrammerError("no secondary node but using "
5011 "a mirrored disk template")
5013 target_node = secondary_nodes[0]
5014 _CheckNodeOnline(self, target_node)
5015 _CheckNodeNotDrained(self, target_node)
5016 if instance.admin_up:
5017 # check memory requirements on the secondary node
5018 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5019 instance.name, bep[constants.BE_MEMORY],
5020 instance.hypervisor)
5022 self.LogInfo("Not checking memory on the secondary node as"
5023 " instance will not be started")
5025 # check bridge existance
5026 _CheckInstanceBridgesExist(self, instance, node=target_node)
5028 def Exec(self, feedback_fn):
5029 """Failover an instance.
5031 The failover is done by shutting it down on its present node and
5032 starting it on the secondary.
5035 instance = self.instance
5037 source_node = instance.primary_node
5038 target_node = instance.secondary_nodes[0]
5040 if instance.admin_up:
5041 feedback_fn("* checking disk consistency between source and target")
5042 for dev in instance.disks:
5043 # for drbd, these are drbd over lvm
5044 if not _CheckDiskConsistency(self, dev, target_node, False):
5045 if not self.op.ignore_consistency:
5046 raise errors.OpExecError("Disk %s is degraded on target node,"
5047 " aborting failover." % dev.iv_name)
5049 feedback_fn("* not checking disk consistency as instance is not running")
5051 feedback_fn("* shutting down instance on source node")
5052 logging.info("Shutting down instance %s on node %s",
5053 instance.name, source_node)
5055 result = self.rpc.call_instance_shutdown(source_node, instance,
5056 self.shutdown_timeout)
5057 msg = result.fail_msg
5059 if self.op.ignore_consistency:
5060 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5061 " Proceeding anyway. Please make sure node"
5062 " %s is down. Error details: %s",
5063 instance.name, source_node, source_node, msg)
5065 raise errors.OpExecError("Could not shutdown instance %s on"
5067 (instance.name, source_node, msg))
5069 feedback_fn("* deactivating the instance's disks on source node")
5070 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5071 raise errors.OpExecError("Can't shut down the instance's disks.")
5073 instance.primary_node = target_node
5074 # distribute new instance config to the other nodes
5075 self.cfg.Update(instance, feedback_fn)
5077 # Only start the instance if it's marked as up
5078 if instance.admin_up:
5079 feedback_fn("* activating the instance's disks on target node")
5080 logging.info("Starting instance %s on node %s",
5081 instance.name, target_node)
5083 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5084 ignore_secondaries=True)
5086 _ShutdownInstanceDisks(self, instance)
5087 raise errors.OpExecError("Can't activate the instance's disks")
5089 feedback_fn("* starting the instance on the target node")
5090 result = self.rpc.call_instance_start(target_node, instance, None, None)
5091 msg = result.fail_msg
5093 _ShutdownInstanceDisks(self, instance)
5094 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5095 (instance.name, target_node, msg))
5098 class LUMigrateInstance(LogicalUnit):
5099 """Migrate an instance.
5101 This is migration without shutting down, compared to the failover,
5102 which is done with shutdown.
5105 HPATH = "instance-migrate"
5106 HTYPE = constants.HTYPE_INSTANCE
5107 _OP_REQP = ["instance_name", "live", "cleanup"]
5111 def ExpandNames(self):
5112 self._ExpandAndLockInstance()
5114 self.needed_locks[locking.LEVEL_NODE] = []
5115 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5117 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5118 self.op.live, self.op.cleanup)
5119 self.tasklets = [self._migrater]
5121 def DeclareLocks(self, level):
5122 if level == locking.LEVEL_NODE:
5123 self._LockInstancesNodes()
5125 def BuildHooksEnv(self):
5128 This runs on master, primary and secondary nodes of the instance.
5131 instance = self._migrater.instance
5132 source_node = instance.primary_node
5133 target_node = instance.secondary_nodes[0]
5134 env = _BuildInstanceHookEnvByObject(self, instance)
5135 env["MIGRATE_LIVE"] = self.op.live
5136 env["MIGRATE_CLEANUP"] = self.op.cleanup
5138 "OLD_PRIMARY": source_node,
5139 "OLD_SECONDARY": target_node,
5140 "NEW_PRIMARY": target_node,
5141 "NEW_SECONDARY": source_node,
5143 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5145 nl_post.append(source_node)
5146 return env, nl, nl_post
5149 class LUMoveInstance(LogicalUnit):
5150 """Move an instance by data-copying.
5153 HPATH = "instance-move"
5154 HTYPE = constants.HTYPE_INSTANCE
5155 _OP_REQP = ["instance_name", "target_node"]
5158 def CheckArguments(self):
5159 """Check the arguments.
5162 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5163 constants.DEFAULT_SHUTDOWN_TIMEOUT)
5165 def ExpandNames(self):
5166 self._ExpandAndLockInstance()
5167 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5168 self.op.target_node = target_node
5169 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5170 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5172 def DeclareLocks(self, level):
5173 if level == locking.LEVEL_NODE:
5174 self._LockInstancesNodes(primary_only=True)
5176 def BuildHooksEnv(self):
5179 This runs on master, primary and secondary nodes of the instance.
5183 "TARGET_NODE": self.op.target_node,
5184 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5186 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5187 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5188 self.op.target_node]
5191 def CheckPrereq(self):
5192 """Check prerequisites.
5194 This checks that the instance is in the cluster.
5197 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5198 assert self.instance is not None, \
5199 "Cannot retrieve locked instance %s" % self.op.instance_name
5201 node = self.cfg.GetNodeInfo(self.op.target_node)
5202 assert node is not None, \
5203 "Cannot retrieve locked node %s" % self.op.target_node
5205 self.target_node = target_node = node.name
5207 if target_node == instance.primary_node:
5208 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5209 (instance.name, target_node),
5212 bep = self.cfg.GetClusterInfo().FillBE(instance)
5214 for idx, dsk in enumerate(instance.disks):
5215 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5216 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5217 " cannot copy" % idx, errors.ECODE_STATE)
5219 _CheckNodeOnline(self, target_node)
5220 _CheckNodeNotDrained(self, target_node)
5222 if instance.admin_up:
5223 # check memory requirements on the secondary node
5224 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5225 instance.name, bep[constants.BE_MEMORY],
5226 instance.hypervisor)
5228 self.LogInfo("Not checking memory on the secondary node as"
5229 " instance will not be started")
5231 # check bridge existance
5232 _CheckInstanceBridgesExist(self, instance, node=target_node)
5234 def Exec(self, feedback_fn):
5235 """Move an instance.
5237 The move is done by shutting it down on its present node, copying
5238 the data over (slow) and starting it on the new node.
5241 instance = self.instance
5243 source_node = instance.primary_node
5244 target_node = self.target_node
5246 self.LogInfo("Shutting down instance %s on source node %s",
5247 instance.name, source_node)
5249 result = self.rpc.call_instance_shutdown(source_node, instance,
5250 self.shutdown_timeout)
5251 msg = result.fail_msg
5253 if self.op.ignore_consistency:
5254 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5255 " Proceeding anyway. Please make sure node"
5256 " %s is down. Error details: %s",
5257 instance.name, source_node, source_node, msg)
5259 raise errors.OpExecError("Could not shutdown instance %s on"
5261 (instance.name, source_node, msg))
5263 # create the target disks
5265 _CreateDisks(self, instance, target_node=target_node)
5266 except errors.OpExecError:
5267 self.LogWarning("Device creation failed, reverting...")
5269 _RemoveDisks(self, instance, target_node=target_node)
5271 self.cfg.ReleaseDRBDMinors(instance.name)
5274 cluster_name = self.cfg.GetClusterInfo().cluster_name
5277 # activate, get path, copy the data over
5278 for idx, disk in enumerate(instance.disks):
5279 self.LogInfo("Copying data for disk %d", idx)
5280 result = self.rpc.call_blockdev_assemble(target_node, disk,
5281 instance.name, True)
5283 self.LogWarning("Can't assemble newly created disk %d: %s",
5284 idx, result.fail_msg)
5285 errs.append(result.fail_msg)
5287 dev_path = result.payload
5288 result = self.rpc.call_blockdev_export(source_node, disk,
5289 target_node, dev_path,
5292 self.LogWarning("Can't copy data over for disk %d: %s",
5293 idx, result.fail_msg)
5294 errs.append(result.fail_msg)
5298 self.LogWarning("Some disks failed to copy, aborting")
5300 _RemoveDisks(self, instance, target_node=target_node)
5302 self.cfg.ReleaseDRBDMinors(instance.name)
5303 raise errors.OpExecError("Errors during disk copy: %s" %
5306 instance.primary_node = target_node
5307 self.cfg.Update(instance, feedback_fn)
5309 self.LogInfo("Removing the disks on the original node")
5310 _RemoveDisks(self, instance, target_node=source_node)
5312 # Only start the instance if it's marked as up
5313 if instance.admin_up:
5314 self.LogInfo("Starting instance %s on node %s",
5315 instance.name, target_node)
5317 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5318 ignore_secondaries=True)
5320 _ShutdownInstanceDisks(self, instance)
5321 raise errors.OpExecError("Can't activate the instance's disks")
5323 result = self.rpc.call_instance_start(target_node, instance, None, None)
5324 msg = result.fail_msg
5326 _ShutdownInstanceDisks(self, instance)
5327 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5328 (instance.name, target_node, msg))
5331 class LUMigrateNode(LogicalUnit):
5332 """Migrate all instances from a node.
5335 HPATH = "node-migrate"
5336 HTYPE = constants.HTYPE_NODE
5337 _OP_REQP = ["node_name", "live"]
5340 def ExpandNames(self):
5341 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5343 self.needed_locks = {
5344 locking.LEVEL_NODE: [self.op.node_name],
5347 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5349 # Create tasklets for migrating instances for all instances on this node
5353 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5354 logging.debug("Migrating instance %s", inst.name)
5355 names.append(inst.name)
5357 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5359 self.tasklets = tasklets
5361 # Declare instance locks
5362 self.needed_locks[locking.LEVEL_INSTANCE] = names
5364 def DeclareLocks(self, level):
5365 if level == locking.LEVEL_NODE:
5366 self._LockInstancesNodes()
5368 def BuildHooksEnv(self):
5371 This runs on the master, the primary and all the secondaries.
5375 "NODE_NAME": self.op.node_name,
5378 nl = [self.cfg.GetMasterNode()]
5380 return (env, nl, nl)
5383 class TLMigrateInstance(Tasklet):
5384 def __init__(self, lu, instance_name, live, cleanup):
5385 """Initializes this class.
5388 Tasklet.__init__(self, lu)
5391 self.instance_name = instance_name
5393 self.cleanup = cleanup
5395 def CheckPrereq(self):
5396 """Check prerequisites.
5398 This checks that the instance is in the cluster.
5401 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5402 instance = self.cfg.GetInstanceInfo(instance_name)
5403 assert instance is not None
5405 if instance.disk_template != constants.DT_DRBD8:
5406 raise errors.OpPrereqError("Instance's disk layout is not"
5407 " drbd8, cannot migrate.", errors.ECODE_STATE)
5409 secondary_nodes = instance.secondary_nodes
5410 if not secondary_nodes:
5411 raise errors.ConfigurationError("No secondary node but using"
5412 " drbd8 disk template")
5414 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5416 target_node = secondary_nodes[0]
5417 # check memory requirements on the secondary node
5418 _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5419 instance.name, i_be[constants.BE_MEMORY],
5420 instance.hypervisor)
5422 # check bridge existance
5423 _CheckInstanceBridgesExist(self, instance, node=target_node)
5425 if not self.cleanup:
5426 _CheckNodeNotDrained(self, target_node)
5427 result = self.rpc.call_instance_migratable(instance.primary_node,
5429 result.Raise("Can't migrate, please use failover",
5430 prereq=True, ecode=errors.ECODE_STATE)
5432 self.instance = instance
5434 def _WaitUntilSync(self):
5435 """Poll with custom rpc for disk sync.
5437 This uses our own step-based rpc call.
5440 self.feedback_fn("* wait until resync is done")
5444 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5446 self.instance.disks)
5448 for node, nres in result.items():
5449 nres.Raise("Cannot resync disks on node %s" % node)
5450 node_done, node_percent = nres.payload
5451 all_done = all_done and node_done
5452 if node_percent is not None:
5453 min_percent = min(min_percent, node_percent)
5455 if min_percent < 100:
5456 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5459 def _EnsureSecondary(self, node):
5460 """Demote a node to secondary.
5463 self.feedback_fn("* switching node %s to secondary mode" % node)
5465 for dev in self.instance.disks:
5466 self.cfg.SetDiskID(dev, node)
5468 result = self.rpc.call_blockdev_close(node, self.instance.name,
5469 self.instance.disks)
5470 result.Raise("Cannot change disk to secondary on node %s" % node)
5472 def _GoStandalone(self):
5473 """Disconnect from the network.
5476 self.feedback_fn("* changing into standalone mode")
5477 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5478 self.instance.disks)
5479 for node, nres in result.items():
5480 nres.Raise("Cannot disconnect disks node %s" % node)
5482 def _GoReconnect(self, multimaster):
5483 """Reconnect to the network.
5489 msg = "single-master"
5490 self.feedback_fn("* changing disks into %s mode" % msg)
5491 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5492 self.instance.disks,
5493 self.instance.name, multimaster)
5494 for node, nres in result.items():
5495 nres.Raise("Cannot change disks config on node %s" % node)
5497 def _ExecCleanup(self):
5498 """Try to cleanup after a failed migration.
5500 The cleanup is done by:
5501 - check that the instance is running only on one node
5502 (and update the config if needed)
5503 - change disks on its secondary node to secondary
5504 - wait until disks are fully synchronized
5505 - disconnect from the network
5506 - change disks into single-master mode
5507 - wait again until disks are fully synchronized
5510 instance = self.instance
5511 target_node = self.target_node
5512 source_node = self.source_node
5514 # check running on only one node
5515 self.feedback_fn("* checking where the instance actually runs"
5516 " (if this hangs, the hypervisor might be in"
5518 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5519 for node, result in ins_l.items():
5520 result.Raise("Can't contact node %s" % node)
5522 runningon_source = instance.name in ins_l[source_node].payload
5523 runningon_target = instance.name in ins_l[target_node].payload
5525 if runningon_source and runningon_target:
5526 raise errors.OpExecError("Instance seems to be running on two nodes,"
5527 " or the hypervisor is confused. You will have"
5528 " to ensure manually that it runs only on one"
5529 " and restart this operation.")
5531 if not (runningon_source or runningon_target):
5532 raise errors.OpExecError("Instance does not seem to be running at all."
5533 " In this case, it's safer to repair by"
5534 " running 'gnt-instance stop' to ensure disk"
5535 " shutdown, and then restarting it.")
5537 if runningon_target:
5538 # the migration has actually succeeded, we need to update the config
5539 self.feedback_fn("* instance running on secondary node (%s),"
5540 " updating config" % target_node)
5541 instance.primary_node = target_node
5542 self.cfg.Update(instance, self.feedback_fn)
5543 demoted_node = source_node
5545 self.feedback_fn("* instance confirmed to be running on its"
5546 " primary node (%s)" % source_node)
5547 demoted_node = target_node
5549 self._EnsureSecondary(demoted_node)
5551 self._WaitUntilSync()
5552 except errors.OpExecError:
5553 # we ignore here errors, since if the device is standalone, it
5554 # won't be able to sync
5556 self._GoStandalone()
5557 self._GoReconnect(False)
5558 self._WaitUntilSync()
5560 self.feedback_fn("* done")
5562 def _RevertDiskStatus(self):
5563 """Try to revert the disk status after a failed migration.
5566 target_node = self.target_node
5568 self._EnsureSecondary(target_node)
5569 self._GoStandalone()
5570 self._GoReconnect(False)
5571 self._WaitUntilSync()
5572 except errors.OpExecError, err:
5573 self.lu.LogWarning("Migration failed and I can't reconnect the"
5574 " drives: error '%s'\n"
5575 "Please look and recover the instance status" %
5578 def _AbortMigration(self):
5579 """Call the hypervisor code to abort a started migration.
5582 instance = self.instance
5583 target_node = self.target_node
5584 migration_info = self.migration_info
5586 abort_result = self.rpc.call_finalize_migration(target_node,
5590 abort_msg = abort_result.fail_msg
5592 logging.error("Aborting migration failed on target node %s: %s",
5593 target_node, abort_msg)
5594 # Don't raise an exception here, as we stil have to try to revert the
5595 # disk status, even if this step failed.
5597 def _ExecMigration(self):
5598 """Migrate an instance.
5600 The migrate is done by:
5601 - change the disks into dual-master mode
5602 - wait until disks are fully synchronized again
5603 - migrate the instance
5604 - change disks on the new secondary node (the old primary) to secondary
5605 - wait until disks are fully synchronized
5606 - change disks into single-master mode
5609 instance = self.instance
5610 target_node = self.target_node
5611 source_node = self.source_node
5613 self.feedback_fn("* checking disk consistency between source and target")
5614 for dev in instance.disks:
5615 if not _CheckDiskConsistency(self, dev, target_node, False):
5616 raise errors.OpExecError("Disk %s is degraded or not fully"
5617 " synchronized on target node,"
5618 " aborting migrate." % dev.iv_name)
5620 # First get the migration information from the remote node
5621 result = self.rpc.call_migration_info(source_node, instance)
5622 msg = result.fail_msg
5624 log_err = ("Failed fetching source migration information from %s: %s" %
5626 logging.error(log_err)
5627 raise errors.OpExecError(log_err)
5629 self.migration_info = migration_info = result.payload
5631 # Then switch the disks to master/master mode
5632 self._EnsureSecondary(target_node)
5633 self._GoStandalone()
5634 self._GoReconnect(True)
5635 self._WaitUntilSync()
5637 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5638 result = self.rpc.call_accept_instance(target_node,
5641 self.nodes_ip[target_node])
5643 msg = result.fail_msg
5645 logging.error("Instance pre-migration failed, trying to revert"
5646 " disk status: %s", msg)
5647 self.feedback_fn("Pre-migration failed, aborting")
5648 self._AbortMigration()
5649 self._RevertDiskStatus()
5650 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5651 (instance.name, msg))
5653 self.feedback_fn("* migrating instance to %s" % target_node)
5655 result = self.rpc.call_instance_migrate(source_node, instance,
5656 self.nodes_ip[target_node],
5658 msg = result.fail_msg
5660 logging.error("Instance migration failed, trying to revert"
5661 " disk status: %s", msg)
5662 self.feedback_fn("Migration failed, aborting")
5663 self._AbortMigration()
5664 self._RevertDiskStatus()
5665 raise errors.OpExecError("Could not migrate instance %s: %s" %
5666 (instance.name, msg))
5669 instance.primary_node = target_node
5670 # distribute new instance config to the other nodes
5671 self.cfg.Update(instance, self.feedback_fn)
5673 result = self.rpc.call_finalize_migration(target_node,
5677 msg = result.fail_msg
5679 logging.error("Instance migration succeeded, but finalization failed:"
5681 raise errors.OpExecError("Could not finalize instance migration: %s" %
5684 self._EnsureSecondary(source_node)
5685 self._WaitUntilSync()
5686 self._GoStandalone()
5687 self._GoReconnect(False)
5688 self._WaitUntilSync()
5690 self.feedback_fn("* done")
5692 def Exec(self, feedback_fn):
5693 """Perform the migration.
5696 feedback_fn("Migrating instance %s" % self.instance.name)
5698 self.feedback_fn = feedback_fn
5700 self.source_node = self.instance.primary_node
5701 self.target_node = self.instance.secondary_nodes[0]
5702 self.all_nodes = [self.source_node, self.target_node]
5704 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5705 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5709 return self._ExecCleanup()
5711 return self._ExecMigration()
5714 def _CreateBlockDev(lu, node, instance, device, force_create,
5716 """Create a tree of block devices on a given node.
5718 If this device type has to be created on secondaries, create it and
5721 If not, just recurse to children keeping the same 'force' value.
5723 @param lu: the lu on whose behalf we execute
5724 @param node: the node on which to create the device
5725 @type instance: L{objects.Instance}
5726 @param instance: the instance which owns the device
5727 @type device: L{objects.Disk}
5728 @param device: the device to create
5729 @type force_create: boolean
5730 @param force_create: whether to force creation of this device; this
5731 will be change to True whenever we find a device which has
5732 CreateOnSecondary() attribute
5733 @param info: the extra 'metadata' we should attach to the device
5734 (this will be represented as a LVM tag)
5735 @type force_open: boolean
5736 @param force_open: this parameter will be passes to the
5737 L{backend.BlockdevCreate} function where it specifies
5738 whether we run on primary or not, and it affects both
5739 the child assembly and the device own Open() execution
5742 if device.CreateOnSecondary():
5746 for child in device.children:
5747 _CreateBlockDev(lu, node, instance, child, force_create,
5750 if not force_create:
5753 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5756 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5757 """Create a single block device on a given node.
5759 This will not recurse over children of the device, so they must be
5762 @param lu: the lu on whose behalf we execute
5763 @param node: the node on which to create the device
5764 @type instance: L{objects.Instance}
5765 @param instance: the instance which owns the device
5766 @type device: L{objects.Disk}
5767 @param device: the device to create
5768 @param info: the extra 'metadata' we should attach to the device
5769 (this will be represented as a LVM tag)
5770 @type force_open: boolean
5771 @param force_open: this parameter will be passes to the
5772 L{backend.BlockdevCreate} function where it specifies
5773 whether we run on primary or not, and it affects both
5774 the child assembly and the device own Open() execution
5777 lu.cfg.SetDiskID(device, node)
5778 result = lu.rpc.call_blockdev_create(node, device, device.size,
5779 instance.name, force_open, info)
5780 result.Raise("Can't create block device %s on"
5781 " node %s for instance %s" % (device, node, instance.name))
5782 if device.physical_id is None:
5783 device.physical_id = result.payload
5786 def _GenerateUniqueNames(lu, exts):
5787 """Generate a suitable LV name.
5789 This will generate a logical volume name for the given instance.
5794 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5795 results.append("%s%s" % (new_id, val))
5799 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5801 """Generate a drbd8 device complete with its children.
5804 port = lu.cfg.AllocatePort()
5805 vgname = lu.cfg.GetVGName()
5806 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5807 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5808 logical_id=(vgname, names[0]))
5809 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5810 logical_id=(vgname, names[1]))
5811 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5812 logical_id=(primary, secondary, port,
5815 children=[dev_data, dev_meta],
5820 def _GenerateDiskTemplate(lu, template_name,
5821 instance_name, primary_node,
5822 secondary_nodes, disk_info,
5823 file_storage_dir, file_driver,
5825 """Generate the entire disk layout for a given template type.
5828 #TODO: compute space requirements
5830 vgname = lu.cfg.GetVGName()
5831 disk_count = len(disk_info)
5833 if template_name == constants.DT_DISKLESS:
5835 elif template_name == constants.DT_PLAIN:
5836 if len(secondary_nodes) != 0:
5837 raise errors.ProgrammerError("Wrong template configuration")
5839 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5840 for i in range(disk_count)])
5841 for idx, disk in enumerate(disk_info):
5842 disk_index = idx + base_index
5843 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5844 logical_id=(vgname, names[idx]),
5845 iv_name="disk/%d" % disk_index,
5847 disks.append(disk_dev)
5848 elif template_name == constants.DT_DRBD8:
5849 if len(secondary_nodes) != 1:
5850 raise errors.ProgrammerError("Wrong template configuration")
5851 remote_node = secondary_nodes[0]
5852 minors = lu.cfg.AllocateDRBDMinor(
5853 [primary_node, remote_node] * len(disk_info), instance_name)
5856 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5857 for i in range(disk_count)]):
5858 names.append(lv_prefix + "_data")
5859 names.append(lv_prefix + "_meta")
5860 for idx, disk in enumerate(disk_info):
5861 disk_index = idx + base_index
5862 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5863 disk["size"], names[idx*2:idx*2+2],
5864 "disk/%d" % disk_index,
5865 minors[idx*2], minors[idx*2+1])
5866 disk_dev.mode = disk["mode"]
5867 disks.append(disk_dev)
5868 elif template_name == constants.DT_FILE:
5869 if len(secondary_nodes) != 0:
5870 raise errors.ProgrammerError("Wrong template configuration")
5872 _RequireFileStorage()
5874 for idx, disk in enumerate(disk_info):
5875 disk_index = idx + base_index
5876 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5877 iv_name="disk/%d" % disk_index,
5878 logical_id=(file_driver,
5879 "%s/disk%d" % (file_storage_dir,
5882 disks.append(disk_dev)
5884 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5888 def _GetInstanceInfoText(instance):
5889 """Compute that text that should be added to the disk's metadata.
5892 return "originstname+%s" % instance.name
5895 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5896 """Create all disks for an instance.
5898 This abstracts away some work from AddInstance.
5900 @type lu: L{LogicalUnit}
5901 @param lu: the logical unit on whose behalf we execute
5902 @type instance: L{objects.Instance}
5903 @param instance: the instance whose disks we should create
5905 @param to_skip: list of indices to skip
5906 @type target_node: string
5907 @param target_node: if passed, overrides the target node for creation
5909 @return: the success of the creation
5912 info = _GetInstanceInfoText(instance)
5913 if target_node is None:
5914 pnode = instance.primary_node
5915 all_nodes = instance.all_nodes
5920 if instance.disk_template == constants.DT_FILE:
5921 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5922 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5924 result.Raise("Failed to create directory '%s' on"
5925 " node %s" % (file_storage_dir, pnode))
5927 # Note: this needs to be kept in sync with adding of disks in
5928 # LUSetInstanceParams
5929 for idx, device in enumerate(instance.disks):
5930 if to_skip and idx in to_skip:
5932 logging.info("Creating volume %s for instance %s",
5933 device.iv_name, instance.name)
5935 for node in all_nodes:
5936 f_create = node == pnode
5937 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5940 def _RemoveDisks(lu, instance, target_node=None):
5941 """Remove all disks for an instance.
5943 This abstracts away some work from `AddInstance()` and
5944 `RemoveInstance()`. Note that in case some of the devices couldn't
5945 be removed, the removal will continue with the other ones (compare
5946 with `_CreateDisks()`).
5948 @type lu: L{LogicalUnit}
5949 @param lu: the logical unit on whose behalf we execute
5950 @type instance: L{objects.Instance}
5951 @param instance: the instance whose disks we should remove
5952 @type target_node: string
5953 @param target_node: used to override the node on which to remove the disks
5955 @return: the success of the removal
5958 logging.info("Removing block devices for instance %s", instance.name)
5961 for device in instance.disks:
5963 edata = [(target_node, device)]
5965 edata = device.ComputeNodeTree(instance.primary_node)
5966 for node, disk in edata:
5967 lu.cfg.SetDiskID(disk, node)
5968 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5970 lu.LogWarning("Could not remove block device %s on node %s,"
5971 " continuing anyway: %s", device.iv_name, node, msg)
5974 if instance.disk_template == constants.DT_FILE:
5975 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5979 tgt = instance.primary_node
5980 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5982 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5983 file_storage_dir, instance.primary_node, result.fail_msg)
5989 def _ComputeDiskSize(disk_template, disks):
5990 """Compute disk size requirements in the volume group
5993 # Required free disk space as a function of disk and swap space
5995 constants.DT_DISKLESS: None,
5996 constants.DT_PLAIN: sum(d["size"] for d in disks),
5997 # 128 MB are added for drbd metadata for each disk
5998 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5999 constants.DT_FILE: None,
6002 if disk_template not in req_size_dict:
6003 raise errors.ProgrammerError("Disk template '%s' size requirement"
6004 " is unknown" % disk_template)
6006 return req_size_dict[disk_template]
6009 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6010 """Hypervisor parameter validation.
6012 This function abstract the hypervisor parameter validation to be
6013 used in both instance create and instance modify.
6015 @type lu: L{LogicalUnit}
6016 @param lu: the logical unit for which we check
6017 @type nodenames: list
6018 @param nodenames: the list of nodes on which we should check
6019 @type hvname: string
6020 @param hvname: the name of the hypervisor we should use
6021 @type hvparams: dict
6022 @param hvparams: the parameters which we need to check
6023 @raise errors.OpPrereqError: if the parameters are not valid
6026 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6029 for node in nodenames:
6033 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6036 class LUCreateInstance(LogicalUnit):
6037 """Create an instance.
6040 HPATH = "instance-add"
6041 HTYPE = constants.HTYPE_INSTANCE
6042 _OP_REQP = ["instance_name", "disks",
6044 "wait_for_sync", "ip_check", "nics",
6045 "hvparams", "beparams"]
6048 def CheckArguments(self):
6052 # set optional parameters to none if they don't exist
6053 for attr in ["pnode", "snode", "iallocator", "hypervisor",
6054 "disk_template", "identify_defaults"]:
6055 if not hasattr(self.op, attr):
6056 setattr(self.op, attr, None)
6058 # do not require name_check to ease forward/backward compatibility
6060 if not hasattr(self.op, "name_check"):
6061 self.op.name_check = True
6062 if not hasattr(self.op, "no_install"):
6063 self.op.no_install = False
6064 if self.op.no_install and self.op.start:
6065 self.LogInfo("No-installation mode selected, disabling startup")
6066 self.op.start = False
6067 # validate/normalize the instance name
6068 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6069 if self.op.ip_check and not self.op.name_check:
6070 # TODO: make the ip check more flexible and not depend on the name check
6071 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6073 # check disk information: either all adopt, or no adopt
6074 has_adopt = has_no_adopt = False
6075 for disk in self.op.disks:
6080 if has_adopt and has_no_adopt:
6081 raise errors.OpPrereqError("Either all disks are adopted or none is",
6084 if self.op.disk_template != constants.DT_PLAIN:
6085 raise errors.OpPrereqError("Disk adoption is only supported for the"
6086 " 'plain' disk template",
6088 if self.op.iallocator is not None:
6089 raise errors.OpPrereqError("Disk adoption not allowed with an"
6090 " iallocator script", errors.ECODE_INVAL)
6091 if self.op.mode == constants.INSTANCE_IMPORT:
6092 raise errors.OpPrereqError("Disk adoption not allowed for"
6093 " instance import", errors.ECODE_INVAL)
6095 self.adopt_disks = has_adopt
6097 # verify creation mode
6098 if self.op.mode not in (constants.INSTANCE_CREATE,
6099 constants.INSTANCE_IMPORT):
6100 raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6101 self.op.mode, errors.ECODE_INVAL)
6103 # instance name verification
6104 if self.op.name_check:
6105 self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6106 self.op.instance_name = self.hostname1.name
6107 # used in CheckPrereq for ip ping check
6108 self.check_ip = self.hostname1.ip
6110 self.check_ip = None
6112 # file storage checks
6113 if (self.op.file_driver and
6114 not self.op.file_driver in constants.FILE_DRIVER):
6115 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6116 self.op.file_driver, errors.ECODE_INVAL)
6118 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6119 raise errors.OpPrereqError("File storage directory path not absolute",
6122 ### Node/iallocator related checks
6123 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6124 raise errors.OpPrereqError("One and only one of iallocator and primary"
6125 " node must be given",
6128 if self.op.mode == constants.INSTANCE_IMPORT:
6129 # On import force_variant must be True, because if we forced it at
6130 # initial install, our only chance when importing it back is that it
6132 self.op.force_variant = True
6134 if self.op.no_install:
6135 self.LogInfo("No-installation mode has no effect during import")
6137 else: # INSTANCE_CREATE
6138 if getattr(self.op, "os_type", None) is None:
6139 raise errors.OpPrereqError("No guest OS specified",
6141 self.op.force_variant = getattr(self.op, "force_variant", False)
6142 if self.op.disk_template is None:
6143 raise errors.OpPrereqError("No disk template specified",
6146 def ExpandNames(self):
6147 """ExpandNames for CreateInstance.
6149 Figure out the right locks for instance creation.
6152 self.needed_locks = {}
6154 instance_name = self.op.instance_name
6155 # this is just a preventive check, but someone might still add this
6156 # instance in the meantime, and creation will fail at lock-add time
6157 if instance_name in self.cfg.GetInstanceList():
6158 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6159 instance_name, errors.ECODE_EXISTS)
6161 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6163 if self.op.iallocator:
6164 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6166 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6167 nodelist = [self.op.pnode]
6168 if self.op.snode is not None:
6169 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6170 nodelist.append(self.op.snode)
6171 self.needed_locks[locking.LEVEL_NODE] = nodelist
6173 # in case of import lock the source node too
6174 if self.op.mode == constants.INSTANCE_IMPORT:
6175 src_node = getattr(self.op, "src_node", None)
6176 src_path = getattr(self.op, "src_path", None)
6178 if src_path is None:
6179 self.op.src_path = src_path = self.op.instance_name
6181 if src_node is None:
6182 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6183 self.op.src_node = None
6184 if os.path.isabs(src_path):
6185 raise errors.OpPrereqError("Importing an instance from an absolute"
6186 " path requires a source node option.",
6189 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6190 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6191 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6192 if not os.path.isabs(src_path):
6193 self.op.src_path = src_path = \
6194 utils.PathJoin(constants.EXPORT_DIR, src_path)
6196 def _RunAllocator(self):
6197 """Run the allocator based on input opcode.
6200 nics = [n.ToDict() for n in self.nics]
6201 ial = IAllocator(self.cfg, self.rpc,
6202 mode=constants.IALLOCATOR_MODE_ALLOC,
6203 name=self.op.instance_name,
6204 disk_template=self.op.disk_template,
6207 vcpus=self.be_full[constants.BE_VCPUS],
6208 mem_size=self.be_full[constants.BE_MEMORY],
6211 hypervisor=self.op.hypervisor,
6214 ial.Run(self.op.iallocator)
6217 raise errors.OpPrereqError("Can't compute nodes using"
6218 " iallocator '%s': %s" %
6219 (self.op.iallocator, ial.info),
6221 if len(ial.result) != ial.required_nodes:
6222 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6223 " of nodes (%s), required %s" %
6224 (self.op.iallocator, len(ial.result),
6225 ial.required_nodes), errors.ECODE_FAULT)
6226 self.op.pnode = ial.result[0]
6227 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6228 self.op.instance_name, self.op.iallocator,
6229 utils.CommaJoin(ial.result))
6230 if ial.required_nodes == 2:
6231 self.op.snode = ial.result[1]
6233 def BuildHooksEnv(self):
6236 This runs on master, primary and secondary nodes of the instance.
6240 "ADD_MODE": self.op.mode,
6242 if self.op.mode == constants.INSTANCE_IMPORT:
6243 env["SRC_NODE"] = self.op.src_node
6244 env["SRC_PATH"] = self.op.src_path
6245 env["SRC_IMAGES"] = self.src_images
6247 env.update(_BuildInstanceHookEnv(
6248 name=self.op.instance_name,
6249 primary_node=self.op.pnode,
6250 secondary_nodes=self.secondaries,
6251 status=self.op.start,
6252 os_type=self.op.os_type,
6253 memory=self.be_full[constants.BE_MEMORY],
6254 vcpus=self.be_full[constants.BE_VCPUS],
6255 nics=_NICListToTuple(self, self.nics),
6256 disk_template=self.op.disk_template,
6257 disks=[(d["size"], d["mode"]) for d in self.disks],
6260 hypervisor_name=self.op.hypervisor,
6263 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6267 def _ReadExportInfo(self):
6268 """Reads the export information from disk.
6270 It will override the opcode source node and path with the actual
6271 information, if these two were not specified before.
6273 @return: the export information
6276 assert self.op.mode == constants.INSTANCE_IMPORT
6278 src_node = self.op.src_node
6279 src_path = self.op.src_path
6281 if src_node is None:
6282 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6283 exp_list = self.rpc.call_export_list(locked_nodes)
6285 for node in exp_list:
6286 if exp_list[node].fail_msg:
6288 if src_path in exp_list[node].payload:
6290 self.op.src_node = src_node = node
6291 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6295 raise errors.OpPrereqError("No export found for relative path %s" %
6296 src_path, errors.ECODE_INVAL)
6298 _CheckNodeOnline(self, src_node)
6299 result = self.rpc.call_export_info(src_node, src_path)
6300 result.Raise("No export or invalid export found in dir %s" % src_path)
6302 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6303 if not export_info.has_section(constants.INISECT_EXP):
6304 raise errors.ProgrammerError("Corrupted export config",
6305 errors.ECODE_ENVIRON)
6307 ei_version = export_info.get(constants.INISECT_EXP, "version")
6308 if (int(ei_version) != constants.EXPORT_VERSION):
6309 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6310 (ei_version, constants.EXPORT_VERSION),
6311 errors.ECODE_ENVIRON)
6314 def _ReadExportParams(self, einfo):
6315 """Use export parameters as defaults.
6317 In case the opcode doesn't specify (as in override) some instance
6318 parameters, then try to use them from the export information, if
6322 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6324 if self.op.disk_template is None:
6325 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6326 self.op.disk_template = einfo.get(constants.INISECT_INS,
6329 raise errors.OpPrereqError("No disk template specified and the export"
6330 " is missing the disk_template information",
6333 if not self.op.disks:
6334 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6336 # TODO: import the disk iv_name too
6337 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6338 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6339 disks.append({"size": disk_sz})
6340 self.op.disks = disks
6342 raise errors.OpPrereqError("No disk info specified and the export"
6343 " is missing the disk information",
6346 if (not self.op.nics and
6347 einfo.has_option(constants.INISECT_INS, "nic_count")):
6349 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6351 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6352 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6357 if (self.op.hypervisor is None and
6358 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6359 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6360 if einfo.has_section(constants.INISECT_HYP):
6361 # use the export parameters but do not override the ones
6362 # specified by the user
6363 for name, value in einfo.items(constants.INISECT_HYP):
6364 if name not in self.op.hvparams:
6365 self.op.hvparams[name] = value
6367 if einfo.has_section(constants.INISECT_BEP):
6368 # use the parameters, without overriding
6369 for name, value in einfo.items(constants.INISECT_BEP):
6370 if name not in self.op.beparams:
6371 self.op.beparams[name] = value
6373 # try to read the parameters old style, from the main section
6374 for name in constants.BES_PARAMETERS:
6375 if (name not in self.op.beparams and
6376 einfo.has_option(constants.INISECT_INS, name)):
6377 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6379 def _RevertToDefaults(self, cluster):
6380 """Revert the instance parameters to the default values.
6384 hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6385 for name in self.op.hvparams.keys():
6386 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6387 del self.op.hvparams[name]
6389 be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6390 for name in self.op.beparams.keys():
6391 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6392 del self.op.beparams[name]
6394 nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6395 for nic in self.op.nics:
6396 for name in constants.NICS_PARAMETERS:
6397 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6400 def CheckPrereq(self):
6401 """Check prerequisites.
6404 if self.op.mode == constants.INSTANCE_IMPORT:
6405 export_info = self._ReadExportInfo()
6406 self._ReadExportParams(export_info)
6408 _CheckDiskTemplate(self.op.disk_template)
6410 if (not self.cfg.GetVGName() and
6411 self.op.disk_template not in constants.DTS_NOT_LVM):
6412 raise errors.OpPrereqError("Cluster does not support lvm-based"
6413 " instances", errors.ECODE_STATE)
6415 if self.op.hypervisor is None:
6416 self.op.hypervisor = self.cfg.GetHypervisorType()
6418 cluster = self.cfg.GetClusterInfo()
6419 enabled_hvs = cluster.enabled_hypervisors
6420 if self.op.hypervisor not in enabled_hvs:
6421 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6422 " cluster (%s)" % (self.op.hypervisor,
6423 ",".join(enabled_hvs)),
6426 # check hypervisor parameter syntax (locally)
6427 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6428 filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6431 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6432 hv_type.CheckParameterSyntax(filled_hvp)
6433 self.hv_full = filled_hvp
6434 # check that we don't specify global parameters on an instance
6435 _CheckGlobalHvParams(self.op.hvparams)
6437 # fill and remember the beparams dict
6438 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6439 self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6442 # now that hvp/bep are in final format, let's reset to defaults,
6444 if self.op.identify_defaults:
6445 self._RevertToDefaults(cluster)
6449 for idx, nic in enumerate(self.op.nics):
6450 nic_mode_req = nic.get("mode", None)
6451 nic_mode = nic_mode_req
6452 if nic_mode is None:
6453 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6455 # in routed mode, for the first nic, the default ip is 'auto'
6456 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6457 default_ip_mode = constants.VALUE_AUTO
6459 default_ip_mode = constants.VALUE_NONE
6461 # ip validity checks
6462 ip = nic.get("ip", default_ip_mode)
6463 if ip is None or ip.lower() == constants.VALUE_NONE:
6465 elif ip.lower() == constants.VALUE_AUTO:
6466 if not self.op.name_check:
6467 raise errors.OpPrereqError("IP address set to auto but name checks"
6468 " have been skipped. Aborting.",
6470 nic_ip = self.hostname1.ip
6472 if not utils.IsValidIP(ip):
6473 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6474 " like a valid IP" % ip,
6478 # TODO: check the ip address for uniqueness
6479 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6480 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6483 # MAC address verification
6484 mac = nic.get("mac", constants.VALUE_AUTO)
6485 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6486 mac = utils.NormalizeAndValidateMac(mac)
6489 self.cfg.ReserveMAC(mac, self.proc.GetECId())
6490 except errors.ReservationError:
6491 raise errors.OpPrereqError("MAC address %s already in use"
6492 " in cluster" % mac,
6493 errors.ECODE_NOTUNIQUE)
6495 # bridge verification
6496 bridge = nic.get("bridge", None)
6497 link = nic.get("link", None)
6499 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6500 " at the same time", errors.ECODE_INVAL)
6501 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6502 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6509 nicparams[constants.NIC_MODE] = nic_mode_req
6511 nicparams[constants.NIC_LINK] = link
6513 check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6515 objects.NIC.CheckParameterSyntax(check_params)
6516 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6518 # disk checks/pre-build
6520 for disk in self.op.disks:
6521 mode = disk.get("mode", constants.DISK_RDWR)
6522 if mode not in constants.DISK_ACCESS_SET:
6523 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6524 mode, errors.ECODE_INVAL)
6525 size = disk.get("size", None)
6527 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6530 except (TypeError, ValueError):
6531 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6533 new_disk = {"size": size, "mode": mode}
6535 new_disk["adopt"] = disk["adopt"]
6536 self.disks.append(new_disk)
6538 if self.op.mode == constants.INSTANCE_IMPORT:
6540 # Check that the new instance doesn't have less disks than the export
6541 instance_disks = len(self.disks)
6542 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6543 if instance_disks < export_disks:
6544 raise errors.OpPrereqError("Not enough disks to import."
6545 " (instance: %d, export: %d)" %
6546 (instance_disks, export_disks),
6550 for idx in range(export_disks):
6551 option = 'disk%d_dump' % idx
6552 if export_info.has_option(constants.INISECT_INS, option):
6553 # FIXME: are the old os-es, disk sizes, etc. useful?
6554 export_name = export_info.get(constants.INISECT_INS, option)
6555 image = utils.PathJoin(self.op.src_path, export_name)
6556 disk_images.append(image)
6558 disk_images.append(False)
6560 self.src_images = disk_images
6562 old_name = export_info.get(constants.INISECT_INS, 'name')
6564 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6565 except (TypeError, ValueError), err:
6566 raise errors.OpPrereqError("Invalid export file, nic_count is not"
6567 " an integer: %s" % str(err),
6569 if self.op.instance_name == old_name:
6570 for idx, nic in enumerate(self.nics):
6571 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6572 nic_mac_ini = 'nic%d_mac' % idx
6573 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6575 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6577 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6578 if self.op.ip_check:
6579 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6580 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6581 (self.check_ip, self.op.instance_name),
6582 errors.ECODE_NOTUNIQUE)
6584 #### mac address generation
6585 # By generating here the mac address both the allocator and the hooks get
6586 # the real final mac address rather than the 'auto' or 'generate' value.
6587 # There is a race condition between the generation and the instance object
6588 # creation, which means that we know the mac is valid now, but we're not
6589 # sure it will be when we actually add the instance. If things go bad
6590 # adding the instance will abort because of a duplicate mac, and the
6591 # creation job will fail.
6592 for nic in self.nics:
6593 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6594 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6598 if self.op.iallocator is not None:
6599 self._RunAllocator()
6601 #### node related checks
6603 # check primary node
6604 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6605 assert self.pnode is not None, \
6606 "Cannot retrieve locked node %s" % self.op.pnode
6608 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6609 pnode.name, errors.ECODE_STATE)
6611 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6612 pnode.name, errors.ECODE_STATE)
6614 self.secondaries = []
6616 # mirror node verification
6617 if self.op.disk_template in constants.DTS_NET_MIRROR:
6618 if self.op.snode is None:
6619 raise errors.OpPrereqError("The networked disk templates need"
6620 " a mirror node", errors.ECODE_INVAL)
6621 if self.op.snode == pnode.name:
6622 raise errors.OpPrereqError("The secondary node cannot be the"
6623 " primary node.", errors.ECODE_INVAL)
6624 _CheckNodeOnline(self, self.op.snode)
6625 _CheckNodeNotDrained(self, self.op.snode)
6626 self.secondaries.append(self.op.snode)
6628 nodenames = [pnode.name] + self.secondaries
6630 req_size = _ComputeDiskSize(self.op.disk_template,
6633 # Check lv size requirements, if not adopting
6634 if req_size is not None and not self.adopt_disks:
6635 _CheckNodesFreeDisk(self, nodenames, req_size)
6637 if self.adopt_disks: # instead, we must check the adoption data
6638 all_lvs = set([i["adopt"] for i in self.disks])
6639 if len(all_lvs) != len(self.disks):
6640 raise errors.OpPrereqError("Duplicate volume names given for adoption",
6642 for lv_name in all_lvs:
6644 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6645 except errors.ReservationError:
6646 raise errors.OpPrereqError("LV named %s used by another instance" %
6647 lv_name, errors.ECODE_NOTUNIQUE)
6649 node_lvs = self.rpc.call_lv_list([pnode.name],
6650 self.cfg.GetVGName())[pnode.name]
6651 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6652 node_lvs = node_lvs.payload
6653 delta = all_lvs.difference(node_lvs.keys())
6655 raise errors.OpPrereqError("Missing logical volume(s): %s" %
6656 utils.CommaJoin(delta),
6658 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6660 raise errors.OpPrereqError("Online logical volumes found, cannot"
6661 " adopt: %s" % utils.CommaJoin(online_lvs),
6663 # update the size of disk based on what is found
6664 for dsk in self.disks:
6665 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6667 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6669 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6671 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6673 # memory check on primary node
6675 _CheckNodeFreeMemory(self, self.pnode.name,
6676 "creating instance %s" % self.op.instance_name,
6677 self.be_full[constants.BE_MEMORY],
6680 self.dry_run_result = list(nodenames)
6682 def Exec(self, feedback_fn):
6683 """Create and add the instance to the cluster.
6686 instance = self.op.instance_name
6687 pnode_name = self.pnode.name
6689 ht_kind = self.op.hypervisor
6690 if ht_kind in constants.HTS_REQ_PORT:
6691 network_port = self.cfg.AllocatePort()
6695 if constants.ENABLE_FILE_STORAGE:
6696 # this is needed because os.path.join does not accept None arguments
6697 if self.op.file_storage_dir is None:
6698 string_file_storage_dir = ""
6700 string_file_storage_dir = self.op.file_storage_dir
6702 # build the full file storage dir path
6703 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6704 string_file_storage_dir, instance)
6706 file_storage_dir = ""
6709 disks = _GenerateDiskTemplate(self,
6710 self.op.disk_template,
6711 instance, pnode_name,
6715 self.op.file_driver,
6718 iobj = objects.Instance(name=instance, os=self.op.os_type,
6719 primary_node=pnode_name,
6720 nics=self.nics, disks=disks,
6721 disk_template=self.op.disk_template,
6723 network_port=network_port,
6724 beparams=self.op.beparams,
6725 hvparams=self.op.hvparams,
6726 hypervisor=self.op.hypervisor,
6729 if self.adopt_disks:
6730 # rename LVs to the newly-generated names; we need to construct
6731 # 'fake' LV disks with the old data, plus the new unique_id
6732 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6734 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6735 rename_to.append(t_dsk.logical_id)
6736 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6737 self.cfg.SetDiskID(t_dsk, pnode_name)
6738 result = self.rpc.call_blockdev_rename(pnode_name,
6739 zip(tmp_disks, rename_to))
6740 result.Raise("Failed to rename adoped LVs")
6742 feedback_fn("* creating instance disks...")
6744 _CreateDisks(self, iobj)
6745 except errors.OpExecError:
6746 self.LogWarning("Device creation failed, reverting...")
6748 _RemoveDisks(self, iobj)
6750 self.cfg.ReleaseDRBDMinors(instance)
6753 feedback_fn("adding instance %s to cluster config" % instance)
6755 self.cfg.AddInstance(iobj, self.proc.GetECId())
6757 # Declare that we don't want to remove the instance lock anymore, as we've
6758 # added the instance to the config
6759 del self.remove_locks[locking.LEVEL_INSTANCE]
6760 # Unlock all the nodes
6761 if self.op.mode == constants.INSTANCE_IMPORT:
6762 nodes_keep = [self.op.src_node]
6763 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6764 if node != self.op.src_node]
6765 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6766 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6768 self.context.glm.release(locking.LEVEL_NODE)
6769 del self.acquired_locks[locking.LEVEL_NODE]
6771 if self.op.wait_for_sync:
6772 disk_abort = not _WaitForSync(self, iobj)
6773 elif iobj.disk_template in constants.DTS_NET_MIRROR:
6774 # make sure the disks are not degraded (still sync-ing is ok)
6776 feedback_fn("* checking mirrors status")
6777 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6782 _RemoveDisks(self, iobj)
6783 self.cfg.RemoveInstance(iobj.name)
6784 # Make sure the instance lock gets removed
6785 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6786 raise errors.OpExecError("There are some degraded disks for"
6789 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6790 if self.op.mode == constants.INSTANCE_CREATE:
6791 if not self.op.no_install:
6792 feedback_fn("* running the instance OS create scripts...")
6793 # FIXME: pass debug option from opcode to backend
6794 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6795 self.op.debug_level)
6796 result.Raise("Could not add os for instance %s"
6797 " on node %s" % (instance, pnode_name))
6799 elif self.op.mode == constants.INSTANCE_IMPORT:
6800 feedback_fn("* running the instance OS import scripts...")
6801 src_node = self.op.src_node
6802 src_images = self.src_images
6803 cluster_name = self.cfg.GetClusterName()
6804 # FIXME: pass debug option from opcode to backend
6805 import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6806 src_node, src_images,
6808 self.op.debug_level)
6809 msg = import_result.fail_msg
6811 self.LogWarning("Error while importing the disk images for instance"
6812 " %s on node %s: %s" % (instance, pnode_name, msg))
6814 # also checked in the prereq part
6815 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6819 iobj.admin_up = True
6820 self.cfg.Update(iobj, feedback_fn)
6821 logging.info("Starting instance %s on node %s", instance, pnode_name)
6822 feedback_fn("* starting instance...")
6823 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6824 result.Raise("Could not start instance")
6826 return list(iobj.all_nodes)
6829 class LUConnectConsole(NoHooksLU):
6830 """Connect to an instance's console.
6832 This is somewhat special in that it returns the command line that
6833 you need to run on the master node in order to connect to the
6837 _OP_REQP = ["instance_name"]
6840 def ExpandNames(self):
6841 self._ExpandAndLockInstance()
6843 def CheckPrereq(self):
6844 """Check prerequisites.
6846 This checks that the instance is in the cluster.
6849 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6850 assert self.instance is not None, \
6851 "Cannot retrieve locked instance %s" % self.op.instance_name
6852 _CheckNodeOnline(self, self.instance.primary_node)
6854 def Exec(self, feedback_fn):
6855 """Connect to the console of an instance
6858 instance = self.instance
6859 node = instance.primary_node
6861 node_insts = self.rpc.call_instance_list([node],
6862 [instance.hypervisor])[node]
6863 node_insts.Raise("Can't get node information from %s" % node)
6865 if instance.name not in node_insts.payload:
6866 raise errors.OpExecError("Instance %s is not running." % instance.name)
6868 logging.debug("Connecting to console of %s on %s", instance.name, node)
6870 hyper = hypervisor.GetHypervisor(instance.hypervisor)
6871 cluster = self.cfg.GetClusterInfo()
6872 # beparams and hvparams are passed separately, to avoid editing the
6873 # instance and then saving the defaults in the instance itself.
6874 hvparams = cluster.FillHV(instance)
6875 beparams = cluster.FillBE(instance)
6876 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6879 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6882 class LUReplaceDisks(LogicalUnit):
6883 """Replace the disks of an instance.
6886 HPATH = "mirrors-replace"
6887 HTYPE = constants.HTYPE_INSTANCE
6888 _OP_REQP = ["instance_name", "mode", "disks"]
6891 def CheckArguments(self):
6892 if not hasattr(self.op, "remote_node"):
6893 self.op.remote_node = None
6894 if not hasattr(self.op, "iallocator"):
6895 self.op.iallocator = None
6896 if not hasattr(self.op, "early_release"):
6897 self.op.early_release = False
6899 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6902 def ExpandNames(self):
6903 self._ExpandAndLockInstance()
6905 if self.op.iallocator is not None:
6906 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6908 elif self.op.remote_node is not None:
6909 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6910 self.op.remote_node = remote_node
6912 # Warning: do not remove the locking of the new secondary here
6913 # unless DRBD8.AddChildren is changed to work in parallel;
6914 # currently it doesn't since parallel invocations of
6915 # FindUnusedMinor will conflict
6916 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6917 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6920 self.needed_locks[locking.LEVEL_NODE] = []
6921 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6923 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6924 self.op.iallocator, self.op.remote_node,
6925 self.op.disks, False, self.op.early_release)
6927 self.tasklets = [self.replacer]
6929 def DeclareLocks(self, level):
6930 # If we're not already locking all nodes in the set we have to declare the
6931 # instance's primary/secondary nodes.
6932 if (level == locking.LEVEL_NODE and
6933 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6934 self._LockInstancesNodes()
6936 def BuildHooksEnv(self):
6939 This runs on the master, the primary and all the secondaries.
6942 instance = self.replacer.instance
6944 "MODE": self.op.mode,
6945 "NEW_SECONDARY": self.op.remote_node,
6946 "OLD_SECONDARY": instance.secondary_nodes[0],
6948 env.update(_BuildInstanceHookEnvByObject(self, instance))
6950 self.cfg.GetMasterNode(),
6951 instance.primary_node,
6953 if self.op.remote_node is not None:
6954 nl.append(self.op.remote_node)
6958 class LUEvacuateNode(LogicalUnit):
6959 """Relocate the secondary instances from a node.
6962 HPATH = "node-evacuate"
6963 HTYPE = constants.HTYPE_NODE
6964 _OP_REQP = ["node_name"]
6967 def CheckArguments(self):
6968 if not hasattr(self.op, "remote_node"):
6969 self.op.remote_node = None
6970 if not hasattr(self.op, "iallocator"):
6971 self.op.iallocator = None
6972 if not hasattr(self.op, "early_release"):
6973 self.op.early_release = False
6975 TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6976 self.op.remote_node,
6979 def ExpandNames(self):
6980 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6982 self.needed_locks = {}
6984 # Declare node locks
6985 if self.op.iallocator is not None:
6986 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6988 elif self.op.remote_node is not None:
6989 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6991 # Warning: do not remove the locking of the new secondary here
6992 # unless DRBD8.AddChildren is changed to work in parallel;
6993 # currently it doesn't since parallel invocations of
6994 # FindUnusedMinor will conflict
6995 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6996 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6999 raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7001 # Create tasklets for replacing disks for all secondary instances on this
7006 for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7007 logging.debug("Replacing disks for instance %s", inst.name)
7008 names.append(inst.name)
7010 replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7011 self.op.iallocator, self.op.remote_node, [],
7012 True, self.op.early_release)
7013 tasklets.append(replacer)
7015 self.tasklets = tasklets
7016 self.instance_names = names
7018 # Declare instance locks
7019 self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7021 def DeclareLocks(self, level):
7022 # If we're not already locking all nodes in the set we have to declare the
7023 # instance's primary/secondary nodes.
7024 if (level == locking.LEVEL_NODE and
7025 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7026 self._LockInstancesNodes()
7028 def BuildHooksEnv(self):
7031 This runs on the master, the primary and all the secondaries.
7035 "NODE_NAME": self.op.node_name,
7038 nl = [self.cfg.GetMasterNode()]
7040 if self.op.remote_node is not None:
7041 env["NEW_SECONDARY"] = self.op.remote_node
7042 nl.append(self.op.remote_node)
7044 return (env, nl, nl)
7047 class TLReplaceDisks(Tasklet):
7048 """Replaces disks for an instance.
7050 Note: Locking is not within the scope of this class.
7053 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7054 disks, delay_iallocator, early_release):
7055 """Initializes this class.
7058 Tasklet.__init__(self, lu)
7061 self.instance_name = instance_name
7063 self.iallocator_name = iallocator_name
7064 self.remote_node = remote_node
7066 self.delay_iallocator = delay_iallocator
7067 self.early_release = early_release
7070 self.instance = None
7071 self.new_node = None
7072 self.target_node = None
7073 self.other_node = None
7074 self.remote_node_info = None
7075 self.node_secondary_ip = None
7078 def CheckArguments(mode, remote_node, iallocator):
7079 """Helper function for users of this class.
7082 # check for valid parameter combination
7083 if mode == constants.REPLACE_DISK_CHG:
7084 if remote_node is None and iallocator is None:
7085 raise errors.OpPrereqError("When changing the secondary either an"
7086 " iallocator script must be used or the"
7087 " new node given", errors.ECODE_INVAL)
7089 if remote_node is not None and iallocator is not None:
7090 raise errors.OpPrereqError("Give either the iallocator or the new"
7091 " secondary, not both", errors.ECODE_INVAL)
7093 elif remote_node is not None or iallocator is not None:
7094 # Not replacing the secondary
7095 raise errors.OpPrereqError("The iallocator and new node options can"
7096 " only be used when changing the"
7097 " secondary node", errors.ECODE_INVAL)
7100 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7101 """Compute a new secondary node using an IAllocator.
7104 ial = IAllocator(lu.cfg, lu.rpc,
7105 mode=constants.IALLOCATOR_MODE_RELOC,
7107 relocate_from=relocate_from)
7109 ial.Run(iallocator_name)
7112 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7113 " %s" % (iallocator_name, ial.info),
7116 if len(ial.result) != ial.required_nodes:
7117 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7118 " of nodes (%s), required %s" %
7120 len(ial.result), ial.required_nodes),
7123 remote_node_name = ial.result[0]
7125 lu.LogInfo("Selected new secondary for instance '%s': %s",
7126 instance_name, remote_node_name)
7128 return remote_node_name
7130 def _FindFaultyDisks(self, node_name):
7131 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7134 def CheckPrereq(self):
7135 """Check prerequisites.
7137 This checks that the instance is in the cluster.
7140 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7141 assert instance is not None, \
7142 "Cannot retrieve locked instance %s" % self.instance_name
7144 if instance.disk_template != constants.DT_DRBD8:
7145 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7146 " instances", errors.ECODE_INVAL)
7148 if len(instance.secondary_nodes) != 1:
7149 raise errors.OpPrereqError("The instance has a strange layout,"
7150 " expected one secondary but found %d" %
7151 len(instance.secondary_nodes),
7154 if not self.delay_iallocator:
7155 self._CheckPrereq2()
7157 def _CheckPrereq2(self):
7158 """Check prerequisites, second part.
7160 This function should always be part of CheckPrereq. It was separated and is
7161 now called from Exec because during node evacuation iallocator was only
7162 called with an unmodified cluster model, not taking planned changes into
7166 instance = self.instance
7167 secondary_node = instance.secondary_nodes[0]
7169 if self.iallocator_name is None:
7170 remote_node = self.remote_node
7172 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7173 instance.name, instance.secondary_nodes)
7175 if remote_node is not None:
7176 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7177 assert self.remote_node_info is not None, \
7178 "Cannot retrieve locked node %s" % remote_node
7180 self.remote_node_info = None
7182 if remote_node == self.instance.primary_node:
7183 raise errors.OpPrereqError("The specified node is the primary node of"
7184 " the instance.", errors.ECODE_INVAL)
7186 if remote_node == secondary_node:
7187 raise errors.OpPrereqError("The specified node is already the"
7188 " secondary node of the instance.",
7191 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7192 constants.REPLACE_DISK_CHG):
7193 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7196 if self.mode == constants.REPLACE_DISK_AUTO:
7197 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7198 faulty_secondary = self._FindFaultyDisks(secondary_node)
7200 if faulty_primary and faulty_secondary:
7201 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7202 " one node and can not be repaired"
7203 " automatically" % self.instance_name,
7207 self.disks = faulty_primary
7208 self.target_node = instance.primary_node
7209 self.other_node = secondary_node
7210 check_nodes = [self.target_node, self.other_node]
7211 elif faulty_secondary:
7212 self.disks = faulty_secondary
7213 self.target_node = secondary_node
7214 self.other_node = instance.primary_node
7215 check_nodes = [self.target_node, self.other_node]
7221 # Non-automatic modes
7222 if self.mode == constants.REPLACE_DISK_PRI:
7223 self.target_node = instance.primary_node
7224 self.other_node = secondary_node
7225 check_nodes = [self.target_node, self.other_node]
7227 elif self.mode == constants.REPLACE_DISK_SEC:
7228 self.target_node = secondary_node
7229 self.other_node = instance.primary_node
7230 check_nodes = [self.target_node, self.other_node]
7232 elif self.mode == constants.REPLACE_DISK_CHG:
7233 self.new_node = remote_node
7234 self.other_node = instance.primary_node
7235 self.target_node = secondary_node
7236 check_nodes = [self.new_node, self.other_node]
7238 _CheckNodeNotDrained(self.lu, remote_node)
7240 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7241 assert old_node_info is not None
7242 if old_node_info.offline and not self.early_release:
7243 # doesn't make sense to delay the release
7244 self.early_release = True
7245 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7246 " early-release mode", secondary_node)
7249 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7252 # If not specified all disks should be replaced
7254 self.disks = range(len(self.instance.disks))
7256 for node in check_nodes:
7257 _CheckNodeOnline(self.lu, node)
7259 # Check whether disks are valid
7260 for disk_idx in self.disks:
7261 instance.FindDisk(disk_idx)
7263 # Get secondary node IP addresses
7266 for node_name in [self.target_node, self.other_node, self.new_node]:
7267 if node_name is not None:
7268 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7270 self.node_secondary_ip = node_2nd_ip
7272 def Exec(self, feedback_fn):
7273 """Execute disk replacement.
7275 This dispatches the disk replacement to the appropriate handler.
7278 if self.delay_iallocator:
7279 self._CheckPrereq2()
7282 feedback_fn("No disks need replacement")
7285 feedback_fn("Replacing disk(s) %s for %s" %
7286 (utils.CommaJoin(self.disks), self.instance.name))
7288 activate_disks = (not self.instance.admin_up)
7290 # Activate the instance disks if we're replacing them on a down instance
7292 _StartInstanceDisks(self.lu, self.instance, True)
7295 # Should we replace the secondary node?
7296 if self.new_node is not None:
7297 fn = self._ExecDrbd8Secondary
7299 fn = self._ExecDrbd8DiskOnly
7301 return fn(feedback_fn)
7304 # Deactivate the instance disks if we're replacing them on a
7307 _SafeShutdownInstanceDisks(self.lu, self.instance)
7309 def _CheckVolumeGroup(self, nodes):
7310 self.lu.LogInfo("Checking volume groups")
7312 vgname = self.cfg.GetVGName()
7314 # Make sure volume group exists on all involved nodes
7315 results = self.rpc.call_vg_list(nodes)
7317 raise errors.OpExecError("Can't list volume groups on the nodes")
7321 res.Raise("Error checking node %s" % node)
7322 if vgname not in res.payload:
7323 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7326 def _CheckDisksExistence(self, nodes):
7327 # Check disk existence
7328 for idx, dev in enumerate(self.instance.disks):
7329 if idx not in self.disks:
7333 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7334 self.cfg.SetDiskID(dev, node)
7336 result = self.rpc.call_blockdev_find(node, dev)
7338 msg = result.fail_msg
7339 if msg or not result.payload:
7341 msg = "disk not found"
7342 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7345 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7346 for idx, dev in enumerate(self.instance.disks):
7347 if idx not in self.disks:
7350 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7353 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7355 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7356 " replace disks for instance %s" %
7357 (node_name, self.instance.name))
7359 def _CreateNewStorage(self, node_name):
7360 vgname = self.cfg.GetVGName()
7363 for idx, dev in enumerate(self.instance.disks):
7364 if idx not in self.disks:
7367 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7369 self.cfg.SetDiskID(dev, node_name)
7371 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7372 names = _GenerateUniqueNames(self.lu, lv_names)
7374 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7375 logical_id=(vgname, names[0]))
7376 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7377 logical_id=(vgname, names[1]))
7379 new_lvs = [lv_data, lv_meta]
7380 old_lvs = dev.children
7381 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7383 # we pass force_create=True to force the LVM creation
7384 for new_lv in new_lvs:
7385 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7386 _GetInstanceInfoText(self.instance), False)
7390 def _CheckDevices(self, node_name, iv_names):
7391 for name, (dev, _, _) in iv_names.iteritems():
7392 self.cfg.SetDiskID(dev, node_name)
7394 result = self.rpc.call_blockdev_find(node_name, dev)
7396 msg = result.fail_msg
7397 if msg or not result.payload:
7399 msg = "disk not found"
7400 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7403 if result.payload.is_degraded:
7404 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7406 def _RemoveOldStorage(self, node_name, iv_names):
7407 for name, (_, old_lvs, _) in iv_names.iteritems():
7408 self.lu.LogInfo("Remove logical volumes for %s" % name)
7411 self.cfg.SetDiskID(lv, node_name)
7413 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7415 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7416 hint="remove unused LVs manually")
7418 def _ReleaseNodeLock(self, node_name):
7419 """Releases the lock for a given node."""
7420 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7422 def _ExecDrbd8DiskOnly(self, feedback_fn):
7423 """Replace a disk on the primary or secondary for DRBD 8.
7425 The algorithm for replace is quite complicated:
7427 1. for each disk to be replaced:
7429 1. create new LVs on the target node with unique names
7430 1. detach old LVs from the drbd device
7431 1. rename old LVs to name_replaced.<time_t>
7432 1. rename new LVs to old LVs
7433 1. attach the new LVs (with the old names now) to the drbd device
7435 1. wait for sync across all devices
7437 1. for each modified disk:
7439 1. remove old LVs (which have the name name_replaces.<time_t>)
7441 Failures are not very well handled.
7446 # Step: check device activation
7447 self.lu.LogStep(1, steps_total, "Check device existence")
7448 self._CheckDisksExistence([self.other_node, self.target_node])
7449 self._CheckVolumeGroup([self.target_node, self.other_node])
7451 # Step: check other node consistency
7452 self.lu.LogStep(2, steps_total, "Check peer consistency")
7453 self._CheckDisksConsistency(self.other_node,
7454 self.other_node == self.instance.primary_node,
7457 # Step: create new storage
7458 self.lu.LogStep(3, steps_total, "Allocate new storage")
7459 iv_names = self._CreateNewStorage(self.target_node)
7461 # Step: for each lv, detach+rename*2+attach
7462 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7463 for dev, old_lvs, new_lvs in iv_names.itervalues():
7464 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7466 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7468 result.Raise("Can't detach drbd from local storage on node"
7469 " %s for device %s" % (self.target_node, dev.iv_name))
7471 #cfg.Update(instance)
7473 # ok, we created the new LVs, so now we know we have the needed
7474 # storage; as such, we proceed on the target node to rename
7475 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7476 # using the assumption that logical_id == physical_id (which in
7477 # turn is the unique_id on that node)
7479 # FIXME(iustin): use a better name for the replaced LVs
7480 temp_suffix = int(time.time())
7481 ren_fn = lambda d, suff: (d.physical_id[0],
7482 d.physical_id[1] + "_replaced-%s" % suff)
7484 # Build the rename list based on what LVs exist on the node
7485 rename_old_to_new = []
7486 for to_ren in old_lvs:
7487 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7488 if not result.fail_msg and result.payload:
7490 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7492 self.lu.LogInfo("Renaming the old LVs on the target node")
7493 result = self.rpc.call_blockdev_rename(self.target_node,
7495 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7497 # Now we rename the new LVs to the old LVs
7498 self.lu.LogInfo("Renaming the new LVs on the target node")
7499 rename_new_to_old = [(new, old.physical_id)
7500 for old, new in zip(old_lvs, new_lvs)]
7501 result = self.rpc.call_blockdev_rename(self.target_node,
7503 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7505 for old, new in zip(old_lvs, new_lvs):
7506 new.logical_id = old.logical_id
7507 self.cfg.SetDiskID(new, self.target_node)
7509 for disk in old_lvs:
7510 disk.logical_id = ren_fn(disk, temp_suffix)
7511 self.cfg.SetDiskID(disk, self.target_node)
7513 # Now that the new lvs have the old name, we can add them to the device
7514 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7515 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7517 msg = result.fail_msg
7519 for new_lv in new_lvs:
7520 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7523 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7524 hint=("cleanup manually the unused logical"
7526 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7528 dev.children = new_lvs
7530 self.cfg.Update(self.instance, feedback_fn)
7533 if self.early_release:
7534 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7536 self._RemoveOldStorage(self.target_node, iv_names)
7537 # WARNING: we release both node locks here, do not do other RPCs
7538 # than WaitForSync to the primary node
7539 self._ReleaseNodeLock([self.target_node, self.other_node])
7542 # This can fail as the old devices are degraded and _WaitForSync
7543 # does a combined result over all disks, so we don't check its return value
7544 self.lu.LogStep(cstep, steps_total, "Sync devices")
7546 _WaitForSync(self.lu, self.instance)
7548 # Check all devices manually
7549 self._CheckDevices(self.instance.primary_node, iv_names)
7551 # Step: remove old storage
7552 if not self.early_release:
7553 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7555 self._RemoveOldStorage(self.target_node, iv_names)
7557 def _ExecDrbd8Secondary(self, feedback_fn):
7558 """Replace the secondary node for DRBD 8.
7560 The algorithm for replace is quite complicated:
7561 - for all disks of the instance:
7562 - create new LVs on the new node with same names
7563 - shutdown the drbd device on the old secondary
7564 - disconnect the drbd network on the primary
7565 - create the drbd device on the new secondary
7566 - network attach the drbd on the primary, using an artifice:
7567 the drbd code for Attach() will connect to the network if it
7568 finds a device which is connected to the good local disks but
7570 - wait for sync across all devices
7571 - remove all disks from the old secondary
7573 Failures are not very well handled.
7578 # Step: check device activation
7579 self.lu.LogStep(1, steps_total, "Check device existence")
7580 self._CheckDisksExistence([self.instance.primary_node])
7581 self._CheckVolumeGroup([self.instance.primary_node])
7583 # Step: check other node consistency
7584 self.lu.LogStep(2, steps_total, "Check peer consistency")
7585 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7587 # Step: create new storage
7588 self.lu.LogStep(3, steps_total, "Allocate new storage")
7589 for idx, dev in enumerate(self.instance.disks):
7590 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7591 (self.new_node, idx))
7592 # we pass force_create=True to force LVM creation
7593 for new_lv in dev.children:
7594 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7595 _GetInstanceInfoText(self.instance), False)
7597 # Step 4: dbrd minors and drbd setups changes
7598 # after this, we must manually remove the drbd minors on both the
7599 # error and the success paths
7600 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7601 minors = self.cfg.AllocateDRBDMinor([self.new_node
7602 for dev in self.instance.disks],
7604 logging.debug("Allocated minors %r", minors)
7607 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7608 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7609 (self.new_node, idx))
7610 # create new devices on new_node; note that we create two IDs:
7611 # one without port, so the drbd will be activated without
7612 # networking information on the new node at this stage, and one
7613 # with network, for the latter activation in step 4
7614 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7615 if self.instance.primary_node == o_node1:
7618 assert self.instance.primary_node == o_node2, "Three-node instance?"
7621 new_alone_id = (self.instance.primary_node, self.new_node, None,
7622 p_minor, new_minor, o_secret)
7623 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7624 p_minor, new_minor, o_secret)
7626 iv_names[idx] = (dev, dev.children, new_net_id)
7627 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7629 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7630 logical_id=new_alone_id,
7631 children=dev.children,
7634 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7635 _GetInstanceInfoText(self.instance), False)
7636 except errors.GenericError:
7637 self.cfg.ReleaseDRBDMinors(self.instance.name)
7640 # We have new devices, shutdown the drbd on the old secondary
7641 for idx, dev in enumerate(self.instance.disks):
7642 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7643 self.cfg.SetDiskID(dev, self.target_node)
7644 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7646 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7647 "node: %s" % (idx, msg),
7648 hint=("Please cleanup this device manually as"
7649 " soon as possible"))
7651 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7652 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7653 self.node_secondary_ip,
7654 self.instance.disks)\
7655 [self.instance.primary_node]
7657 msg = result.fail_msg
7659 # detaches didn't succeed (unlikely)
7660 self.cfg.ReleaseDRBDMinors(self.instance.name)
7661 raise errors.OpExecError("Can't detach the disks from the network on"
7662 " old node: %s" % (msg,))
7664 # if we managed to detach at least one, we update all the disks of
7665 # the instance to point to the new secondary
7666 self.lu.LogInfo("Updating instance configuration")
7667 for dev, _, new_logical_id in iv_names.itervalues():
7668 dev.logical_id = new_logical_id
7669 self.cfg.SetDiskID(dev, self.instance.primary_node)
7671 self.cfg.Update(self.instance, feedback_fn)
7673 # and now perform the drbd attach
7674 self.lu.LogInfo("Attaching primary drbds to new secondary"
7675 " (standalone => connected)")
7676 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7678 self.node_secondary_ip,
7679 self.instance.disks,
7682 for to_node, to_result in result.items():
7683 msg = to_result.fail_msg
7685 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7687 hint=("please do a gnt-instance info to see the"
7688 " status of disks"))
7690 if self.early_release:
7691 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7693 self._RemoveOldStorage(self.target_node, iv_names)
7694 # WARNING: we release all node locks here, do not do other RPCs
7695 # than WaitForSync to the primary node
7696 self._ReleaseNodeLock([self.instance.primary_node,
7701 # This can fail as the old devices are degraded and _WaitForSync
7702 # does a combined result over all disks, so we don't check its return value
7703 self.lu.LogStep(cstep, steps_total, "Sync devices")
7705 _WaitForSync(self.lu, self.instance)
7707 # Check all devices manually
7708 self._CheckDevices(self.instance.primary_node, iv_names)
7710 # Step: remove old storage
7711 if not self.early_release:
7712 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7713 self._RemoveOldStorage(self.target_node, iv_names)
7716 class LURepairNodeStorage(NoHooksLU):
7717 """Repairs the volume group on a node.
7720 _OP_REQP = ["node_name"]
7723 def CheckArguments(self):
7724 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7726 _CheckStorageType(self.op.storage_type)
7728 def ExpandNames(self):
7729 self.needed_locks = {
7730 locking.LEVEL_NODE: [self.op.node_name],
7733 def _CheckFaultyDisks(self, instance, node_name):
7734 """Ensure faulty disks abort the opcode or at least warn."""
7736 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7738 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7739 " node '%s'" % (instance.name, node_name),
7741 except errors.OpPrereqError, err:
7742 if self.op.ignore_consistency:
7743 self.proc.LogWarning(str(err.args[0]))
7747 def CheckPrereq(self):
7748 """Check prerequisites.
7751 storage_type = self.op.storage_type
7753 if (constants.SO_FIX_CONSISTENCY not in
7754 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7755 raise errors.OpPrereqError("Storage units of type '%s' can not be"
7756 " repaired" % storage_type,
7759 # Check whether any instance on this node has faulty disks
7760 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7761 if not inst.admin_up:
7763 check_nodes = set(inst.all_nodes)
7764 check_nodes.discard(self.op.node_name)
7765 for inst_node_name in check_nodes:
7766 self._CheckFaultyDisks(inst, inst_node_name)
7768 def Exec(self, feedback_fn):
7769 feedback_fn("Repairing storage unit '%s' on %s ..." %
7770 (self.op.name, self.op.node_name))
7772 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7773 result = self.rpc.call_storage_execute(self.op.node_name,
7774 self.op.storage_type, st_args,
7776 constants.SO_FIX_CONSISTENCY)
7777 result.Raise("Failed to repair storage unit '%s' on %s" %
7778 (self.op.name, self.op.node_name))
7781 class LUNodeEvacuationStrategy(NoHooksLU):
7782 """Computes the node evacuation strategy.
7785 _OP_REQP = ["nodes"]
7788 def CheckArguments(self):
7789 if not hasattr(self.op, "remote_node"):
7790 self.op.remote_node = None
7791 if not hasattr(self.op, "iallocator"):
7792 self.op.iallocator = None
7793 if self.op.remote_node is not None and self.op.iallocator is not None:
7794 raise errors.OpPrereqError("Give either the iallocator or the new"
7795 " secondary, not both", errors.ECODE_INVAL)
7797 def ExpandNames(self):
7798 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7799 self.needed_locks = locks = {}
7800 if self.op.remote_node is None:
7801 locks[locking.LEVEL_NODE] = locking.ALL_SET
7803 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7804 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7806 def CheckPrereq(self):
7809 def Exec(self, feedback_fn):
7810 if self.op.remote_node is not None:
7812 for node in self.op.nodes:
7813 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7816 if i.primary_node == self.op.remote_node:
7817 raise errors.OpPrereqError("Node %s is the primary node of"
7818 " instance %s, cannot use it as"
7820 (self.op.remote_node, i.name),
7822 result.append([i.name, self.op.remote_node])
7824 ial = IAllocator(self.cfg, self.rpc,
7825 mode=constants.IALLOCATOR_MODE_MEVAC,
7826 evac_nodes=self.op.nodes)
7827 ial.Run(self.op.iallocator, validate=True)
7829 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7835 class LUGrowDisk(LogicalUnit):
7836 """Grow a disk of an instance.
7840 HTYPE = constants.HTYPE_INSTANCE
7841 _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7844 def ExpandNames(self):
7845 self._ExpandAndLockInstance()
7846 self.needed_locks[locking.LEVEL_NODE] = []
7847 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7849 def DeclareLocks(self, level):
7850 if level == locking.LEVEL_NODE:
7851 self._LockInstancesNodes()
7853 def BuildHooksEnv(self):
7856 This runs on the master, the primary and all the secondaries.
7860 "DISK": self.op.disk,
7861 "AMOUNT": self.op.amount,
7863 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7864 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7867 def CheckPrereq(self):
7868 """Check prerequisites.
7870 This checks that the instance is in the cluster.
7873 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7874 assert instance is not None, \
7875 "Cannot retrieve locked instance %s" % self.op.instance_name
7876 nodenames = list(instance.all_nodes)
7877 for node in nodenames:
7878 _CheckNodeOnline(self, node)
7881 self.instance = instance
7883 if instance.disk_template not in constants.DTS_GROWABLE:
7884 raise errors.OpPrereqError("Instance's disk layout does not support"
7885 " growing.", errors.ECODE_INVAL)
7887 self.disk = instance.FindDisk(self.op.disk)
7889 if instance.disk_template != constants.DT_FILE:
7890 # TODO: check the free disk space for file, when that feature will be
7892 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7894 def Exec(self, feedback_fn):
7895 """Execute disk grow.
7898 instance = self.instance
7900 for node in instance.all_nodes:
7901 self.cfg.SetDiskID(disk, node)
7902 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7903 result.Raise("Grow request failed to node %s" % node)
7905 # TODO: Rewrite code to work properly
7906 # DRBD goes into sync mode for a short amount of time after executing the
7907 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7908 # calling "resize" in sync mode fails. Sleeping for a short amount of
7909 # time is a work-around.
7912 disk.RecordGrow(self.op.amount)
7913 self.cfg.Update(instance, feedback_fn)
7914 if self.op.wait_for_sync:
7915 disk_abort = not _WaitForSync(self, instance)
7917 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7918 " status.\nPlease check the instance.")
7921 class LUQueryInstanceData(NoHooksLU):
7922 """Query runtime instance data.
7925 _OP_REQP = ["instances", "static"]
7928 def ExpandNames(self):
7929 self.needed_locks = {}
7930 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7932 if not isinstance(self.op.instances, list):
7933 raise errors.OpPrereqError("Invalid argument type 'instances'",
7936 if self.op.instances:
7937 self.wanted_names = []
7938 for name in self.op.instances:
7939 full_name = _ExpandInstanceName(self.cfg, name)
7940 self.wanted_names.append(full_name)
7941 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7943 self.wanted_names = None
7944 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7946 self.needed_locks[locking.LEVEL_NODE] = []
7947 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7949 def DeclareLocks(self, level):
7950 if level == locking.LEVEL_NODE:
7951 self._LockInstancesNodes()
7953 def CheckPrereq(self):
7954 """Check prerequisites.
7956 This only checks the optional instance list against the existing names.
7959 if self.wanted_names is None:
7960 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7962 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7963 in self.wanted_names]
7966 def _ComputeBlockdevStatus(self, node, instance_name, dev):
7967 """Returns the status of a block device
7970 if self.op.static or not node:
7973 self.cfg.SetDiskID(dev, node)
7975 result = self.rpc.call_blockdev_find(node, dev)
7979 result.Raise("Can't compute disk status for %s" % instance_name)
7981 status = result.payload
7985 return (status.dev_path, status.major, status.minor,
7986 status.sync_percent, status.estimated_time,
7987 status.is_degraded, status.ldisk_status)
7989 def _ComputeDiskStatus(self, instance, snode, dev):
7990 """Compute block device status.
7993 if dev.dev_type in constants.LDS_DRBD:
7994 # we change the snode then (otherwise we use the one passed in)
7995 if dev.logical_id[0] == instance.primary_node:
7996 snode = dev.logical_id[1]
7998 snode = dev.logical_id[0]
8000 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8002 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8005 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8006 for child in dev.children]
8011 "iv_name": dev.iv_name,
8012 "dev_type": dev.dev_type,
8013 "logical_id": dev.logical_id,
8014 "physical_id": dev.physical_id,
8015 "pstatus": dev_pstatus,
8016 "sstatus": dev_sstatus,
8017 "children": dev_children,
8024 def Exec(self, feedback_fn):
8025 """Gather and return data"""
8028 cluster = self.cfg.GetClusterInfo()
8030 for instance in self.wanted_instances:
8031 if not self.op.static:
8032 remote_info = self.rpc.call_instance_info(instance.primary_node,
8034 instance.hypervisor)
8035 remote_info.Raise("Error checking node %s" % instance.primary_node)
8036 remote_info = remote_info.payload
8037 if remote_info and "state" in remote_info:
8040 remote_state = "down"
8043 if instance.admin_up:
8046 config_state = "down"
8048 disks = [self._ComputeDiskStatus(instance, None, device)
8049 for device in instance.disks]
8052 "name": instance.name,
8053 "config_state": config_state,
8054 "run_state": remote_state,
8055 "pnode": instance.primary_node,
8056 "snodes": instance.secondary_nodes,
8058 # this happens to be the same format used for hooks
8059 "nics": _NICListToTuple(self, instance.nics),
8061 "hypervisor": instance.hypervisor,
8062 "network_port": instance.network_port,
8063 "hv_instance": instance.hvparams,
8064 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8065 "be_instance": instance.beparams,
8066 "be_actual": cluster.FillBE(instance),
8067 "serial_no": instance.serial_no,
8068 "mtime": instance.mtime,
8069 "ctime": instance.ctime,
8070 "uuid": instance.uuid,
8073 result[instance.name] = idict
8078 class LUSetInstanceParams(LogicalUnit):
8079 """Modifies an instances's parameters.
8082 HPATH = "instance-modify"
8083 HTYPE = constants.HTYPE_INSTANCE
8084 _OP_REQP = ["instance_name"]
8087 def CheckArguments(self):
8088 if not hasattr(self.op, 'nics'):
8090 if not hasattr(self.op, 'disks'):
8092 if not hasattr(self.op, 'beparams'):
8093 self.op.beparams = {}
8094 if not hasattr(self.op, 'hvparams'):
8095 self.op.hvparams = {}
8096 if not hasattr(self.op, "disk_template"):
8097 self.op.disk_template = None
8098 if not hasattr(self.op, "remote_node"):
8099 self.op.remote_node = None
8100 if not hasattr(self.op, "os_name"):
8101 self.op.os_name = None
8102 if not hasattr(self.op, "force_variant"):
8103 self.op.force_variant = False
8104 self.op.force = getattr(self.op, "force", False)
8105 if not (self.op.nics or self.op.disks or self.op.disk_template or
8106 self.op.hvparams or self.op.beparams or self.op.os_name):
8107 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8109 if self.op.hvparams:
8110 _CheckGlobalHvParams(self.op.hvparams)
8114 for disk_op, disk_dict in self.op.disks:
8115 if disk_op == constants.DDM_REMOVE:
8118 elif disk_op == constants.DDM_ADD:
8121 if not isinstance(disk_op, int):
8122 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8123 if not isinstance(disk_dict, dict):
8124 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8125 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8127 if disk_op == constants.DDM_ADD:
8128 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8129 if mode not in constants.DISK_ACCESS_SET:
8130 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8132 size = disk_dict.get('size', None)
8134 raise errors.OpPrereqError("Required disk parameter size missing",
8138 except (TypeError, ValueError), err:
8139 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8140 str(err), errors.ECODE_INVAL)
8141 disk_dict['size'] = size
8143 # modification of disk
8144 if 'size' in disk_dict:
8145 raise errors.OpPrereqError("Disk size change not possible, use"
8146 " grow-disk", errors.ECODE_INVAL)
8148 if disk_addremove > 1:
8149 raise errors.OpPrereqError("Only one disk add or remove operation"
8150 " supported at a time", errors.ECODE_INVAL)
8152 if self.op.disks and self.op.disk_template is not None:
8153 raise errors.OpPrereqError("Disk template conversion and other disk"
8154 " changes not supported at the same time",
8157 if self.op.disk_template:
8158 _CheckDiskTemplate(self.op.disk_template)
8159 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8160 self.op.remote_node is None):
8161 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8162 " one requires specifying a secondary node",
8167 for nic_op, nic_dict in self.op.nics:
8168 if nic_op == constants.DDM_REMOVE:
8171 elif nic_op == constants.DDM_ADD:
8174 if not isinstance(nic_op, int):
8175 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8176 if not isinstance(nic_dict, dict):
8177 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8178 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8180 # nic_dict should be a dict
8181 nic_ip = nic_dict.get('ip', None)
8182 if nic_ip is not None:
8183 if nic_ip.lower() == constants.VALUE_NONE:
8184 nic_dict['ip'] = None
8186 if not utils.IsValidIP(nic_ip):
8187 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8190 nic_bridge = nic_dict.get('bridge', None)
8191 nic_link = nic_dict.get('link', None)
8192 if nic_bridge and nic_link:
8193 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8194 " at the same time", errors.ECODE_INVAL)
8195 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8196 nic_dict['bridge'] = None
8197 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8198 nic_dict['link'] = None
8200 if nic_op == constants.DDM_ADD:
8201 nic_mac = nic_dict.get('mac', None)
8203 nic_dict['mac'] = constants.VALUE_AUTO
8205 if 'mac' in nic_dict:
8206 nic_mac = nic_dict['mac']
8207 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8208 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8210 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8211 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8212 " modifying an existing nic",
8215 if nic_addremove > 1:
8216 raise errors.OpPrereqError("Only one NIC add or remove operation"
8217 " supported at a time", errors.ECODE_INVAL)
8219 def ExpandNames(self):
8220 self._ExpandAndLockInstance()
8221 self.needed_locks[locking.LEVEL_NODE] = []
8222 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8224 def DeclareLocks(self, level):
8225 if level == locking.LEVEL_NODE:
8226 self._LockInstancesNodes()
8227 if self.op.disk_template and self.op.remote_node:
8228 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8229 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8231 def BuildHooksEnv(self):
8234 This runs on the master, primary and secondaries.
8238 if constants.BE_MEMORY in self.be_new:
8239 args['memory'] = self.be_new[constants.BE_MEMORY]
8240 if constants.BE_VCPUS in self.be_new:
8241 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8242 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8243 # information at all.
8246 nic_override = dict(self.op.nics)
8247 c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8248 for idx, nic in enumerate(self.instance.nics):
8249 if idx in nic_override:
8250 this_nic_override = nic_override[idx]
8252 this_nic_override = {}
8253 if 'ip' in this_nic_override:
8254 ip = this_nic_override['ip']
8257 if 'mac' in this_nic_override:
8258 mac = this_nic_override['mac']
8261 if idx in self.nic_pnew:
8262 nicparams = self.nic_pnew[idx]
8264 nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8265 mode = nicparams[constants.NIC_MODE]
8266 link = nicparams[constants.NIC_LINK]
8267 args['nics'].append((ip, mac, mode, link))
8268 if constants.DDM_ADD in nic_override:
8269 ip = nic_override[constants.DDM_ADD].get('ip', None)
8270 mac = nic_override[constants.DDM_ADD]['mac']
8271 nicparams = self.nic_pnew[constants.DDM_ADD]
8272 mode = nicparams[constants.NIC_MODE]
8273 link = nicparams[constants.NIC_LINK]
8274 args['nics'].append((ip, mac, mode, link))
8275 elif constants.DDM_REMOVE in nic_override:
8276 del args['nics'][-1]
8278 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8279 if self.op.disk_template:
8280 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8281 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8285 def _GetUpdatedParams(old_params, update_dict,
8286 default_values, parameter_types):
8287 """Return the new params dict for the given params.
8289 @type old_params: dict
8290 @param old_params: old parameters
8291 @type update_dict: dict
8292 @param update_dict: dict containing new parameter values,
8293 or constants.VALUE_DEFAULT to reset the
8294 parameter to its default value
8295 @type default_values: dict
8296 @param default_values: default values for the filled parameters
8297 @type parameter_types: dict
8298 @param parameter_types: dict mapping target dict keys to types
8299 in constants.ENFORCEABLE_TYPES
8300 @rtype: (dict, dict)
8301 @return: (new_parameters, filled_parameters)
8304 params_copy = copy.deepcopy(old_params)
8305 for key, val in update_dict.iteritems():
8306 if val == constants.VALUE_DEFAULT:
8308 del params_copy[key]
8312 params_copy[key] = val
8313 utils.ForceDictType(params_copy, parameter_types)
8314 params_filled = objects.FillDict(default_values, params_copy)
8315 return (params_copy, params_filled)
8317 def CheckPrereq(self):
8318 """Check prerequisites.
8320 This only checks the instance list against the existing names.
8323 self.force = self.op.force
8325 # checking the new params on the primary/secondary nodes
8327 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8328 cluster = self.cluster = self.cfg.GetClusterInfo()
8329 assert self.instance is not None, \
8330 "Cannot retrieve locked instance %s" % self.op.instance_name
8331 pnode = instance.primary_node
8332 nodelist = list(instance.all_nodes)
8334 if self.op.disk_template:
8335 if instance.disk_template == self.op.disk_template:
8336 raise errors.OpPrereqError("Instance already has disk template %s" %
8337 instance.disk_template, errors.ECODE_INVAL)
8339 if (instance.disk_template,
8340 self.op.disk_template) not in self._DISK_CONVERSIONS:
8341 raise errors.OpPrereqError("Unsupported disk template conversion from"
8342 " %s to %s" % (instance.disk_template,
8343 self.op.disk_template),
8345 if self.op.disk_template in constants.DTS_NET_MIRROR:
8346 _CheckNodeOnline(self, self.op.remote_node)
8347 _CheckNodeNotDrained(self, self.op.remote_node)
8348 disks = [{"size": d.size} for d in instance.disks]
8349 required = _ComputeDiskSize(self.op.disk_template, disks)
8350 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8351 _CheckInstanceDown(self, instance, "cannot change disk template")
8353 # hvparams processing
8354 if self.op.hvparams:
8355 i_hvdict, hv_new = self._GetUpdatedParams(
8356 instance.hvparams, self.op.hvparams,
8357 cluster.hvparams[instance.hypervisor],
8358 constants.HVS_PARAMETER_TYPES)
8360 hypervisor.GetHypervisor(
8361 instance.hypervisor).CheckParameterSyntax(hv_new)
8362 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8363 self.hv_new = hv_new # the new actual values
8364 self.hv_inst = i_hvdict # the new dict (without defaults)
8366 self.hv_new = self.hv_inst = {}
8368 # beparams processing
8369 if self.op.beparams:
8370 i_bedict, be_new = self._GetUpdatedParams(
8371 instance.beparams, self.op.beparams,
8372 cluster.beparams[constants.PP_DEFAULT],
8373 constants.BES_PARAMETER_TYPES)
8374 self.be_new = be_new # the new actual values
8375 self.be_inst = i_bedict # the new dict (without defaults)
8377 self.be_new = self.be_inst = {}
8381 if constants.BE_MEMORY in self.op.beparams and not self.force:
8382 mem_check_list = [pnode]
8383 if be_new[constants.BE_AUTO_BALANCE]:
8384 # either we changed auto_balance to yes or it was from before
8385 mem_check_list.extend(instance.secondary_nodes)
8386 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8387 instance.hypervisor)
8388 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8389 instance.hypervisor)
8390 pninfo = nodeinfo[pnode]
8391 msg = pninfo.fail_msg
8393 # Assume the primary node is unreachable and go ahead
8394 self.warn.append("Can't get info from primary node %s: %s" %
8396 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8397 self.warn.append("Node data from primary node %s doesn't contain"
8398 " free memory information" % pnode)
8399 elif instance_info.fail_msg:
8400 self.warn.append("Can't get instance runtime information: %s" %
8401 instance_info.fail_msg)
8403 if instance_info.payload:
8404 current_mem = int(instance_info.payload['memory'])
8406 # Assume instance not running
8407 # (there is a slight race condition here, but it's not very probable,
8408 # and we have no other way to check)
8410 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8411 pninfo.payload['memory_free'])
8413 raise errors.OpPrereqError("This change will prevent the instance"
8414 " from starting, due to %d MB of memory"
8415 " missing on its primary node" % miss_mem,
8418 if be_new[constants.BE_AUTO_BALANCE]:
8419 for node, nres in nodeinfo.items():
8420 if node not in instance.secondary_nodes:
8424 self.warn.append("Can't get info from secondary node %s: %s" %
8426 elif not isinstance(nres.payload.get('memory_free', None), int):
8427 self.warn.append("Secondary node %s didn't return free"
8428 " memory information" % node)
8429 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8430 self.warn.append("Not enough memory to failover instance to"
8431 " secondary node %s" % node)
8436 for nic_op, nic_dict in self.op.nics:
8437 if nic_op == constants.DDM_REMOVE:
8438 if not instance.nics:
8439 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8442 if nic_op != constants.DDM_ADD:
8444 if not instance.nics:
8445 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8446 " no NICs" % nic_op,
8448 if nic_op < 0 or nic_op >= len(instance.nics):
8449 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8451 (nic_op, len(instance.nics) - 1),
8453 old_nic_params = instance.nics[nic_op].nicparams
8454 old_nic_ip = instance.nics[nic_op].ip
8459 update_params_dict = dict([(key, nic_dict[key])
8460 for key in constants.NICS_PARAMETERS
8461 if key in nic_dict])
8463 if 'bridge' in nic_dict:
8464 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8466 new_nic_params, new_filled_nic_params = \
8467 self._GetUpdatedParams(old_nic_params, update_params_dict,
8468 cluster.nicparams[constants.PP_DEFAULT],
8469 constants.NICS_PARAMETER_TYPES)
8470 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8471 self.nic_pinst[nic_op] = new_nic_params
8472 self.nic_pnew[nic_op] = new_filled_nic_params
8473 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8475 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8476 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8477 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8479 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8481 self.warn.append(msg)
8483 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8484 if new_nic_mode == constants.NIC_MODE_ROUTED:
8485 if 'ip' in nic_dict:
8486 nic_ip = nic_dict['ip']
8490 raise errors.OpPrereqError('Cannot set the nic ip to None'
8491 ' on a routed nic', errors.ECODE_INVAL)
8492 if 'mac' in nic_dict:
8493 nic_mac = nic_dict['mac']
8495 raise errors.OpPrereqError('Cannot set the nic mac to None',
8497 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8498 # otherwise generate the mac
8499 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8501 # or validate/reserve the current one
8503 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8504 except errors.ReservationError:
8505 raise errors.OpPrereqError("MAC address %s already in use"
8506 " in cluster" % nic_mac,
8507 errors.ECODE_NOTUNIQUE)
8510 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8511 raise errors.OpPrereqError("Disk operations not supported for"
8512 " diskless instances",
8514 for disk_op, _ in self.op.disks:
8515 if disk_op == constants.DDM_REMOVE:
8516 if len(instance.disks) == 1:
8517 raise errors.OpPrereqError("Cannot remove the last disk of"
8518 " an instance", errors.ECODE_INVAL)
8519 _CheckInstanceDown(self, instance, "cannot remove disks")
8521 if (disk_op == constants.DDM_ADD and
8522 len(instance.nics) >= constants.MAX_DISKS):
8523 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8524 " add more" % constants.MAX_DISKS,
8526 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8528 if disk_op < 0 or disk_op >= len(instance.disks):
8529 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8531 (disk_op, len(instance.disks)),
8535 if self.op.os_name and not self.op.force:
8536 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8537 self.op.force_variant)
8541 def _ConvertPlainToDrbd(self, feedback_fn):
8542 """Converts an instance from plain to drbd.
8545 feedback_fn("Converting template to drbd")
8546 instance = self.instance
8547 pnode = instance.primary_node
8548 snode = self.op.remote_node
8550 # create a fake disk info for _GenerateDiskTemplate
8551 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8552 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8553 instance.name, pnode, [snode],
8554 disk_info, None, None, 0)
8555 info = _GetInstanceInfoText(instance)
8556 feedback_fn("Creating aditional volumes...")
8557 # first, create the missing data and meta devices
8558 for disk in new_disks:
8559 # unfortunately this is... not too nice
8560 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8562 for child in disk.children:
8563 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8564 # at this stage, all new LVs have been created, we can rename the
8566 feedback_fn("Renaming original volumes...")
8567 rename_list = [(o, n.children[0].logical_id)
8568 for (o, n) in zip(instance.disks, new_disks)]
8569 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8570 result.Raise("Failed to rename original LVs")
8572 feedback_fn("Initializing DRBD devices...")
8573 # all child devices are in place, we can now create the DRBD devices
8574 for disk in new_disks:
8575 for node in [pnode, snode]:
8576 f_create = node == pnode
8577 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8579 # at this point, the instance has been modified
8580 instance.disk_template = constants.DT_DRBD8
8581 instance.disks = new_disks
8582 self.cfg.Update(instance, feedback_fn)
8584 # disks are created, waiting for sync
8585 disk_abort = not _WaitForSync(self, instance)
8587 raise errors.OpExecError("There are some degraded disks for"
8588 " this instance, please cleanup manually")
8590 def _ConvertDrbdToPlain(self, feedback_fn):
8591 """Converts an instance from drbd to plain.
8594 instance = self.instance
8595 assert len(instance.secondary_nodes) == 1
8596 pnode = instance.primary_node
8597 snode = instance.secondary_nodes[0]
8598 feedback_fn("Converting template to plain")
8600 old_disks = instance.disks
8601 new_disks = [d.children[0] for d in old_disks]
8603 # copy over size and mode
8604 for parent, child in zip(old_disks, new_disks):
8605 child.size = parent.size
8606 child.mode = parent.mode
8608 # update instance structure
8609 instance.disks = new_disks
8610 instance.disk_template = constants.DT_PLAIN
8611 self.cfg.Update(instance, feedback_fn)
8613 feedback_fn("Removing volumes on the secondary node...")
8614 for disk in old_disks:
8615 self.cfg.SetDiskID(disk, snode)
8616 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8618 self.LogWarning("Could not remove block device %s on node %s,"
8619 " continuing anyway: %s", disk.iv_name, snode, msg)
8621 feedback_fn("Removing unneeded volumes on the primary node...")
8622 for idx, disk in enumerate(old_disks):
8623 meta = disk.children[1]
8624 self.cfg.SetDiskID(meta, pnode)
8625 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8627 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8628 " continuing anyway: %s", idx, pnode, msg)
8631 def Exec(self, feedback_fn):
8632 """Modifies an instance.
8634 All parameters take effect only at the next restart of the instance.
8637 # Process here the warnings from CheckPrereq, as we don't have a
8638 # feedback_fn there.
8639 for warn in self.warn:
8640 feedback_fn("WARNING: %s" % warn)
8643 instance = self.instance
8645 for disk_op, disk_dict in self.op.disks:
8646 if disk_op == constants.DDM_REMOVE:
8647 # remove the last disk
8648 device = instance.disks.pop()
8649 device_idx = len(instance.disks)
8650 for node, disk in device.ComputeNodeTree(instance.primary_node):
8651 self.cfg.SetDiskID(disk, node)
8652 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8654 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8655 " continuing anyway", device_idx, node, msg)
8656 result.append(("disk/%d" % device_idx, "remove"))
8657 elif disk_op == constants.DDM_ADD:
8659 if instance.disk_template == constants.DT_FILE:
8660 file_driver, file_path = instance.disks[0].logical_id
8661 file_path = os.path.dirname(file_path)
8663 file_driver = file_path = None
8664 disk_idx_base = len(instance.disks)
8665 new_disk = _GenerateDiskTemplate(self,
8666 instance.disk_template,
8667 instance.name, instance.primary_node,
8668 instance.secondary_nodes,
8673 instance.disks.append(new_disk)
8674 info = _GetInstanceInfoText(instance)
8676 logging.info("Creating volume %s for instance %s",
8677 new_disk.iv_name, instance.name)
8678 # Note: this needs to be kept in sync with _CreateDisks
8680 for node in instance.all_nodes:
8681 f_create = node == instance.primary_node
8683 _CreateBlockDev(self, node, instance, new_disk,
8684 f_create, info, f_create)
8685 except errors.OpExecError, err:
8686 self.LogWarning("Failed to create volume %s (%s) on"
8688 new_disk.iv_name, new_disk, node, err)
8689 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8690 (new_disk.size, new_disk.mode)))
8692 # change a given disk
8693 instance.disks[disk_op].mode = disk_dict['mode']
8694 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8696 if self.op.disk_template:
8697 r_shut = _ShutdownInstanceDisks(self, instance)
8699 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8700 " proceed with disk template conversion")
8701 mode = (instance.disk_template, self.op.disk_template)
8703 self._DISK_CONVERSIONS[mode](self, feedback_fn)
8705 self.cfg.ReleaseDRBDMinors(instance.name)
8707 result.append(("disk_template", self.op.disk_template))
8710 for nic_op, nic_dict in self.op.nics:
8711 if nic_op == constants.DDM_REMOVE:
8712 # remove the last nic
8713 del instance.nics[-1]
8714 result.append(("nic.%d" % len(instance.nics), "remove"))
8715 elif nic_op == constants.DDM_ADD:
8716 # mac and bridge should be set, by now
8717 mac = nic_dict['mac']
8718 ip = nic_dict.get('ip', None)
8719 nicparams = self.nic_pinst[constants.DDM_ADD]
8720 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8721 instance.nics.append(new_nic)
8722 result.append(("nic.%d" % (len(instance.nics) - 1),
8723 "add:mac=%s,ip=%s,mode=%s,link=%s" %
8724 (new_nic.mac, new_nic.ip,
8725 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8726 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8729 for key in 'mac', 'ip':
8731 setattr(instance.nics[nic_op], key, nic_dict[key])
8732 if nic_op in self.nic_pinst:
8733 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8734 for key, val in nic_dict.iteritems():
8735 result.append(("nic.%s/%d" % (key, nic_op), val))
8738 if self.op.hvparams:
8739 instance.hvparams = self.hv_inst
8740 for key, val in self.op.hvparams.iteritems():
8741 result.append(("hv/%s" % key, val))
8744 if self.op.beparams:
8745 instance.beparams = self.be_inst
8746 for key, val in self.op.beparams.iteritems():
8747 result.append(("be/%s" % key, val))
8751 instance.os = self.op.os_name
8753 self.cfg.Update(instance, feedback_fn)
8757 _DISK_CONVERSIONS = {
8758 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8759 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8762 class LUQueryExports(NoHooksLU):
8763 """Query the exports list
8766 _OP_REQP = ['nodes']
8769 def ExpandNames(self):
8770 self.needed_locks = {}
8771 self.share_locks[locking.LEVEL_NODE] = 1
8772 if not self.op.nodes:
8773 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8775 self.needed_locks[locking.LEVEL_NODE] = \
8776 _GetWantedNodes(self, self.op.nodes)
8778 def CheckPrereq(self):
8779 """Check prerequisites.
8782 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8784 def Exec(self, feedback_fn):
8785 """Compute the list of all the exported system images.
8788 @return: a dictionary with the structure node->(export-list)
8789 where export-list is a list of the instances exported on
8793 rpcresult = self.rpc.call_export_list(self.nodes)
8795 for node in rpcresult:
8796 if rpcresult[node].fail_msg:
8797 result[node] = False
8799 result[node] = rpcresult[node].payload
8804 class LUExportInstance(LogicalUnit):
8805 """Export an instance to an image in the cluster.
8808 HPATH = "instance-export"
8809 HTYPE = constants.HTYPE_INSTANCE
8810 _OP_REQP = ["instance_name", "target_node", "shutdown"]
8813 def CheckArguments(self):
8814 """Check the arguments.
8817 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8818 constants.DEFAULT_SHUTDOWN_TIMEOUT)
8820 def ExpandNames(self):
8821 self._ExpandAndLockInstance()
8822 # FIXME: lock only instance primary and destination node
8824 # Sad but true, for now we have do lock all nodes, as we don't know where
8825 # the previous export might be, and and in this LU we search for it and
8826 # remove it from its current node. In the future we could fix this by:
8827 # - making a tasklet to search (share-lock all), then create the new one,
8828 # then one to remove, after
8829 # - removing the removal operation altogether
8830 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8832 def DeclareLocks(self, level):
8833 """Last minute lock declaration."""
8834 # All nodes are locked anyway, so nothing to do here.
8836 def BuildHooksEnv(self):
8839 This will run on the master, primary node and target node.
8843 "EXPORT_NODE": self.op.target_node,
8844 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8845 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8847 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8848 nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8849 self.op.target_node]
8852 def CheckPrereq(self):
8853 """Check prerequisites.
8855 This checks that the instance and node names are valid.
8858 instance_name = self.op.instance_name
8859 self.instance = self.cfg.GetInstanceInfo(instance_name)
8860 assert self.instance is not None, \
8861 "Cannot retrieve locked instance %s" % self.op.instance_name
8862 _CheckNodeOnline(self, self.instance.primary_node)
8864 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8865 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8866 assert self.dst_node is not None
8868 _CheckNodeOnline(self, self.dst_node.name)
8869 _CheckNodeNotDrained(self, self.dst_node.name)
8871 # instance disk type verification
8872 for disk in self.instance.disks:
8873 if disk.dev_type == constants.LD_FILE:
8874 raise errors.OpPrereqError("Export not supported for instances with"
8875 " file-based disks", errors.ECODE_INVAL)
8877 def _CreateSnapshots(self, feedback_fn):
8878 """Creates an LVM snapshot for every disk of the instance.
8880 @return: List of snapshots as L{objects.Disk} instances
8883 instance = self.instance
8884 src_node = instance.primary_node
8886 vgname = self.cfg.GetVGName()
8890 for idx, disk in enumerate(instance.disks):
8891 feedback_fn("Creating a snapshot of disk/%s on node %s" %
8894 # result.payload will be a snapshot of an lvm leaf of the one we
8896 result = self.rpc.call_blockdev_snapshot(src_node, disk)
8897 msg = result.fail_msg
8899 self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8901 snap_disks.append(False)
8903 disk_id = (vgname, result.payload)
8904 new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8905 logical_id=disk_id, physical_id=disk_id,
8906 iv_name=disk.iv_name)
8907 snap_disks.append(new_dev)
8911 def _RemoveSnapshot(self, feedback_fn, snap_disks, disk_index):
8912 """Removes an LVM snapshot.
8914 @type snap_disks: list
8915 @param snap_disks: The list of all snapshots as returned by
8917 @type disk_index: number
8918 @param disk_index: Index of the snapshot to be removed
8920 @return: Whether removal was successful or not
8923 disk = snap_disks[disk_index]
8925 src_node = self.instance.primary_node
8927 feedback_fn("Removing snapshot of disk/%s on node %s" %
8928 (disk_index, src_node))
8930 result = self.rpc.call_blockdev_remove(src_node, disk)
8931 if not result.fail_msg:
8934 self.LogWarning("Could not remove snapshot for disk/%d from node"
8935 " %s: %s", disk_index, src_node, result.fail_msg)
8939 def _CleanupExports(self, feedback_fn):
8940 """Removes exports of current instance from all other nodes.
8942 If an instance in a cluster with nodes A..D was exported to node C, its
8943 exports will be removed from the nodes A, B and D.
8946 nodelist = self.cfg.GetNodeList()
8947 nodelist.remove(self.dst_node.name)
8949 # on one-node clusters nodelist will be empty after the removal
8950 # if we proceed the backup would be removed because OpQueryExports
8951 # substitutes an empty list with the full cluster node list.
8952 iname = self.instance.name
8954 feedback_fn("Removing old exports for instance %s" % iname)
8955 exportlist = self.rpc.call_export_list(nodelist)
8956 for node in exportlist:
8957 if exportlist[node].fail_msg:
8959 if iname in exportlist[node].payload:
8960 msg = self.rpc.call_export_remove(node, iname).fail_msg
8962 self.LogWarning("Could not remove older export for instance %s"
8963 " on node %s: %s", iname, node, msg)
8965 def Exec(self, feedback_fn):
8966 """Export an instance to an image in the cluster.
8969 instance = self.instance
8970 dst_node = self.dst_node
8971 src_node = instance.primary_node
8973 if self.op.shutdown:
8974 # shutdown the instance, but not the disks
8975 feedback_fn("Shutting down instance %s" % instance.name)
8976 result = self.rpc.call_instance_shutdown(src_node, instance,
8977 self.shutdown_timeout)
8978 result.Raise("Could not shutdown instance %s on"
8979 " node %s" % (instance.name, src_node))
8981 # set the disks ID correctly since call_instance_start needs the
8982 # correct drbd minor to create the symlinks
8983 for disk in instance.disks:
8984 self.cfg.SetDiskID(disk, src_node)
8986 activate_disks = (not instance.admin_up)
8989 # Activate the instance disks if we'exporting a stopped instance
8990 feedback_fn("Activating disks for %s" % instance.name)
8991 _StartInstanceDisks(self, instance, None)
8996 removed_snaps = [False] * len(instance.disks)
9001 snap_disks = self._CreateSnapshots(feedback_fn)
9003 if self.op.shutdown and instance.admin_up:
9004 feedback_fn("Starting instance %s" % instance.name)
9005 result = self.rpc.call_instance_start(src_node, instance,
9007 msg = result.fail_msg
9009 _ShutdownInstanceDisks(self, instance)
9010 raise errors.OpExecError("Could not start instance: %s" % msg)
9012 assert len(snap_disks) == len(instance.disks)
9013 assert len(removed_snaps) == len(instance.disks)
9015 # TODO: check for size
9017 cluster_name = self.cfg.GetClusterName()
9018 for idx, dev in enumerate(snap_disks):
9019 feedback_fn("Exporting snapshot %s from %s to %s" %
9020 (idx, src_node, dst_node.name))
9022 # FIXME: pass debug from opcode to backend
9023 result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
9024 instance, cluster_name,
9025 idx, self.op.debug_level)
9026 msg = result.fail_msg
9028 self.LogWarning("Could not export disk/%s from node %s to"
9029 " node %s: %s", idx, src_node, dst_node.name, msg)
9030 dresults.append(False)
9032 dresults.append(True)
9035 if self._RemoveSnapshot(feedback_fn, snap_disks, idx):
9036 removed_snaps[idx] = True
9038 dresults.append(False)
9040 assert len(dresults) == len(instance.disks)
9042 # Check for backwards compatibility
9043 assert compat.all(isinstance(i, bool) for i in dresults), \
9044 "Not all results are boolean: %r" % dresults
9046 feedback_fn("Finalizing export on %s" % dst_node.name)
9047 result = self.rpc.call_finalize_export(dst_node.name, instance,
9049 msg = result.fail_msg
9052 self.LogWarning("Could not finalize export for instance %s"
9053 " on node %s: %s", instance.name, dst_node.name, msg)
9056 # Remove all snapshots
9057 assert len(removed_snaps) == len(instance.disks)
9058 for idx, removed in enumerate(removed_snaps):
9060 self._RemoveSnapshot(feedback_fn, snap_disks, idx)
9064 feedback_fn("Deactivating disks for %s" % instance.name)
9065 _ShutdownInstanceDisks(self, instance)
9067 self._CleanupExports(feedback_fn)
9069 return fin_resu, dresults
9072 class LURemoveExport(NoHooksLU):
9073 """Remove exports related to the named instance.
9076 _OP_REQP = ["instance_name"]
9079 def ExpandNames(self):
9080 self.needed_locks = {}
9081 # We need all nodes to be locked in order for RemoveExport to work, but we
9082 # don't need to lock the instance itself, as nothing will happen to it (and
9083 # we can remove exports also for a removed instance)
9084 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9086 def CheckPrereq(self):
9087 """Check prerequisites.
9091 def Exec(self, feedback_fn):
9092 """Remove any export.
9095 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9096 # If the instance was not found we'll try with the name that was passed in.
9097 # This will only work if it was an FQDN, though.
9099 if not instance_name:
9101 instance_name = self.op.instance_name
9103 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9104 exportlist = self.rpc.call_export_list(locked_nodes)
9106 for node in exportlist:
9107 msg = exportlist[node].fail_msg
9109 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9111 if instance_name in exportlist[node].payload:
9113 result = self.rpc.call_export_remove(node, instance_name)
9114 msg = result.fail_msg
9116 logging.error("Could not remove export for instance %s"
9117 " on node %s: %s", instance_name, node, msg)
9119 if fqdn_warn and not found:
9120 feedback_fn("Export not found. If trying to remove an export belonging"
9121 " to a deleted instance please use its Fully Qualified"
9125 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9128 This is an abstract class which is the parent of all the other tags LUs.
9132 def ExpandNames(self):
9133 self.needed_locks = {}
9134 if self.op.kind == constants.TAG_NODE:
9135 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9136 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9137 elif self.op.kind == constants.TAG_INSTANCE:
9138 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9139 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9141 def CheckPrereq(self):
9142 """Check prerequisites.
9145 if self.op.kind == constants.TAG_CLUSTER:
9146 self.target = self.cfg.GetClusterInfo()
9147 elif self.op.kind == constants.TAG_NODE:
9148 self.target = self.cfg.GetNodeInfo(self.op.name)
9149 elif self.op.kind == constants.TAG_INSTANCE:
9150 self.target = self.cfg.GetInstanceInfo(self.op.name)
9152 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9153 str(self.op.kind), errors.ECODE_INVAL)
9156 class LUGetTags(TagsLU):
9157 """Returns the tags of a given object.
9160 _OP_REQP = ["kind", "name"]
9163 def Exec(self, feedback_fn):
9164 """Returns the tag list.
9167 return list(self.target.GetTags())
9170 class LUSearchTags(NoHooksLU):
9171 """Searches the tags for a given pattern.
9174 _OP_REQP = ["pattern"]
9177 def ExpandNames(self):
9178 self.needed_locks = {}
9180 def CheckPrereq(self):
9181 """Check prerequisites.
9183 This checks the pattern passed for validity by compiling it.
9187 self.re = re.compile(self.op.pattern)
9188 except re.error, err:
9189 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9190 (self.op.pattern, err), errors.ECODE_INVAL)
9192 def Exec(self, feedback_fn):
9193 """Returns the tag list.
9197 tgts = [("/cluster", cfg.GetClusterInfo())]
9198 ilist = cfg.GetAllInstancesInfo().values()
9199 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9200 nlist = cfg.GetAllNodesInfo().values()
9201 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9203 for path, target in tgts:
9204 for tag in target.GetTags():
9205 if self.re.search(tag):
9206 results.append((path, tag))
9210 class LUAddTags(TagsLU):
9211 """Sets a tag on a given object.
9214 _OP_REQP = ["kind", "name", "tags"]
9217 def CheckPrereq(self):
9218 """Check prerequisites.
9220 This checks the type and length of the tag name and value.
9223 TagsLU.CheckPrereq(self)
9224 for tag in self.op.tags:
9225 objects.TaggableObject.ValidateTag(tag)
9227 def Exec(self, feedback_fn):
9232 for tag in self.op.tags:
9233 self.target.AddTag(tag)
9234 except errors.TagError, err:
9235 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9236 self.cfg.Update(self.target, feedback_fn)
9239 class LUDelTags(TagsLU):
9240 """Delete a list of tags from a given object.
9243 _OP_REQP = ["kind", "name", "tags"]
9246 def CheckPrereq(self):
9247 """Check prerequisites.
9249 This checks that we have the given tag.
9252 TagsLU.CheckPrereq(self)
9253 for tag in self.op.tags:
9254 objects.TaggableObject.ValidateTag(tag)
9255 del_tags = frozenset(self.op.tags)
9256 cur_tags = self.target.GetTags()
9257 if not del_tags <= cur_tags:
9258 diff_tags = del_tags - cur_tags
9259 diff_names = ["'%s'" % tag for tag in diff_tags]
9261 raise errors.OpPrereqError("Tag(s) %s not found" %
9262 (",".join(diff_names)), errors.ECODE_NOENT)
9264 def Exec(self, feedback_fn):
9265 """Remove the tag from the object.
9268 for tag in self.op.tags:
9269 self.target.RemoveTag(tag)
9270 self.cfg.Update(self.target, feedback_fn)
9273 class LUTestDelay(NoHooksLU):
9274 """Sleep for a specified amount of time.
9276 This LU sleeps on the master and/or nodes for a specified amount of
9280 _OP_REQP = ["duration", "on_master", "on_nodes"]
9283 def ExpandNames(self):
9284 """Expand names and set required locks.
9286 This expands the node list, if any.
9289 self.needed_locks = {}
9290 if self.op.on_nodes:
9291 # _GetWantedNodes can be used here, but is not always appropriate to use
9292 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9294 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9295 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9297 def CheckPrereq(self):
9298 """Check prerequisites.
9302 def Exec(self, feedback_fn):
9303 """Do the actual sleep.
9306 if self.op.on_master:
9307 if not utils.TestDelay(self.op.duration):
9308 raise errors.OpExecError("Error during master delay test")
9309 if self.op.on_nodes:
9310 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9311 for node, node_result in result.items():
9312 node_result.Raise("Failure during rpc call to node %s" % node)
9315 class IAllocator(object):
9316 """IAllocator framework.
9318 An IAllocator instance has three sets of attributes:
9319 - cfg that is needed to query the cluster
9320 - input data (all members of the _KEYS class attribute are required)
9321 - four buffer attributes (in|out_data|text), that represent the
9322 input (to the external script) in text and data structure format,
9323 and the output from it, again in two formats
9324 - the result variables from the script (success, info, nodes) for
9328 # pylint: disable-msg=R0902
9329 # lots of instance attributes
9331 "name", "mem_size", "disks", "disk_template",
9332 "os", "tags", "nics", "vcpus", "hypervisor",
9335 "name", "relocate_from",
9341 def __init__(self, cfg, rpc, mode, **kwargs):
9344 # init buffer variables
9345 self.in_text = self.out_text = self.in_data = self.out_data = None
9346 # init all input fields so that pylint is happy
9348 self.mem_size = self.disks = self.disk_template = None
9349 self.os = self.tags = self.nics = self.vcpus = None
9350 self.hypervisor = None
9351 self.relocate_from = None
9353 self.evac_nodes = None
9355 self.required_nodes = None
9356 # init result fields
9357 self.success = self.info = self.result = None
9358 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9359 keyset = self._ALLO_KEYS
9360 fn = self._AddNewInstance
9361 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9362 keyset = self._RELO_KEYS
9363 fn = self._AddRelocateInstance
9364 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9365 keyset = self._EVAC_KEYS
9366 fn = self._AddEvacuateNodes
9368 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9369 " IAllocator" % self.mode)
9371 if key not in keyset:
9372 raise errors.ProgrammerError("Invalid input parameter '%s' to"
9373 " IAllocator" % key)
9374 setattr(self, key, kwargs[key])
9377 if key not in kwargs:
9378 raise errors.ProgrammerError("Missing input parameter '%s' to"
9379 " IAllocator" % key)
9380 self._BuildInputData(fn)
9382 def _ComputeClusterData(self):
9383 """Compute the generic allocator input data.
9385 This is the data that is independent of the actual operation.
9389 cluster_info = cfg.GetClusterInfo()
9392 "version": constants.IALLOCATOR_VERSION,
9393 "cluster_name": cfg.GetClusterName(),
9394 "cluster_tags": list(cluster_info.GetTags()),
9395 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9396 # we don't have job IDs
9398 iinfo = cfg.GetAllInstancesInfo().values()
9399 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9403 node_list = cfg.GetNodeList()
9405 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9406 hypervisor_name = self.hypervisor
9407 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9408 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9409 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9410 hypervisor_name = cluster_info.enabled_hypervisors[0]
9412 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9415 self.rpc.call_all_instances_info(node_list,
9416 cluster_info.enabled_hypervisors)
9417 for nname, nresult in node_data.items():
9418 # first fill in static (config-based) values
9419 ninfo = cfg.GetNodeInfo(nname)
9421 "tags": list(ninfo.GetTags()),
9422 "primary_ip": ninfo.primary_ip,
9423 "secondary_ip": ninfo.secondary_ip,
9424 "offline": ninfo.offline,
9425 "drained": ninfo.drained,
9426 "master_candidate": ninfo.master_candidate,
9429 if not (ninfo.offline or ninfo.drained):
9430 nresult.Raise("Can't get data for node %s" % nname)
9431 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9433 remote_info = nresult.payload
9435 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9436 'vg_size', 'vg_free', 'cpu_total']:
9437 if attr not in remote_info:
9438 raise errors.OpExecError("Node '%s' didn't return attribute"
9439 " '%s'" % (nname, attr))
9440 if not isinstance(remote_info[attr], int):
9441 raise errors.OpExecError("Node '%s' returned invalid value"
9443 (nname, attr, remote_info[attr]))
9444 # compute memory used by primary instances
9445 i_p_mem = i_p_up_mem = 0
9446 for iinfo, beinfo in i_list:
9447 if iinfo.primary_node == nname:
9448 i_p_mem += beinfo[constants.BE_MEMORY]
9449 if iinfo.name not in node_iinfo[nname].payload:
9452 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9453 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9454 remote_info['memory_free'] -= max(0, i_mem_diff)
9457 i_p_up_mem += beinfo[constants.BE_MEMORY]
9459 # compute memory used by instances
9461 "total_memory": remote_info['memory_total'],
9462 "reserved_memory": remote_info['memory_dom0'],
9463 "free_memory": remote_info['memory_free'],
9464 "total_disk": remote_info['vg_size'],
9465 "free_disk": remote_info['vg_free'],
9466 "total_cpus": remote_info['cpu_total'],
9467 "i_pri_memory": i_p_mem,
9468 "i_pri_up_memory": i_p_up_mem,
9472 node_results[nname] = pnr
9473 data["nodes"] = node_results
9477 for iinfo, beinfo in i_list:
9479 for nic in iinfo.nics:
9480 filled_params = objects.FillDict(
9481 cluster_info.nicparams[constants.PP_DEFAULT],
9483 nic_dict = {"mac": nic.mac,
9485 "mode": filled_params[constants.NIC_MODE],
9486 "link": filled_params[constants.NIC_LINK],
9488 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9489 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9490 nic_data.append(nic_dict)
9492 "tags": list(iinfo.GetTags()),
9493 "admin_up": iinfo.admin_up,
9494 "vcpus": beinfo[constants.BE_VCPUS],
9495 "memory": beinfo[constants.BE_MEMORY],
9497 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9499 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9500 "disk_template": iinfo.disk_template,
9501 "hypervisor": iinfo.hypervisor,
9503 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9505 instance_data[iinfo.name] = pir
9507 data["instances"] = instance_data
9511 def _AddNewInstance(self):
9512 """Add new instance data to allocator structure.
9514 This in combination with _AllocatorGetClusterData will create the
9515 correct structure needed as input for the allocator.
9517 The checks for the completeness of the opcode must have already been
9521 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9523 if self.disk_template in constants.DTS_NET_MIRROR:
9524 self.required_nodes = 2
9526 self.required_nodes = 1
9529 "disk_template": self.disk_template,
9532 "vcpus": self.vcpus,
9533 "memory": self.mem_size,
9534 "disks": self.disks,
9535 "disk_space_total": disk_space,
9537 "required_nodes": self.required_nodes,
9541 def _AddRelocateInstance(self):
9542 """Add relocate instance data to allocator structure.
9544 This in combination with _IAllocatorGetClusterData will create the
9545 correct structure needed as input for the allocator.
9547 The checks for the completeness of the opcode must have already been
9551 instance = self.cfg.GetInstanceInfo(self.name)
9552 if instance is None:
9553 raise errors.ProgrammerError("Unknown instance '%s' passed to"
9554 " IAllocator" % self.name)
9556 if instance.disk_template not in constants.DTS_NET_MIRROR:
9557 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9560 if len(instance.secondary_nodes) != 1:
9561 raise errors.OpPrereqError("Instance has not exactly one secondary node",
9564 self.required_nodes = 1
9565 disk_sizes = [{'size': disk.size} for disk in instance.disks]
9566 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9570 "disk_space_total": disk_space,
9571 "required_nodes": self.required_nodes,
9572 "relocate_from": self.relocate_from,
9576 def _AddEvacuateNodes(self):
9577 """Add evacuate nodes data to allocator structure.
9581 "evac_nodes": self.evac_nodes
9585 def _BuildInputData(self, fn):
9586 """Build input data structures.
9589 self._ComputeClusterData()
9592 request["type"] = self.mode
9593 self.in_data["request"] = request
9595 self.in_text = serializer.Dump(self.in_data)
9597 def Run(self, name, validate=True, call_fn=None):
9598 """Run an instance allocator and return the results.
9602 call_fn = self.rpc.call_iallocator_runner
9604 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9605 result.Raise("Failure while running the iallocator script")
9607 self.out_text = result.payload
9609 self._ValidateResult()
9611 def _ValidateResult(self):
9612 """Process the allocator results.
9614 This will process and if successful save the result in
9615 self.out_data and the other parameters.
9619 rdict = serializer.Load(self.out_text)
9620 except Exception, err:
9621 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9623 if not isinstance(rdict, dict):
9624 raise errors.OpExecError("Can't parse iallocator results: not a dict")
9626 # TODO: remove backwards compatiblity in later versions
9627 if "nodes" in rdict and "result" not in rdict:
9628 rdict["result"] = rdict["nodes"]
9631 for key in "success", "info", "result":
9632 if key not in rdict:
9633 raise errors.OpExecError("Can't parse iallocator results:"
9634 " missing key '%s'" % key)
9635 setattr(self, key, rdict[key])
9637 if not isinstance(rdict["result"], list):
9638 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9640 self.out_data = rdict
9643 class LUTestAllocator(NoHooksLU):
9644 """Run allocator tests.
9646 This LU runs the allocator tests
9649 _OP_REQP = ["direction", "mode", "name"]
9651 def CheckPrereq(self):
9652 """Check prerequisites.
9654 This checks the opcode parameters depending on the director and mode test.
9657 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9658 for attr in ["name", "mem_size", "disks", "disk_template",
9659 "os", "tags", "nics", "vcpus"]:
9660 if not hasattr(self.op, attr):
9661 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9662 attr, errors.ECODE_INVAL)
9663 iname = self.cfg.ExpandInstanceName(self.op.name)
9664 if iname is not None:
9665 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9666 iname, errors.ECODE_EXISTS)
9667 if not isinstance(self.op.nics, list):
9668 raise errors.OpPrereqError("Invalid parameter 'nics'",
9670 for row in self.op.nics:
9671 if (not isinstance(row, dict) or
9674 "bridge" not in row):
9675 raise errors.OpPrereqError("Invalid contents of the 'nics'"
9676 " parameter", errors.ECODE_INVAL)
9677 if not isinstance(self.op.disks, list):
9678 raise errors.OpPrereqError("Invalid parameter 'disks'",
9680 for row in self.op.disks:
9681 if (not isinstance(row, dict) or
9682 "size" not in row or
9683 not isinstance(row["size"], int) or
9684 "mode" not in row or
9685 row["mode"] not in ['r', 'w']):
9686 raise errors.OpPrereqError("Invalid contents of the 'disks'"
9687 " parameter", errors.ECODE_INVAL)
9688 if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9689 self.op.hypervisor = self.cfg.GetHypervisorType()
9690 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9691 if not hasattr(self.op, "name"):
9692 raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9694 fname = _ExpandInstanceName(self.cfg, self.op.name)
9695 self.op.name = fname
9696 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9697 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9698 if not hasattr(self.op, "evac_nodes"):
9699 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9700 " opcode input", errors.ECODE_INVAL)
9702 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9703 self.op.mode, errors.ECODE_INVAL)
9705 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9706 if not hasattr(self.op, "allocator") or self.op.allocator is None:
9707 raise errors.OpPrereqError("Missing allocator name",
9709 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9710 raise errors.OpPrereqError("Wrong allocator test '%s'" %
9711 self.op.direction, errors.ECODE_INVAL)
9713 def Exec(self, feedback_fn):
9714 """Run the allocator test.
9717 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9718 ial = IAllocator(self.cfg, self.rpc,
9721 mem_size=self.op.mem_size,
9722 disks=self.op.disks,
9723 disk_template=self.op.disk_template,
9727 vcpus=self.op.vcpus,
9728 hypervisor=self.op.hypervisor,
9730 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9731 ial = IAllocator(self.cfg, self.rpc,
9734 relocate_from=list(self.relocate_from),
9736 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9737 ial = IAllocator(self.cfg, self.rpc,
9739 evac_nodes=self.op.evac_nodes)
9741 raise errors.ProgrammerError("Uncatched mode %s in"
9742 " LUTestAllocator.Exec", self.op.mode)
9744 if self.op.direction == constants.IALLOCATOR_DIR_IN:
9745 result = ial.in_text
9747 ial.Run(self.op.allocator, validate=False)
9748 result = ial.out_text