4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
38 from ganeti import ssh
39 from ganeti import utils
40 from ganeti import errors
41 from ganeti import hypervisor
42 from ganeti import locking
43 from ganeti import constants
44 from ganeti import objects
45 from ganeti import serializer
46 from ganeti import ssconf
47 from ganeti import uidpool
48 from ganeti import compat
51 class LogicalUnit(object):
52 """Logical Unit base class.
54 Subclasses must follow these rules:
55 - implement ExpandNames
56 - implement CheckPrereq (except when tasklets are used)
57 - implement Exec (except when tasklets are used)
58 - implement BuildHooksEnv
59 - redefine HPATH and HTYPE
60 - optionally redefine their run requirements:
61 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
63 Note that all commands require root permissions.
65 @ivar dry_run_result: the value (if any) that will be returned to the caller
66 in dry-run mode (signalled by opcode dry_run parameter)
74 def __init__(self, processor, op, context, rpc):
75 """Constructor for LogicalUnit.
77 This needs to be overridden in derived classes in order to check op
83 self.cfg = context.cfg
84 self.context = context
86 # Dicts used to declare locking needs to mcpu
87 self.needed_locks = None
88 self.acquired_locks = {}
89 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
91 self.remove_locks = {}
92 # Used to force good behavior when calling helper functions
93 self.recalculate_locks = {}
96 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
97 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
98 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
100 self.dry_run_result = None
101 # support for generic debug attribute
102 if (not hasattr(self.op, "debug_level") or
103 not isinstance(self.op.debug_level, int)):
104 self.op.debug_level = 0
109 for attr_name in self._OP_REQP:
110 attr_val = getattr(op, attr_name, None)
112 raise errors.OpPrereqError("Required parameter '%s' missing" %
113 attr_name, errors.ECODE_INVAL)
115 self.CheckArguments()
118 """Returns the SshRunner object
122 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
125 ssh = property(fget=__GetSSH)
127 def CheckArguments(self):
128 """Check syntactic validity for the opcode arguments.
130 This method is for doing a simple syntactic check and ensure
131 validity of opcode parameters, without any cluster-related
132 checks. While the same can be accomplished in ExpandNames and/or
133 CheckPrereq, doing these separate is better because:
135 - ExpandNames is left as as purely a lock-related function
136 - CheckPrereq is run after we have acquired locks (and possible
139 The function is allowed to change the self.op attribute so that
140 later methods can no longer worry about missing parameters.
145 def ExpandNames(self):
146 """Expand names for this LU.
148 This method is called before starting to execute the opcode, and it should
149 update all the parameters of the opcode to their canonical form (e.g. a
150 short node name must be fully expanded after this method has successfully
151 completed). This way locking, hooks, logging, ecc. can work correctly.
153 LUs which implement this method must also populate the self.needed_locks
154 member, as a dict with lock levels as keys, and a list of needed lock names
157 - use an empty dict if you don't need any lock
158 - if you don't need any lock at a particular level omit that level
159 - don't put anything for the BGL level
160 - if you want all locks at a level use locking.ALL_SET as a value
162 If you need to share locks (rather than acquire them exclusively) at one
163 level you can modify self.share_locks, setting a true value (usually 1) for
164 that level. By default locks are not shared.
166 This function can also define a list of tasklets, which then will be
167 executed in order instead of the usual LU-level CheckPrereq and Exec
168 functions, if those are not defined by the LU.
172 # Acquire all nodes and one instance
173 self.needed_locks = {
174 locking.LEVEL_NODE: locking.ALL_SET,
175 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
177 # Acquire just two nodes
178 self.needed_locks = {
179 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
182 self.needed_locks = {} # No, you can't leave it to the default value None
185 # The implementation of this method is mandatory only if the new LU is
186 # concurrent, so that old LUs don't need to be changed all at the same
189 self.needed_locks = {} # Exclusive LUs don't need locks.
191 raise NotImplementedError
193 def DeclareLocks(self, level):
194 """Declare LU locking needs for a level
196 While most LUs can just declare their locking needs at ExpandNames time,
197 sometimes there's the need to calculate some locks after having acquired
198 the ones before. This function is called just before acquiring locks at a
199 particular level, but after acquiring the ones at lower levels, and permits
200 such calculations. It can be used to modify self.needed_locks, and by
201 default it does nothing.
203 This function is only called if you have something already set in
204 self.needed_locks for the level.
206 @param level: Locking level which is going to be locked
207 @type level: member of ganeti.locking.LEVELS
211 def CheckPrereq(self):
212 """Check prerequisites for this LU.
214 This method should check that the prerequisites for the execution
215 of this LU are fulfilled. It can do internode communication, but
216 it should be idempotent - no cluster or system changes are
219 The method should raise errors.OpPrereqError in case something is
220 not fulfilled. Its return value is ignored.
222 This method should also update all the parameters of the opcode to
223 their canonical form if it hasn't been done by ExpandNames before.
226 if self.tasklets is not None:
227 for (idx, tl) in enumerate(self.tasklets):
228 logging.debug("Checking prerequisites for tasklet %s/%s",
229 idx + 1, len(self.tasklets))
232 raise NotImplementedError
234 def Exec(self, feedback_fn):
237 This method should implement the actual work. It should raise
238 errors.OpExecError for failures that are somewhat dealt with in
242 if self.tasklets is not None:
243 for (idx, tl) in enumerate(self.tasklets):
244 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
247 raise NotImplementedError
249 def BuildHooksEnv(self):
250 """Build hooks environment for this LU.
252 This method should return a three-node tuple consisting of: a dict
253 containing the environment that will be used for running the
254 specific hook for this LU, a list of node names on which the hook
255 should run before the execution, and a list of node names on which
256 the hook should run after the execution.
258 The keys of the dict must not have 'GANETI_' prefixed as this will
259 be handled in the hooks runner. Also note additional keys will be
260 added by the hooks runner. If the LU doesn't define any
261 environment, an empty dict (and not None) should be returned.
263 No nodes should be returned as an empty list (and not None).
265 Note that if the HPATH for a LU class is None, this function will
269 raise NotImplementedError
271 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
272 """Notify the LU about the results of its hooks.
274 This method is called every time a hooks phase is executed, and notifies
275 the Logical Unit about the hooks' result. The LU can then use it to alter
276 its result based on the hooks. By default the method does nothing and the
277 previous result is passed back unchanged but any LU can define it if it
278 wants to use the local cluster hook-scripts somehow.
280 @param phase: one of L{constants.HOOKS_PHASE_POST} or
281 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
282 @param hook_results: the results of the multi-node hooks rpc call
283 @param feedback_fn: function used send feedback back to the caller
284 @param lu_result: the previous Exec result this LU had, or None
286 @return: the new Exec result, based on the previous result
290 # API must be kept, thus we ignore the unused argument and could
291 # be a function warnings
292 # pylint: disable-msg=W0613,R0201
295 def _ExpandAndLockInstance(self):
296 """Helper function to expand and lock an instance.
298 Many LUs that work on an instance take its name in self.op.instance_name
299 and need to expand it and then declare the expanded name for locking. This
300 function does it, and then updates self.op.instance_name to the expanded
301 name. It also initializes needed_locks as a dict, if this hasn't been done
305 if self.needed_locks is None:
306 self.needed_locks = {}
308 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
309 "_ExpandAndLockInstance called with instance-level locks set"
310 self.op.instance_name = _ExpandInstanceName(self.cfg,
311 self.op.instance_name)
312 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
314 def _LockInstancesNodes(self, primary_only=False):
315 """Helper function to declare instances' nodes for locking.
317 This function should be called after locking one or more instances to lock
318 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
319 with all primary or secondary nodes for instances already locked and
320 present in self.needed_locks[locking.LEVEL_INSTANCE].
322 It should be called from DeclareLocks, and for safety only works if
323 self.recalculate_locks[locking.LEVEL_NODE] is set.
325 In the future it may grow parameters to just lock some instance's nodes, or
326 to just lock primaries or secondary nodes, if needed.
328 If should be called in DeclareLocks in a way similar to::
330 if level == locking.LEVEL_NODE:
331 self._LockInstancesNodes()
333 @type primary_only: boolean
334 @param primary_only: only lock primary nodes of locked instances
337 assert locking.LEVEL_NODE in self.recalculate_locks, \
338 "_LockInstancesNodes helper function called with no nodes to recalculate"
340 # TODO: check if we're really been called with the instance locks held
342 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
343 # future we might want to have different behaviors depending on the value
344 # of self.recalculate_locks[locking.LEVEL_NODE]
346 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
347 instance = self.context.cfg.GetInstanceInfo(instance_name)
348 wanted_nodes.append(instance.primary_node)
350 wanted_nodes.extend(instance.secondary_nodes)
352 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
353 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
354 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
355 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
357 del self.recalculate_locks[locking.LEVEL_NODE]
360 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
361 """Simple LU which runs no hooks.
363 This LU is intended as a parent for other LogicalUnits which will
364 run no hooks, in order to reduce duplicate code.
370 def BuildHooksEnv(self):
371 """Empty BuildHooksEnv for NoHooksLu.
373 This just raises an error.
376 assert False, "BuildHooksEnv called for NoHooksLUs"
380 """Tasklet base class.
382 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
383 they can mix legacy code with tasklets. Locking needs to be done in the LU,
384 tasklets know nothing about locks.
386 Subclasses must follow these rules:
387 - Implement CheckPrereq
391 def __init__(self, lu):
398 def CheckPrereq(self):
399 """Check prerequisites for this tasklets.
401 This method should check whether the prerequisites for the execution of
402 this tasklet are fulfilled. It can do internode communication, but it
403 should be idempotent - no cluster or system changes are allowed.
405 The method should raise errors.OpPrereqError in case something is not
406 fulfilled. Its return value is ignored.
408 This method should also update all parameters to their canonical form if it
409 hasn't been done before.
412 raise NotImplementedError
414 def Exec(self, feedback_fn):
415 """Execute the tasklet.
417 This method should implement the actual work. It should raise
418 errors.OpExecError for failures that are somewhat dealt with in code, or
422 raise NotImplementedError
425 def _GetWantedNodes(lu, nodes):
426 """Returns list of checked and expanded node names.
428 @type lu: L{LogicalUnit}
429 @param lu: the logical unit on whose behalf we execute
431 @param nodes: list of node names or None for all nodes
433 @return: the list of nodes, sorted
434 @raise errors.ProgrammerError: if the nodes parameter is wrong type
437 if not isinstance(nodes, list):
438 raise errors.OpPrereqError("Invalid argument type 'nodes'",
442 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
443 " non-empty list of nodes whose name is to be expanded.")
445 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
446 return utils.NiceSort(wanted)
449 def _GetWantedInstances(lu, instances):
450 """Returns list of checked and expanded instance names.
452 @type lu: L{LogicalUnit}
453 @param lu: the logical unit on whose behalf we execute
454 @type instances: list
455 @param instances: list of instance names or None for all instances
457 @return: the list of instances, sorted
458 @raise errors.OpPrereqError: if the instances parameter is wrong type
459 @raise errors.OpPrereqError: if any of the passed instances is not found
462 if not isinstance(instances, list):
463 raise errors.OpPrereqError("Invalid argument type 'instances'",
467 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
469 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
473 def _CheckOutputFields(static, dynamic, selected):
474 """Checks whether all selected fields are valid.
476 @type static: L{utils.FieldSet}
477 @param static: static fields set
478 @type dynamic: L{utils.FieldSet}
479 @param dynamic: dynamic fields set
486 delta = f.NonMatching(selected)
488 raise errors.OpPrereqError("Unknown output fields selected: %s"
489 % ",".join(delta), errors.ECODE_INVAL)
492 def _CheckBooleanOpField(op, name):
493 """Validates boolean opcode parameters.
495 This will ensure that an opcode parameter is either a boolean value,
496 or None (but that it always exists).
499 val = getattr(op, name, None)
500 if not (val is None or isinstance(val, bool)):
501 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
502 (name, str(val)), errors.ECODE_INVAL)
503 setattr(op, name, val)
506 def _CheckGlobalHvParams(params):
507 """Validates that given hypervisor params are not global ones.
509 This will ensure that instances don't get customised versions of
513 used_globals = constants.HVC_GLOBALS.intersection(params)
515 msg = ("The following hypervisor parameters are global and cannot"
516 " be customized at instance level, please modify them at"
517 " cluster level: %s" % utils.CommaJoin(used_globals))
518 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
521 def _CheckNodeOnline(lu, node):
522 """Ensure that a given node is online.
524 @param lu: the LU on behalf of which we make the check
525 @param node: the node to check
526 @raise errors.OpPrereqError: if the node is offline
529 if lu.cfg.GetNodeInfo(node).offline:
530 raise errors.OpPrereqError("Can't use offline node %s" % node,
534 def _CheckNodeNotDrained(lu, node):
535 """Ensure that a given node is not drained.
537 @param lu: the LU on behalf of which we make the check
538 @param node: the node to check
539 @raise errors.OpPrereqError: if the node is drained
542 if lu.cfg.GetNodeInfo(node).drained:
543 raise errors.OpPrereqError("Can't use drained node %s" % node,
547 def _CheckNodeHasOS(lu, node, os_name, force_variant):
548 """Ensure that a node supports a given OS.
550 @param lu: the LU on behalf of which we make the check
551 @param node: the node to check
552 @param os_name: the OS to query about
553 @param force_variant: whether to ignore variant errors
554 @raise errors.OpPrereqError: if the node is not supporting the OS
557 result = lu.rpc.call_os_get(node, os_name)
558 result.Raise("OS '%s' not in supported OS list for node %s" %
560 prereq=True, ecode=errors.ECODE_INVAL)
561 if not force_variant:
562 _CheckOSVariant(result.payload, os_name)
565 def _RequireFileStorage():
566 """Checks that file storage is enabled.
568 @raise errors.OpPrereqError: when file storage is disabled
571 if not constants.ENABLE_FILE_STORAGE:
572 raise errors.OpPrereqError("File storage disabled at configure time",
576 def _CheckDiskTemplate(template):
577 """Ensure a given disk template is valid.
580 if template not in constants.DISK_TEMPLATES:
581 msg = ("Invalid disk template name '%s', valid templates are: %s" %
582 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
583 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
584 if template == constants.DT_FILE:
585 _RequireFileStorage()
588 def _CheckStorageType(storage_type):
589 """Ensure a given storage type is valid.
592 if storage_type not in constants.VALID_STORAGE_TYPES:
593 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
595 if storage_type == constants.ST_FILE:
596 _RequireFileStorage()
600 def _CheckInstanceDown(lu, instance, reason):
601 """Ensure that an instance is not running."""
602 if instance.admin_up:
603 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
604 (instance.name, reason), errors.ECODE_STATE)
606 pnode = instance.primary_node
607 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
608 ins_l.Raise("Can't contact node %s for instance information" % pnode,
609 prereq=True, ecode=errors.ECODE_ENVIRON)
611 if instance.name in ins_l.payload:
612 raise errors.OpPrereqError("Instance %s is running, %s" %
613 (instance.name, reason), errors.ECODE_STATE)
616 def _ExpandItemName(fn, name, kind):
617 """Expand an item name.
619 @param fn: the function to use for expansion
620 @param name: requested item name
621 @param kind: text description ('Node' or 'Instance')
622 @return: the resolved (full) name
623 @raise errors.OpPrereqError: if the item is not found
627 if full_name is None:
628 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
633 def _ExpandNodeName(cfg, name):
634 """Wrapper over L{_ExpandItemName} for nodes."""
635 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
638 def _ExpandInstanceName(cfg, name):
639 """Wrapper over L{_ExpandItemName} for instance."""
640 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
643 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
644 memory, vcpus, nics, disk_template, disks,
645 bep, hvp, hypervisor_name):
646 """Builds instance related env variables for hooks
648 This builds the hook environment from individual variables.
651 @param name: the name of the instance
652 @type primary_node: string
653 @param primary_node: the name of the instance's primary node
654 @type secondary_nodes: list
655 @param secondary_nodes: list of secondary nodes as strings
656 @type os_type: string
657 @param os_type: the name of the instance's OS
658 @type status: boolean
659 @param status: the should_run status of the instance
661 @param memory: the memory size of the instance
663 @param vcpus: the count of VCPUs the instance has
665 @param nics: list of tuples (ip, mac, mode, link) representing
666 the NICs the instance has
667 @type disk_template: string
668 @param disk_template: the disk template of the instance
670 @param disks: the list of (size, mode) pairs
672 @param bep: the backend parameters for the instance
674 @param hvp: the hypervisor parameters for the instance
675 @type hypervisor_name: string
676 @param hypervisor_name: the hypervisor for the instance
678 @return: the hook environment for this instance
687 "INSTANCE_NAME": name,
688 "INSTANCE_PRIMARY": primary_node,
689 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
690 "INSTANCE_OS_TYPE": os_type,
691 "INSTANCE_STATUS": str_status,
692 "INSTANCE_MEMORY": memory,
693 "INSTANCE_VCPUS": vcpus,
694 "INSTANCE_DISK_TEMPLATE": disk_template,
695 "INSTANCE_HYPERVISOR": hypervisor_name,
699 nic_count = len(nics)
700 for idx, (ip, mac, mode, link) in enumerate(nics):
703 env["INSTANCE_NIC%d_IP" % idx] = ip
704 env["INSTANCE_NIC%d_MAC" % idx] = mac
705 env["INSTANCE_NIC%d_MODE" % idx] = mode
706 env["INSTANCE_NIC%d_LINK" % idx] = link
707 if mode == constants.NIC_MODE_BRIDGED:
708 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
712 env["INSTANCE_NIC_COUNT"] = nic_count
715 disk_count = len(disks)
716 for idx, (size, mode) in enumerate(disks):
717 env["INSTANCE_DISK%d_SIZE" % idx] = size
718 env["INSTANCE_DISK%d_MODE" % idx] = mode
722 env["INSTANCE_DISK_COUNT"] = disk_count
724 for source, kind in [(bep, "BE"), (hvp, "HV")]:
725 for key, value in source.items():
726 env["INSTANCE_%s_%s" % (kind, key)] = value
731 def _NICListToTuple(lu, nics):
732 """Build a list of nic information tuples.
734 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
735 value in LUQueryInstanceData.
737 @type lu: L{LogicalUnit}
738 @param lu: the logical unit on whose behalf we execute
739 @type nics: list of L{objects.NIC}
740 @param nics: list of nics to convert to hooks tuples
744 c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
748 filled_params = objects.FillDict(c_nicparams, nic.nicparams)
749 mode = filled_params[constants.NIC_MODE]
750 link = filled_params[constants.NIC_LINK]
751 hooks_nics.append((ip, mac, mode, link))
755 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
756 """Builds instance related env variables for hooks from an object.
758 @type lu: L{LogicalUnit}
759 @param lu: the logical unit on whose behalf we execute
760 @type instance: L{objects.Instance}
761 @param instance: the instance for which we should build the
764 @param override: dictionary with key/values that will override
767 @return: the hook environment dictionary
770 cluster = lu.cfg.GetClusterInfo()
771 bep = cluster.FillBE(instance)
772 hvp = cluster.FillHV(instance)
774 'name': instance.name,
775 'primary_node': instance.primary_node,
776 'secondary_nodes': instance.secondary_nodes,
777 'os_type': instance.os,
778 'status': instance.admin_up,
779 'memory': bep[constants.BE_MEMORY],
780 'vcpus': bep[constants.BE_VCPUS],
781 'nics': _NICListToTuple(lu, instance.nics),
782 'disk_template': instance.disk_template,
783 'disks': [(disk.size, disk.mode) for disk in instance.disks],
786 'hypervisor_name': instance.hypervisor,
789 args.update(override)
790 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
793 def _AdjustCandidatePool(lu, exceptions):
794 """Adjust the candidate pool after node operations.
797 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
799 lu.LogInfo("Promoted nodes to master candidate role: %s",
800 utils.CommaJoin(node.name for node in mod_list))
801 for name in mod_list:
802 lu.context.ReaddNode(name)
803 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
805 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
809 def _DecideSelfPromotion(lu, exceptions=None):
810 """Decide whether I should promote myself as a master candidate.
813 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
814 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
815 # the new node will increase mc_max with one, so:
816 mc_should = min(mc_should + 1, cp_size)
817 return mc_now < mc_should
820 def _CheckNicsBridgesExist(lu, target_nics, target_node,
821 profile=constants.PP_DEFAULT):
822 """Check that the brigdes needed by a list of nics exist.
825 c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
826 paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
827 for nic in target_nics]
828 brlist = [params[constants.NIC_LINK] for params in paramslist
829 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
831 result = lu.rpc.call_bridges_exist(target_node, brlist)
832 result.Raise("Error checking bridges on destination node '%s'" %
833 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
836 def _CheckInstanceBridgesExist(lu, instance, node=None):
837 """Check that the brigdes needed by an instance exist.
841 node = instance.primary_node
842 _CheckNicsBridgesExist(lu, instance.nics, node)
845 def _CheckOSVariant(os_obj, name):
846 """Check whether an OS name conforms to the os variants specification.
848 @type os_obj: L{objects.OS}
849 @param os_obj: OS object to check
851 @param name: OS name passed by the user, to check for validity
854 if not os_obj.supported_variants:
857 variant = name.split("+", 1)[1]
859 raise errors.OpPrereqError("OS name must include a variant",
862 if variant not in os_obj.supported_variants:
863 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
866 def _GetNodeInstancesInner(cfg, fn):
867 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
870 def _GetNodeInstances(cfg, node_name):
871 """Returns a list of all primary and secondary instances on a node.
875 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
878 def _GetNodePrimaryInstances(cfg, node_name):
879 """Returns primary instances on a node.
882 return _GetNodeInstancesInner(cfg,
883 lambda inst: node_name == inst.primary_node)
886 def _GetNodeSecondaryInstances(cfg, node_name):
887 """Returns secondary instances on a node.
890 return _GetNodeInstancesInner(cfg,
891 lambda inst: node_name in inst.secondary_nodes)
894 def _GetStorageTypeArgs(cfg, storage_type):
895 """Returns the arguments for a storage type.
898 # Special case for file storage
899 if storage_type == constants.ST_FILE:
900 # storage.FileStorage wants a list of storage directories
901 return [[cfg.GetFileStorageDir()]]
906 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
909 for dev in instance.disks:
910 cfg.SetDiskID(dev, node_name)
912 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
913 result.Raise("Failed to get disk status from node %s" % node_name,
914 prereq=prereq, ecode=errors.ECODE_ENVIRON)
916 for idx, bdev_status in enumerate(result.payload):
917 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
923 def _FormatTimestamp(secs):
924 """Formats a Unix timestamp with the local timezone.
927 return time.strftime("%F %T %Z", time.gmtime(secs))
930 class LUPostInitCluster(LogicalUnit):
931 """Logical unit for running hooks after cluster initialization.
934 HPATH = "cluster-init"
935 HTYPE = constants.HTYPE_CLUSTER
938 def BuildHooksEnv(self):
942 env = {"OP_TARGET": self.cfg.GetClusterName()}
943 mn = self.cfg.GetMasterNode()
946 def CheckPrereq(self):
947 """No prerequisites to check.
952 def Exec(self, feedback_fn):
959 class LUDestroyCluster(LogicalUnit):
960 """Logical unit for destroying the cluster.
963 HPATH = "cluster-destroy"
964 HTYPE = constants.HTYPE_CLUSTER
967 def BuildHooksEnv(self):
971 env = {"OP_TARGET": self.cfg.GetClusterName()}
974 def CheckPrereq(self):
975 """Check prerequisites.
977 This checks whether the cluster is empty.
979 Any errors are signaled by raising errors.OpPrereqError.
982 master = self.cfg.GetMasterNode()
984 nodelist = self.cfg.GetNodeList()
985 if len(nodelist) != 1 or nodelist[0] != master:
986 raise errors.OpPrereqError("There are still %d node(s) in"
987 " this cluster." % (len(nodelist) - 1),
989 instancelist = self.cfg.GetInstanceList()
991 raise errors.OpPrereqError("There are still %d instance(s) in"
992 " this cluster." % len(instancelist),
995 def Exec(self, feedback_fn):
996 """Destroys the cluster.
999 master = self.cfg.GetMasterNode()
1000 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1002 # Run post hooks on master node before it's removed
1003 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1005 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1007 # pylint: disable-msg=W0702
1008 self.LogWarning("Errors occurred running hooks on %s" % master)
1010 result = self.rpc.call_node_stop_master(master, False)
1011 result.Raise("Could not disable the master role")
1013 if modify_ssh_setup:
1014 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1015 utils.CreateBackup(priv_key)
1016 utils.CreateBackup(pub_key)
1021 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
1022 warn_days=constants.SSL_CERT_EXPIRATION_WARN,
1023 error_days=constants.SSL_CERT_EXPIRATION_ERROR):
1024 """Verifies certificate details for LUVerifyCluster.
1028 msg = "Certificate %s is expired" % filename
1030 if not_before is not None and not_after is not None:
1031 msg += (" (valid from %s to %s)" %
1032 (_FormatTimestamp(not_before),
1033 _FormatTimestamp(not_after)))
1034 elif not_before is not None:
1035 msg += " (valid from %s)" % _FormatTimestamp(not_before)
1036 elif not_after is not None:
1037 msg += " (valid until %s)" % _FormatTimestamp(not_after)
1039 return (LUVerifyCluster.ETYPE_ERROR, msg)
1041 elif not_before is not None and not_before > now:
1042 return (LUVerifyCluster.ETYPE_WARNING,
1043 "Certificate %s not yet valid (valid from %s)" %
1044 (filename, _FormatTimestamp(not_before)))
1046 elif not_after is not None:
1047 remaining_days = int((not_after - now) / (24 * 3600))
1049 msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1051 if remaining_days <= error_days:
1052 return (LUVerifyCluster.ETYPE_ERROR, msg)
1054 if remaining_days <= warn_days:
1055 return (LUVerifyCluster.ETYPE_WARNING, msg)
1060 def _VerifyCertificate(filename):
1061 """Verifies a certificate for LUVerifyCluster.
1063 @type filename: string
1064 @param filename: Path to PEM file
1068 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1069 utils.ReadFile(filename))
1070 except Exception, err: # pylint: disable-msg=W0703
1071 return (LUVerifyCluster.ETYPE_ERROR,
1072 "Failed to load X509 certificate %s: %s" % (filename, err))
1074 # Depending on the pyOpenSSL version, this can just return (None, None)
1075 (not_before, not_after) = utils.GetX509CertValidity(cert)
1077 return _VerifyCertificateInner(filename, cert.has_expired(),
1078 not_before, not_after, time.time())
1081 class LUVerifyCluster(LogicalUnit):
1082 """Verifies the cluster status.
1085 HPATH = "cluster-verify"
1086 HTYPE = constants.HTYPE_CLUSTER
1087 _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1090 TCLUSTER = "cluster"
1092 TINSTANCE = "instance"
1094 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1095 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1096 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1097 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1098 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1099 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1100 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1101 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1102 ENODEDRBD = (TNODE, "ENODEDRBD")
1103 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1104 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1105 ENODEHV = (TNODE, "ENODEHV")
1106 ENODELVM = (TNODE, "ENODELVM")
1107 ENODEN1 = (TNODE, "ENODEN1")
1108 ENODENET = (TNODE, "ENODENET")
1109 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1110 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1111 ENODERPC = (TNODE, "ENODERPC")
1112 ENODESSH = (TNODE, "ENODESSH")
1113 ENODEVERSION = (TNODE, "ENODEVERSION")
1114 ENODESETUP = (TNODE, "ENODESETUP")
1115 ENODETIME = (TNODE, "ENODETIME")
1117 ETYPE_FIELD = "code"
1118 ETYPE_ERROR = "ERROR"
1119 ETYPE_WARNING = "WARNING"
1121 class NodeImage(object):
1122 """A class representing the logical and physical status of a node.
1124 @ivar volumes: a structure as returned from
1125 L{ganeti.backend.GetVolumeList} (runtime)
1126 @ivar instances: a list of running instances (runtime)
1127 @ivar pinst: list of configured primary instances (config)
1128 @ivar sinst: list of configured secondary instances (config)
1129 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1130 of this node (config)
1131 @ivar mfree: free memory, as reported by hypervisor (runtime)
1132 @ivar dfree: free disk, as reported by the node (runtime)
1133 @ivar offline: the offline status (config)
1134 @type rpc_fail: boolean
1135 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1136 not whether the individual keys were correct) (runtime)
1137 @type lvm_fail: boolean
1138 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1139 @type hyp_fail: boolean
1140 @ivar hyp_fail: whether the RPC call didn't return the instance list
1141 @type ghost: boolean
1142 @ivar ghost: whether this is a known node or not (config)
1145 def __init__(self, offline=False):
1153 self.offline = offline
1154 self.rpc_fail = False
1155 self.lvm_fail = False
1156 self.hyp_fail = False
1159 def ExpandNames(self):
1160 self.needed_locks = {
1161 locking.LEVEL_NODE: locking.ALL_SET,
1162 locking.LEVEL_INSTANCE: locking.ALL_SET,
1164 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1166 def _Error(self, ecode, item, msg, *args, **kwargs):
1167 """Format an error message.
1169 Based on the opcode's error_codes parameter, either format a
1170 parseable error code, or a simpler error string.
1172 This must be called only from Exec and functions called from Exec.
1175 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1177 # first complete the msg
1180 # then format the whole message
1181 if self.op.error_codes:
1182 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1188 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1189 # and finally report it via the feedback_fn
1190 self._feedback_fn(" - %s" % msg)
1192 def _ErrorIf(self, cond, *args, **kwargs):
1193 """Log an error message if the passed condition is True.
1196 cond = bool(cond) or self.op.debug_simulate_errors
1198 self._Error(*args, **kwargs)
1199 # do not mark the operation as failed for WARN cases only
1200 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1201 self.bad = self.bad or cond
1203 def _VerifyNode(self, ninfo, nresult):
1204 """Run multiple tests against a node.
1208 - compares ganeti version
1209 - checks vg existence and size > 20G
1210 - checks config file checksum
1211 - checks ssh to other nodes
1213 @type ninfo: L{objects.Node}
1214 @param ninfo: the node to check
1215 @param nresult: the results from the node
1217 @return: whether overall this call was successful (and we can expect
1218 reasonable values in the respose)
1222 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1224 # main result, nresult should be a non-empty dict
1225 test = not nresult or not isinstance(nresult, dict)
1226 _ErrorIf(test, self.ENODERPC, node,
1227 "unable to verify node: no data returned")
1231 # compares ganeti version
1232 local_version = constants.PROTOCOL_VERSION
1233 remote_version = nresult.get("version", None)
1234 test = not (remote_version and
1235 isinstance(remote_version, (list, tuple)) and
1236 len(remote_version) == 2)
1237 _ErrorIf(test, self.ENODERPC, node,
1238 "connection to node returned invalid data")
1242 test = local_version != remote_version[0]
1243 _ErrorIf(test, self.ENODEVERSION, node,
1244 "incompatible protocol versions: master %s,"
1245 " node %s", local_version, remote_version[0])
1249 # node seems compatible, we can actually try to look into its results
1251 # full package version
1252 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1253 self.ENODEVERSION, node,
1254 "software version mismatch: master %s, node %s",
1255 constants.RELEASE_VERSION, remote_version[1],
1256 code=self.ETYPE_WARNING)
1258 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1259 if isinstance(hyp_result, dict):
1260 for hv_name, hv_result in hyp_result.iteritems():
1261 test = hv_result is not None
1262 _ErrorIf(test, self.ENODEHV, node,
1263 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1266 test = nresult.get(constants.NV_NODESETUP,
1267 ["Missing NODESETUP results"])
1268 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1273 def _VerifyNodeTime(self, ninfo, nresult,
1274 nvinfo_starttime, nvinfo_endtime):
1275 """Check the node time.
1277 @type ninfo: L{objects.Node}
1278 @param ninfo: the node to check
1279 @param nresult: the remote results for the node
1280 @param nvinfo_starttime: the start time of the RPC call
1281 @param nvinfo_endtime: the end time of the RPC call
1285 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1287 ntime = nresult.get(constants.NV_TIME, None)
1289 ntime_merged = utils.MergeTime(ntime)
1290 except (ValueError, TypeError):
1291 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1294 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1295 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1296 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1297 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1301 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1302 "Node time diverges by at least %s from master node time",
1305 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1306 """Check the node time.
1308 @type ninfo: L{objects.Node}
1309 @param ninfo: the node to check
1310 @param nresult: the remote results for the node
1311 @param vg_name: the configured VG name
1318 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1320 # checks vg existence and size > 20G
1321 vglist = nresult.get(constants.NV_VGLIST, None)
1323 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1325 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1326 constants.MIN_VG_SIZE)
1327 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1330 pvlist = nresult.get(constants.NV_PVLIST, None)
1331 test = pvlist is None
1332 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1334 # check that ':' is not present in PV names, since it's a
1335 # special character for lvcreate (denotes the range of PEs to
1337 for _, pvname, owner_vg in pvlist:
1338 test = ":" in pvname
1339 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1340 " '%s' of VG '%s'", pvname, owner_vg)
1342 def _VerifyNodeNetwork(self, ninfo, nresult):
1343 """Check the node time.
1345 @type ninfo: L{objects.Node}
1346 @param ninfo: the node to check
1347 @param nresult: the remote results for the node
1351 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1353 test = constants.NV_NODELIST not in nresult
1354 _ErrorIf(test, self.ENODESSH, node,
1355 "node hasn't returned node ssh connectivity data")
1357 if nresult[constants.NV_NODELIST]:
1358 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1359 _ErrorIf(True, self.ENODESSH, node,
1360 "ssh communication with node '%s': %s", a_node, a_msg)
1362 test = constants.NV_NODENETTEST not in nresult
1363 _ErrorIf(test, self.ENODENET, node,
1364 "node hasn't returned node tcp connectivity data")
1366 if nresult[constants.NV_NODENETTEST]:
1367 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1369 _ErrorIf(True, self.ENODENET, node,
1370 "tcp communication with node '%s': %s",
1371 anode, nresult[constants.NV_NODENETTEST][anode])
1373 test = constants.NV_MASTERIP not in nresult
1374 _ErrorIf(test, self.ENODENET, node,
1375 "node hasn't returned node master IP reachability data")
1377 if not nresult[constants.NV_MASTERIP]:
1378 if node == self.master_node:
1379 msg = "the master node cannot reach the master IP (not configured?)"
1381 msg = "cannot reach the master IP"
1382 _ErrorIf(True, self.ENODENET, node, msg)
1385 def _VerifyInstance(self, instance, instanceconfig, node_image):
1386 """Verify an instance.
1388 This function checks to see if the required block devices are
1389 available on the instance's node.
1392 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1393 node_current = instanceconfig.primary_node
1395 node_vol_should = {}
1396 instanceconfig.MapLVsByNode(node_vol_should)
1398 for node in node_vol_should:
1399 n_img = node_image[node]
1400 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1401 # ignore missing volumes on offline or broken nodes
1403 for volume in node_vol_should[node]:
1404 test = volume not in n_img.volumes
1405 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1406 "volume %s missing on node %s", volume, node)
1408 if instanceconfig.admin_up:
1409 pri_img = node_image[node_current]
1410 test = instance not in pri_img.instances and not pri_img.offline
1411 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1412 "instance not running on its primary node %s",
1415 for node, n_img in node_image.items():
1416 if (not node == node_current):
1417 test = instance in n_img.instances
1418 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1419 "instance should not run on node %s", node)
1421 def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1422 """Verify if there are any unknown volumes in the cluster.
1424 The .os, .swap and backup volumes are ignored. All other volumes are
1425 reported as unknown.
1428 for node, n_img in node_image.items():
1429 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1430 # skip non-healthy nodes
1432 for volume in n_img.volumes:
1433 test = (node not in node_vol_should or
1434 volume not in node_vol_should[node])
1435 self._ErrorIf(test, self.ENODEORPHANLV, node,
1436 "volume %s is unknown", volume)
1438 def _VerifyOrphanInstances(self, instancelist, node_image):
1439 """Verify the list of running instances.
1441 This checks what instances are running but unknown to the cluster.
1444 for node, n_img in node_image.items():
1445 for o_inst in n_img.instances:
1446 test = o_inst not in instancelist
1447 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1448 "instance %s on node %s should not exist", o_inst, node)
1450 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1451 """Verify N+1 Memory Resilience.
1453 Check that if one single node dies we can still start all the
1454 instances it was primary for.
1457 for node, n_img in node_image.items():
1458 # This code checks that every node which is now listed as
1459 # secondary has enough memory to host all instances it is
1460 # supposed to should a single other node in the cluster fail.
1461 # FIXME: not ready for failover to an arbitrary node
1462 # FIXME: does not support file-backed instances
1463 # WARNING: we currently take into account down instances as well
1464 # as up ones, considering that even if they're down someone
1465 # might want to start them even in the event of a node failure.
1466 for prinode, instances in n_img.sbp.items():
1468 for instance in instances:
1469 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1470 if bep[constants.BE_AUTO_BALANCE]:
1471 needed_mem += bep[constants.BE_MEMORY]
1472 test = n_img.mfree < needed_mem
1473 self._ErrorIf(test, self.ENODEN1, node,
1474 "not enough memory on to accommodate"
1475 " failovers should peer node %s fail", prinode)
1477 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1479 """Verifies and computes the node required file checksums.
1481 @type ninfo: L{objects.Node}
1482 @param ninfo: the node to check
1483 @param nresult: the remote results for the node
1484 @param file_list: required list of files
1485 @param local_cksum: dictionary of local files and their checksums
1486 @param master_files: list of files that only masters should have
1490 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1492 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1493 test = not isinstance(remote_cksum, dict)
1494 _ErrorIf(test, self.ENODEFILECHECK, node,
1495 "node hasn't returned file checksum data")
1499 for file_name in file_list:
1500 node_is_mc = ninfo.master_candidate
1501 must_have = (file_name not in master_files) or node_is_mc
1503 test1 = file_name not in remote_cksum
1505 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1507 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1508 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1509 "file '%s' missing", file_name)
1510 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1511 "file '%s' has wrong checksum", file_name)
1512 # not candidate and this is not a must-have file
1513 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1514 "file '%s' should not exist on non master"
1515 " candidates (and the file is outdated)", file_name)
1516 # all good, except non-master/non-must have combination
1517 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1518 "file '%s' should not exist"
1519 " on non master candidates", file_name)
1521 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1522 """Verifies and the node DRBD status.
1524 @type ninfo: L{objects.Node}
1525 @param ninfo: the node to check
1526 @param nresult: the remote results for the node
1527 @param instanceinfo: the dict of instances
1528 @param drbd_map: the DRBD map as returned by
1529 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1533 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1535 # compute the DRBD minors
1537 for minor, instance in drbd_map[node].items():
1538 test = instance not in instanceinfo
1539 _ErrorIf(test, self.ECLUSTERCFG, None,
1540 "ghost instance '%s' in temporary DRBD map", instance)
1541 # ghost instance should not be running, but otherwise we
1542 # don't give double warnings (both ghost instance and
1543 # unallocated minor in use)
1545 node_drbd[minor] = (instance, False)
1547 instance = instanceinfo[instance]
1548 node_drbd[minor] = (instance.name, instance.admin_up)
1550 # and now check them
1551 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1552 test = not isinstance(used_minors, (tuple, list))
1553 _ErrorIf(test, self.ENODEDRBD, node,
1554 "cannot parse drbd status file: %s", str(used_minors))
1556 # we cannot check drbd status
1559 for minor, (iname, must_exist) in node_drbd.items():
1560 test = minor not in used_minors and must_exist
1561 _ErrorIf(test, self.ENODEDRBD, node,
1562 "drbd minor %d of instance %s is not active", minor, iname)
1563 for minor in used_minors:
1564 test = minor not in node_drbd
1565 _ErrorIf(test, self.ENODEDRBD, node,
1566 "unallocated drbd minor %d is in use", minor)
1568 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1569 """Verifies and updates the node volume data.
1571 This function will update a L{NodeImage}'s internal structures
1572 with data from the remote call.
1574 @type ninfo: L{objects.Node}
1575 @param ninfo: the node to check
1576 @param nresult: the remote results for the node
1577 @param nimg: the node image object
1578 @param vg_name: the configured VG name
1582 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1584 nimg.lvm_fail = True
1585 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1588 elif isinstance(lvdata, basestring):
1589 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1590 utils.SafeEncode(lvdata))
1591 elif not isinstance(lvdata, dict):
1592 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1594 nimg.volumes = lvdata
1595 nimg.lvm_fail = False
1597 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1598 """Verifies and updates the node instance list.
1600 If the listing was successful, then updates this node's instance
1601 list. Otherwise, it marks the RPC call as failed for the instance
1604 @type ninfo: L{objects.Node}
1605 @param ninfo: the node to check
1606 @param nresult: the remote results for the node
1607 @param nimg: the node image object
1610 idata = nresult.get(constants.NV_INSTANCELIST, None)
1611 test = not isinstance(idata, list)
1612 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1613 " (instancelist): %s", utils.SafeEncode(str(idata)))
1615 nimg.hyp_fail = True
1617 nimg.instances = idata
1619 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1620 """Verifies and computes a node information map
1622 @type ninfo: L{objects.Node}
1623 @param ninfo: the node to check
1624 @param nresult: the remote results for the node
1625 @param nimg: the node image object
1626 @param vg_name: the configured VG name
1630 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1632 # try to read free memory (from the hypervisor)
1633 hv_info = nresult.get(constants.NV_HVINFO, None)
1634 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1635 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1638 nimg.mfree = int(hv_info["memory_free"])
1639 except (ValueError, TypeError):
1640 _ErrorIf(True, self.ENODERPC, node,
1641 "node returned invalid nodeinfo, check hypervisor")
1643 # FIXME: devise a free space model for file based instances as well
1644 if vg_name is not None:
1645 test = (constants.NV_VGLIST not in nresult or
1646 vg_name not in nresult[constants.NV_VGLIST])
1647 _ErrorIf(test, self.ENODELVM, node,
1648 "node didn't return data for the volume group '%s'"
1649 " - it is either missing or broken", vg_name)
1652 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1653 except (ValueError, TypeError):
1654 _ErrorIf(True, self.ENODERPC, node,
1655 "node returned invalid LVM info, check LVM status")
1657 def CheckPrereq(self):
1658 """Check prerequisites.
1660 Transform the list of checks we're going to skip into a set and check that
1661 all its members are valid.
1664 self.skip_set = frozenset(self.op.skip_checks)
1665 if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1666 raise errors.OpPrereqError("Invalid checks to be skipped specified",
1669 def BuildHooksEnv(self):
1672 Cluster-Verify hooks just ran in the post phase and their failure makes
1673 the output be logged in the verify output and the verification to fail.
1676 all_nodes = self.cfg.GetNodeList()
1678 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1680 for node in self.cfg.GetAllNodesInfo().values():
1681 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1683 return env, [], all_nodes
1685 def Exec(self, feedback_fn):
1686 """Verify integrity of cluster, performing various test on nodes.
1690 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1691 verbose = self.op.verbose
1692 self._feedback_fn = feedback_fn
1693 feedback_fn("* Verifying global settings")
1694 for msg in self.cfg.VerifyConfig():
1695 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1697 # Check the cluster certificates
1698 for cert_filename in constants.ALL_CERT_FILES:
1699 (errcode, msg) = _VerifyCertificate(cert_filename)
1700 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1702 vg_name = self.cfg.GetVGName()
1703 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1704 cluster = self.cfg.GetClusterInfo()
1705 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1706 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1707 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1708 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1709 for iname in instancelist)
1710 i_non_redundant = [] # Non redundant instances
1711 i_non_a_balanced = [] # Non auto-balanced instances
1712 n_offline = 0 # Count of offline nodes
1713 n_drained = 0 # Count of nodes being drained
1714 node_vol_should = {}
1716 # FIXME: verify OS list
1717 # do local checksums
1718 master_files = [constants.CLUSTER_CONF_FILE]
1719 master_node = self.master_node = self.cfg.GetMasterNode()
1720 master_ip = self.cfg.GetMasterIP()
1722 file_names = ssconf.SimpleStore().GetFileList()
1723 file_names.extend(constants.ALL_CERT_FILES)
1724 file_names.extend(master_files)
1725 if cluster.modify_etc_hosts:
1726 file_names.append(constants.ETC_HOSTS)
1728 local_checksums = utils.FingerprintFiles(file_names)
1730 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1731 node_verify_param = {
1732 constants.NV_FILELIST: file_names,
1733 constants.NV_NODELIST: [node.name for node in nodeinfo
1734 if not node.offline],
1735 constants.NV_HYPERVISOR: hypervisors,
1736 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1737 node.secondary_ip) for node in nodeinfo
1738 if not node.offline],
1739 constants.NV_INSTANCELIST: hypervisors,
1740 constants.NV_VERSION: None,
1741 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1742 constants.NV_NODESETUP: None,
1743 constants.NV_TIME: None,
1744 constants.NV_MASTERIP: (master_node, master_ip),
1747 if vg_name is not None:
1748 node_verify_param[constants.NV_VGLIST] = None
1749 node_verify_param[constants.NV_LVLIST] = vg_name
1750 node_verify_param[constants.NV_PVLIST] = [vg_name]
1751 node_verify_param[constants.NV_DRBDLIST] = None
1753 # Build our expected cluster state
1754 node_image = dict((node.name, self.NodeImage(offline=node.offline))
1755 for node in nodeinfo)
1757 for instance in instancelist:
1758 inst_config = instanceinfo[instance]
1760 for nname in inst_config.all_nodes:
1761 if nname not in node_image:
1763 gnode = self.NodeImage()
1765 node_image[nname] = gnode
1767 inst_config.MapLVsByNode(node_vol_should)
1769 pnode = inst_config.primary_node
1770 node_image[pnode].pinst.append(instance)
1772 for snode in inst_config.secondary_nodes:
1773 nimg = node_image[snode]
1774 nimg.sinst.append(instance)
1775 if pnode not in nimg.sbp:
1776 nimg.sbp[pnode] = []
1777 nimg.sbp[pnode].append(instance)
1779 # At this point, we have the in-memory data structures complete,
1780 # except for the runtime information, which we'll gather next
1782 # Due to the way our RPC system works, exact response times cannot be
1783 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1784 # time before and after executing the request, we can at least have a time
1786 nvinfo_starttime = time.time()
1787 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1788 self.cfg.GetClusterName())
1789 nvinfo_endtime = time.time()
1791 all_drbd_map = self.cfg.ComputeDRBDMap()
1793 feedback_fn("* Verifying node status")
1794 for node_i in nodeinfo:
1796 nimg = node_image[node]
1800 feedback_fn("* Skipping offline node %s" % (node,))
1804 if node == master_node:
1806 elif node_i.master_candidate:
1807 ntype = "master candidate"
1808 elif node_i.drained:
1814 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1816 msg = all_nvinfo[node].fail_msg
1817 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1819 nimg.rpc_fail = True
1822 nresult = all_nvinfo[node].payload
1824 nimg.call_ok = self._VerifyNode(node_i, nresult)
1825 self._VerifyNodeNetwork(node_i, nresult)
1826 self._VerifyNodeLVM(node_i, nresult, vg_name)
1827 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1829 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1830 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1832 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1833 self._UpdateNodeInstances(node_i, nresult, nimg)
1834 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1836 feedback_fn("* Verifying instance status")
1837 for instance in instancelist:
1839 feedback_fn("* Verifying instance %s" % instance)
1840 inst_config = instanceinfo[instance]
1841 self._VerifyInstance(instance, inst_config, node_image)
1842 inst_nodes_offline = []
1844 pnode = inst_config.primary_node
1845 pnode_img = node_image[pnode]
1846 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1847 self.ENODERPC, pnode, "instance %s, connection to"
1848 " primary node failed", instance)
1850 if pnode_img.offline:
1851 inst_nodes_offline.append(pnode)
1853 # If the instance is non-redundant we cannot survive losing its primary
1854 # node, so we are not N+1 compliant. On the other hand we have no disk
1855 # templates with more than one secondary so that situation is not well
1857 # FIXME: does not support file-backed instances
1858 if not inst_config.secondary_nodes:
1859 i_non_redundant.append(instance)
1860 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1861 instance, "instance has multiple secondary nodes: %s",
1862 utils.CommaJoin(inst_config.secondary_nodes),
1863 code=self.ETYPE_WARNING)
1865 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1866 i_non_a_balanced.append(instance)
1868 for snode in inst_config.secondary_nodes:
1869 s_img = node_image[snode]
1870 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1871 "instance %s, connection to secondary node failed", instance)
1874 inst_nodes_offline.append(snode)
1876 # warn that the instance lives on offline nodes
1877 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1878 "instance lives on offline node(s) %s",
1879 utils.CommaJoin(inst_nodes_offline))
1880 # ... or ghost nodes
1881 for node in inst_config.all_nodes:
1882 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1883 "instance lives on ghost node %s", node)
1885 feedback_fn("* Verifying orphan volumes")
1886 self._VerifyOrphanVolumes(node_vol_should, node_image)
1888 feedback_fn("* Verifying orphan instances")
1889 self._VerifyOrphanInstances(instancelist, node_image)
1891 if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1892 feedback_fn("* Verifying N+1 Memory redundancy")
1893 self._VerifyNPlusOneMemory(node_image, instanceinfo)
1895 feedback_fn("* Other Notes")
1897 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
1898 % len(i_non_redundant))
1900 if i_non_a_balanced:
1901 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
1902 % len(i_non_a_balanced))
1905 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
1908 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
1912 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1913 """Analyze the post-hooks' result
1915 This method analyses the hook result, handles it, and sends some
1916 nicely-formatted feedback back to the user.
1918 @param phase: one of L{constants.HOOKS_PHASE_POST} or
1919 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1920 @param hooks_results: the results of the multi-node hooks rpc call
1921 @param feedback_fn: function used send feedback back to the caller
1922 @param lu_result: previous Exec result
1923 @return: the new Exec result, based on the previous result
1927 # We only really run POST phase hooks, and are only interested in
1929 if phase == constants.HOOKS_PHASE_POST:
1930 # Used to change hooks' output to proper indentation
1931 indent_re = re.compile('^', re.M)
1932 feedback_fn("* Hooks Results")
1933 assert hooks_results, "invalid result from hooks"
1935 for node_name in hooks_results:
1936 res = hooks_results[node_name]
1938 test = msg and not res.offline
1939 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1940 "Communication failure in hooks execution: %s", msg)
1941 if res.offline or msg:
1942 # No need to investigate payload if node is offline or gave an error.
1943 # override manually lu_result here as _ErrorIf only
1944 # overrides self.bad
1947 for script, hkr, output in res.payload:
1948 test = hkr == constants.HKR_FAIL
1949 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1950 "Script %s failed, output:", script)
1952 output = indent_re.sub(' ', output)
1953 feedback_fn("%s" % output)
1959 class LUVerifyDisks(NoHooksLU):
1960 """Verifies the cluster disks status.
1966 def ExpandNames(self):
1967 self.needed_locks = {
1968 locking.LEVEL_NODE: locking.ALL_SET,
1969 locking.LEVEL_INSTANCE: locking.ALL_SET,
1971 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1973 def CheckPrereq(self):
1974 """Check prerequisites.
1976 This has no prerequisites.
1981 def Exec(self, feedback_fn):
1982 """Verify integrity of cluster disks.
1984 @rtype: tuple of three items
1985 @return: a tuple of (dict of node-to-node_error, list of instances
1986 which need activate-disks, dict of instance: (node, volume) for
1990 result = res_nodes, res_instances, res_missing = {}, [], {}
1992 vg_name = self.cfg.GetVGName()
1993 nodes = utils.NiceSort(self.cfg.GetNodeList())
1994 instances = [self.cfg.GetInstanceInfo(name)
1995 for name in self.cfg.GetInstanceList()]
1998 for inst in instances:
2000 if (not inst.admin_up or
2001 inst.disk_template not in constants.DTS_NET_MIRROR):
2003 inst.MapLVsByNode(inst_lvs)
2004 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2005 for node, vol_list in inst_lvs.iteritems():
2006 for vol in vol_list:
2007 nv_dict[(node, vol)] = inst
2012 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2016 node_res = node_lvs[node]
2017 if node_res.offline:
2019 msg = node_res.fail_msg
2021 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2022 res_nodes[node] = msg
2025 lvs = node_res.payload
2026 for lv_name, (_, _, lv_online) in lvs.items():
2027 inst = nv_dict.pop((node, lv_name), None)
2028 if (not lv_online and inst is not None
2029 and inst.name not in res_instances):
2030 res_instances.append(inst.name)
2032 # any leftover items in nv_dict are missing LVs, let's arrange the
2034 for key, inst in nv_dict.iteritems():
2035 if inst.name not in res_missing:
2036 res_missing[inst.name] = []
2037 res_missing[inst.name].append(key)
2042 class LURepairDiskSizes(NoHooksLU):
2043 """Verifies the cluster disks sizes.
2046 _OP_REQP = ["instances"]
2049 def ExpandNames(self):
2050 if not isinstance(self.op.instances, list):
2051 raise errors.OpPrereqError("Invalid argument type 'instances'",
2054 if self.op.instances:
2055 self.wanted_names = []
2056 for name in self.op.instances:
2057 full_name = _ExpandInstanceName(self.cfg, name)
2058 self.wanted_names.append(full_name)
2059 self.needed_locks = {
2060 locking.LEVEL_NODE: [],
2061 locking.LEVEL_INSTANCE: self.wanted_names,
2063 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2065 self.wanted_names = None
2066 self.needed_locks = {
2067 locking.LEVEL_NODE: locking.ALL_SET,
2068 locking.LEVEL_INSTANCE: locking.ALL_SET,
2070 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2072 def DeclareLocks(self, level):
2073 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2074 self._LockInstancesNodes(primary_only=True)
2076 def CheckPrereq(self):
2077 """Check prerequisites.
2079 This only checks the optional instance list against the existing names.
2082 if self.wanted_names is None:
2083 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2085 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2086 in self.wanted_names]
2088 def _EnsureChildSizes(self, disk):
2089 """Ensure children of the disk have the needed disk size.
2091 This is valid mainly for DRBD8 and fixes an issue where the
2092 children have smaller disk size.
2094 @param disk: an L{ganeti.objects.Disk} object
2097 if disk.dev_type == constants.LD_DRBD8:
2098 assert disk.children, "Empty children for DRBD8?"
2099 fchild = disk.children[0]
2100 mismatch = fchild.size < disk.size
2102 self.LogInfo("Child disk has size %d, parent %d, fixing",
2103 fchild.size, disk.size)
2104 fchild.size = disk.size
2106 # and we recurse on this child only, not on the metadev
2107 return self._EnsureChildSizes(fchild) or mismatch
2111 def Exec(self, feedback_fn):
2112 """Verify the size of cluster disks.
2115 # TODO: check child disks too
2116 # TODO: check differences in size between primary/secondary nodes
2118 for instance in self.wanted_instances:
2119 pnode = instance.primary_node
2120 if pnode not in per_node_disks:
2121 per_node_disks[pnode] = []
2122 for idx, disk in enumerate(instance.disks):
2123 per_node_disks[pnode].append((instance, idx, disk))
2126 for node, dskl in per_node_disks.items():
2127 newl = [v[2].Copy() for v in dskl]
2129 self.cfg.SetDiskID(dsk, node)
2130 result = self.rpc.call_blockdev_getsizes(node, newl)
2132 self.LogWarning("Failure in blockdev_getsizes call to node"
2133 " %s, ignoring", node)
2135 if len(result.data) != len(dskl):
2136 self.LogWarning("Invalid result from node %s, ignoring node results",
2139 for ((instance, idx, disk), size) in zip(dskl, result.data):
2141 self.LogWarning("Disk %d of instance %s did not return size"
2142 " information, ignoring", idx, instance.name)
2144 if not isinstance(size, (int, long)):
2145 self.LogWarning("Disk %d of instance %s did not return valid"
2146 " size information, ignoring", idx, instance.name)
2149 if size != disk.size:
2150 self.LogInfo("Disk %d of instance %s has mismatched size,"
2151 " correcting: recorded %d, actual %d", idx,
2152 instance.name, disk.size, size)
2154 self.cfg.Update(instance, feedback_fn)
2155 changed.append((instance.name, idx, size))
2156 if self._EnsureChildSizes(disk):
2157 self.cfg.Update(instance, feedback_fn)
2158 changed.append((instance.name, idx, disk.size))
2162 class LURenameCluster(LogicalUnit):
2163 """Rename the cluster.
2166 HPATH = "cluster-rename"
2167 HTYPE = constants.HTYPE_CLUSTER
2170 def BuildHooksEnv(self):
2175 "OP_TARGET": self.cfg.GetClusterName(),
2176 "NEW_NAME": self.op.name,
2178 mn = self.cfg.GetMasterNode()
2179 all_nodes = self.cfg.GetNodeList()
2180 return env, [mn], all_nodes
2182 def CheckPrereq(self):
2183 """Verify that the passed name is a valid one.
2186 hostname = utils.GetHostInfo(self.op.name)
2188 new_name = hostname.name
2189 self.ip = new_ip = hostname.ip
2190 old_name = self.cfg.GetClusterName()
2191 old_ip = self.cfg.GetMasterIP()
2192 if new_name == old_name and new_ip == old_ip:
2193 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2194 " cluster has changed",
2196 if new_ip != old_ip:
2197 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2198 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2199 " reachable on the network. Aborting." %
2200 new_ip, errors.ECODE_NOTUNIQUE)
2202 self.op.name = new_name
2204 def Exec(self, feedback_fn):
2205 """Rename the cluster.
2208 clustername = self.op.name
2211 # shutdown the master IP
2212 master = self.cfg.GetMasterNode()
2213 result = self.rpc.call_node_stop_master(master, False)
2214 result.Raise("Could not disable the master role")
2217 cluster = self.cfg.GetClusterInfo()
2218 cluster.cluster_name = clustername
2219 cluster.master_ip = ip
2220 self.cfg.Update(cluster, feedback_fn)
2222 # update the known hosts file
2223 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2224 node_list = self.cfg.GetNodeList()
2226 node_list.remove(master)
2229 result = self.rpc.call_upload_file(node_list,
2230 constants.SSH_KNOWN_HOSTS_FILE)
2231 for to_node, to_result in result.iteritems():
2232 msg = to_result.fail_msg
2234 msg = ("Copy of file %s to node %s failed: %s" %
2235 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2236 self.proc.LogWarning(msg)
2239 result = self.rpc.call_node_start_master(master, False, False)
2240 msg = result.fail_msg
2242 self.LogWarning("Could not re-enable the master role on"
2243 " the master, please restart manually: %s", msg)
2246 def _RecursiveCheckIfLVMBased(disk):
2247 """Check if the given disk or its children are lvm-based.
2249 @type disk: L{objects.Disk}
2250 @param disk: the disk to check
2252 @return: boolean indicating whether a LD_LV dev_type was found or not
2256 for chdisk in disk.children:
2257 if _RecursiveCheckIfLVMBased(chdisk):
2259 return disk.dev_type == constants.LD_LV
2262 class LUSetClusterParams(LogicalUnit):
2263 """Change the parameters of the cluster.
2266 HPATH = "cluster-modify"
2267 HTYPE = constants.HTYPE_CLUSTER
2271 def CheckArguments(self):
2275 for attr in ["candidate_pool_size",
2276 "uid_pool", "add_uids", "remove_uids"]:
2277 if not hasattr(self.op, attr):
2278 setattr(self.op, attr, None)
2280 if self.op.candidate_pool_size is not None:
2282 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2283 except (ValueError, TypeError), err:
2284 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2285 str(err), errors.ECODE_INVAL)
2286 if self.op.candidate_pool_size < 1:
2287 raise errors.OpPrereqError("At least one master candidate needed",
2290 _CheckBooleanOpField(self.op, "maintain_node_health")
2292 if self.op.uid_pool:
2293 uidpool.CheckUidPool(self.op.uid_pool)
2295 if self.op.add_uids:
2296 uidpool.CheckUidPool(self.op.add_uids)
2298 if self.op.remove_uids:
2299 uidpool.CheckUidPool(self.op.remove_uids)
2301 def ExpandNames(self):
2302 # FIXME: in the future maybe other cluster params won't require checking on
2303 # all nodes to be modified.
2304 self.needed_locks = {
2305 locking.LEVEL_NODE: locking.ALL_SET,
2307 self.share_locks[locking.LEVEL_NODE] = 1
2309 def BuildHooksEnv(self):
2314 "OP_TARGET": self.cfg.GetClusterName(),
2315 "NEW_VG_NAME": self.op.vg_name,
2317 mn = self.cfg.GetMasterNode()
2318 return env, [mn], [mn]
2320 def CheckPrereq(self):
2321 """Check prerequisites.
2323 This checks whether the given params don't conflict and
2324 if the given volume group is valid.
2327 if self.op.vg_name is not None and not self.op.vg_name:
2328 instances = self.cfg.GetAllInstancesInfo().values()
2329 for inst in instances:
2330 for disk in inst.disks:
2331 if _RecursiveCheckIfLVMBased(disk):
2332 raise errors.OpPrereqError("Cannot disable lvm storage while"
2333 " lvm-based instances exist",
2336 node_list = self.acquired_locks[locking.LEVEL_NODE]
2338 # if vg_name not None, checks given volume group on all nodes
2340 vglist = self.rpc.call_vg_list(node_list)
2341 for node in node_list:
2342 msg = vglist[node].fail_msg
2344 # ignoring down node
2345 self.LogWarning("Error while gathering data on node %s"
2346 " (ignoring node): %s", node, msg)
2348 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2350 constants.MIN_VG_SIZE)
2352 raise errors.OpPrereqError("Error on node '%s': %s" %
2353 (node, vgstatus), errors.ECODE_ENVIRON)
2355 self.cluster = cluster = self.cfg.GetClusterInfo()
2356 # validate params changes
2357 if self.op.beparams:
2358 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2359 self.new_beparams = objects.FillDict(
2360 cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2362 if self.op.nicparams:
2363 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2364 self.new_nicparams = objects.FillDict(
2365 cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2366 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2369 # check all instances for consistency
2370 for instance in self.cfg.GetAllInstancesInfo().values():
2371 for nic_idx, nic in enumerate(instance.nics):
2372 params_copy = copy.deepcopy(nic.nicparams)
2373 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2375 # check parameter syntax
2377 objects.NIC.CheckParameterSyntax(params_filled)
2378 except errors.ConfigurationError, err:
2379 nic_errors.append("Instance %s, nic/%d: %s" %
2380 (instance.name, nic_idx, err))
2382 # if we're moving instances to routed, check that they have an ip
2383 target_mode = params_filled[constants.NIC_MODE]
2384 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2385 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2386 (instance.name, nic_idx))
2388 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2389 "\n".join(nic_errors))
2391 # hypervisor list/parameters
2392 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2393 if self.op.hvparams:
2394 if not isinstance(self.op.hvparams, dict):
2395 raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2397 for hv_name, hv_dict in self.op.hvparams.items():
2398 if hv_name not in self.new_hvparams:
2399 self.new_hvparams[hv_name] = hv_dict
2401 self.new_hvparams[hv_name].update(hv_dict)
2403 # os hypervisor parameters
2404 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2406 if not isinstance(self.op.os_hvp, dict):
2407 raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2409 for os_name, hvs in self.op.os_hvp.items():
2410 if not isinstance(hvs, dict):
2411 raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2412 " input"), errors.ECODE_INVAL)
2413 if os_name not in self.new_os_hvp:
2414 self.new_os_hvp[os_name] = hvs
2416 for hv_name, hv_dict in hvs.items():
2417 if hv_name not in self.new_os_hvp[os_name]:
2418 self.new_os_hvp[os_name][hv_name] = hv_dict
2420 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2422 # changes to the hypervisor list
2423 if self.op.enabled_hypervisors is not None:
2424 self.hv_list = self.op.enabled_hypervisors
2425 if not self.hv_list:
2426 raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2427 " least one member",
2429 invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2431 raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2433 utils.CommaJoin(invalid_hvs),
2435 for hv in self.hv_list:
2436 # if the hypervisor doesn't already exist in the cluster
2437 # hvparams, we initialize it to empty, and then (in both
2438 # cases) we make sure to fill the defaults, as we might not
2439 # have a complete defaults list if the hypervisor wasn't
2441 if hv not in new_hvp:
2443 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2444 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2446 self.hv_list = cluster.enabled_hypervisors
2448 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2449 # either the enabled list has changed, or the parameters have, validate
2450 for hv_name, hv_params in self.new_hvparams.items():
2451 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2452 (self.op.enabled_hypervisors and
2453 hv_name in self.op.enabled_hypervisors)):
2454 # either this is a new hypervisor, or its parameters have changed
2455 hv_class = hypervisor.GetHypervisor(hv_name)
2456 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2457 hv_class.CheckParameterSyntax(hv_params)
2458 _CheckHVParams(self, node_list, hv_name, hv_params)
2461 # no need to check any newly-enabled hypervisors, since the
2462 # defaults have already been checked in the above code-block
2463 for os_name, os_hvp in self.new_os_hvp.items():
2464 for hv_name, hv_params in os_hvp.items():
2465 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2466 # we need to fill in the new os_hvp on top of the actual hv_p
2467 cluster_defaults = self.new_hvparams.get(hv_name, {})
2468 new_osp = objects.FillDict(cluster_defaults, hv_params)
2469 hv_class = hypervisor.GetHypervisor(hv_name)
2470 hv_class.CheckParameterSyntax(new_osp)
2471 _CheckHVParams(self, node_list, hv_name, new_osp)
2474 def Exec(self, feedback_fn):
2475 """Change the parameters of the cluster.
2478 if self.op.vg_name is not None:
2479 new_volume = self.op.vg_name
2482 if new_volume != self.cfg.GetVGName():
2483 self.cfg.SetVGName(new_volume)
2485 feedback_fn("Cluster LVM configuration already in desired"
2486 " state, not changing")
2487 if self.op.hvparams:
2488 self.cluster.hvparams = self.new_hvparams
2490 self.cluster.os_hvp = self.new_os_hvp
2491 if self.op.enabled_hypervisors is not None:
2492 self.cluster.hvparams = self.new_hvparams
2493 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2494 if self.op.beparams:
2495 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2496 if self.op.nicparams:
2497 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2499 if self.op.candidate_pool_size is not None:
2500 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2501 # we need to update the pool size here, otherwise the save will fail
2502 _AdjustCandidatePool(self, [])
2504 if self.op.maintain_node_health is not None:
2505 self.cluster.maintain_node_health = self.op.maintain_node_health
2507 if self.op.add_uids is not None:
2508 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2510 if self.op.remove_uids is not None:
2511 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2513 if self.op.uid_pool is not None:
2514 self.cluster.uid_pool = self.op.uid_pool
2516 self.cfg.Update(self.cluster, feedback_fn)
2519 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2520 """Distribute additional files which are part of the cluster configuration.
2522 ConfigWriter takes care of distributing the config and ssconf files, but
2523 there are more files which should be distributed to all nodes. This function
2524 makes sure those are copied.
2526 @param lu: calling logical unit
2527 @param additional_nodes: list of nodes not in the config to distribute to
2530 # 1. Gather target nodes
2531 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2532 dist_nodes = lu.cfg.GetOnlineNodeList()
2533 if additional_nodes is not None:
2534 dist_nodes.extend(additional_nodes)
2535 if myself.name in dist_nodes:
2536 dist_nodes.remove(myself.name)
2538 # 2. Gather files to distribute
2539 dist_files = set([constants.ETC_HOSTS,
2540 constants.SSH_KNOWN_HOSTS_FILE,
2541 constants.RAPI_CERT_FILE,
2542 constants.RAPI_USERS_FILE,
2543 constants.CONFD_HMAC_KEY,
2546 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2547 for hv_name in enabled_hypervisors:
2548 hv_class = hypervisor.GetHypervisor(hv_name)
2549 dist_files.update(hv_class.GetAncillaryFiles())
2551 # 3. Perform the files upload
2552 for fname in dist_files:
2553 if os.path.exists(fname):
2554 result = lu.rpc.call_upload_file(dist_nodes, fname)
2555 for to_node, to_result in result.items():
2556 msg = to_result.fail_msg
2558 msg = ("Copy of file %s to node %s failed: %s" %
2559 (fname, to_node, msg))
2560 lu.proc.LogWarning(msg)
2563 class LURedistributeConfig(NoHooksLU):
2564 """Force the redistribution of cluster configuration.
2566 This is a very simple LU.
2572 def ExpandNames(self):
2573 self.needed_locks = {
2574 locking.LEVEL_NODE: locking.ALL_SET,
2576 self.share_locks[locking.LEVEL_NODE] = 1
2578 def CheckPrereq(self):
2579 """Check prerequisites.
2583 def Exec(self, feedback_fn):
2584 """Redistribute the configuration.
2587 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2588 _RedistributeAncillaryFiles(self)
2591 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2592 """Sleep and poll for an instance's disk to sync.
2595 if not instance.disks or disks is not None and not disks:
2598 disks = _ExpandCheckDisks(instance, disks)
2601 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2603 node = instance.primary_node
2606 lu.cfg.SetDiskID(dev, node)
2608 # TODO: Convert to utils.Retry
2611 degr_retries = 10 # in seconds, as we sleep 1 second each time
2615 cumul_degraded = False
2616 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2617 msg = rstats.fail_msg
2619 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2622 raise errors.RemoteError("Can't contact node %s for mirror data,"
2623 " aborting." % node)
2626 rstats = rstats.payload
2628 for i, mstat in enumerate(rstats):
2630 lu.LogWarning("Can't compute data for node %s/%s",
2631 node, disks[i].iv_name)
2634 cumul_degraded = (cumul_degraded or
2635 (mstat.is_degraded and mstat.sync_percent is None))
2636 if mstat.sync_percent is not None:
2638 if mstat.estimated_time is not None:
2639 rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2640 max_time = mstat.estimated_time
2642 rem_time = "no time estimate"
2643 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2644 (disks[i].iv_name, mstat.sync_percent, rem_time))
2646 # if we're done but degraded, let's do a few small retries, to
2647 # make sure we see a stable and not transient situation; therefore
2648 # we force restart of the loop
2649 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2650 logging.info("Degraded disks found, %d retries left", degr_retries)
2658 time.sleep(min(60, max_time))
2661 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2662 return not cumul_degraded
2665 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2666 """Check that mirrors are not degraded.
2668 The ldisk parameter, if True, will change the test from the
2669 is_degraded attribute (which represents overall non-ok status for
2670 the device(s)) to the ldisk (representing the local storage status).
2673 lu.cfg.SetDiskID(dev, node)
2677 if on_primary or dev.AssembleOnSecondary():
2678 rstats = lu.rpc.call_blockdev_find(node, dev)
2679 msg = rstats.fail_msg
2681 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2683 elif not rstats.payload:
2684 lu.LogWarning("Can't find disk on node %s", node)
2688 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2690 result = result and not rstats.payload.is_degraded
2693 for child in dev.children:
2694 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2699 class LUDiagnoseOS(NoHooksLU):
2700 """Logical unit for OS diagnose/query.
2703 _OP_REQP = ["output_fields", "names"]
2705 _FIELDS_STATIC = utils.FieldSet()
2706 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2707 # Fields that need calculation of global os validity
2708 _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2710 def ExpandNames(self):
2712 raise errors.OpPrereqError("Selective OS query not supported",
2715 _CheckOutputFields(static=self._FIELDS_STATIC,
2716 dynamic=self._FIELDS_DYNAMIC,
2717 selected=self.op.output_fields)
2719 # Lock all nodes, in shared mode
2720 # Temporary removal of locks, should be reverted later
2721 # TODO: reintroduce locks when they are lighter-weight
2722 self.needed_locks = {}
2723 #self.share_locks[locking.LEVEL_NODE] = 1
2724 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2726 def CheckPrereq(self):
2727 """Check prerequisites.
2732 def _DiagnoseByOS(rlist):
2733 """Remaps a per-node return list into an a per-os per-node dictionary
2735 @param rlist: a map with node names as keys and OS objects as values
2738 @return: a dictionary with osnames as keys and as value another map, with
2739 nodes as keys and tuples of (path, status, diagnose) as values, eg::
2741 {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2742 (/srv/..., False, "invalid api")],
2743 "node2": [(/srv/..., True, "")]}
2748 # we build here the list of nodes that didn't fail the RPC (at RPC
2749 # level), so that nodes with a non-responding node daemon don't
2750 # make all OSes invalid
2751 good_nodes = [node_name for node_name in rlist
2752 if not rlist[node_name].fail_msg]
2753 for node_name, nr in rlist.items():
2754 if nr.fail_msg or not nr.payload:
2756 for name, path, status, diagnose, variants in nr.payload:
2757 if name not in all_os:
2758 # build a list of nodes for this os containing empty lists
2759 # for each node in node_list
2761 for nname in good_nodes:
2762 all_os[name][nname] = []
2763 all_os[name][node_name].append((path, status, diagnose, variants))
2766 def Exec(self, feedback_fn):
2767 """Compute the list of OSes.
2770 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2771 node_data = self.rpc.call_os_diagnose(valid_nodes)
2772 pol = self._DiagnoseByOS(node_data)
2774 calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2775 calc_variants = "variants" in self.op.output_fields
2777 for os_name, os_data in pol.items():
2782 for osl in os_data.values():
2783 valid = valid and osl and osl[0][1]
2788 node_variants = osl[0][3]
2789 if variants is None:
2790 variants = node_variants
2792 variants = [v for v in variants if v in node_variants]
2794 for field in self.op.output_fields:
2797 elif field == "valid":
2799 elif field == "node_status":
2800 # this is just a copy of the dict
2802 for node_name, nos_list in os_data.items():
2803 val[node_name] = nos_list
2804 elif field == "variants":
2807 raise errors.ParameterError(field)
2814 class LURemoveNode(LogicalUnit):
2815 """Logical unit for removing a node.
2818 HPATH = "node-remove"
2819 HTYPE = constants.HTYPE_NODE
2820 _OP_REQP = ["node_name"]
2822 def BuildHooksEnv(self):
2825 This doesn't run on the target node in the pre phase as a failed
2826 node would then be impossible to remove.
2830 "OP_TARGET": self.op.node_name,
2831 "NODE_NAME": self.op.node_name,
2833 all_nodes = self.cfg.GetNodeList()
2835 all_nodes.remove(self.op.node_name)
2837 logging.warning("Node %s which is about to be removed not found"
2838 " in the all nodes list", self.op.node_name)
2839 return env, all_nodes, all_nodes
2841 def CheckPrereq(self):
2842 """Check prerequisites.
2845 - the node exists in the configuration
2846 - it does not have primary or secondary instances
2847 - it's not the master
2849 Any errors are signaled by raising errors.OpPrereqError.
2852 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2853 node = self.cfg.GetNodeInfo(self.op.node_name)
2854 assert node is not None
2856 instance_list = self.cfg.GetInstanceList()
2858 masternode = self.cfg.GetMasterNode()
2859 if node.name == masternode:
2860 raise errors.OpPrereqError("Node is the master node,"
2861 " you need to failover first.",
2864 for instance_name in instance_list:
2865 instance = self.cfg.GetInstanceInfo(instance_name)
2866 if node.name in instance.all_nodes:
2867 raise errors.OpPrereqError("Instance %s is still running on the node,"
2868 " please remove first." % instance_name,
2870 self.op.node_name = node.name
2873 def Exec(self, feedback_fn):
2874 """Removes the node from the cluster.
2878 logging.info("Stopping the node daemon and removing configs from node %s",
2881 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2883 # Promote nodes to master candidate as needed
2884 _AdjustCandidatePool(self, exceptions=[node.name])
2885 self.context.RemoveNode(node.name)
2887 # Run post hooks on the node before it's removed
2888 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2890 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2892 # pylint: disable-msg=W0702
2893 self.LogWarning("Errors occurred running hooks on %s" % node.name)
2895 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2896 msg = result.fail_msg
2898 self.LogWarning("Errors encountered on the remote node while leaving"
2899 " the cluster: %s", msg)
2901 # Remove node from our /etc/hosts
2902 if self.cfg.GetClusterInfo().modify_etc_hosts:
2903 # FIXME: this should be done via an rpc call to node daemon
2904 utils.RemoveHostFromEtcHosts(node.name)
2905 _RedistributeAncillaryFiles(self)
2908 class LUQueryNodes(NoHooksLU):
2909 """Logical unit for querying nodes.
2912 # pylint: disable-msg=W0142
2913 _OP_REQP = ["output_fields", "names", "use_locking"]
2916 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2917 "master_candidate", "offline", "drained"]
2919 _FIELDS_DYNAMIC = utils.FieldSet(
2921 "mtotal", "mnode", "mfree",
2923 "ctotal", "cnodes", "csockets",
2926 _FIELDS_STATIC = utils.FieldSet(*[
2927 "pinst_cnt", "sinst_cnt",
2928 "pinst_list", "sinst_list",
2929 "pip", "sip", "tags",
2931 "role"] + _SIMPLE_FIELDS
2934 def ExpandNames(self):
2935 _CheckOutputFields(static=self._FIELDS_STATIC,
2936 dynamic=self._FIELDS_DYNAMIC,
2937 selected=self.op.output_fields)
2939 self.needed_locks = {}
2940 self.share_locks[locking.LEVEL_NODE] = 1
2943 self.wanted = _GetWantedNodes(self, self.op.names)
2945 self.wanted = locking.ALL_SET
2947 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2948 self.do_locking = self.do_node_query and self.op.use_locking
2950 # if we don't request only static fields, we need to lock the nodes
2951 self.needed_locks[locking.LEVEL_NODE] = self.wanted
2953 def CheckPrereq(self):
2954 """Check prerequisites.
2957 # The validation of the node list is done in the _GetWantedNodes,
2958 # if non empty, and if empty, there's no validation to do
2961 def Exec(self, feedback_fn):
2962 """Computes the list of nodes and their attributes.
2965 all_info = self.cfg.GetAllNodesInfo()
2967 nodenames = self.acquired_locks[locking.LEVEL_NODE]
2968 elif self.wanted != locking.ALL_SET:
2969 nodenames = self.wanted
2970 missing = set(nodenames).difference(all_info.keys())
2972 raise errors.OpExecError(
2973 "Some nodes were removed before retrieving their data: %s" % missing)
2975 nodenames = all_info.keys()
2977 nodenames = utils.NiceSort(nodenames)
2978 nodelist = [all_info[name] for name in nodenames]
2980 # begin data gathering
2982 if self.do_node_query:
2984 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2985 self.cfg.GetHypervisorType())
2986 for name in nodenames:
2987 nodeinfo = node_data[name]
2988 if not nodeinfo.fail_msg and nodeinfo.payload:
2989 nodeinfo = nodeinfo.payload
2990 fn = utils.TryConvert
2992 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2993 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2994 "mfree": fn(int, nodeinfo.get('memory_free', None)),
2995 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2996 "dfree": fn(int, nodeinfo.get('vg_free', None)),
2997 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2998 "bootid": nodeinfo.get('bootid', None),
2999 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3000 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3003 live_data[name] = {}
3005 live_data = dict.fromkeys(nodenames, {})
3007 node_to_primary = dict([(name, set()) for name in nodenames])
3008 node_to_secondary = dict([(name, set()) for name in nodenames])
3010 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3011 "sinst_cnt", "sinst_list"))
3012 if inst_fields & frozenset(self.op.output_fields):
3013 inst_data = self.cfg.GetAllInstancesInfo()
3015 for inst in inst_data.values():
3016 if inst.primary_node in node_to_primary:
3017 node_to_primary[inst.primary_node].add(inst.name)
3018 for secnode in inst.secondary_nodes:
3019 if secnode in node_to_secondary:
3020 node_to_secondary[secnode].add(inst.name)
3022 master_node = self.cfg.GetMasterNode()
3024 # end data gathering
3027 for node in nodelist:
3029 for field in self.op.output_fields:
3030 if field in self._SIMPLE_FIELDS:
3031 val = getattr(node, field)
3032 elif field == "pinst_list":
3033 val = list(node_to_primary[node.name])
3034 elif field == "sinst_list":
3035 val = list(node_to_secondary[node.name])
3036 elif field == "pinst_cnt":
3037 val = len(node_to_primary[node.name])
3038 elif field == "sinst_cnt":
3039 val = len(node_to_secondary[node.name])
3040 elif field == "pip":
3041 val = node.primary_ip
3042 elif field == "sip":
3043 val = node.secondary_ip
3044 elif field == "tags":
3045 val = list(node.GetTags())
3046 elif field == "master":
3047 val = node.name == master_node
3048 elif self._FIELDS_DYNAMIC.Matches(field):
3049 val = live_data[node.name].get(field, None)
3050 elif field == "role":
3051 if node.name == master_node:
3053 elif node.master_candidate:
3062 raise errors.ParameterError(field)
3063 node_output.append(val)
3064 output.append(node_output)
3069 class LUQueryNodeVolumes(NoHooksLU):
3070 """Logical unit for getting volumes on node(s).
3073 _OP_REQP = ["nodes", "output_fields"]
3075 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3076 _FIELDS_STATIC = utils.FieldSet("node")
3078 def ExpandNames(self):
3079 _CheckOutputFields(static=self._FIELDS_STATIC,
3080 dynamic=self._FIELDS_DYNAMIC,
3081 selected=self.op.output_fields)
3083 self.needed_locks = {}
3084 self.share_locks[locking.LEVEL_NODE] = 1
3085 if not self.op.nodes:
3086 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3088 self.needed_locks[locking.LEVEL_NODE] = \
3089 _GetWantedNodes(self, self.op.nodes)
3091 def CheckPrereq(self):
3092 """Check prerequisites.
3094 This checks that the fields required are valid output fields.
3097 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3099 def Exec(self, feedback_fn):
3100 """Computes the list of nodes and their attributes.
3103 nodenames = self.nodes
3104 volumes = self.rpc.call_node_volumes(nodenames)
3106 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3107 in self.cfg.GetInstanceList()]
3109 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3112 for node in nodenames:
3113 nresult = volumes[node]
3116 msg = nresult.fail_msg
3118 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3121 node_vols = nresult.payload[:]
3122 node_vols.sort(key=lambda vol: vol['dev'])
3124 for vol in node_vols:
3126 for field in self.op.output_fields:
3129 elif field == "phys":
3133 elif field == "name":
3135 elif field == "size":
3136 val = int(float(vol['size']))
3137 elif field == "instance":
3139 if node not in lv_by_node[inst]:
3141 if vol['name'] in lv_by_node[inst][node]:
3147 raise errors.ParameterError(field)
3148 node_output.append(str(val))
3150 output.append(node_output)
3155 class LUQueryNodeStorage(NoHooksLU):
3156 """Logical unit for getting information on storage units on node(s).
3159 _OP_REQP = ["nodes", "storage_type", "output_fields"]
3161 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3163 def CheckArguments(self):
3164 _CheckStorageType(self.op.storage_type)
3166 _CheckOutputFields(static=self._FIELDS_STATIC,
3167 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3168 selected=self.op.output_fields)
3170 def ExpandNames(self):
3171 self.needed_locks = {}
3172 self.share_locks[locking.LEVEL_NODE] = 1
3175 self.needed_locks[locking.LEVEL_NODE] = \
3176 _GetWantedNodes(self, self.op.nodes)
3178 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3180 def CheckPrereq(self):
3181 """Check prerequisites.
3183 This checks that the fields required are valid output fields.
3186 self.op.name = getattr(self.op, "name", None)
3188 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3190 def Exec(self, feedback_fn):
3191 """Computes the list of nodes and their attributes.
3194 # Always get name to sort by
3195 if constants.SF_NAME in self.op.output_fields:
3196 fields = self.op.output_fields[:]
3198 fields = [constants.SF_NAME] + self.op.output_fields
3200 # Never ask for node or type as it's only known to the LU
3201 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3202 while extra in fields:
3203 fields.remove(extra)
3205 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3206 name_idx = field_idx[constants.SF_NAME]
3208 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3209 data = self.rpc.call_storage_list(self.nodes,
3210 self.op.storage_type, st_args,
3211 self.op.name, fields)
3215 for node in utils.NiceSort(self.nodes):
3216 nresult = data[node]
3220 msg = nresult.fail_msg
3222 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3225 rows = dict([(row[name_idx], row) for row in nresult.payload])
3227 for name in utils.NiceSort(rows.keys()):
3232 for field in self.op.output_fields:
3233 if field == constants.SF_NODE:
3235 elif field == constants.SF_TYPE:
3236 val = self.op.storage_type
3237 elif field in field_idx:
3238 val = row[field_idx[field]]
3240 raise errors.ParameterError(field)
3249 class LUModifyNodeStorage(NoHooksLU):
3250 """Logical unit for modifying a storage volume on a node.
3253 _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3256 def CheckArguments(self):
3257 self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3259 _CheckStorageType(self.op.storage_type)
3261 def ExpandNames(self):
3262 self.needed_locks = {
3263 locking.LEVEL_NODE: self.op.node_name,
3266 def CheckPrereq(self):
3267 """Check prerequisites.
3270 storage_type = self.op.storage_type
3273 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3275 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3276 " modified" % storage_type,
3279 diff = set(self.op.changes.keys()) - modifiable
3281 raise errors.OpPrereqError("The following fields can not be modified for"
3282 " storage units of type '%s': %r" %
3283 (storage_type, list(diff)),
3286 def Exec(self, feedback_fn):
3287 """Computes the list of nodes and their attributes.
3290 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3291 result = self.rpc.call_storage_modify(self.op.node_name,
3292 self.op.storage_type, st_args,
3293 self.op.name, self.op.changes)
3294 result.Raise("Failed to modify storage unit '%s' on %s" %
3295 (self.op.name, self.op.node_name))
3298 class LUAddNode(LogicalUnit):
3299 """Logical unit for adding node to the cluster.
3303 HTYPE = constants.HTYPE_NODE
3304 _OP_REQP = ["node_name"]
3306 def CheckArguments(self):
3307 # validate/normalize the node name
3308 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3310 def BuildHooksEnv(self):
3313 This will run on all nodes before, and on all nodes + the new node after.
3317 "OP_TARGET": self.op.node_name,
3318 "NODE_NAME": self.op.node_name,
3319 "NODE_PIP": self.op.primary_ip,
3320 "NODE_SIP": self.op.secondary_ip,
3322 nodes_0 = self.cfg.GetNodeList()
3323 nodes_1 = nodes_0 + [self.op.node_name, ]
3324 return env, nodes_0, nodes_1
3326 def CheckPrereq(self):
3327 """Check prerequisites.
3330 - the new node is not already in the config
3332 - its parameters (single/dual homed) matches the cluster
3334 Any errors are signaled by raising errors.OpPrereqError.
3337 node_name = self.op.node_name
3340 dns_data = utils.GetHostInfo(node_name)
3342 node = dns_data.name
3343 primary_ip = self.op.primary_ip = dns_data.ip
3344 secondary_ip = getattr(self.op, "secondary_ip", None)
3345 if secondary_ip is None:
3346 secondary_ip = primary_ip
3347 if not utils.IsValidIP(secondary_ip):
3348 raise errors.OpPrereqError("Invalid secondary IP given",
3350 self.op.secondary_ip = secondary_ip
3352 node_list = cfg.GetNodeList()
3353 if not self.op.readd and node in node_list:
3354 raise errors.OpPrereqError("Node %s is already in the configuration" %
3355 node, errors.ECODE_EXISTS)
3356 elif self.op.readd and node not in node_list:
3357 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3360 self.changed_primary_ip = False
3362 for existing_node_name in node_list:
3363 existing_node = cfg.GetNodeInfo(existing_node_name)
3365 if self.op.readd and node == existing_node_name:
3366 if existing_node.secondary_ip != secondary_ip:
3367 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3368 " address configuration as before",
3370 if existing_node.primary_ip != primary_ip:
3371 self.changed_primary_ip = True
3375 if (existing_node.primary_ip == primary_ip or
3376 existing_node.secondary_ip == primary_ip or
3377 existing_node.primary_ip == secondary_ip or
3378 existing_node.secondary_ip == secondary_ip):
3379 raise errors.OpPrereqError("New node ip address(es) conflict with"
3380 " existing node %s" % existing_node.name,
3381 errors.ECODE_NOTUNIQUE)
3383 # check that the type of the node (single versus dual homed) is the
3384 # same as for the master
3385 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3386 master_singlehomed = myself.secondary_ip == myself.primary_ip
3387 newbie_singlehomed = secondary_ip == primary_ip
3388 if master_singlehomed != newbie_singlehomed:
3389 if master_singlehomed:
3390 raise errors.OpPrereqError("The master has no private ip but the"
3391 " new node has one",
3394 raise errors.OpPrereqError("The master has a private ip but the"
3395 " new node doesn't have one",
3398 # checks reachability
3399 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3400 raise errors.OpPrereqError("Node not reachable by ping",
3401 errors.ECODE_ENVIRON)
3403 if not newbie_singlehomed:
3404 # check reachability from my secondary ip to newbie's secondary ip
3405 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3406 source=myself.secondary_ip):
3407 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3408 " based ping to noded port",
3409 errors.ECODE_ENVIRON)
3416 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3419 self.new_node = self.cfg.GetNodeInfo(node)
3420 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3422 self.new_node = objects.Node(name=node,
3423 primary_ip=primary_ip,
3424 secondary_ip=secondary_ip,
3425 master_candidate=self.master_candidate,
3426 offline=False, drained=False)
3428 def Exec(self, feedback_fn):
3429 """Adds the new node to the cluster.
3432 new_node = self.new_node
3433 node = new_node.name
3435 # for re-adds, reset the offline/drained/master-candidate flags;
3436 # we need to reset here, otherwise offline would prevent RPC calls
3437 # later in the procedure; this also means that if the re-add
3438 # fails, we are left with a non-offlined, broken node
3440 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3441 self.LogInfo("Readding a node, the offline/drained flags were reset")
3442 # if we demote the node, we do cleanup later in the procedure
3443 new_node.master_candidate = self.master_candidate
3444 if self.changed_primary_ip:
3445 new_node.primary_ip = self.op.primary_ip
3447 # notify the user about any possible mc promotion
3448 if new_node.master_candidate:
3449 self.LogInfo("Node will be a master candidate")
3451 # check connectivity
3452 result = self.rpc.call_version([node])[node]
3453 result.Raise("Can't get version information from node %s" % node)
3454 if constants.PROTOCOL_VERSION == result.payload:
3455 logging.info("Communication to node %s fine, sw version %s match",
3456 node, result.payload)
3458 raise errors.OpExecError("Version mismatch master version %s,"
3459 " node version %s" %
3460 (constants.PROTOCOL_VERSION, result.payload))
3463 if self.cfg.GetClusterInfo().modify_ssh_setup:
3464 logging.info("Copy ssh key to node %s", node)
3465 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3467 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3468 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3472 keyarray.append(utils.ReadFile(i))
3474 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3475 keyarray[2], keyarray[3], keyarray[4],
3477 result.Raise("Cannot transfer ssh keys to the new node")
3479 # Add node to our /etc/hosts, and add key to known_hosts
3480 if self.cfg.GetClusterInfo().modify_etc_hosts:
3481 # FIXME: this should be done via an rpc call to node daemon
3482 utils.AddHostToEtcHosts(new_node.name)
3484 if new_node.secondary_ip != new_node.primary_ip:
3485 result = self.rpc.call_node_has_ip_address(new_node.name,
3486 new_node.secondary_ip)
3487 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3488 prereq=True, ecode=errors.ECODE_ENVIRON)
3489 if not result.payload:
3490 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3491 " you gave (%s). Please fix and re-run this"
3492 " command." % new_node.secondary_ip)
3494 node_verify_list = [self.cfg.GetMasterNode()]
3495 node_verify_param = {
3496 constants.NV_NODELIST: [node],
3497 # TODO: do a node-net-test as well?
3500 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3501 self.cfg.GetClusterName())
3502 for verifier in node_verify_list:
3503 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3504 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3506 for failed in nl_payload:
3507 feedback_fn("ssh/hostname verification failed"
3508 " (checking from %s): %s" %
3509 (verifier, nl_payload[failed]))
3510 raise errors.OpExecError("ssh/hostname verification failed.")
3513 _RedistributeAncillaryFiles(self)
3514 self.context.ReaddNode(new_node)
3515 # make sure we redistribute the config
3516 self.cfg.Update(new_node, feedback_fn)
3517 # and make sure the new node will not have old files around
3518 if not new_node.master_candidate:
3519 result = self.rpc.call_node_demote_from_mc(new_node.name)
3520 msg = result.fail_msg
3522 self.LogWarning("Node failed to demote itself from master"
3523 " candidate status: %s" % msg)
3525 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3526 self.context.AddNode(new_node, self.proc.GetECId())
3529 class LUSetNodeParams(LogicalUnit):
3530 """Modifies the parameters of a node.
3533 HPATH = "node-modify"
3534 HTYPE = constants.HTYPE_NODE
3535 _OP_REQP = ["node_name"]
3538 def CheckArguments(self):
3539 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3540 _CheckBooleanOpField(self.op, 'master_candidate')
3541 _CheckBooleanOpField(self.op, 'offline')
3542 _CheckBooleanOpField(self.op, 'drained')
3543 _CheckBooleanOpField(self.op, 'auto_promote')
3544 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3545 if all_mods.count(None) == 3:
3546 raise errors.OpPrereqError("Please pass at least one modification",
3548 if all_mods.count(True) > 1:
3549 raise errors.OpPrereqError("Can't set the node into more than one"
3550 " state at the same time",
3553 # Boolean value that tells us whether we're offlining or draining the node
3554 self.offline_or_drain = (self.op.offline == True or
3555 self.op.drained == True)
3556 self.deoffline_or_drain = (self.op.offline == False or
3557 self.op.drained == False)
3558 self.might_demote = (self.op.master_candidate == False or
3559 self.offline_or_drain)
3561 self.lock_all = self.op.auto_promote and self.might_demote
3564 def ExpandNames(self):
3566 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3568 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3570 def BuildHooksEnv(self):
3573 This runs on the master node.
3577 "OP_TARGET": self.op.node_name,
3578 "MASTER_CANDIDATE": str(self.op.master_candidate),
3579 "OFFLINE": str(self.op.offline),
3580 "DRAINED": str(self.op.drained),
3582 nl = [self.cfg.GetMasterNode(),
3586 def CheckPrereq(self):
3587 """Check prerequisites.
3589 This only checks the instance list against the existing names.
3592 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3594 if (self.op.master_candidate is not None or
3595 self.op.drained is not None or
3596 self.op.offline is not None):
3597 # we can't change the master's node flags
3598 if self.op.node_name == self.cfg.GetMasterNode():
3599 raise errors.OpPrereqError("The master role can be changed"
3600 " only via masterfailover",
3604 if node.master_candidate and self.might_demote and not self.lock_all:
3605 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3606 # check if after removing the current node, we're missing master
3608 (mc_remaining, mc_should, _) = \
3609 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3610 if mc_remaining < mc_should:
3611 raise errors.OpPrereqError("Not enough master candidates, please"
3612 " pass auto_promote to allow promotion",
3615 if (self.op.master_candidate == True and
3616 ((node.offline and not self.op.offline == False) or
3617 (node.drained and not self.op.drained == False))):
3618 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3619 " to master_candidate" % node.name,
3622 # If we're being deofflined/drained, we'll MC ourself if needed
3623 if (self.deoffline_or_drain and not self.offline_or_drain and not
3624 self.op.master_candidate == True and not node.master_candidate):
3625 self.op.master_candidate = _DecideSelfPromotion(self)
3626 if self.op.master_candidate:
3627 self.LogInfo("Autopromoting node to master candidate")
3631 def Exec(self, feedback_fn):
3640 if self.op.offline is not None:
3641 node.offline = self.op.offline
3642 result.append(("offline", str(self.op.offline)))
3643 if self.op.offline == True:
3644 if node.master_candidate:
3645 node.master_candidate = False
3647 result.append(("master_candidate", "auto-demotion due to offline"))
3649 node.drained = False
3650 result.append(("drained", "clear drained status due to offline"))
3652 if self.op.master_candidate is not None:
3653 node.master_candidate = self.op.master_candidate
3655 result.append(("master_candidate", str(self.op.master_candidate)))
3656 if self.op.master_candidate == False:
3657 rrc = self.rpc.call_node_demote_from_mc(node.name)
3660 self.LogWarning("Node failed to demote itself: %s" % msg)
3662 if self.op.drained is not None:
3663 node.drained = self.op.drained
3664 result.append(("drained", str(self.op.drained)))
3665 if self.op.drained == True:
3666 if node.master_candidate:
3667 node.master_candidate = False
3669 result.append(("master_candidate", "auto-demotion due to drain"))
3670 rrc = self.rpc.call_node_demote_from_mc(node.name)
3673 self.LogWarning("Node failed to demote itself: %s" % msg)
3675 node.offline = False
3676 result.append(("offline", "clear offline status due to drain"))
3678 # we locked all nodes, we adjust the CP before updating this node
3680 _AdjustCandidatePool(self, [node.name])
3682 # this will trigger configuration file update, if needed
3683 self.cfg.Update(node, feedback_fn)
3685 # this will trigger job queue propagation or cleanup
3687 self.context.ReaddNode(node)
3692 class LUPowercycleNode(NoHooksLU):
3693 """Powercycles a node.
3696 _OP_REQP = ["node_name", "force"]
3699 def CheckArguments(self):
3700 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3701 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3702 raise errors.OpPrereqError("The node is the master and the force"
3703 " parameter was not set",
3706 def ExpandNames(self):
3707 """Locking for PowercycleNode.
3709 This is a last-resort option and shouldn't block on other
3710 jobs. Therefore, we grab no locks.
3713 self.needed_locks = {}
3715 def CheckPrereq(self):
3716 """Check prerequisites.
3718 This LU has no prereqs.
3723 def Exec(self, feedback_fn):
3727 result = self.rpc.call_node_powercycle(self.op.node_name,
3728 self.cfg.GetHypervisorType())
3729 result.Raise("Failed to schedule the reboot")
3730 return result.payload
3733 class LUQueryClusterInfo(NoHooksLU):
3734 """Query cluster configuration.
3740 def ExpandNames(self):
3741 self.needed_locks = {}
3743 def CheckPrereq(self):
3744 """No prerequsites needed for this LU.
3749 def Exec(self, feedback_fn):
3750 """Return cluster config.
3753 cluster = self.cfg.GetClusterInfo()
3756 # Filter just for enabled hypervisors
3757 for os_name, hv_dict in cluster.os_hvp.items():
3758 os_hvp[os_name] = {}
3759 for hv_name, hv_params in hv_dict.items():
3760 if hv_name in cluster.enabled_hypervisors:
3761 os_hvp[os_name][hv_name] = hv_params
3764 "software_version": constants.RELEASE_VERSION,
3765 "protocol_version": constants.PROTOCOL_VERSION,
3766 "config_version": constants.CONFIG_VERSION,
3767 "os_api_version": max(constants.OS_API_VERSIONS),
3768 "export_version": constants.EXPORT_VERSION,
3769 "architecture": (platform.architecture()[0], platform.machine()),
3770 "name": cluster.cluster_name,
3771 "master": cluster.master_node,
3772 "default_hypervisor": cluster.enabled_hypervisors[0],
3773 "enabled_hypervisors": cluster.enabled_hypervisors,
3774 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3775 for hypervisor_name in cluster.enabled_hypervisors]),
3777 "beparams": cluster.beparams,
3778 "nicparams": cluster.nicparams,
3779 "candidate_pool_size": cluster.candidate_pool_size,
3780 "master_netdev": cluster.master_netdev,
3781 "volume_group_name": cluster.volume_group_name,
3782 "file_storage_dir": cluster.file_storage_dir,
3783 "maintain_node_health": cluster.maintain_node_health,
3784 "ctime": cluster.ctime,
3785 "mtime": cluster.mtime,
3786 "uuid": cluster.uuid,
3787 "tags": list(cluster.GetTags()),
3788 "uid_pool": cluster.uid_pool,
3794 class LUQueryConfigValues(NoHooksLU):
3795 """Return configuration values.
3800 _FIELDS_DYNAMIC = utils.FieldSet()
3801 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3804 def ExpandNames(self):
3805 self.needed_locks = {}
3807 _CheckOutputFields(static=self._FIELDS_STATIC,
3808 dynamic=self._FIELDS_DYNAMIC,
3809 selected=self.op.output_fields)
3811 def CheckPrereq(self):
3812 """No prerequisites.
3817 def Exec(self, feedback_fn):
3818 """Dump a representation of the cluster config to the standard output.
3822 for field in self.op.output_fields:
3823 if field == "cluster_name":
3824 entry = self.cfg.GetClusterName()
3825 elif field == "master_node":
3826 entry = self.cfg.GetMasterNode()
3827 elif field == "drain_flag":
3828 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3829 elif field == "watcher_pause":
3830 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3832 raise errors.ParameterError(field)
3833 values.append(entry)
3837 class LUActivateInstanceDisks(NoHooksLU):
3838 """Bring up an instance's disks.
3841 _OP_REQP = ["instance_name"]
3844 def ExpandNames(self):
3845 self._ExpandAndLockInstance()
3846 self.needed_locks[locking.LEVEL_NODE] = []
3847 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3849 def DeclareLocks(self, level):
3850 if level == locking.LEVEL_NODE:
3851 self._LockInstancesNodes()
3853 def CheckPrereq(self):
3854 """Check prerequisites.
3856 This checks that the instance is in the cluster.
3859 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3860 assert self.instance is not None, \
3861 "Cannot retrieve locked instance %s" % self.op.instance_name
3862 _CheckNodeOnline(self, self.instance.primary_node)
3863 if not hasattr(self.op, "ignore_size"):
3864 self.op.ignore_size = False
3866 def Exec(self, feedback_fn):
3867 """Activate the disks.
3870 disks_ok, disks_info = \
3871 _AssembleInstanceDisks(self, self.instance,
3872 ignore_size=self.op.ignore_size)
3874 raise errors.OpExecError("Cannot activate block devices")
3879 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
3881 """Prepare the block devices for an instance.
3883 This sets up the block devices on all nodes.
3885 @type lu: L{LogicalUnit}
3886 @param lu: the logical unit on whose behalf we execute
3887 @type instance: L{objects.Instance}
3888 @param instance: the instance for whose disks we assemble
3889 @type disks: list of L{objects.Disk} or None
3890 @param disks: which disks to assemble (or all, if None)
3891 @type ignore_secondaries: boolean
3892 @param ignore_secondaries: if true, errors on secondary nodes
3893 won't result in an error return from the function
3894 @type ignore_size: boolean
3895 @param ignore_size: if true, the current known size of the disk
3896 will not be used during the disk activation, useful for cases
3897 when the size is wrong
3898 @return: False if the operation failed, otherwise a list of
3899 (host, instance_visible_name, node_visible_name)
3900 with the mapping from node devices to instance devices
3905 iname = instance.name
3906 disks = _ExpandCheckDisks(instance, disks)
3908 # With the two passes mechanism we try to reduce the window of
3909 # opportunity for the race condition of switching DRBD to primary
3910 # before handshaking occured, but we do not eliminate it
3912 # The proper fix would be to wait (with some limits) until the
3913 # connection has been made and drbd transitions from WFConnection
3914 # into any other network-connected state (Connected, SyncTarget,
3917 # 1st pass, assemble on all nodes in secondary mode
3918 for inst_disk in disks:
3919 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3921 node_disk = node_disk.Copy()
3922 node_disk.UnsetSize()
3923 lu.cfg.SetDiskID(node_disk, node)
3924 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3925 msg = result.fail_msg
3927 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3928 " (is_primary=False, pass=1): %s",
3929 inst_disk.iv_name, node, msg)
3930 if not ignore_secondaries:
3933 # FIXME: race condition on drbd migration to primary
3935 # 2nd pass, do only the primary node
3936 for inst_disk in disks:
3939 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3940 if node != instance.primary_node:
3943 node_disk = node_disk.Copy()
3944 node_disk.UnsetSize()
3945 lu.cfg.SetDiskID(node_disk, node)
3946 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3947 msg = result.fail_msg
3949 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3950 " (is_primary=True, pass=2): %s",
3951 inst_disk.iv_name, node, msg)
3954 dev_path = result.payload
3956 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3958 # leave the disks configured for the primary node
3959 # this is a workaround that would be fixed better by
3960 # improving the logical/physical id handling
3962 lu.cfg.SetDiskID(disk, instance.primary_node)
3964 return disks_ok, device_info
3967 def _StartInstanceDisks(lu, instance, force):
3968 """Start the disks of an instance.
3971 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3972 ignore_secondaries=force)
3974 _ShutdownInstanceDisks(lu, instance)
3975 if force is not None and not force:
3976 lu.proc.LogWarning("", hint="If the message above refers to a"
3978 " you can retry the operation using '--force'.")
3979 raise errors.OpExecError("Disk consistency error")
3982 class LUDeactivateInstanceDisks(NoHooksLU):
3983 """Shutdown an instance's disks.
3986 _OP_REQP = ["instance_name"]
3989 def ExpandNames(self):
3990 self._ExpandAndLockInstance()
3991 self.needed_locks[locking.LEVEL_NODE] = []
3992 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3994 def DeclareLocks(self, level):
3995 if level == locking.LEVEL_NODE:
3996 self._LockInstancesNodes()
3998 def CheckPrereq(self):
3999 """Check prerequisites.
4001 This checks that the instance is in the cluster.
4004 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4005 assert self.instance is not None, \
4006 "Cannot retrieve locked instance %s" % self.op.instance_name
4008 def Exec(self, feedback_fn):
4009 """Deactivate the disks
4012 instance = self.instance
4013 _SafeShutdownInstanceDisks(self, instance)
4016 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4017 """Shutdown block devices of an instance.
4019 This function checks if an instance is running, before calling
4020 _ShutdownInstanceDisks.
4023 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4024 _ShutdownInstanceDisks(lu, instance, disks=disks)
4027 def _ExpandCheckDisks(instance, disks):
4028 """Return the instance disks selected by the disks list
4030 @type disks: list of L{objects.Disk} or None
4031 @param disks: selected disks
4032 @rtype: list of L{objects.Disk}
4033 @return: selected instance disks to act on
4037 return instance.disks
4039 if not set(disks).issubset(instance.disks):
4040 raise errors.ProgrammerError("Can only act on disks belonging to the"
4045 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4046 """Shutdown block devices of an instance.
4048 This does the shutdown on all nodes of the instance.
4050 If the ignore_primary is false, errors on the primary node are
4055 disks = _ExpandCheckDisks(instance, disks)
4058 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4059 lu.cfg.SetDiskID(top_disk, node)
4060 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4061 msg = result.fail_msg
4063 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4064 disk.iv_name, node, msg)
4065 if not ignore_primary or node != instance.primary_node:
4070 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4071 """Checks if a node has enough free memory.
4073 This function check if a given node has the needed amount of free
4074 memory. In case the node has less memory or we cannot get the
4075 information from the node, this function raise an OpPrereqError
4078 @type lu: C{LogicalUnit}
4079 @param lu: a logical unit from which we get configuration data
4081 @param node: the node to check
4082 @type reason: C{str}
4083 @param reason: string to use in the error message
4084 @type requested: C{int}
4085 @param requested: the amount of memory in MiB to check for
4086 @type hypervisor_name: C{str}
4087 @param hypervisor_name: the hypervisor to ask for memory stats
4088 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4089 we cannot check the node
4092 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4093 nodeinfo[node].Raise("Can't get data from node %s" % node,
4094 prereq=True, ecode=errors.ECODE_ENVIRON)
4095 free_mem = nodeinfo[node].payload.get('memory_free', None)
4096 if not isinstance(free_mem, int):
4097 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4098 " was '%s'" % (node, free_mem),
4099 errors.ECODE_ENVIRON)
4100 if requested > free_mem:
4101 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4102 " needed %s MiB, available %s MiB" %
4103 (node, reason, requested, free_mem),
4107 def _CheckNodesFreeDisk(lu, nodenames, requested):
4108 """Checks if nodes have enough free disk space in the default VG.
4110 This function check if all given nodes have the needed amount of
4111 free disk. In case any node has less disk or we cannot get the
4112 information from the node, this function raise an OpPrereqError
4115 @type lu: C{LogicalUnit}
4116 @param lu: a logical unit from which we get configuration data
4117 @type nodenames: C{list}
4118 @param nodenames: the list of node names to check
4119 @type requested: C{int}
4120 @param requested: the amount of disk in MiB to check for
4121 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4122 we cannot check the node
4125 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4126 lu.cfg.GetHypervisorType())
4127 for node in nodenames:
4128 info = nodeinfo[node]
4129 info.Raise("Cannot get current information from node %s" % node,
4130 prereq=True, ecode=errors.ECODE_ENVIRON)
4131 vg_free = info.payload.get("vg_free", None)
4132 if not isinstance(vg_free, int):
4133 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4134 " result was '%s'" % (node, vg_free),
4135 errors.ECODE_ENVIRON)
4136 if requested > vg_free:
4137 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4138 " required %d MiB, available %d MiB" %
4139 (node, requested, vg_free),
4143 class LUStartupInstance(LogicalUnit):
4144 """Starts an instance.
4147 HPATH = "instance-start"
4148 HTYPE = constants.HTYPE_INSTANCE
4149 _OP_REQP = ["instance_name", "force"]
4152 def ExpandNames(self):
4153 self._ExpandAndLockInstance()
4155 def BuildHooksEnv(self):
4158 This runs on master, primary and secondary nodes of the instance.
4162 "FORCE": self.op.force,
4164 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4165 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4168 def CheckPrereq(self):
4169 """Check prerequisites.
4171 This checks that the instance is in the cluster.
4174 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4175 assert self.instance is not None, \
4176 "Cannot retrieve locked instance %s" % self.op.instance_name
4179 self.beparams = getattr(self.op, "beparams", {})
4181 if not isinstance(self.beparams, dict):
4182 raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4183 " dict" % (type(self.beparams), ),
4185 # fill the beparams dict
4186 utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4187 self.op.beparams = self.beparams
4190 self.hvparams = getattr(self.op, "hvparams", {})
4192 if not isinstance(self.hvparams, dict):
4193 raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4194 " dict" % (type(self.hvparams), ),
4197 # check hypervisor parameter syntax (locally)
4198 cluster = self.cfg.GetClusterInfo()
4199 utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4200 filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4202 filled_hvp.update(self.hvparams)
4203 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4204 hv_type.CheckParameterSyntax(filled_hvp)
4205 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4206 self.op.hvparams = self.hvparams
4208 _CheckNodeOnline(self, instance.primary_node)
4210 bep = self.cfg.GetClusterInfo().FillBE(instance)
4211 # check bridges existence
4212 _CheckInstanceBridgesExist(self, instance)
4214 remote_info = self.rpc.call_instance_info(instance.primary_node,
4216 instance.hypervisor)
4217 remote_info.Raise("Error checking node %s" % instance.primary_node,
4218 prereq=True, ecode=errors.ECODE_ENVIRON)
4219 if not remote_info.payload: # not running already
4220 _CheckNodeFreeMemory(self, instance.primary_node,
4221 "starting instance %s" % instance.name,
4222 bep[constants.BE_MEMORY], instance.hypervisor)
4224 def Exec(self, feedback_fn):
4225 """Start the instance.
4228 instance = self.instance
4229 force = self.op.force
4231 self.cfg.MarkInstanceUp(instance.name)
4233 node_current = instance.primary_node
4235 _StartInstanceDisks(self, instance, force)
4237 result = self.rpc.call_instance_start(node_current, instance,
4238 self.hvparams, self.beparams)
4239 msg = result.fail_msg
4241 _ShutdownInstanceDisks(self, instance)
4242 raise errors.OpExecError("Could not start instance: %s" % msg)
4245 class LURebootInstance(LogicalUnit):
4246 """Reboot an instance.
4249 HPATH = "instance-reboot"
4250 HTYPE = constants.HTYPE_INSTANCE
4251 _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4254 def CheckArguments(self):
4255 """Check the arguments.
4258 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4259 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4261 def ExpandNames(self):
4262 if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4263 constants.INSTANCE_REBOOT_HARD,
4264 constants.INSTANCE_REBOOT_FULL]:
4265 raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4266 (constants.INSTANCE_REBOOT_SOFT,
4267 constants.INSTANCE_REBOOT_HARD,
4268 constants.INSTANCE_REBOOT_FULL))
4269 self._ExpandAndLockInstance()
4271 def BuildHooksEnv(self):
4274 This runs on master, primary and secondary nodes of the instance.
4278 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4279 "REBOOT_TYPE": self.op.reboot_type,
4280 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4282 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4283 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4286 def CheckPrereq(self):
4287 """Check prerequisites.
4289 This checks that the instance is in the cluster.
4292 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4293 assert self.instance is not None, \
4294 "Cannot retrieve locked instance %s" % self.op.instance_name
4296 _CheckNodeOnline(self, instance.primary_node)
4298 # check bridges existence
4299 _CheckInstanceBridgesExist(self, instance)
4301 def Exec(self, feedback_fn):
4302 """Reboot the instance.
4305 instance = self.instance
4306 ignore_secondaries = self.op.ignore_secondaries
4307 reboot_type = self.op.reboot_type
4309 node_current = instance.primary_node
4311 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4312 constants.INSTANCE_REBOOT_HARD]:
4313 for disk in instance.disks:
4314 self.cfg.SetDiskID(disk, node_current)
4315 result = self.rpc.call_instance_reboot(node_current, instance,
4317 self.shutdown_timeout)
4318 result.Raise("Could not reboot instance")
4320 result = self.rpc.call_instance_shutdown(node_current, instance,
4321 self.shutdown_timeout)
4322 result.Raise("Could not shutdown instance for full reboot")
4323 _ShutdownInstanceDisks(self, instance)
4324 _StartInstanceDisks(self, instance, ignore_secondaries)
4325 result = self.rpc.call_instance_start(node_current, instance, None, None)
4326 msg = result.fail_msg
4328 _ShutdownInstanceDisks(self, instance)
4329 raise errors.OpExecError("Could not start instance for"
4330 " full reboot: %s" % msg)
4332 self.cfg.MarkInstanceUp(instance.name)
4335 class LUShutdownInstance(LogicalUnit):
4336 """Shutdown an instance.
4339 HPATH = "instance-stop"
4340 HTYPE = constants.HTYPE_INSTANCE
4341 _OP_REQP = ["instance_name"]
4344 def CheckArguments(self):
4345 """Check the arguments.
4348 self.timeout = getattr(self.op, "timeout",
4349 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4351 def ExpandNames(self):
4352 self._ExpandAndLockInstance()
4354 def BuildHooksEnv(self):
4357 This runs on master, primary and secondary nodes of the instance.
4360 env = _BuildInstanceHookEnvByObject(self, self.instance)
4361 env["TIMEOUT"] = self.timeout
4362 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4365 def CheckPrereq(self):
4366 """Check prerequisites.
4368 This checks that the instance is in the cluster.
4371 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4372 assert self.instance is not None, \
4373 "Cannot retrieve locked instance %s" % self.op.instance_name
4374 _CheckNodeOnline(self, self.instance.primary_node)
4376 def Exec(self, feedback_fn):
4377 """Shutdown the instance.
4380 instance = self.instance
4381 node_current = instance.primary_node
4382 timeout = self.timeout
4383 self.cfg.MarkInstanceDown(instance.name)
4384 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4385 msg = result.fail_msg
4387 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4389 _ShutdownInstanceDisks(self, instance)
4392 class LUReinstallInstance(LogicalUnit):
4393 """Reinstall an instance.
4396 HPATH = "instance-reinstall"
4397 HTYPE = constants.HTYPE_INSTANCE
4398 _OP_REQP = ["instance_name"]
4401 def ExpandNames(self):
4402 self._ExpandAndLockInstance()
4404 def BuildHooksEnv(self):
4407 This runs on master, primary and secondary nodes of the instance.
4410 env = _BuildInstanceHookEnvByObject(self, self.instance)
4411 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4414 def CheckPrereq(self):
4415 """Check prerequisites.
4417 This checks that the instance is in the cluster and is not running.
4420 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4421 assert instance is not None, \
4422 "Cannot retrieve locked instance %s" % self.op.instance_name
4423 _CheckNodeOnline(self, instance.primary_node)
4425 if instance.disk_template == constants.DT_DISKLESS:
4426 raise errors.OpPrereqError("Instance '%s' has no disks" %
4427 self.op.instance_name,
4429 _CheckInstanceDown(self, instance, "cannot reinstall")
4431 self.op.os_type = getattr(self.op, "os_type", None)
4432 self.op.force_variant = getattr(self.op, "force_variant", False)
4433 if self.op.os_type is not None:
4435 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4436 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4438 self.instance = instance
4440 def Exec(self, feedback_fn):
4441 """Reinstall the instance.
4444 inst = self.instance
4446 if self.op.os_type is not None:
4447 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4448 inst.os = self.op.os_type
4449 self.cfg.Update(inst, feedback_fn)
4451 _StartInstanceDisks(self, inst, None)
4453 feedback_fn("Running the instance OS create scripts...")
4454 # FIXME: pass debug option from opcode to backend
4455 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4456 self.op.debug_level)
4457 result.Raise("Could not install OS for instance %s on node %s" %
4458 (inst.name, inst.primary_node))
4460 _ShutdownInstanceDisks(self, inst)
4463 class LURecreateInstanceDisks(LogicalUnit):
4464 """Recreate an instance's missing disks.
4467 HPATH = "instance-recreate-disks"
4468 HTYPE = constants.HTYPE_INSTANCE
4469 _OP_REQP = ["instance_name", "disks"]
4472 def CheckArguments(self):
4473 """Check the arguments.
4476 if not isinstance(self.op.disks, list):
4477 raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4478 for item in self.op.disks:
4479 if (not isinstance(item, int) or
4481 raise errors.OpPrereqError("Invalid disk specification '%s'" %
4482 str(item), errors.ECODE_INVAL)
4484 def ExpandNames(self):
4485 self._ExpandAndLockInstance()
4487 def BuildHooksEnv(self):
4490 This runs on master, primary and secondary nodes of the instance.
4493 env = _BuildInstanceHookEnvByObject(self, self.instance)
4494 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4497 def CheckPrereq(self):
4498 """Check prerequisites.
4500 This checks that the instance is in the cluster and is not running.
4503 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4504 assert instance is not None, \
4505 "Cannot retrieve locked instance %s" % self.op.instance_name
4506 _CheckNodeOnline(self, instance.primary_node)
4508 if instance.disk_template == constants.DT_DISKLESS:
4509 raise errors.OpPrereqError("Instance '%s' has no disks" %
4510 self.op.instance_name, errors.ECODE_INVAL)
4511 _CheckInstanceDown(self, instance, "cannot recreate disks")
4513 if not self.op.disks:
4514 self.op.disks = range(len(instance.disks))
4516 for idx in self.op.disks:
4517 if idx >= len(instance.disks):
4518 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4521 self.instance = instance
4523 def Exec(self, feedback_fn):
4524 """Recreate the disks.
4528 for idx, _ in enumerate(self.instance.disks):
4529 if idx not in self.op.disks: # disk idx has not been passed in
4533 _CreateDisks(self, self.instance, to_skip=to_skip)
4536 class LURenameInstance(LogicalUnit):
4537 """Rename an instance.
4540 HPATH = "instance-rename"
4541 HTYPE = constants.HTYPE_INSTANCE
4542 _OP_REQP = ["instance_name", "new_name"]
4544 def BuildHooksEnv(self):
4547 This runs on master, primary and secondary nodes of the instance.
4550 env = _BuildInstanceHookEnvByObject(self, self.instance)
4551 env["INSTANCE_NEW_NAME"] = self.op.new_name
4552 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4555 def CheckPrereq(self):
4556 """Check prerequisites.
4558 This checks that the instance is in the cluster and is not running.
4561 self.op.instance_name = _ExpandInstanceName(self.cfg,
4562 self.op.instance_name)
4563 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4564 assert instance is not None
4565 _CheckNodeOnline(self, instance.primary_node)
4566 _CheckInstanceDown(self, instance, "cannot rename")
4567 self.instance = instance
4569 # new name verification
4570 name_info = utils.GetHostInfo(self.op.new_name)
4572 self.op.new_name = new_name = name_info.name
4573 instance_list = self.cfg.GetInstanceList()
4574 if new_name in instance_list:
4575 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4576 new_name, errors.ECODE_EXISTS)
4578 if not getattr(self.op, "ignore_ip", False):
4579 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4580 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4581 (name_info.ip, new_name),
4582 errors.ECODE_NOTUNIQUE)
4585 def Exec(self, feedback_fn):
4586 """Reinstall the instance.
4589 inst = self.instance
4590 old_name = inst.name
4592 if inst.disk_template == constants.DT_FILE:
4593 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4595 self.cfg.RenameInstance(inst.name, self.op.new_name)
4596 # Change the instance lock. This is definitely safe while we hold the BGL
4597 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4598 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4600 # re-read the instance from the configuration after rename
4601 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4603 if inst.disk_template == constants.DT_FILE:
4604 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4605 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4606 old_file_storage_dir,
4607 new_file_storage_dir)
4608 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4609 " (but the instance has been renamed in Ganeti)" %
4610 (inst.primary_node, old_file_storage_dir,
4611 new_file_storage_dir))
4613 _StartInstanceDisks(self, inst, None)
4615 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4616 old_name, self.op.debug_level)
4617 msg = result.fail_msg
4619 msg = ("Could not run OS rename script for instance %s on node %s"
4620 " (but the instance has been renamed in Ganeti): %s" %
4621 (inst.name, inst.primary_node, msg))
4622 self.proc.LogWarning(msg)
4624 _ShutdownInstanceDisks(self, inst)
4627 class LURemoveInstance(LogicalUnit):
4628 """Remove an instance.
4631 HPATH = "instance-remove"
4632 HTYPE = constants.HTYPE_INSTANCE
4633 _OP_REQP = ["instance_name", "ignore_failures"]
4636 def CheckArguments(self):
4637 """Check the arguments.
4640 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4641 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4643 def ExpandNames(self):
4644 self._ExpandAndLockInstance()
4645 self.needed_locks[locking.LEVEL_NODE] = []
4646 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4648 def DeclareLocks(self, level):
4649 if level == locking.LEVEL_NODE:
4650 self._LockInstancesNodes()
4652 def BuildHooksEnv(self):
4655 This runs on master, primary and secondary nodes of the instance.
4658 env = _BuildInstanceHookEnvByObject(self, self.instance)
4659 env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4660 nl = [self.cfg.GetMasterNode()]
4661 nl_post = list(self.instance.all_nodes) + nl
4662 return env, nl, nl_post
4664 def CheckPrereq(self):
4665 """Check prerequisites.
4667 This checks that the instance is in the cluster.
4670 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4671 assert self.instance is not None, \
4672 "Cannot retrieve locked instance %s" % self.op.instance_name
4674 def Exec(self, feedback_fn):
4675 """Remove the instance.
4678 instance = self.instance
4679 logging.info("Shutting down instance %s on node %s",
4680 instance.name, instance.primary_node)
4682 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4683 self.shutdown_timeout)
4684 msg = result.fail_msg
4686 if self.op.ignore_failures:
4687 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4689 raise errors.OpExecError("Could not shutdown instance %s on"
4691 (instance.name, instance.primary_node, msg))
4693 logging.info("Removing block devices for instance %s", instance.name)
4695 if not _RemoveDisks(self, instance):
4696 if self.op.ignore_failures:
4697 feedback_fn("Warning: can't remove instance's disks")
4699 raise errors.OpExecError("Can't remove instance's disks")
4701 logging.info("Removing instance %s out of cluster config", instance.name)
4703 self.cfg.RemoveInstance(instance.name)
4704 self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4707 class LUQueryInstances(NoHooksLU):
4708 """Logical unit for querying instances.
4711 # pylint: disable-msg=W0142
4712 _OP_REQP = ["output_fields", "names", "use_locking"]
4714 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4715 "serial_no", "ctime", "mtime", "uuid"]
4716 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4718 "disk_template", "ip", "mac", "bridge",
4719 "nic_mode", "nic_link",
4720 "sda_size", "sdb_size", "vcpus", "tags",
4721 "network_port", "beparams",
4722 r"(disk)\.(size)/([0-9]+)",
4723 r"(disk)\.(sizes)", "disk_usage",
4724 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4725 r"(nic)\.(bridge)/([0-9]+)",
4726 r"(nic)\.(macs|ips|modes|links|bridges)",
4727 r"(disk|nic)\.(count)",
4729 ] + _SIMPLE_FIELDS +
4731 for name in constants.HVS_PARAMETERS
4732 if name not in constants.HVC_GLOBALS] +
4734 for name in constants.BES_PARAMETERS])
4735 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4738 def ExpandNames(self):
4739 _CheckOutputFields(static=self._FIELDS_STATIC,
4740 dynamic=self._FIELDS_DYNAMIC,
4741 selected=self.op.output_fields)
4743 self.needed_locks = {}
4744 self.share_locks[locking.LEVEL_INSTANCE] = 1
4745 self.share_locks[locking.LEVEL_NODE] = 1
4748 self.wanted = _GetWantedInstances(self, self.op.names)
4750 self.wanted = locking.ALL_SET
4752 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4753 self.do_locking = self.do_node_query and self.op.use_locking
4755 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4756 self.needed_locks[locking.LEVEL_NODE] = []
4757 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4759 def DeclareLocks(self, level):
4760 if level == locking.LEVEL_NODE and self.do_locking:
4761 self._LockInstancesNodes()
4763 def CheckPrereq(self):
4764 """Check prerequisites.
4769 def Exec(self, feedback_fn):
4770 """Computes the list of nodes and their attributes.
4773 # pylint: disable-msg=R0912
4774 # way too many branches here
4775 all_info = self.cfg.GetAllInstancesInfo()
4776 if self.wanted == locking.ALL_SET:
4777 # caller didn't specify instance names, so ordering is not important
4779 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4781 instance_names = all_info.keys()
4782 instance_names = utils.NiceSort(instance_names)
4784 # caller did specify names, so we must keep the ordering
4786 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4788 tgt_set = all_info.keys()
4789 missing = set(self.wanted).difference(tgt_set)
4791 raise errors.OpExecError("Some instances were removed before"
4792 " retrieving their data: %s" % missing)
4793 instance_names = self.wanted
4795 instance_list = [all_info[iname] for iname in instance_names]
4797 # begin data gathering
4799 nodes = frozenset([inst.primary_node for inst in instance_list])
4800 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4804 if self.do_node_query:
4806 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4808 result = node_data[name]
4810 # offline nodes will be in both lists
4811 off_nodes.append(name)
4813 bad_nodes.append(name)
4816 live_data.update(result.payload)
4817 # else no instance is alive
4819 live_data = dict([(name, {}) for name in instance_names])
4821 # end data gathering
4826 cluster = self.cfg.GetClusterInfo()
4827 for instance in instance_list:
4829 i_hv = cluster.FillHV(instance, skip_globals=True)
4830 i_be = cluster.FillBE(instance)
4831 i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4832 nic.nicparams) for nic in instance.nics]
4833 for field in self.op.output_fields:
4834 st_match = self._FIELDS_STATIC.Matches(field)
4835 if field in self._SIMPLE_FIELDS:
4836 val = getattr(instance, field)
4837 elif field == "pnode":
4838 val = instance.primary_node
4839 elif field == "snodes":
4840 val = list(instance.secondary_nodes)
4841 elif field == "admin_state":
4842 val = instance.admin_up
4843 elif field == "oper_state":
4844 if instance.primary_node in bad_nodes:
4847 val = bool(live_data.get(instance.name))
4848 elif field == "status":
4849 if instance.primary_node in off_nodes:
4850 val = "ERROR_nodeoffline"
4851 elif instance.primary_node in bad_nodes:
4852 val = "ERROR_nodedown"
4854 running = bool(live_data.get(instance.name))
4856 if instance.admin_up:
4861 if instance.admin_up:
4865 elif field == "oper_ram":
4866 if instance.primary_node in bad_nodes:
4868 elif instance.name in live_data:
4869 val = live_data[instance.name].get("memory", "?")
4872 elif field == "vcpus":
4873 val = i_be[constants.BE_VCPUS]
4874 elif field == "disk_template":
4875 val = instance.disk_template
4878 val = instance.nics[0].ip
4881 elif field == "nic_mode":
4883 val = i_nicp[0][constants.NIC_MODE]
4886 elif field == "nic_link":
4888 val = i_nicp[0][constants.NIC_LINK]
4891 elif field == "bridge":
4892 if (instance.nics and
4893 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4894 val = i_nicp[0][constants.NIC_LINK]
4897 elif field == "mac":
4899 val = instance.nics[0].mac
4902 elif field == "sda_size" or field == "sdb_size":
4903 idx = ord(field[2]) - ord('a')
4905 val = instance.FindDisk(idx).size
4906 except errors.OpPrereqError:
4908 elif field == "disk_usage": # total disk usage per node
4909 disk_sizes = [{'size': disk.size} for disk in instance.disks]
4910 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4911 elif field == "tags":
4912 val = list(instance.GetTags())
4913 elif field == "hvparams":
4915 elif (field.startswith(HVPREFIX) and
4916 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4917 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4918 val = i_hv.get(field[len(HVPREFIX):], None)
4919 elif field == "beparams":
4921 elif (field.startswith(BEPREFIX) and
4922 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4923 val = i_be.get(field[len(BEPREFIX):], None)
4924 elif st_match and st_match.groups():
4925 # matches a variable list
4926 st_groups = st_match.groups()
4927 if st_groups and st_groups[0] == "disk":
4928 if st_groups[1] == "count":
4929 val = len(instance.disks)
4930 elif st_groups[1] == "sizes":
4931 val = [disk.size for disk in instance.disks]
4932 elif st_groups[1] == "size":
4934 val = instance.FindDisk(st_groups[2]).size
4935 except errors.OpPrereqError:
4938 assert False, "Unhandled disk parameter"
4939 elif st_groups[0] == "nic":
4940 if st_groups[1] == "count":
4941 val = len(instance.nics)
4942 elif st_groups[1] == "macs":
4943 val = [nic.mac for nic in instance.nics]
4944 elif st_groups[1] == "ips":
4945 val = [nic.ip for nic in instance.nics]
4946 elif st_groups[1] == "modes":
4947 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4948 elif st_groups[1] == "links":
4949 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4950 elif st_groups[1] == "bridges":
4953 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4954 val.append(nicp[constants.NIC_LINK])
4959 nic_idx = int(st_groups[2])
4960 if nic_idx >= len(instance.nics):
4963 if st_groups[1] == "mac":
4964 val = instance.nics[nic_idx].mac
4965 elif st_groups[1] == "ip":
4966 val = instance.nics[nic_idx].ip
4967 elif st_groups[1] == "mode":
4968 val = i_nicp[nic_idx][constants.NIC_MODE]
4969 elif st_groups[1] == "link":
4970 val = i_nicp[nic_idx][constants.NIC_LINK]
4971 elif st_groups[1] == "bridge":
4972 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4973 if nic_mode == constants.NIC_MODE_BRIDGED:
4974 val = i_nicp[nic_idx][constants.NIC_LINK]
4978 assert False, "Unhandled NIC parameter"
4980 assert False, ("Declared but unhandled variable parameter '%s'" %
4983 assert False, "Declared but unhandled parameter '%s'" % field
4990 class LUFailoverInstance(LogicalUnit):
4991 """Failover an instance.
4994 HPATH = "instance-failover"
4995 HTYPE = constants.HTYPE_INSTANCE
4996 _OP_REQP = ["instance_name", "ignore_consistency"]
4999 def CheckArguments(self):
5000 """Check the arguments.
5003 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5004 constants.DEFAULT_SHUTDOWN_TIMEOUT)
5006 def ExpandNames(self):
5007 self._ExpandAndLockInstance()
5008 self.needed_locks[locking.LEVEL_NODE] = []
5009 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5011 def DeclareLocks(self, level):
5012 if level == locking.LEVEL_NODE:
5013 self._LockInstancesNodes()
5015 def BuildHooksEnv(self):
5018 This runs on master, primary and secondary nodes of the instance.
5021 instance = self.instance
5022 source_node = instance.primary_node
5023 target_node = instance.secondary_nodes[0]
5025 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5026 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5027 "OLD_PRIMARY": source_node,
5028 "OLD_SECONDARY": target_node,
5029 "NEW_PRIMARY": target_node,
5030 "NEW_SECONDARY": source_node,
5032 env.update(_BuildInstanceHookEnvByObject(self, instance))
5033 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5035 nl_post.append(source_node)
5036 return env, nl, nl_post
5038 def CheckPrereq(self):
5039 """Check prerequisites.
5041 This checks that the instance is in the cluster.
5044 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5045 assert self.instance is not None, \
5046 "Cannot retrieve locked instance %s" % self.op.instance_name
5048 bep = self.cfg.GetClusterInfo().FillBE(instance)
5049 if instance.disk_template not in constants.DTS_NET_MIRROR:
5050 raise errors.OpPrereqError("Instance's disk layout is not"
5051 " network mirrored, cannot failover.",
5054 secondary_nodes = instance.secondary_nodes
5055 if not secondary_nodes:
5056 raise errors.ProgrammerError("no secondary node but using "
5057 "a mirrored disk template")
5059 target_node = secondary_nodes[0]
5060 _CheckNodeOnline(self, target_node)
5061 _CheckNodeNotDrained(self, target_node)
5062 if instance.admin_up:
5063 # check memory requirements on the secondary node
5064 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5065 instance.name, bep[constants.BE_MEMORY],
5066 instance.hypervisor)
5068 self.LogInfo("Not checking memory on the secondary node as"
5069 " instance will not be started")
5071 # check bridge existance
5072 _CheckInstanceBridgesExist(self, instance, node=target_node)
5074 def Exec(self, feedback_fn):
5075 """Failover an instance.
5077 The failover is done by shutting it down on its present node and
5078 starting it on the secondary.
5081 instance = self.instance
5083 source_node = instance.primary_node
5084 target_node = instance.secondary_nodes[0]
5086 if instance.admin_up:
5087 feedback_fn("* checking disk consistency between source and target")
5088 for dev in instance.disks:
5089 # for drbd, these are drbd over lvm
5090 if not _CheckDiskConsistency(self, dev, target_node, False):
5091 if not self.op.ignore_consistency:
5092 raise errors.OpExecError("Disk %s is degraded on target node,"
5093 " aborting failover." % dev.iv_name)
5095 feedback_fn("* not checking disk consistency as instance is not running")
5097 feedback_fn("* shutting down instance on source node")
5098 logging.info("Shutting down instance %s on node %s",
5099 instance.name, source_node)
5101 result = self.rpc.call_instance_shutdown(source_node, instance,
5102 self.shutdown_timeout)
5103 msg = result.fail_msg
5105 if self.op.ignore_consistency:
5106 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5107 " Proceeding anyway. Please make sure node"
5108 " %s is down. Error details: %s",
5109 instance.name, source_node, source_node, msg)
5111 raise errors.OpExecError("Could not shutdown instance %s on"
5113 (instance.name, source_node, msg))
5115 feedback_fn("* deactivating the instance's disks on source node")
5116 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5117 raise errors.OpExecError("Can't shut down the instance's disks.")
5119 instance.primary_node = target_node
5120 # distribute new instance config to the other nodes
5121 self.cfg.Update(instance, feedback_fn)
5123 # Only start the instance if it's marked as up
5124 if instance.admin_up:
5125 feedback_fn("* activating the instance's disks on target node")
5126 logging.info("Starting instance %s on node %s",
5127 instance.name, target_node)
5129 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5130 ignore_secondaries=True)
5132 _ShutdownInstanceDisks(self, instance)
5133 raise errors.OpExecError("Can't activate the instance's disks")
5135 feedback_fn("* starting the instance on the target node")
5136 result = self.rpc.call_instance_start(target_node, instance, None, None)
5137 msg = result.fail_msg
5139 _ShutdownInstanceDisks(self, instance)
5140 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5141 (instance.name, target_node, msg))
5144 class LUMigrateInstance(LogicalUnit):
5145 """Migrate an instance.
5147 This is migration without shutting down, compared to the failover,
5148 which is done with shutdown.
5151 HPATH = "instance-migrate"
5152 HTYPE = constants.HTYPE_INSTANCE
5153 _OP_REQP = ["instance_name", "live", "cleanup"]
5157 def ExpandNames(self):
5158 self._ExpandAndLockInstance()
5160 self.needed_locks[locking.LEVEL_NODE] = []
5161 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5163 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5164 self.op.live, self.op.cleanup)
5165 self.tasklets = [self._migrater]
5167 def DeclareLocks(self, level):
5168 if level == locking.LEVEL_NODE:
5169 self._LockInstancesNodes()
5171 def BuildHooksEnv(self):
5174 This runs on master, primary and secondary nodes of the instance.
5177 instance = self._migrater.instance
5178 source_node = instance.primary_node
5179 target_node = instance.secondary_nodes[0]
5180 env = _BuildInstanceHookEnvByObject(self, instance)
5181 env["MIGRATE_LIVE"] = self.op.live
5182 env["MIGRATE_CLEANUP"] = self.op.cleanup
5184 "OLD_PRIMARY": source_node,
5185 "OLD_SECONDARY": target_node,
5186 "NEW_PRIMARY": target_node,
5187 "NEW_SECONDARY": source_node,
5189 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5191 nl_post.append(source_node)
5192 return env, nl, nl_post
5195 class LUMoveInstance(LogicalUnit):
5196 """Move an instance by data-copying.
5199 HPATH = "instance-move"
5200 HTYPE = constants.HTYPE_INSTANCE
5201 _OP_REQP = ["instance_name", "target_node"]
5204 def CheckArguments(self):
5205 """Check the arguments.
5208 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5209 constants.DEFAULT_SHUTDOWN_TIMEOUT)
5211 def ExpandNames(self):
5212 self._ExpandAndLockInstance()
5213 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5214 self.op.target_node = target_node
5215 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5216 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5218 def DeclareLocks(self, level):
5219 if level == locking.LEVEL_NODE:
5220 self._LockInstancesNodes(primary_only=True)
5222 def BuildHooksEnv(self):
5225 This runs on master, primary and secondary nodes of the instance.
5229 "TARGET_NODE": self.op.target_node,
5230 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5232 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5233 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5234 self.op.target_node]
5237 def CheckPrereq(self):
5238 """Check prerequisites.
5240 This checks that the instance is in the cluster.
5243 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5244 assert self.instance is not None, \
5245 "Cannot retrieve locked instance %s" % self.op.instance_name
5247 node = self.cfg.GetNodeInfo(self.op.target_node)
5248 assert node is not None, \
5249 "Cannot retrieve locked node %s" % self.op.target_node
5251 self.target_node = target_node = node.name
5253 if target_node == instance.primary_node:
5254 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5255 (instance.name, target_node),
5258 bep = self.cfg.GetClusterInfo().FillBE(instance)
5260 for idx, dsk in enumerate(instance.disks):
5261 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5262 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5263 " cannot copy" % idx, errors.ECODE_STATE)
5265 _CheckNodeOnline(self, target_node)
5266 _CheckNodeNotDrained(self, target_node)
5268 if instance.admin_up:
5269 # check memory requirements on the secondary node
5270 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5271 instance.name, bep[constants.BE_MEMORY],
5272 instance.hypervisor)
5274 self.LogInfo("Not checking memory on the secondary node as"
5275 " instance will not be started")
5277 # check bridge existance
5278 _CheckInstanceBridgesExist(self, instance, node=target_node)
5280 def Exec(self, feedback_fn):
5281 """Move an instance.
5283 The move is done by shutting it down on its present node, copying
5284 the data over (slow) and starting it on the new node.
5287 instance = self.instance
5289 source_node = instance.primary_node
5290 target_node = self.target_node
5292 self.LogInfo("Shutting down instance %s on source node %s",
5293 instance.name, source_node)
5295 result = self.rpc.call_instance_shutdown(source_node, instance,
5296 self.shutdown_timeout)
5297 msg = result.fail_msg
5299 if self.op.ignore_consistency:
5300 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5301 " Proceeding anyway. Please make sure node"
5302 " %s is down. Error details: %s",
5303 instance.name, source_node, source_node, msg)
5305 raise errors.OpExecError("Could not shutdown instance %s on"
5307 (instance.name, source_node, msg))
5309 # create the target disks
5311 _CreateDisks(self, instance, target_node=target_node)
5312 except errors.OpExecError:
5313 self.LogWarning("Device creation failed, reverting...")
5315 _RemoveDisks(self, instance, target_node=target_node)
5317 self.cfg.ReleaseDRBDMinors(instance.name)
5320 cluster_name = self.cfg.GetClusterInfo().cluster_name
5323 # activate, get path, copy the data over
5324 for idx, disk in enumerate(instance.disks):
5325 self.LogInfo("Copying data for disk %d", idx)
5326 result = self.rpc.call_blockdev_assemble(target_node, disk,
5327 instance.name, True)
5329 self.LogWarning("Can't assemble newly created disk %d: %s",
5330 idx, result.fail_msg)
5331 errs.append(result.fail_msg)
5333 dev_path = result.payload
5334 result = self.rpc.call_blockdev_export(source_node, disk,
5335 target_node, dev_path,
5338 self.LogWarning("Can't copy data over for disk %d: %s",
5339 idx, result.fail_msg)
5340 errs.append(result.fail_msg)
5344 self.LogWarning("Some disks failed to copy, aborting")
5346 _RemoveDisks(self, instance, target_node=target_node)
5348 self.cfg.ReleaseDRBDMinors(instance.name)
5349 raise errors.OpExecError("Errors during disk copy: %s" %
5352 instance.primary_node = target_node
5353 self.cfg.Update(instance, feedback_fn)
5355 self.LogInfo("Removing the disks on the original node")
5356 _RemoveDisks(self, instance, target_node=source_node)
5358 # Only start the instance if it's marked as up
5359 if instance.admin_up:
5360 self.LogInfo("Starting instance %s on node %s",
5361 instance.name, target_node)
5363 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5364 ignore_secondaries=True)
5366 _ShutdownInstanceDisks(self, instance)
5367 raise errors.OpExecError("Can't activate the instance's disks")
5369 result = self.rpc.call_instance_start(target_node, instance, None, None)
5370 msg = result.fail_msg
5372 _ShutdownInstanceDisks(self, instance)
5373 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5374 (instance.name, target_node, msg))
5377 class LUMigrateNode(LogicalUnit):
5378 """Migrate all instances from a node.
5381 HPATH = "node-migrate"
5382 HTYPE = constants.HTYPE_NODE
5383 _OP_REQP = ["node_name", "live"]
5386 def ExpandNames(self):
5387 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5389 self.needed_locks = {
5390 locking.LEVEL_NODE: [self.op.node_name],
5393 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5395 # Create tasklets for migrating instances for all instances on this node
5399 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5400 logging.debug("Migrating instance %s", inst.name)
5401 names.append(inst.name)
5403 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5405 self.tasklets = tasklets
5407 # Declare instance locks
5408 self.needed_locks[locking.LEVEL_INSTANCE] = names
5410 def DeclareLocks(self, level):
5411 if level == locking.LEVEL_NODE:
5412 self._LockInstancesNodes()
5414 def BuildHooksEnv(self):
5417 This runs on the master, the primary and all the secondaries.
5421 "NODE_NAME": self.op.node_name,
5424 nl = [self.cfg.GetMasterNode()]
5426 return (env, nl, nl)
5429 class TLMigrateInstance(Tasklet):
5430 def __init__(self, lu, instance_name, live, cleanup):
5431 """Initializes this class.
5434 Tasklet.__init__(self, lu)
5437 self.instance_name = instance_name
5439 self.cleanup = cleanup
5441 def CheckPrereq(self):
5442 """Check prerequisites.
5444 This checks that the instance is in the cluster.
5447 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5448 instance = self.cfg.GetInstanceInfo(instance_name)
5449 assert instance is not None
5451 if instance.disk_template != constants.DT_DRBD8:
5452 raise errors.OpPrereqError("Instance's disk layout is not"
5453 " drbd8, cannot migrate.", errors.ECODE_STATE)
5455 secondary_nodes = instance.secondary_nodes
5456 if not secondary_nodes:
5457 raise errors.ConfigurationError("No secondary node but using"
5458 " drbd8 disk template")
5460 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5462 target_node = secondary_nodes[0]
5463 # check memory requirements on the secondary node
5464 _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5465 instance.name, i_be[constants.BE_MEMORY],
5466 instance.hypervisor)
5468 # check bridge existance
5469 _CheckInstanceBridgesExist(self, instance, node=target_node)
5471 if not self.cleanup:
5472 _CheckNodeNotDrained(self, target_node)
5473 result = self.rpc.call_instance_migratable(instance.primary_node,
5475 result.Raise("Can't migrate, please use failover",
5476 prereq=True, ecode=errors.ECODE_STATE)
5478 self.instance = instance
5480 def _WaitUntilSync(self):
5481 """Poll with custom rpc for disk sync.
5483 This uses our own step-based rpc call.
5486 self.feedback_fn("* wait until resync is done")
5490 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5492 self.instance.disks)
5494 for node, nres in result.items():
5495 nres.Raise("Cannot resync disks on node %s" % node)
5496 node_done, node_percent = nres.payload
5497 all_done = all_done and node_done
5498 if node_percent is not None:
5499 min_percent = min(min_percent, node_percent)
5501 if min_percent < 100:
5502 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5505 def _EnsureSecondary(self, node):
5506 """Demote a node to secondary.
5509 self.feedback_fn("* switching node %s to secondary mode" % node)
5511 for dev in self.instance.disks:
5512 self.cfg.SetDiskID(dev, node)
5514 result = self.rpc.call_blockdev_close(node, self.instance.name,
5515 self.instance.disks)
5516 result.Raise("Cannot change disk to secondary on node %s" % node)
5518 def _GoStandalone(self):
5519 """Disconnect from the network.
5522 self.feedback_fn("* changing into standalone mode")
5523 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5524 self.instance.disks)
5525 for node, nres in result.items():
5526 nres.Raise("Cannot disconnect disks node %s" % node)
5528 def _GoReconnect(self, multimaster):
5529 """Reconnect to the network.
5535 msg = "single-master"
5536 self.feedback_fn("* changing disks into %s mode" % msg)
5537 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5538 self.instance.disks,
5539 self.instance.name, multimaster)
5540 for node, nres in result.items():
5541 nres.Raise("Cannot change disks config on node %s" % node)
5543 def _ExecCleanup(self):
5544 """Try to cleanup after a failed migration.
5546 The cleanup is done by:
5547 - check that the instance is running only on one node
5548 (and update the config if needed)
5549 - change disks on its secondary node to secondary
5550 - wait until disks are fully synchronized
5551 - disconnect from the network
5552 - change disks into single-master mode
5553 - wait again until disks are fully synchronized
5556 instance = self.instance
5557 target_node = self.target_node
5558 source_node = self.source_node
5560 # check running on only one node
5561 self.feedback_fn("* checking where the instance actually runs"
5562 " (if this hangs, the hypervisor might be in"
5564 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5565 for node, result in ins_l.items():
5566 result.Raise("Can't contact node %s" % node)
5568 runningon_source = instance.name in ins_l[source_node].payload
5569 runningon_target = instance.name in ins_l[target_node].payload
5571 if runningon_source and runningon_target:
5572 raise errors.OpExecError("Instance seems to be running on two nodes,"
5573 " or the hypervisor is confused. You will have"
5574 " to ensure manually that it runs only on one"
5575 " and restart this operation.")
5577 if not (runningon_source or runningon_target):
5578 raise errors.OpExecError("Instance does not seem to be running at all."
5579 " In this case, it's safer to repair by"
5580 " running 'gnt-instance stop' to ensure disk"
5581 " shutdown, and then restarting it.")
5583 if runningon_target:
5584 # the migration has actually succeeded, we need to update the config
5585 self.feedback_fn("* instance running on secondary node (%s),"
5586 " updating config" % target_node)
5587 instance.primary_node = target_node
5588 self.cfg.Update(instance, self.feedback_fn)
5589 demoted_node = source_node
5591 self.feedback_fn("* instance confirmed to be running on its"
5592 " primary node (%s)" % source_node)
5593 demoted_node = target_node
5595 self._EnsureSecondary(demoted_node)
5597 self._WaitUntilSync()
5598 except errors.OpExecError:
5599 # we ignore here errors, since if the device is standalone, it
5600 # won't be able to sync
5602 self._GoStandalone()
5603 self._GoReconnect(False)
5604 self._WaitUntilSync()
5606 self.feedback_fn("* done")
5608 def _RevertDiskStatus(self):
5609 """Try to revert the disk status after a failed migration.
5612 target_node = self.target_node
5614 self._EnsureSecondary(target_node)
5615 self._GoStandalone()
5616 self._GoReconnect(False)
5617 self._WaitUntilSync()
5618 except errors.OpExecError, err:
5619 self.lu.LogWarning("Migration failed and I can't reconnect the"
5620 " drives: error '%s'\n"
5621 "Please look and recover the instance status" %
5624 def _AbortMigration(self):
5625 """Call the hypervisor code to abort a started migration.
5628 instance = self.instance
5629 target_node = self.target_node
5630 migration_info = self.migration_info
5632 abort_result = self.rpc.call_finalize_migration(target_node,
5636 abort_msg = abort_result.fail_msg
5638 logging.error("Aborting migration failed on target node %s: %s",
5639 target_node, abort_msg)
5640 # Don't raise an exception here, as we stil have to try to revert the
5641 # disk status, even if this step failed.
5643 def _ExecMigration(self):
5644 """Migrate an instance.
5646 The migrate is done by:
5647 - change the disks into dual-master mode
5648 - wait until disks are fully synchronized again
5649 - migrate the instance
5650 - change disks on the new secondary node (the old primary) to secondary
5651 - wait until disks are fully synchronized
5652 - change disks into single-master mode
5655 instance = self.instance
5656 target_node = self.target_node
5657 source_node = self.source_node
5659 self.feedback_fn("* checking disk consistency between source and target")
5660 for dev in instance.disks:
5661 if not _CheckDiskConsistency(self, dev, target_node, False):
5662 raise errors.OpExecError("Disk %s is degraded or not fully"
5663 " synchronized on target node,"
5664 " aborting migrate." % dev.iv_name)
5666 # First get the migration information from the remote node
5667 result = self.rpc.call_migration_info(source_node, instance)
5668 msg = result.fail_msg
5670 log_err = ("Failed fetching source migration information from %s: %s" %
5672 logging.error(log_err)
5673 raise errors.OpExecError(log_err)
5675 self.migration_info = migration_info = result.payload
5677 # Then switch the disks to master/master mode
5678 self._EnsureSecondary(target_node)
5679 self._GoStandalone()
5680 self._GoReconnect(True)
5681 self._WaitUntilSync()
5683 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5684 result = self.rpc.call_accept_instance(target_node,
5687 self.nodes_ip[target_node])
5689 msg = result.fail_msg
5691 logging.error("Instance pre-migration failed, trying to revert"
5692 " disk status: %s", msg)
5693 self.feedback_fn("Pre-migration failed, aborting")
5694 self._AbortMigration()
5695 self._RevertDiskStatus()
5696 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5697 (instance.name, msg))
5699 self.feedback_fn("* migrating instance to %s" % target_node)
5701 result = self.rpc.call_instance_migrate(source_node, instance,
5702 self.nodes_ip[target_node],
5704 msg = result.fail_msg
5706 logging.error("Instance migration failed, trying to revert"
5707 " disk status: %s", msg)
5708 self.feedback_fn("Migration failed, aborting")
5709 self._AbortMigration()
5710 self._RevertDiskStatus()
5711 raise errors.OpExecError("Could not migrate instance %s: %s" %
5712 (instance.name, msg))
5715 instance.primary_node = target_node
5716 # distribute new instance config to the other nodes
5717 self.cfg.Update(instance, self.feedback_fn)
5719 result = self.rpc.call_finalize_migration(target_node,
5723 msg = result.fail_msg
5725 logging.error("Instance migration succeeded, but finalization failed:"
5727 raise errors.OpExecError("Could not finalize instance migration: %s" %
5730 self._EnsureSecondary(source_node)
5731 self._WaitUntilSync()
5732 self._GoStandalone()
5733 self._GoReconnect(False)
5734 self._WaitUntilSync()
5736 self.feedback_fn("* done")
5738 def Exec(self, feedback_fn):
5739 """Perform the migration.
5742 feedback_fn("Migrating instance %s" % self.instance.name)
5744 self.feedback_fn = feedback_fn
5746 self.source_node = self.instance.primary_node
5747 self.target_node = self.instance.secondary_nodes[0]
5748 self.all_nodes = [self.source_node, self.target_node]
5750 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5751 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5755 return self._ExecCleanup()
5757 return self._ExecMigration()
5760 def _CreateBlockDev(lu, node, instance, device, force_create,
5762 """Create a tree of block devices on a given node.
5764 If this device type has to be created on secondaries, create it and
5767 If not, just recurse to children keeping the same 'force' value.
5769 @param lu: the lu on whose behalf we execute
5770 @param node: the node on which to create the device
5771 @type instance: L{objects.Instance}
5772 @param instance: the instance which owns the device
5773 @type device: L{objects.Disk}
5774 @param device: the device to create
5775 @type force_create: boolean
5776 @param force_create: whether to force creation of this device; this
5777 will be change to True whenever we find a device which has
5778 CreateOnSecondary() attribute
5779 @param info: the extra 'metadata' we should attach to the device
5780 (this will be represented as a LVM tag)
5781 @type force_open: boolean
5782 @param force_open: this parameter will be passes to the
5783 L{backend.BlockdevCreate} function where it specifies
5784 whether we run on primary or not, and it affects both
5785 the child assembly and the device own Open() execution
5788 if device.CreateOnSecondary():
5792 for child in device.children:
5793 _CreateBlockDev(lu, node, instance, child, force_create,
5796 if not force_create:
5799 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5802 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5803 """Create a single block device on a given node.
5805 This will not recurse over children of the device, so they must be
5808 @param lu: the lu on whose behalf we execute
5809 @param node: the node on which to create the device
5810 @type instance: L{objects.Instance}
5811 @param instance: the instance which owns the device
5812 @type device: L{objects.Disk}
5813 @param device: the device to create
5814 @param info: the extra 'metadata' we should attach to the device
5815 (this will be represented as a LVM tag)
5816 @type force_open: boolean
5817 @param force_open: this parameter will be passes to the
5818 L{backend.BlockdevCreate} function where it specifies
5819 whether we run on primary or not, and it affects both
5820 the child assembly and the device own Open() execution
5823 lu.cfg.SetDiskID(device, node)
5824 result = lu.rpc.call_blockdev_create(node, device, device.size,
5825 instance.name, force_open, info)
5826 result.Raise("Can't create block device %s on"
5827 " node %s for instance %s" % (device, node, instance.name))
5828 if device.physical_id is None:
5829 device.physical_id = result.payload
5832 def _GenerateUniqueNames(lu, exts):
5833 """Generate a suitable LV name.
5835 This will generate a logical volume name for the given instance.
5840 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5841 results.append("%s%s" % (new_id, val))
5845 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5847 """Generate a drbd8 device complete with its children.
5850 port = lu.cfg.AllocatePort()
5851 vgname = lu.cfg.GetVGName()
5852 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5853 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5854 logical_id=(vgname, names[0]))
5855 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5856 logical_id=(vgname, names[1]))
5857 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5858 logical_id=(primary, secondary, port,
5861 children=[dev_data, dev_meta],
5866 def _GenerateDiskTemplate(lu, template_name,
5867 instance_name, primary_node,
5868 secondary_nodes, disk_info,
5869 file_storage_dir, file_driver,
5871 """Generate the entire disk layout for a given template type.
5874 #TODO: compute space requirements
5876 vgname = lu.cfg.GetVGName()
5877 disk_count = len(disk_info)
5879 if template_name == constants.DT_DISKLESS:
5881 elif template_name == constants.DT_PLAIN:
5882 if len(secondary_nodes) != 0:
5883 raise errors.ProgrammerError("Wrong template configuration")
5885 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5886 for i in range(disk_count)])
5887 for idx, disk in enumerate(disk_info):
5888 disk_index = idx + base_index
5889 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5890 logical_id=(vgname, names[idx]),
5891 iv_name="disk/%d" % disk_index,
5893 disks.append(disk_dev)
5894 elif template_name == constants.DT_DRBD8:
5895 if len(secondary_nodes) != 1:
5896 raise errors.ProgrammerError("Wrong template configuration")
5897 remote_node = secondary_nodes[0]
5898 minors = lu.cfg.AllocateDRBDMinor(
5899 [primary_node, remote_node] * len(disk_info), instance_name)
5902 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5903 for i in range(disk_count)]):
5904 names.append(lv_prefix + "_data")
5905 names.append(lv_prefix + "_meta")
5906 for idx, disk in enumerate(disk_info):
5907 disk_index = idx + base_index
5908 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5909 disk["size"], names[idx*2:idx*2+2],
5910 "disk/%d" % disk_index,
5911 minors[idx*2], minors[idx*2+1])
5912 disk_dev.mode = disk["mode"]
5913 disks.append(disk_dev)
5914 elif template_name == constants.DT_FILE:
5915 if len(secondary_nodes) != 0:
5916 raise errors.ProgrammerError("Wrong template configuration")
5918 _RequireFileStorage()
5920 for idx, disk in enumerate(disk_info):
5921 disk_index = idx + base_index
5922 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5923 iv_name="disk/%d" % disk_index,
5924 logical_id=(file_driver,
5925 "%s/disk%d" % (file_storage_dir,
5928 disks.append(disk_dev)
5930 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5934 def _GetInstanceInfoText(instance):
5935 """Compute that text that should be added to the disk's metadata.
5938 return "originstname+%s" % instance.name
5941 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5942 """Create all disks for an instance.
5944 This abstracts away some work from AddInstance.
5946 @type lu: L{LogicalUnit}
5947 @param lu: the logical unit on whose behalf we execute
5948 @type instance: L{objects.Instance}
5949 @param instance: the instance whose disks we should create
5951 @param to_skip: list of indices to skip
5952 @type target_node: string
5953 @param target_node: if passed, overrides the target node for creation
5955 @return: the success of the creation
5958 info = _GetInstanceInfoText(instance)
5959 if target_node is None:
5960 pnode = instance.primary_node
5961 all_nodes = instance.all_nodes
5966 if instance.disk_template == constants.DT_FILE:
5967 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5968 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5970 result.Raise("Failed to create directory '%s' on"
5971 " node %s" % (file_storage_dir, pnode))
5973 # Note: this needs to be kept in sync with adding of disks in
5974 # LUSetInstanceParams
5975 for idx, device in enumerate(instance.disks):
5976 if to_skip and idx in to_skip:
5978 logging.info("Creating volume %s for instance %s",
5979 device.iv_name, instance.name)
5981 for node in all_nodes:
5982 f_create = node == pnode
5983 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5986 def _RemoveDisks(lu, instance, target_node=None):
5987 """Remove all disks for an instance.
5989 This abstracts away some work from `AddInstance()` and
5990 `RemoveInstance()`. Note that in case some of the devices couldn't
5991 be removed, the removal will continue with the other ones (compare
5992 with `_CreateDisks()`).
5994 @type lu: L{LogicalUnit}
5995 @param lu: the logical unit on whose behalf we execute
5996 @type instance: L{objects.Instance}
5997 @param instance: the instance whose disks we should remove
5998 @type target_node: string
5999 @param target_node: used to override the node on which to remove the disks
6001 @return: the success of the removal
6004 logging.info("Removing block devices for instance %s", instance.name)
6007 for device in instance.disks:
6009 edata = [(target_node, device)]
6011 edata = device.ComputeNodeTree(instance.primary_node)
6012 for node, disk in edata:
6013 lu.cfg.SetDiskID(disk, node)
6014 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6016 lu.LogWarning("Could not remove block device %s on node %s,"
6017 " continuing anyway: %s", device.iv_name, node, msg)
6020 if instance.disk_template == constants.DT_FILE:
6021 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6025 tgt = instance.primary_node
6026 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6028 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6029 file_storage_dir, instance.primary_node, result.fail_msg)
6035 def _ComputeDiskSize(disk_template, disks):
6036 """Compute disk size requirements in the volume group
6039 # Required free disk space as a function of disk and swap space
6041 constants.DT_DISKLESS: None,
6042 constants.DT_PLAIN: sum(d["size"] for d in disks),
6043 # 128 MB are added for drbd metadata for each disk
6044 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6045 constants.DT_FILE: None,
6048 if disk_template not in req_size_dict:
6049 raise errors.ProgrammerError("Disk template '%s' size requirement"
6050 " is unknown" % disk_template)
6052 return req_size_dict[disk_template]
6055 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6056 """Hypervisor parameter validation.
6058 This function abstract the hypervisor parameter validation to be
6059 used in both instance create and instance modify.
6061 @type lu: L{LogicalUnit}
6062 @param lu: the logical unit for which we check
6063 @type nodenames: list
6064 @param nodenames: the list of nodes on which we should check
6065 @type hvname: string
6066 @param hvname: the name of the hypervisor we should use
6067 @type hvparams: dict
6068 @param hvparams: the parameters which we need to check
6069 @raise errors.OpPrereqError: if the parameters are not valid
6072 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6075 for node in nodenames:
6079 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6082 class LUCreateInstance(LogicalUnit):
6083 """Create an instance.
6086 HPATH = "instance-add"
6087 HTYPE = constants.HTYPE_INSTANCE
6088 _OP_REQP = ["instance_name", "disks",
6090 "wait_for_sync", "ip_check", "nics",
6091 "hvparams", "beparams"]
6094 def CheckArguments(self):
6098 # set optional parameters to none if they don't exist
6099 for attr in ["pnode", "snode", "iallocator", "hypervisor",
6100 "disk_template", "identify_defaults"]:
6101 if not hasattr(self.op, attr):
6102 setattr(self.op, attr, None)
6104 # do not require name_check to ease forward/backward compatibility
6106 if not hasattr(self.op, "name_check"):
6107 self.op.name_check = True
6108 if not hasattr(self.op, "no_install"):
6109 self.op.no_install = False
6110 if self.op.no_install and self.op.start:
6111 self.LogInfo("No-installation mode selected, disabling startup")
6112 self.op.start = False
6113 # validate/normalize the instance name
6114 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6115 if self.op.ip_check and not self.op.name_check:
6116 # TODO: make the ip check more flexible and not depend on the name check
6117 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6119 # check disk information: either all adopt, or no adopt
6120 has_adopt = has_no_adopt = False
6121 for disk in self.op.disks:
6126 if has_adopt and has_no_adopt:
6127 raise errors.OpPrereqError("Either all disks are adopted or none is",
6130 if self.op.disk_template != constants.DT_PLAIN:
6131 raise errors.OpPrereqError("Disk adoption is only supported for the"
6132 " 'plain' disk template",
6134 if self.op.iallocator is not None:
6135 raise errors.OpPrereqError("Disk adoption not allowed with an"
6136 " iallocator script", errors.ECODE_INVAL)
6137 if self.op.mode == constants.INSTANCE_IMPORT:
6138 raise errors.OpPrereqError("Disk adoption not allowed for"
6139 " instance import", errors.ECODE_INVAL)
6141 self.adopt_disks = has_adopt
6143 # verify creation mode
6144 if self.op.mode not in (constants.INSTANCE_CREATE,
6145 constants.INSTANCE_IMPORT):
6146 raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6147 self.op.mode, errors.ECODE_INVAL)
6149 # instance name verification
6150 if self.op.name_check:
6151 self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6152 self.op.instance_name = self.hostname1.name
6153 # used in CheckPrereq for ip ping check
6154 self.check_ip = self.hostname1.ip
6156 self.check_ip = None
6158 # file storage checks
6159 if (self.op.file_driver and
6160 not self.op.file_driver in constants.FILE_DRIVER):
6161 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6162 self.op.file_driver, errors.ECODE_INVAL)
6164 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6165 raise errors.OpPrereqError("File storage directory path not absolute",
6168 ### Node/iallocator related checks
6169 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6170 raise errors.OpPrereqError("One and only one of iallocator and primary"
6171 " node must be given",
6174 if self.op.mode == constants.INSTANCE_IMPORT:
6175 # On import force_variant must be True, because if we forced it at
6176 # initial install, our only chance when importing it back is that it
6178 self.op.force_variant = True
6180 if self.op.no_install:
6181 self.LogInfo("No-installation mode has no effect during import")
6183 else: # INSTANCE_CREATE
6184 if getattr(self.op, "os_type", None) is None:
6185 raise errors.OpPrereqError("No guest OS specified",
6187 self.op.force_variant = getattr(self.op, "force_variant", False)
6188 if self.op.disk_template is None:
6189 raise errors.OpPrereqError("No disk template specified",
6192 def ExpandNames(self):
6193 """ExpandNames for CreateInstance.
6195 Figure out the right locks for instance creation.
6198 self.needed_locks = {}
6200 instance_name = self.op.instance_name
6201 # this is just a preventive check, but someone might still add this
6202 # instance in the meantime, and creation will fail at lock-add time
6203 if instance_name in self.cfg.GetInstanceList():
6204 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6205 instance_name, errors.ECODE_EXISTS)
6207 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6209 if self.op.iallocator:
6210 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6212 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6213 nodelist = [self.op.pnode]
6214 if self.op.snode is not None:
6215 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6216 nodelist.append(self.op.snode)
6217 self.needed_locks[locking.LEVEL_NODE] = nodelist
6219 # in case of import lock the source node too
6220 if self.op.mode == constants.INSTANCE_IMPORT:
6221 src_node = getattr(self.op, "src_node", None)
6222 src_path = getattr(self.op, "src_path", None)
6224 if src_path is None:
6225 self.op.src_path = src_path = self.op.instance_name
6227 if src_node is None:
6228 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6229 self.op.src_node = None
6230 if os.path.isabs(src_path):
6231 raise errors.OpPrereqError("Importing an instance from an absolute"
6232 " path requires a source node option.",
6235 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6236 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6237 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6238 if not os.path.isabs(src_path):
6239 self.op.src_path = src_path = \
6240 utils.PathJoin(constants.EXPORT_DIR, src_path)
6242 def _RunAllocator(self):
6243 """Run the allocator based on input opcode.
6246 nics = [n.ToDict() for n in self.nics]
6247 ial = IAllocator(self.cfg, self.rpc,
6248 mode=constants.IALLOCATOR_MODE_ALLOC,
6249 name=self.op.instance_name,
6250 disk_template=self.op.disk_template,
6253 vcpus=self.be_full[constants.BE_VCPUS],
6254 mem_size=self.be_full[constants.BE_MEMORY],
6257 hypervisor=self.op.hypervisor,
6260 ial.Run(self.op.iallocator)
6263 raise errors.OpPrereqError("Can't compute nodes using"
6264 " iallocator '%s': %s" %
6265 (self.op.iallocator, ial.info),
6267 if len(ial.result) != ial.required_nodes:
6268 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6269 " of nodes (%s), required %s" %
6270 (self.op.iallocator, len(ial.result),
6271 ial.required_nodes), errors.ECODE_FAULT)
6272 self.op.pnode = ial.result[0]
6273 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6274 self.op.instance_name, self.op.iallocator,
6275 utils.CommaJoin(ial.result))
6276 if ial.required_nodes == 2:
6277 self.op.snode = ial.result[1]
6279 def BuildHooksEnv(self):
6282 This runs on master, primary and secondary nodes of the instance.
6286 "ADD_MODE": self.op.mode,
6288 if self.op.mode == constants.INSTANCE_IMPORT:
6289 env["SRC_NODE"] = self.op.src_node
6290 env["SRC_PATH"] = self.op.src_path
6291 env["SRC_IMAGES"] = self.src_images
6293 env.update(_BuildInstanceHookEnv(
6294 name=self.op.instance_name,
6295 primary_node=self.op.pnode,
6296 secondary_nodes=self.secondaries,
6297 status=self.op.start,
6298 os_type=self.op.os_type,
6299 memory=self.be_full[constants.BE_MEMORY],
6300 vcpus=self.be_full[constants.BE_VCPUS],
6301 nics=_NICListToTuple(self, self.nics),
6302 disk_template=self.op.disk_template,
6303 disks=[(d["size"], d["mode"]) for d in self.disks],
6306 hypervisor_name=self.op.hypervisor,
6309 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6313 def _ReadExportInfo(self):
6314 """Reads the export information from disk.
6316 It will override the opcode source node and path with the actual
6317 information, if these two were not specified before.
6319 @return: the export information
6322 assert self.op.mode == constants.INSTANCE_IMPORT
6324 src_node = self.op.src_node
6325 src_path = self.op.src_path
6327 if src_node is None:
6328 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6329 exp_list = self.rpc.call_export_list(locked_nodes)
6331 for node in exp_list:
6332 if exp_list[node].fail_msg:
6334 if src_path in exp_list[node].payload:
6336 self.op.src_node = src_node = node
6337 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6341 raise errors.OpPrereqError("No export found for relative path %s" %
6342 src_path, errors.ECODE_INVAL)
6344 _CheckNodeOnline(self, src_node)
6345 result = self.rpc.call_export_info(src_node, src_path)
6346 result.Raise("No export or invalid export found in dir %s" % src_path)
6348 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6349 if not export_info.has_section(constants.INISECT_EXP):
6350 raise errors.ProgrammerError("Corrupted export config",
6351 errors.ECODE_ENVIRON)
6353 ei_version = export_info.get(constants.INISECT_EXP, "version")
6354 if (int(ei_version) != constants.EXPORT_VERSION):
6355 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6356 (ei_version, constants.EXPORT_VERSION),
6357 errors.ECODE_ENVIRON)
6360 def _ReadExportParams(self, einfo):
6361 """Use export parameters as defaults.
6363 In case the opcode doesn't specify (as in override) some instance
6364 parameters, then try to use them from the export information, if
6368 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6370 if self.op.disk_template is None:
6371 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6372 self.op.disk_template = einfo.get(constants.INISECT_INS,
6375 raise errors.OpPrereqError("No disk template specified and the export"
6376 " is missing the disk_template information",
6379 if not self.op.disks:
6380 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6382 # TODO: import the disk iv_name too
6383 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6384 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6385 disks.append({"size": disk_sz})
6386 self.op.disks = disks
6388 raise errors.OpPrereqError("No disk info specified and the export"
6389 " is missing the disk information",
6392 if (not self.op.nics and
6393 einfo.has_option(constants.INISECT_INS, "nic_count")):
6395 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6397 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6398 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6403 if (self.op.hypervisor is None and
6404 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6405 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6406 if einfo.has_section(constants.INISECT_HYP):
6407 # use the export parameters but do not override the ones
6408 # specified by the user
6409 for name, value in einfo.items(constants.INISECT_HYP):
6410 if name not in self.op.hvparams:
6411 self.op.hvparams[name] = value
6413 if einfo.has_section(constants.INISECT_BEP):
6414 # use the parameters, without overriding
6415 for name, value in einfo.items(constants.INISECT_BEP):
6416 if name not in self.op.beparams:
6417 self.op.beparams[name] = value
6419 # try to read the parameters old style, from the main section
6420 for name in constants.BES_PARAMETERS:
6421 if (name not in self.op.beparams and
6422 einfo.has_option(constants.INISECT_INS, name)):
6423 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6425 def _RevertToDefaults(self, cluster):
6426 """Revert the instance parameters to the default values.
6430 hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6431 for name in self.op.hvparams.keys():
6432 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6433 del self.op.hvparams[name]
6435 be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6436 for name in self.op.beparams.keys():
6437 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6438 del self.op.beparams[name]
6440 nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6441 for nic in self.op.nics:
6442 for name in constants.NICS_PARAMETERS:
6443 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6446 def CheckPrereq(self):
6447 """Check prerequisites.
6450 if self.op.mode == constants.INSTANCE_IMPORT:
6451 export_info = self._ReadExportInfo()
6452 self._ReadExportParams(export_info)
6454 _CheckDiskTemplate(self.op.disk_template)
6456 if (not self.cfg.GetVGName() and
6457 self.op.disk_template not in constants.DTS_NOT_LVM):
6458 raise errors.OpPrereqError("Cluster does not support lvm-based"
6459 " instances", errors.ECODE_STATE)
6461 if self.op.hypervisor is None:
6462 self.op.hypervisor = self.cfg.GetHypervisorType()
6464 cluster = self.cfg.GetClusterInfo()
6465 enabled_hvs = cluster.enabled_hypervisors
6466 if self.op.hypervisor not in enabled_hvs:
6467 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6468 " cluster (%s)" % (self.op.hypervisor,
6469 ",".join(enabled_hvs)),
6472 # check hypervisor parameter syntax (locally)
6473 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6474 filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6477 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6478 hv_type.CheckParameterSyntax(filled_hvp)
6479 self.hv_full = filled_hvp
6480 # check that we don't specify global parameters on an instance
6481 _CheckGlobalHvParams(self.op.hvparams)
6483 # fill and remember the beparams dict
6484 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6485 self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6488 # now that hvp/bep are in final format, let's reset to defaults,
6490 if self.op.identify_defaults:
6491 self._RevertToDefaults(cluster)
6495 for idx, nic in enumerate(self.op.nics):
6496 nic_mode_req = nic.get("mode", None)
6497 nic_mode = nic_mode_req
6498 if nic_mode is None:
6499 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6501 # in routed mode, for the first nic, the default ip is 'auto'
6502 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6503 default_ip_mode = constants.VALUE_AUTO
6505 default_ip_mode = constants.VALUE_NONE
6507 # ip validity checks
6508 ip = nic.get("ip", default_ip_mode)
6509 if ip is None or ip.lower() == constants.VALUE_NONE:
6511 elif ip.lower() == constants.VALUE_AUTO:
6512 if not self.op.name_check:
6513 raise errors.OpPrereqError("IP address set to auto but name checks"
6514 " have been skipped. Aborting.",
6516 nic_ip = self.hostname1.ip
6518 if not utils.IsValidIP(ip):
6519 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6520 " like a valid IP" % ip,
6524 # TODO: check the ip address for uniqueness
6525 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6526 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6529 # MAC address verification
6530 mac = nic.get("mac", constants.VALUE_AUTO)
6531 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6532 mac = utils.NormalizeAndValidateMac(mac)
6535 self.cfg.ReserveMAC(mac, self.proc.GetECId())
6536 except errors.ReservationError:
6537 raise errors.OpPrereqError("MAC address %s already in use"
6538 " in cluster" % mac,
6539 errors.ECODE_NOTUNIQUE)
6541 # bridge verification
6542 bridge = nic.get("bridge", None)
6543 link = nic.get("link", None)
6545 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6546 " at the same time", errors.ECODE_INVAL)
6547 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6548 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6555 nicparams[constants.NIC_MODE] = nic_mode_req
6557 nicparams[constants.NIC_LINK] = link
6559 check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6561 objects.NIC.CheckParameterSyntax(check_params)
6562 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6564 # disk checks/pre-build
6566 for disk in self.op.disks:
6567 mode = disk.get("mode", constants.DISK_RDWR)
6568 if mode not in constants.DISK_ACCESS_SET:
6569 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6570 mode, errors.ECODE_INVAL)
6571 size = disk.get("size", None)
6573 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6576 except (TypeError, ValueError):
6577 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6579 new_disk = {"size": size, "mode": mode}
6581 new_disk["adopt"] = disk["adopt"]
6582 self.disks.append(new_disk)
6584 if self.op.mode == constants.INSTANCE_IMPORT:
6586 # Check that the new instance doesn't have less disks than the export
6587 instance_disks = len(self.disks)
6588 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6589 if instance_disks < export_disks:
6590 raise errors.OpPrereqError("Not enough disks to import."
6591 " (instance: %d, export: %d)" %
6592 (instance_disks, export_disks),
6596 for idx in range(export_disks):
6597 option = 'disk%d_dump' % idx
6598 if export_info.has_option(constants.INISECT_INS, option):
6599 # FIXME: are the old os-es, disk sizes, etc. useful?
6600 export_name = export_info.get(constants.INISECT_INS, option)
6601 image = utils.PathJoin(self.op.src_path, export_name)
6602 disk_images.append(image)
6604 disk_images.append(False)
6606 self.src_images = disk_images
6608 old_name = export_info.get(constants.INISECT_INS, 'name')
6610 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6611 except (TypeError, ValueError), err:
6612 raise errors.OpPrereqError("Invalid export file, nic_count is not"
6613 " an integer: %s" % str(err),
6615 if self.op.instance_name == old_name:
6616 for idx, nic in enumerate(self.nics):
6617 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6618 nic_mac_ini = 'nic%d_mac' % idx
6619 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6621 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6623 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6624 if self.op.ip_check:
6625 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6626 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6627 (self.check_ip, self.op.instance_name),
6628 errors.ECODE_NOTUNIQUE)
6630 #### mac address generation
6631 # By generating here the mac address both the allocator and the hooks get
6632 # the real final mac address rather than the 'auto' or 'generate' value.
6633 # There is a race condition between the generation and the instance object
6634 # creation, which means that we know the mac is valid now, but we're not
6635 # sure it will be when we actually add the instance. If things go bad
6636 # adding the instance will abort because of a duplicate mac, and the
6637 # creation job will fail.
6638 for nic in self.nics:
6639 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6640 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6644 if self.op.iallocator is not None:
6645 self._RunAllocator()
6647 #### node related checks
6649 # check primary node
6650 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6651 assert self.pnode is not None, \
6652 "Cannot retrieve locked node %s" % self.op.pnode
6654 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6655 pnode.name, errors.ECODE_STATE)
6657 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6658 pnode.name, errors.ECODE_STATE)
6660 self.secondaries = []
6662 # mirror node verification
6663 if self.op.disk_template in constants.DTS_NET_MIRROR:
6664 if self.op.snode is None:
6665 raise errors.OpPrereqError("The networked disk templates need"
6666 " a mirror node", errors.ECODE_INVAL)
6667 if self.op.snode == pnode.name:
6668 raise errors.OpPrereqError("The secondary node cannot be the"
6669 " primary node.", errors.ECODE_INVAL)
6670 _CheckNodeOnline(self, self.op.snode)
6671 _CheckNodeNotDrained(self, self.op.snode)
6672 self.secondaries.append(self.op.snode)
6674 nodenames = [pnode.name] + self.secondaries
6676 req_size = _ComputeDiskSize(self.op.disk_template,
6679 # Check lv size requirements, if not adopting
6680 if req_size is not None and not self.adopt_disks:
6681 _CheckNodesFreeDisk(self, nodenames, req_size)
6683 if self.adopt_disks: # instead, we must check the adoption data
6684 all_lvs = set([i["adopt"] for i in self.disks])
6685 if len(all_lvs) != len(self.disks):
6686 raise errors.OpPrereqError("Duplicate volume names given for adoption",
6688 for lv_name in all_lvs:
6690 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6691 except errors.ReservationError:
6692 raise errors.OpPrereqError("LV named %s used by another instance" %
6693 lv_name, errors.ECODE_NOTUNIQUE)
6695 node_lvs = self.rpc.call_lv_list([pnode.name],
6696 self.cfg.GetVGName())[pnode.name]
6697 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6698 node_lvs = node_lvs.payload
6699 delta = all_lvs.difference(node_lvs.keys())
6701 raise errors.OpPrereqError("Missing logical volume(s): %s" %
6702 utils.CommaJoin(delta),
6704 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6706 raise errors.OpPrereqError("Online logical volumes found, cannot"
6707 " adopt: %s" % utils.CommaJoin(online_lvs),
6709 # update the size of disk based on what is found
6710 for dsk in self.disks:
6711 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6713 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6715 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6717 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6719 # memory check on primary node
6721 _CheckNodeFreeMemory(self, self.pnode.name,
6722 "creating instance %s" % self.op.instance_name,
6723 self.be_full[constants.BE_MEMORY],
6726 self.dry_run_result = list(nodenames)
6728 def Exec(self, feedback_fn):
6729 """Create and add the instance to the cluster.
6732 instance = self.op.instance_name
6733 pnode_name = self.pnode.name
6735 ht_kind = self.op.hypervisor
6736 if ht_kind in constants.HTS_REQ_PORT:
6737 network_port = self.cfg.AllocatePort()
6741 if constants.ENABLE_FILE_STORAGE:
6742 # this is needed because os.path.join does not accept None arguments
6743 if self.op.file_storage_dir is None:
6744 string_file_storage_dir = ""
6746 string_file_storage_dir = self.op.file_storage_dir
6748 # build the full file storage dir path
6749 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6750 string_file_storage_dir, instance)
6752 file_storage_dir = ""
6755 disks = _GenerateDiskTemplate(self,
6756 self.op.disk_template,
6757 instance, pnode_name,
6761 self.op.file_driver,
6764 iobj = objects.Instance(name=instance, os=self.op.os_type,
6765 primary_node=pnode_name,
6766 nics=self.nics, disks=disks,
6767 disk_template=self.op.disk_template,
6769 network_port=network_port,
6770 beparams=self.op.beparams,
6771 hvparams=self.op.hvparams,
6772 hypervisor=self.op.hypervisor,
6775 if self.adopt_disks:
6776 # rename LVs to the newly-generated names; we need to construct
6777 # 'fake' LV disks with the old data, plus the new unique_id
6778 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6780 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6781 rename_to.append(t_dsk.logical_id)
6782 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6783 self.cfg.SetDiskID(t_dsk, pnode_name)
6784 result = self.rpc.call_blockdev_rename(pnode_name,
6785 zip(tmp_disks, rename_to))
6786 result.Raise("Failed to rename adoped LVs")
6788 feedback_fn("* creating instance disks...")
6790 _CreateDisks(self, iobj)
6791 except errors.OpExecError:
6792 self.LogWarning("Device creation failed, reverting...")
6794 _RemoveDisks(self, iobj)
6796 self.cfg.ReleaseDRBDMinors(instance)
6799 feedback_fn("adding instance %s to cluster config" % instance)
6801 self.cfg.AddInstance(iobj, self.proc.GetECId())
6803 # Declare that we don't want to remove the instance lock anymore, as we've
6804 # added the instance to the config
6805 del self.remove_locks[locking.LEVEL_INSTANCE]
6806 # Unlock all the nodes
6807 if self.op.mode == constants.INSTANCE_IMPORT:
6808 nodes_keep = [self.op.src_node]
6809 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6810 if node != self.op.src_node]
6811 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6812 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6814 self.context.glm.release(locking.LEVEL_NODE)
6815 del self.acquired_locks[locking.LEVEL_NODE]
6817 if self.op.wait_for_sync:
6818 disk_abort = not _WaitForSync(self, iobj)
6819 elif iobj.disk_template in constants.DTS_NET_MIRROR:
6820 # make sure the disks are not degraded (still sync-ing is ok)
6822 feedback_fn("* checking mirrors status")
6823 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6828 _RemoveDisks(self, iobj)
6829 self.cfg.RemoveInstance(iobj.name)
6830 # Make sure the instance lock gets removed
6831 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6832 raise errors.OpExecError("There are some degraded disks for"
6835 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6836 if self.op.mode == constants.INSTANCE_CREATE:
6837 if not self.op.no_install:
6838 feedback_fn("* running the instance OS create scripts...")
6839 # FIXME: pass debug option from opcode to backend
6840 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6841 self.op.debug_level)
6842 result.Raise("Could not add os for instance %s"
6843 " on node %s" % (instance, pnode_name))
6845 elif self.op.mode == constants.INSTANCE_IMPORT:
6846 feedback_fn("* running the instance OS import scripts...")
6847 src_node = self.op.src_node
6848 src_images = self.src_images
6849 cluster_name = self.cfg.GetClusterName()
6850 # FIXME: pass debug option from opcode to backend
6851 import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6852 src_node, src_images,
6854 self.op.debug_level)
6855 msg = import_result.fail_msg
6857 self.LogWarning("Error while importing the disk images for instance"
6858 " %s on node %s: %s" % (instance, pnode_name, msg))
6860 # also checked in the prereq part
6861 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6865 iobj.admin_up = True
6866 self.cfg.Update(iobj, feedback_fn)
6867 logging.info("Starting instance %s on node %s", instance, pnode_name)
6868 feedback_fn("* starting instance...")
6869 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6870 result.Raise("Could not start instance")
6872 return list(iobj.all_nodes)
6875 class LUConnectConsole(NoHooksLU):
6876 """Connect to an instance's console.
6878 This is somewhat special in that it returns the command line that
6879 you need to run on the master node in order to connect to the
6883 _OP_REQP = ["instance_name"]
6886 def ExpandNames(self):
6887 self._ExpandAndLockInstance()
6889 def CheckPrereq(self):
6890 """Check prerequisites.
6892 This checks that the instance is in the cluster.
6895 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6896 assert self.instance is not None, \
6897 "Cannot retrieve locked instance %s" % self.op.instance_name
6898 _CheckNodeOnline(self, self.instance.primary_node)
6900 def Exec(self, feedback_fn):
6901 """Connect to the console of an instance
6904 instance = self.instance
6905 node = instance.primary_node
6907 node_insts = self.rpc.call_instance_list([node],
6908 [instance.hypervisor])[node]
6909 node_insts.Raise("Can't get node information from %s" % node)
6911 if instance.name not in node_insts.payload:
6912 raise errors.OpExecError("Instance %s is not running." % instance.name)
6914 logging.debug("Connecting to console of %s on %s", instance.name, node)
6916 hyper = hypervisor.GetHypervisor(instance.hypervisor)
6917 cluster = self.cfg.GetClusterInfo()
6918 # beparams and hvparams are passed separately, to avoid editing the
6919 # instance and then saving the defaults in the instance itself.
6920 hvparams = cluster.FillHV(instance)
6921 beparams = cluster.FillBE(instance)
6922 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6925 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6928 class LUReplaceDisks(LogicalUnit):
6929 """Replace the disks of an instance.
6932 HPATH = "mirrors-replace"
6933 HTYPE = constants.HTYPE_INSTANCE
6934 _OP_REQP = ["instance_name", "mode", "disks"]
6937 def CheckArguments(self):
6938 if not hasattr(self.op, "remote_node"):
6939 self.op.remote_node = None
6940 if not hasattr(self.op, "iallocator"):
6941 self.op.iallocator = None
6942 if not hasattr(self.op, "early_release"):
6943 self.op.early_release = False
6945 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6948 def ExpandNames(self):
6949 self._ExpandAndLockInstance()
6951 if self.op.iallocator is not None:
6952 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6954 elif self.op.remote_node is not None:
6955 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6956 self.op.remote_node = remote_node
6958 # Warning: do not remove the locking of the new secondary here
6959 # unless DRBD8.AddChildren is changed to work in parallel;
6960 # currently it doesn't since parallel invocations of
6961 # FindUnusedMinor will conflict
6962 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6963 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6966 self.needed_locks[locking.LEVEL_NODE] = []
6967 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6969 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6970 self.op.iallocator, self.op.remote_node,
6971 self.op.disks, False, self.op.early_release)
6973 self.tasklets = [self.replacer]
6975 def DeclareLocks(self, level):
6976 # If we're not already locking all nodes in the set we have to declare the
6977 # instance's primary/secondary nodes.
6978 if (level == locking.LEVEL_NODE and
6979 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6980 self._LockInstancesNodes()
6982 def BuildHooksEnv(self):
6985 This runs on the master, the primary and all the secondaries.
6988 instance = self.replacer.instance
6990 "MODE": self.op.mode,
6991 "NEW_SECONDARY": self.op.remote_node,
6992 "OLD_SECONDARY": instance.secondary_nodes[0],
6994 env.update(_BuildInstanceHookEnvByObject(self, instance))
6996 self.cfg.GetMasterNode(),
6997 instance.primary_node,
6999 if self.op.remote_node is not None:
7000 nl.append(self.op.remote_node)
7004 class LUEvacuateNode(LogicalUnit):
7005 """Relocate the secondary instances from a node.
7008 HPATH = "node-evacuate"
7009 HTYPE = constants.HTYPE_NODE
7010 _OP_REQP = ["node_name"]
7013 def CheckArguments(self):
7014 if not hasattr(self.op, "remote_node"):
7015 self.op.remote_node = None
7016 if not hasattr(self.op, "iallocator"):
7017 self.op.iallocator = None
7018 if not hasattr(self.op, "early_release"):
7019 self.op.early_release = False
7021 TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
7022 self.op.remote_node,
7025 def ExpandNames(self):
7026 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7028 self.needed_locks = {}
7030 # Declare node locks
7031 if self.op.iallocator is not None:
7032 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7034 elif self.op.remote_node is not None:
7035 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7037 # Warning: do not remove the locking of the new secondary here
7038 # unless DRBD8.AddChildren is changed to work in parallel;
7039 # currently it doesn't since parallel invocations of
7040 # FindUnusedMinor will conflict
7041 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7042 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7045 raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7047 # Create tasklets for replacing disks for all secondary instances on this
7052 for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7053 logging.debug("Replacing disks for instance %s", inst.name)
7054 names.append(inst.name)
7056 replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7057 self.op.iallocator, self.op.remote_node, [],
7058 True, self.op.early_release)
7059 tasklets.append(replacer)
7061 self.tasklets = tasklets
7062 self.instance_names = names
7064 # Declare instance locks
7065 self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7067 def DeclareLocks(self, level):
7068 # If we're not already locking all nodes in the set we have to declare the
7069 # instance's primary/secondary nodes.
7070 if (level == locking.LEVEL_NODE and
7071 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7072 self._LockInstancesNodes()
7074 def BuildHooksEnv(self):
7077 This runs on the master, the primary and all the secondaries.
7081 "NODE_NAME": self.op.node_name,
7084 nl = [self.cfg.GetMasterNode()]
7086 if self.op.remote_node is not None:
7087 env["NEW_SECONDARY"] = self.op.remote_node
7088 nl.append(self.op.remote_node)
7090 return (env, nl, nl)
7093 class TLReplaceDisks(Tasklet):
7094 """Replaces disks for an instance.
7096 Note: Locking is not within the scope of this class.
7099 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7100 disks, delay_iallocator, early_release):
7101 """Initializes this class.
7104 Tasklet.__init__(self, lu)
7107 self.instance_name = instance_name
7109 self.iallocator_name = iallocator_name
7110 self.remote_node = remote_node
7112 self.delay_iallocator = delay_iallocator
7113 self.early_release = early_release
7116 self.instance = None
7117 self.new_node = None
7118 self.target_node = None
7119 self.other_node = None
7120 self.remote_node_info = None
7121 self.node_secondary_ip = None
7124 def CheckArguments(mode, remote_node, iallocator):
7125 """Helper function for users of this class.
7128 # check for valid parameter combination
7129 if mode == constants.REPLACE_DISK_CHG:
7130 if remote_node is None and iallocator is None:
7131 raise errors.OpPrereqError("When changing the secondary either an"
7132 " iallocator script must be used or the"
7133 " new node given", errors.ECODE_INVAL)
7135 if remote_node is not None and iallocator is not None:
7136 raise errors.OpPrereqError("Give either the iallocator or the new"
7137 " secondary, not both", errors.ECODE_INVAL)
7139 elif remote_node is not None or iallocator is not None:
7140 # Not replacing the secondary
7141 raise errors.OpPrereqError("The iallocator and new node options can"
7142 " only be used when changing the"
7143 " secondary node", errors.ECODE_INVAL)
7146 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7147 """Compute a new secondary node using an IAllocator.
7150 ial = IAllocator(lu.cfg, lu.rpc,
7151 mode=constants.IALLOCATOR_MODE_RELOC,
7153 relocate_from=relocate_from)
7155 ial.Run(iallocator_name)
7158 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7159 " %s" % (iallocator_name, ial.info),
7162 if len(ial.result) != ial.required_nodes:
7163 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7164 " of nodes (%s), required %s" %
7166 len(ial.result), ial.required_nodes),
7169 remote_node_name = ial.result[0]
7171 lu.LogInfo("Selected new secondary for instance '%s': %s",
7172 instance_name, remote_node_name)
7174 return remote_node_name
7176 def _FindFaultyDisks(self, node_name):
7177 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7180 def CheckPrereq(self):
7181 """Check prerequisites.
7183 This checks that the instance is in the cluster.
7186 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7187 assert instance is not None, \
7188 "Cannot retrieve locked instance %s" % self.instance_name
7190 if instance.disk_template != constants.DT_DRBD8:
7191 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7192 " instances", errors.ECODE_INVAL)
7194 if len(instance.secondary_nodes) != 1:
7195 raise errors.OpPrereqError("The instance has a strange layout,"
7196 " expected one secondary but found %d" %
7197 len(instance.secondary_nodes),
7200 if not self.delay_iallocator:
7201 self._CheckPrereq2()
7203 def _CheckPrereq2(self):
7204 """Check prerequisites, second part.
7206 This function should always be part of CheckPrereq. It was separated and is
7207 now called from Exec because during node evacuation iallocator was only
7208 called with an unmodified cluster model, not taking planned changes into
7212 instance = self.instance
7213 secondary_node = instance.secondary_nodes[0]
7215 if self.iallocator_name is None:
7216 remote_node = self.remote_node
7218 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7219 instance.name, instance.secondary_nodes)
7221 if remote_node is not None:
7222 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7223 assert self.remote_node_info is not None, \
7224 "Cannot retrieve locked node %s" % remote_node
7226 self.remote_node_info = None
7228 if remote_node == self.instance.primary_node:
7229 raise errors.OpPrereqError("The specified node is the primary node of"
7230 " the instance.", errors.ECODE_INVAL)
7232 if remote_node == secondary_node:
7233 raise errors.OpPrereqError("The specified node is already the"
7234 " secondary node of the instance.",
7237 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7238 constants.REPLACE_DISK_CHG):
7239 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7242 if self.mode == constants.REPLACE_DISK_AUTO:
7243 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7244 faulty_secondary = self._FindFaultyDisks(secondary_node)
7246 if faulty_primary and faulty_secondary:
7247 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7248 " one node and can not be repaired"
7249 " automatically" % self.instance_name,
7253 self.disks = faulty_primary
7254 self.target_node = instance.primary_node
7255 self.other_node = secondary_node
7256 check_nodes = [self.target_node, self.other_node]
7257 elif faulty_secondary:
7258 self.disks = faulty_secondary
7259 self.target_node = secondary_node
7260 self.other_node = instance.primary_node
7261 check_nodes = [self.target_node, self.other_node]
7267 # Non-automatic modes
7268 if self.mode == constants.REPLACE_DISK_PRI:
7269 self.target_node = instance.primary_node
7270 self.other_node = secondary_node
7271 check_nodes = [self.target_node, self.other_node]
7273 elif self.mode == constants.REPLACE_DISK_SEC:
7274 self.target_node = secondary_node
7275 self.other_node = instance.primary_node
7276 check_nodes = [self.target_node, self.other_node]
7278 elif self.mode == constants.REPLACE_DISK_CHG:
7279 self.new_node = remote_node
7280 self.other_node = instance.primary_node
7281 self.target_node = secondary_node
7282 check_nodes = [self.new_node, self.other_node]
7284 _CheckNodeNotDrained(self.lu, remote_node)
7286 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7287 assert old_node_info is not None
7288 if old_node_info.offline and not self.early_release:
7289 # doesn't make sense to delay the release
7290 self.early_release = True
7291 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7292 " early-release mode", secondary_node)
7295 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7298 # If not specified all disks should be replaced
7300 self.disks = range(len(self.instance.disks))
7302 for node in check_nodes:
7303 _CheckNodeOnline(self.lu, node)
7305 # Check whether disks are valid
7306 for disk_idx in self.disks:
7307 instance.FindDisk(disk_idx)
7309 # Get secondary node IP addresses
7312 for node_name in [self.target_node, self.other_node, self.new_node]:
7313 if node_name is not None:
7314 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7316 self.node_secondary_ip = node_2nd_ip
7318 def Exec(self, feedback_fn):
7319 """Execute disk replacement.
7321 This dispatches the disk replacement to the appropriate handler.
7324 if self.delay_iallocator:
7325 self._CheckPrereq2()
7328 feedback_fn("No disks need replacement")
7331 feedback_fn("Replacing disk(s) %s for %s" %
7332 (utils.CommaJoin(self.disks), self.instance.name))
7334 activate_disks = (not self.instance.admin_up)
7336 # Activate the instance disks if we're replacing them on a down instance
7338 _StartInstanceDisks(self.lu, self.instance, True)
7341 # Should we replace the secondary node?
7342 if self.new_node is not None:
7343 fn = self._ExecDrbd8Secondary
7345 fn = self._ExecDrbd8DiskOnly
7347 return fn(feedback_fn)
7350 # Deactivate the instance disks if we're replacing them on a
7353 _SafeShutdownInstanceDisks(self.lu, self.instance)
7355 def _CheckVolumeGroup(self, nodes):
7356 self.lu.LogInfo("Checking volume groups")
7358 vgname = self.cfg.GetVGName()
7360 # Make sure volume group exists on all involved nodes
7361 results = self.rpc.call_vg_list(nodes)
7363 raise errors.OpExecError("Can't list volume groups on the nodes")
7367 res.Raise("Error checking node %s" % node)
7368 if vgname not in res.payload:
7369 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7372 def _CheckDisksExistence(self, nodes):
7373 # Check disk existence
7374 for idx, dev in enumerate(self.instance.disks):
7375 if idx not in self.disks:
7379 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7380 self.cfg.SetDiskID(dev, node)
7382 result = self.rpc.call_blockdev_find(node, dev)
7384 msg = result.fail_msg
7385 if msg or not result.payload:
7387 msg = "disk not found"
7388 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7391 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7392 for idx, dev in enumerate(self.instance.disks):
7393 if idx not in self.disks:
7396 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7399 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7401 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7402 " replace disks for instance %s" %
7403 (node_name, self.instance.name))
7405 def _CreateNewStorage(self, node_name):
7406 vgname = self.cfg.GetVGName()
7409 for idx, dev in enumerate(self.instance.disks):
7410 if idx not in self.disks:
7413 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7415 self.cfg.SetDiskID(dev, node_name)
7417 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7418 names = _GenerateUniqueNames(self.lu, lv_names)
7420 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7421 logical_id=(vgname, names[0]))
7422 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7423 logical_id=(vgname, names[1]))
7425 new_lvs = [lv_data, lv_meta]
7426 old_lvs = dev.children
7427 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7429 # we pass force_create=True to force the LVM creation
7430 for new_lv in new_lvs:
7431 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7432 _GetInstanceInfoText(self.instance), False)
7436 def _CheckDevices(self, node_name, iv_names):
7437 for name, (dev, _, _) in iv_names.iteritems():
7438 self.cfg.SetDiskID(dev, node_name)
7440 result = self.rpc.call_blockdev_find(node_name, dev)
7442 msg = result.fail_msg
7443 if msg or not result.payload:
7445 msg = "disk not found"
7446 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7449 if result.payload.is_degraded:
7450 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7452 def _RemoveOldStorage(self, node_name, iv_names):
7453 for name, (_, old_lvs, _) in iv_names.iteritems():
7454 self.lu.LogInfo("Remove logical volumes for %s" % name)
7457 self.cfg.SetDiskID(lv, node_name)
7459 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7461 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7462 hint="remove unused LVs manually")
7464 def _ReleaseNodeLock(self, node_name):
7465 """Releases the lock for a given node."""
7466 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7468 def _ExecDrbd8DiskOnly(self, feedback_fn):
7469 """Replace a disk on the primary or secondary for DRBD 8.
7471 The algorithm for replace is quite complicated:
7473 1. for each disk to be replaced:
7475 1. create new LVs on the target node with unique names
7476 1. detach old LVs from the drbd device
7477 1. rename old LVs to name_replaced.<time_t>
7478 1. rename new LVs to old LVs
7479 1. attach the new LVs (with the old names now) to the drbd device
7481 1. wait for sync across all devices
7483 1. for each modified disk:
7485 1. remove old LVs (which have the name name_replaces.<time_t>)
7487 Failures are not very well handled.
7492 # Step: check device activation
7493 self.lu.LogStep(1, steps_total, "Check device existence")
7494 self._CheckDisksExistence([self.other_node, self.target_node])
7495 self._CheckVolumeGroup([self.target_node, self.other_node])
7497 # Step: check other node consistency
7498 self.lu.LogStep(2, steps_total, "Check peer consistency")
7499 self._CheckDisksConsistency(self.other_node,
7500 self.other_node == self.instance.primary_node,
7503 # Step: create new storage
7504 self.lu.LogStep(3, steps_total, "Allocate new storage")
7505 iv_names = self._CreateNewStorage(self.target_node)
7507 # Step: for each lv, detach+rename*2+attach
7508 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7509 for dev, old_lvs, new_lvs in iv_names.itervalues():
7510 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7512 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7514 result.Raise("Can't detach drbd from local storage on node"
7515 " %s for device %s" % (self.target_node, dev.iv_name))
7517 #cfg.Update(instance)
7519 # ok, we created the new LVs, so now we know we have the needed
7520 # storage; as such, we proceed on the target node to rename
7521 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7522 # using the assumption that logical_id == physical_id (which in
7523 # turn is the unique_id on that node)
7525 # FIXME(iustin): use a better name for the replaced LVs
7526 temp_suffix = int(time.time())
7527 ren_fn = lambda d, suff: (d.physical_id[0],
7528 d.physical_id[1] + "_replaced-%s" % suff)
7530 # Build the rename list based on what LVs exist on the node
7531 rename_old_to_new = []
7532 for to_ren in old_lvs:
7533 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7534 if not result.fail_msg and result.payload:
7536 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7538 self.lu.LogInfo("Renaming the old LVs on the target node")
7539 result = self.rpc.call_blockdev_rename(self.target_node,
7541 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7543 # Now we rename the new LVs to the old LVs
7544 self.lu.LogInfo("Renaming the new LVs on the target node")
7545 rename_new_to_old = [(new, old.physical_id)
7546 for old, new in zip(old_lvs, new_lvs)]
7547 result = self.rpc.call_blockdev_rename(self.target_node,
7549 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7551 for old, new in zip(old_lvs, new_lvs):
7552 new.logical_id = old.logical_id
7553 self.cfg.SetDiskID(new, self.target_node)
7555 for disk in old_lvs:
7556 disk.logical_id = ren_fn(disk, temp_suffix)
7557 self.cfg.SetDiskID(disk, self.target_node)
7559 # Now that the new lvs have the old name, we can add them to the device
7560 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7561 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7563 msg = result.fail_msg
7565 for new_lv in new_lvs:
7566 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7569 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7570 hint=("cleanup manually the unused logical"
7572 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7574 dev.children = new_lvs
7576 self.cfg.Update(self.instance, feedback_fn)
7579 if self.early_release:
7580 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7582 self._RemoveOldStorage(self.target_node, iv_names)
7583 # WARNING: we release both node locks here, do not do other RPCs
7584 # than WaitForSync to the primary node
7585 self._ReleaseNodeLock([self.target_node, self.other_node])
7588 # This can fail as the old devices are degraded and _WaitForSync
7589 # does a combined result over all disks, so we don't check its return value
7590 self.lu.LogStep(cstep, steps_total, "Sync devices")
7592 _WaitForSync(self.lu, self.instance)
7594 # Check all devices manually
7595 self._CheckDevices(self.instance.primary_node, iv_names)
7597 # Step: remove old storage
7598 if not self.early_release:
7599 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7601 self._RemoveOldStorage(self.target_node, iv_names)
7603 def _ExecDrbd8Secondary(self, feedback_fn):
7604 """Replace the secondary node for DRBD 8.
7606 The algorithm for replace is quite complicated:
7607 - for all disks of the instance:
7608 - create new LVs on the new node with same names
7609 - shutdown the drbd device on the old secondary
7610 - disconnect the drbd network on the primary
7611 - create the drbd device on the new secondary
7612 - network attach the drbd on the primary, using an artifice:
7613 the drbd code for Attach() will connect to the network if it
7614 finds a device which is connected to the good local disks but
7616 - wait for sync across all devices
7617 - remove all disks from the old secondary
7619 Failures are not very well handled.
7624 # Step: check device activation
7625 self.lu.LogStep(1, steps_total, "Check device existence")
7626 self._CheckDisksExistence([self.instance.primary_node])
7627 self._CheckVolumeGroup([self.instance.primary_node])
7629 # Step: check other node consistency
7630 self.lu.LogStep(2, steps_total, "Check peer consistency")
7631 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7633 # Step: create new storage
7634 self.lu.LogStep(3, steps_total, "Allocate new storage")
7635 for idx, dev in enumerate(self.instance.disks):
7636 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7637 (self.new_node, idx))
7638 # we pass force_create=True to force LVM creation
7639 for new_lv in dev.children:
7640 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7641 _GetInstanceInfoText(self.instance), False)
7643 # Step 4: dbrd minors and drbd setups changes
7644 # after this, we must manually remove the drbd minors on both the
7645 # error and the success paths
7646 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7647 minors = self.cfg.AllocateDRBDMinor([self.new_node
7648 for dev in self.instance.disks],
7650 logging.debug("Allocated minors %r", minors)
7653 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7654 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7655 (self.new_node, idx))
7656 # create new devices on new_node; note that we create two IDs:
7657 # one without port, so the drbd will be activated without
7658 # networking information on the new node at this stage, and one
7659 # with network, for the latter activation in step 4
7660 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7661 if self.instance.primary_node == o_node1:
7664 assert self.instance.primary_node == o_node2, "Three-node instance?"
7667 new_alone_id = (self.instance.primary_node, self.new_node, None,
7668 p_minor, new_minor, o_secret)
7669 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7670 p_minor, new_minor, o_secret)
7672 iv_names[idx] = (dev, dev.children, new_net_id)
7673 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7675 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7676 logical_id=new_alone_id,
7677 children=dev.children,
7680 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7681 _GetInstanceInfoText(self.instance), False)
7682 except errors.GenericError:
7683 self.cfg.ReleaseDRBDMinors(self.instance.name)
7686 # We have new devices, shutdown the drbd on the old secondary
7687 for idx, dev in enumerate(self.instance.disks):
7688 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7689 self.cfg.SetDiskID(dev, self.target_node)
7690 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7692 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7693 "node: %s" % (idx, msg),
7694 hint=("Please cleanup this device manually as"
7695 " soon as possible"))
7697 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7698 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7699 self.node_secondary_ip,
7700 self.instance.disks)\
7701 [self.instance.primary_node]
7703 msg = result.fail_msg
7705 # detaches didn't succeed (unlikely)
7706 self.cfg.ReleaseDRBDMinors(self.instance.name)
7707 raise errors.OpExecError("Can't detach the disks from the network on"
7708 " old node: %s" % (msg,))
7710 # if we managed to detach at least one, we update all the disks of
7711 # the instance to point to the new secondary
7712 self.lu.LogInfo("Updating instance configuration")
7713 for dev, _, new_logical_id in iv_names.itervalues():
7714 dev.logical_id = new_logical_id
7715 self.cfg.SetDiskID(dev, self.instance.primary_node)
7717 self.cfg.Update(self.instance, feedback_fn)
7719 # and now perform the drbd attach
7720 self.lu.LogInfo("Attaching primary drbds to new secondary"
7721 " (standalone => connected)")
7722 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7724 self.node_secondary_ip,
7725 self.instance.disks,
7728 for to_node, to_result in result.items():
7729 msg = to_result.fail_msg
7731 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7733 hint=("please do a gnt-instance info to see the"
7734 " status of disks"))
7736 if self.early_release:
7737 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7739 self._RemoveOldStorage(self.target_node, iv_names)
7740 # WARNING: we release all node locks here, do not do other RPCs
7741 # than WaitForSync to the primary node
7742 self._ReleaseNodeLock([self.instance.primary_node,
7747 # This can fail as the old devices are degraded and _WaitForSync
7748 # does a combined result over all disks, so we don't check its return value
7749 self.lu.LogStep(cstep, steps_total, "Sync devices")
7751 _WaitForSync(self.lu, self.instance)
7753 # Check all devices manually
7754 self._CheckDevices(self.instance.primary_node, iv_names)
7756 # Step: remove old storage
7757 if not self.early_release:
7758 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7759 self._RemoveOldStorage(self.target_node, iv_names)
7762 class LURepairNodeStorage(NoHooksLU):
7763 """Repairs the volume group on a node.
7766 _OP_REQP = ["node_name"]
7769 def CheckArguments(self):
7770 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7772 _CheckStorageType(self.op.storage_type)
7774 def ExpandNames(self):
7775 self.needed_locks = {
7776 locking.LEVEL_NODE: [self.op.node_name],
7779 def _CheckFaultyDisks(self, instance, node_name):
7780 """Ensure faulty disks abort the opcode or at least warn."""
7782 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7784 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7785 " node '%s'" % (instance.name, node_name),
7787 except errors.OpPrereqError, err:
7788 if self.op.ignore_consistency:
7789 self.proc.LogWarning(str(err.args[0]))
7793 def CheckPrereq(self):
7794 """Check prerequisites.
7797 storage_type = self.op.storage_type
7799 if (constants.SO_FIX_CONSISTENCY not in
7800 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7801 raise errors.OpPrereqError("Storage units of type '%s' can not be"
7802 " repaired" % storage_type,
7805 # Check whether any instance on this node has faulty disks
7806 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7807 if not inst.admin_up:
7809 check_nodes = set(inst.all_nodes)
7810 check_nodes.discard(self.op.node_name)
7811 for inst_node_name in check_nodes:
7812 self._CheckFaultyDisks(inst, inst_node_name)
7814 def Exec(self, feedback_fn):
7815 feedback_fn("Repairing storage unit '%s' on %s ..." %
7816 (self.op.name, self.op.node_name))
7818 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7819 result = self.rpc.call_storage_execute(self.op.node_name,
7820 self.op.storage_type, st_args,
7822 constants.SO_FIX_CONSISTENCY)
7823 result.Raise("Failed to repair storage unit '%s' on %s" %
7824 (self.op.name, self.op.node_name))
7827 class LUNodeEvacuationStrategy(NoHooksLU):
7828 """Computes the node evacuation strategy.
7831 _OP_REQP = ["nodes"]
7834 def CheckArguments(self):
7835 if not hasattr(self.op, "remote_node"):
7836 self.op.remote_node = None
7837 if not hasattr(self.op, "iallocator"):
7838 self.op.iallocator = None
7839 if self.op.remote_node is not None and self.op.iallocator is not None:
7840 raise errors.OpPrereqError("Give either the iallocator or the new"
7841 " secondary, not both", errors.ECODE_INVAL)
7843 def ExpandNames(self):
7844 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7845 self.needed_locks = locks = {}
7846 if self.op.remote_node is None:
7847 locks[locking.LEVEL_NODE] = locking.ALL_SET
7849 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7850 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7852 def CheckPrereq(self):
7855 def Exec(self, feedback_fn):
7856 if self.op.remote_node is not None:
7858 for node in self.op.nodes:
7859 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7862 if i.primary_node == self.op.remote_node:
7863 raise errors.OpPrereqError("Node %s is the primary node of"
7864 " instance %s, cannot use it as"
7866 (self.op.remote_node, i.name),
7868 result.append([i.name, self.op.remote_node])
7870 ial = IAllocator(self.cfg, self.rpc,
7871 mode=constants.IALLOCATOR_MODE_MEVAC,
7872 evac_nodes=self.op.nodes)
7873 ial.Run(self.op.iallocator, validate=True)
7875 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7881 class LUGrowDisk(LogicalUnit):
7882 """Grow a disk of an instance.
7886 HTYPE = constants.HTYPE_INSTANCE
7887 _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7890 def ExpandNames(self):
7891 self._ExpandAndLockInstance()
7892 self.needed_locks[locking.LEVEL_NODE] = []
7893 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7895 def DeclareLocks(self, level):
7896 if level == locking.LEVEL_NODE:
7897 self._LockInstancesNodes()
7899 def BuildHooksEnv(self):
7902 This runs on the master, the primary and all the secondaries.
7906 "DISK": self.op.disk,
7907 "AMOUNT": self.op.amount,
7909 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7910 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7913 def CheckPrereq(self):
7914 """Check prerequisites.
7916 This checks that the instance is in the cluster.
7919 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7920 assert instance is not None, \
7921 "Cannot retrieve locked instance %s" % self.op.instance_name
7922 nodenames = list(instance.all_nodes)
7923 for node in nodenames:
7924 _CheckNodeOnline(self, node)
7927 self.instance = instance
7929 if instance.disk_template not in constants.DTS_GROWABLE:
7930 raise errors.OpPrereqError("Instance's disk layout does not support"
7931 " growing.", errors.ECODE_INVAL)
7933 self.disk = instance.FindDisk(self.op.disk)
7935 if instance.disk_template != constants.DT_FILE:
7936 # TODO: check the free disk space for file, when that feature will be
7938 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7940 def Exec(self, feedback_fn):
7941 """Execute disk grow.
7944 instance = self.instance
7947 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
7949 raise errors.OpExecError("Cannot activate block device to grow")
7951 for node in instance.all_nodes:
7952 self.cfg.SetDiskID(disk, node)
7953 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7954 result.Raise("Grow request failed to node %s" % node)
7956 # TODO: Rewrite code to work properly
7957 # DRBD goes into sync mode for a short amount of time after executing the
7958 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7959 # calling "resize" in sync mode fails. Sleeping for a short amount of
7960 # time is a work-around.
7963 disk.RecordGrow(self.op.amount)
7964 self.cfg.Update(instance, feedback_fn)
7965 if self.op.wait_for_sync:
7966 disk_abort = not _WaitForSync(self, instance, disks=[disk])
7968 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7969 " status.\nPlease check the instance.")
7970 if not instance.admin_up:
7971 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
7972 elif not instance.admin_up:
7973 self.proc.LogWarning("Not shutting down the disk even if the instance is"
7974 " not supposed to be running because no wait for"
7975 " sync mode was requested.")
7978 class LUQueryInstanceData(NoHooksLU):
7979 """Query runtime instance data.
7982 _OP_REQP = ["instances", "static"]
7985 def ExpandNames(self):
7986 self.needed_locks = {}
7987 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7989 if not isinstance(self.op.instances, list):
7990 raise errors.OpPrereqError("Invalid argument type 'instances'",
7993 if self.op.instances:
7994 self.wanted_names = []
7995 for name in self.op.instances:
7996 full_name = _ExpandInstanceName(self.cfg, name)
7997 self.wanted_names.append(full_name)
7998 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8000 self.wanted_names = None
8001 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8003 self.needed_locks[locking.LEVEL_NODE] = []
8004 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8006 def DeclareLocks(self, level):
8007 if level == locking.LEVEL_NODE:
8008 self._LockInstancesNodes()
8010 def CheckPrereq(self):
8011 """Check prerequisites.
8013 This only checks the optional instance list against the existing names.
8016 if self.wanted_names is None:
8017 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8019 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8020 in self.wanted_names]
8023 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8024 """Returns the status of a block device
8027 if self.op.static or not node:
8030 self.cfg.SetDiskID(dev, node)
8032 result = self.rpc.call_blockdev_find(node, dev)
8036 result.Raise("Can't compute disk status for %s" % instance_name)
8038 status = result.payload
8042 return (status.dev_path, status.major, status.minor,
8043 status.sync_percent, status.estimated_time,
8044 status.is_degraded, status.ldisk_status)
8046 def _ComputeDiskStatus(self, instance, snode, dev):
8047 """Compute block device status.
8050 if dev.dev_type in constants.LDS_DRBD:
8051 # we change the snode then (otherwise we use the one passed in)
8052 if dev.logical_id[0] == instance.primary_node:
8053 snode = dev.logical_id[1]
8055 snode = dev.logical_id[0]
8057 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8059 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8062 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8063 for child in dev.children]
8068 "iv_name": dev.iv_name,
8069 "dev_type": dev.dev_type,
8070 "logical_id": dev.logical_id,
8071 "physical_id": dev.physical_id,
8072 "pstatus": dev_pstatus,
8073 "sstatus": dev_sstatus,
8074 "children": dev_children,
8081 def Exec(self, feedback_fn):
8082 """Gather and return data"""
8085 cluster = self.cfg.GetClusterInfo()
8087 for instance in self.wanted_instances:
8088 if not self.op.static:
8089 remote_info = self.rpc.call_instance_info(instance.primary_node,
8091 instance.hypervisor)
8092 remote_info.Raise("Error checking node %s" % instance.primary_node)
8093 remote_info = remote_info.payload
8094 if remote_info and "state" in remote_info:
8097 remote_state = "down"
8100 if instance.admin_up:
8103 config_state = "down"
8105 disks = [self._ComputeDiskStatus(instance, None, device)
8106 for device in instance.disks]
8109 "name": instance.name,
8110 "config_state": config_state,
8111 "run_state": remote_state,
8112 "pnode": instance.primary_node,
8113 "snodes": instance.secondary_nodes,
8115 # this happens to be the same format used for hooks
8116 "nics": _NICListToTuple(self, instance.nics),
8117 "disk_template": instance.disk_template,
8119 "hypervisor": instance.hypervisor,
8120 "network_port": instance.network_port,
8121 "hv_instance": instance.hvparams,
8122 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8123 "be_instance": instance.beparams,
8124 "be_actual": cluster.FillBE(instance),
8125 "serial_no": instance.serial_no,
8126 "mtime": instance.mtime,
8127 "ctime": instance.ctime,
8128 "uuid": instance.uuid,
8131 result[instance.name] = idict
8136 class LUSetInstanceParams(LogicalUnit):
8137 """Modifies an instances's parameters.
8140 HPATH = "instance-modify"
8141 HTYPE = constants.HTYPE_INSTANCE
8142 _OP_REQP = ["instance_name"]
8145 def CheckArguments(self):
8146 if not hasattr(self.op, 'nics'):
8148 if not hasattr(self.op, 'disks'):
8150 if not hasattr(self.op, 'beparams'):
8151 self.op.beparams = {}
8152 if not hasattr(self.op, 'hvparams'):
8153 self.op.hvparams = {}
8154 if not hasattr(self.op, "disk_template"):
8155 self.op.disk_template = None
8156 if not hasattr(self.op, "remote_node"):
8157 self.op.remote_node = None
8158 if not hasattr(self.op, "os_name"):
8159 self.op.os_name = None
8160 if not hasattr(self.op, "force_variant"):
8161 self.op.force_variant = False
8162 self.op.force = getattr(self.op, "force", False)
8163 if not (self.op.nics or self.op.disks or self.op.disk_template or
8164 self.op.hvparams or self.op.beparams or self.op.os_name):
8165 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8167 if self.op.hvparams:
8168 _CheckGlobalHvParams(self.op.hvparams)
8172 for disk_op, disk_dict in self.op.disks:
8173 if disk_op == constants.DDM_REMOVE:
8176 elif disk_op == constants.DDM_ADD:
8179 if not isinstance(disk_op, int):
8180 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8181 if not isinstance(disk_dict, dict):
8182 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8183 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8185 if disk_op == constants.DDM_ADD:
8186 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8187 if mode not in constants.DISK_ACCESS_SET:
8188 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8190 size = disk_dict.get('size', None)
8192 raise errors.OpPrereqError("Required disk parameter size missing",
8196 except (TypeError, ValueError), err:
8197 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8198 str(err), errors.ECODE_INVAL)
8199 disk_dict['size'] = size
8201 # modification of disk
8202 if 'size' in disk_dict:
8203 raise errors.OpPrereqError("Disk size change not possible, use"
8204 " grow-disk", errors.ECODE_INVAL)
8206 if disk_addremove > 1:
8207 raise errors.OpPrereqError("Only one disk add or remove operation"
8208 " supported at a time", errors.ECODE_INVAL)
8210 if self.op.disks and self.op.disk_template is not None:
8211 raise errors.OpPrereqError("Disk template conversion and other disk"
8212 " changes not supported at the same time",
8215 if self.op.disk_template:
8216 _CheckDiskTemplate(self.op.disk_template)
8217 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8218 self.op.remote_node is None):
8219 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8220 " one requires specifying a secondary node",
8225 for nic_op, nic_dict in self.op.nics:
8226 if nic_op == constants.DDM_REMOVE:
8229 elif nic_op == constants.DDM_ADD:
8232 if not isinstance(nic_op, int):
8233 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8234 if not isinstance(nic_dict, dict):
8235 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8236 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8238 # nic_dict should be a dict
8239 nic_ip = nic_dict.get('ip', None)
8240 if nic_ip is not None:
8241 if nic_ip.lower() == constants.VALUE_NONE:
8242 nic_dict['ip'] = None
8244 if not utils.IsValidIP(nic_ip):
8245 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8248 nic_bridge = nic_dict.get('bridge', None)
8249 nic_link = nic_dict.get('link', None)
8250 if nic_bridge and nic_link:
8251 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8252 " at the same time", errors.ECODE_INVAL)
8253 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8254 nic_dict['bridge'] = None
8255 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8256 nic_dict['link'] = None
8258 if nic_op == constants.DDM_ADD:
8259 nic_mac = nic_dict.get('mac', None)
8261 nic_dict['mac'] = constants.VALUE_AUTO
8263 if 'mac' in nic_dict:
8264 nic_mac = nic_dict['mac']
8265 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8266 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8268 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8269 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8270 " modifying an existing nic",
8273 if nic_addremove > 1:
8274 raise errors.OpPrereqError("Only one NIC add or remove operation"
8275 " supported at a time", errors.ECODE_INVAL)
8277 def ExpandNames(self):
8278 self._ExpandAndLockInstance()
8279 self.needed_locks[locking.LEVEL_NODE] = []
8280 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8282 def DeclareLocks(self, level):
8283 if level == locking.LEVEL_NODE:
8284 self._LockInstancesNodes()
8285 if self.op.disk_template and self.op.remote_node:
8286 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8287 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8289 def BuildHooksEnv(self):
8292 This runs on the master, primary and secondaries.
8296 if constants.BE_MEMORY in self.be_new:
8297 args['memory'] = self.be_new[constants.BE_MEMORY]
8298 if constants.BE_VCPUS in self.be_new:
8299 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8300 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8301 # information at all.
8304 nic_override = dict(self.op.nics)
8305 c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8306 for idx, nic in enumerate(self.instance.nics):
8307 if idx in nic_override:
8308 this_nic_override = nic_override[idx]
8310 this_nic_override = {}
8311 if 'ip' in this_nic_override:
8312 ip = this_nic_override['ip']
8315 if 'mac' in this_nic_override:
8316 mac = this_nic_override['mac']
8319 if idx in self.nic_pnew:
8320 nicparams = self.nic_pnew[idx]
8322 nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8323 mode = nicparams[constants.NIC_MODE]
8324 link = nicparams[constants.NIC_LINK]
8325 args['nics'].append((ip, mac, mode, link))
8326 if constants.DDM_ADD in nic_override:
8327 ip = nic_override[constants.DDM_ADD].get('ip', None)
8328 mac = nic_override[constants.DDM_ADD]['mac']
8329 nicparams = self.nic_pnew[constants.DDM_ADD]
8330 mode = nicparams[constants.NIC_MODE]
8331 link = nicparams[constants.NIC_LINK]
8332 args['nics'].append((ip, mac, mode, link))
8333 elif constants.DDM_REMOVE in nic_override:
8334 del args['nics'][-1]
8336 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8337 if self.op.disk_template:
8338 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8339 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8343 def _GetUpdatedParams(old_params, update_dict,
8344 default_values, parameter_types):
8345 """Return the new params dict for the given params.
8347 @type old_params: dict
8348 @param old_params: old parameters
8349 @type update_dict: dict
8350 @param update_dict: dict containing new parameter values,
8351 or constants.VALUE_DEFAULT to reset the
8352 parameter to its default value
8353 @type default_values: dict
8354 @param default_values: default values for the filled parameters
8355 @type parameter_types: dict
8356 @param parameter_types: dict mapping target dict keys to types
8357 in constants.ENFORCEABLE_TYPES
8358 @rtype: (dict, dict)
8359 @return: (new_parameters, filled_parameters)
8362 params_copy = copy.deepcopy(old_params)
8363 for key, val in update_dict.iteritems():
8364 if val == constants.VALUE_DEFAULT:
8366 del params_copy[key]
8370 params_copy[key] = val
8371 utils.ForceDictType(params_copy, parameter_types)
8372 params_filled = objects.FillDict(default_values, params_copy)
8373 return (params_copy, params_filled)
8375 def CheckPrereq(self):
8376 """Check prerequisites.
8378 This only checks the instance list against the existing names.
8381 self.force = self.op.force
8383 # checking the new params on the primary/secondary nodes
8385 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8386 cluster = self.cluster = self.cfg.GetClusterInfo()
8387 assert self.instance is not None, \
8388 "Cannot retrieve locked instance %s" % self.op.instance_name
8389 pnode = instance.primary_node
8390 nodelist = list(instance.all_nodes)
8392 if self.op.disk_template:
8393 if instance.disk_template == self.op.disk_template:
8394 raise errors.OpPrereqError("Instance already has disk template %s" %
8395 instance.disk_template, errors.ECODE_INVAL)
8397 if (instance.disk_template,
8398 self.op.disk_template) not in self._DISK_CONVERSIONS:
8399 raise errors.OpPrereqError("Unsupported disk template conversion from"
8400 " %s to %s" % (instance.disk_template,
8401 self.op.disk_template),
8403 if self.op.disk_template in constants.DTS_NET_MIRROR:
8404 _CheckNodeOnline(self, self.op.remote_node)
8405 _CheckNodeNotDrained(self, self.op.remote_node)
8406 disks = [{"size": d.size} for d in instance.disks]
8407 required = _ComputeDiskSize(self.op.disk_template, disks)
8408 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8409 _CheckInstanceDown(self, instance, "cannot change disk template")
8411 # hvparams processing
8412 if self.op.hvparams:
8413 i_hvdict, hv_new = self._GetUpdatedParams(
8414 instance.hvparams, self.op.hvparams,
8415 cluster.hvparams[instance.hypervisor],
8416 constants.HVS_PARAMETER_TYPES)
8418 hypervisor.GetHypervisor(
8419 instance.hypervisor).CheckParameterSyntax(hv_new)
8420 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8421 self.hv_new = hv_new # the new actual values
8422 self.hv_inst = i_hvdict # the new dict (without defaults)
8424 self.hv_new = self.hv_inst = {}
8426 # beparams processing
8427 if self.op.beparams:
8428 i_bedict, be_new = self._GetUpdatedParams(
8429 instance.beparams, self.op.beparams,
8430 cluster.beparams[constants.PP_DEFAULT],
8431 constants.BES_PARAMETER_TYPES)
8432 self.be_new = be_new # the new actual values
8433 self.be_inst = i_bedict # the new dict (without defaults)
8435 self.be_new = self.be_inst = {}
8439 if constants.BE_MEMORY in self.op.beparams and not self.force:
8440 mem_check_list = [pnode]
8441 if be_new[constants.BE_AUTO_BALANCE]:
8442 # either we changed auto_balance to yes or it was from before
8443 mem_check_list.extend(instance.secondary_nodes)
8444 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8445 instance.hypervisor)
8446 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8447 instance.hypervisor)
8448 pninfo = nodeinfo[pnode]
8449 msg = pninfo.fail_msg
8451 # Assume the primary node is unreachable and go ahead
8452 self.warn.append("Can't get info from primary node %s: %s" %
8454 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8455 self.warn.append("Node data from primary node %s doesn't contain"
8456 " free memory information" % pnode)
8457 elif instance_info.fail_msg:
8458 self.warn.append("Can't get instance runtime information: %s" %
8459 instance_info.fail_msg)
8461 if instance_info.payload:
8462 current_mem = int(instance_info.payload['memory'])
8464 # Assume instance not running
8465 # (there is a slight race condition here, but it's not very probable,
8466 # and we have no other way to check)
8468 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8469 pninfo.payload['memory_free'])
8471 raise errors.OpPrereqError("This change will prevent the instance"
8472 " from starting, due to %d MB of memory"
8473 " missing on its primary node" % miss_mem,
8476 if be_new[constants.BE_AUTO_BALANCE]:
8477 for node, nres in nodeinfo.items():
8478 if node not in instance.secondary_nodes:
8482 self.warn.append("Can't get info from secondary node %s: %s" %
8484 elif not isinstance(nres.payload.get('memory_free', None), int):
8485 self.warn.append("Secondary node %s didn't return free"
8486 " memory information" % node)
8487 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8488 self.warn.append("Not enough memory to failover instance to"
8489 " secondary node %s" % node)
8494 for nic_op, nic_dict in self.op.nics:
8495 if nic_op == constants.DDM_REMOVE:
8496 if not instance.nics:
8497 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8500 if nic_op != constants.DDM_ADD:
8502 if not instance.nics:
8503 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8504 " no NICs" % nic_op,
8506 if nic_op < 0 or nic_op >= len(instance.nics):
8507 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8509 (nic_op, len(instance.nics) - 1),
8511 old_nic_params = instance.nics[nic_op].nicparams
8512 old_nic_ip = instance.nics[nic_op].ip
8517 update_params_dict = dict([(key, nic_dict[key])
8518 for key in constants.NICS_PARAMETERS
8519 if key in nic_dict])
8521 if 'bridge' in nic_dict:
8522 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8524 new_nic_params, new_filled_nic_params = \
8525 self._GetUpdatedParams(old_nic_params, update_params_dict,
8526 cluster.nicparams[constants.PP_DEFAULT],
8527 constants.NICS_PARAMETER_TYPES)
8528 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8529 self.nic_pinst[nic_op] = new_nic_params
8530 self.nic_pnew[nic_op] = new_filled_nic_params
8531 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8533 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8534 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8535 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8537 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8539 self.warn.append(msg)
8541 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8542 if new_nic_mode == constants.NIC_MODE_ROUTED:
8543 if 'ip' in nic_dict:
8544 nic_ip = nic_dict['ip']
8548 raise errors.OpPrereqError('Cannot set the nic ip to None'
8549 ' on a routed nic', errors.ECODE_INVAL)
8550 if 'mac' in nic_dict:
8551 nic_mac = nic_dict['mac']
8553 raise errors.OpPrereqError('Cannot set the nic mac to None',
8555 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8556 # otherwise generate the mac
8557 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8559 # or validate/reserve the current one
8561 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8562 except errors.ReservationError:
8563 raise errors.OpPrereqError("MAC address %s already in use"
8564 " in cluster" % nic_mac,
8565 errors.ECODE_NOTUNIQUE)
8568 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8569 raise errors.OpPrereqError("Disk operations not supported for"
8570 " diskless instances",
8572 for disk_op, _ in self.op.disks:
8573 if disk_op == constants.DDM_REMOVE:
8574 if len(instance.disks) == 1:
8575 raise errors.OpPrereqError("Cannot remove the last disk of"
8576 " an instance", errors.ECODE_INVAL)
8577 _CheckInstanceDown(self, instance, "cannot remove disks")
8579 if (disk_op == constants.DDM_ADD and
8580 len(instance.nics) >= constants.MAX_DISKS):
8581 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8582 " add more" % constants.MAX_DISKS,
8584 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8586 if disk_op < 0 or disk_op >= len(instance.disks):
8587 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8589 (disk_op, len(instance.disks)),
8593 if self.op.os_name and not self.op.force:
8594 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8595 self.op.force_variant)
8599 def _ConvertPlainToDrbd(self, feedback_fn):
8600 """Converts an instance from plain to drbd.
8603 feedback_fn("Converting template to drbd")
8604 instance = self.instance
8605 pnode = instance.primary_node
8606 snode = self.op.remote_node
8608 # create a fake disk info for _GenerateDiskTemplate
8609 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8610 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8611 instance.name, pnode, [snode],
8612 disk_info, None, None, 0)
8613 info = _GetInstanceInfoText(instance)
8614 feedback_fn("Creating aditional volumes...")
8615 # first, create the missing data and meta devices
8616 for disk in new_disks:
8617 # unfortunately this is... not too nice
8618 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8620 for child in disk.children:
8621 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8622 # at this stage, all new LVs have been created, we can rename the
8624 feedback_fn("Renaming original volumes...")
8625 rename_list = [(o, n.children[0].logical_id)
8626 for (o, n) in zip(instance.disks, new_disks)]
8627 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8628 result.Raise("Failed to rename original LVs")
8630 feedback_fn("Initializing DRBD devices...")
8631 # all child devices are in place, we can now create the DRBD devices
8632 for disk in new_disks:
8633 for node in [pnode, snode]:
8634 f_create = node == pnode
8635 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8637 # at this point, the instance has been modified
8638 instance.disk_template = constants.DT_DRBD8
8639 instance.disks = new_disks
8640 self.cfg.Update(instance, feedback_fn)
8642 # disks are created, waiting for sync
8643 disk_abort = not _WaitForSync(self, instance)
8645 raise errors.OpExecError("There are some degraded disks for"
8646 " this instance, please cleanup manually")
8648 def _ConvertDrbdToPlain(self, feedback_fn):
8649 """Converts an instance from drbd to plain.
8652 instance = self.instance
8653 assert len(instance.secondary_nodes) == 1
8654 pnode = instance.primary_node
8655 snode = instance.secondary_nodes[0]
8656 feedback_fn("Converting template to plain")
8658 old_disks = instance.disks
8659 new_disks = [d.children[0] for d in old_disks]
8661 # copy over size and mode
8662 for parent, child in zip(old_disks, new_disks):
8663 child.size = parent.size
8664 child.mode = parent.mode
8666 # update instance structure
8667 instance.disks = new_disks
8668 instance.disk_template = constants.DT_PLAIN
8669 self.cfg.Update(instance, feedback_fn)
8671 feedback_fn("Removing volumes on the secondary node...")
8672 for disk in old_disks:
8673 self.cfg.SetDiskID(disk, snode)
8674 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8676 self.LogWarning("Could not remove block device %s on node %s,"
8677 " continuing anyway: %s", disk.iv_name, snode, msg)
8679 feedback_fn("Removing unneeded volumes on the primary node...")
8680 for idx, disk in enumerate(old_disks):
8681 meta = disk.children[1]
8682 self.cfg.SetDiskID(meta, pnode)
8683 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8685 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8686 " continuing anyway: %s", idx, pnode, msg)
8689 def Exec(self, feedback_fn):
8690 """Modifies an instance.
8692 All parameters take effect only at the next restart of the instance.
8695 # Process here the warnings from CheckPrereq, as we don't have a
8696 # feedback_fn there.
8697 for warn in self.warn:
8698 feedback_fn("WARNING: %s" % warn)
8701 instance = self.instance
8703 for disk_op, disk_dict in self.op.disks:
8704 if disk_op == constants.DDM_REMOVE:
8705 # remove the last disk
8706 device = instance.disks.pop()
8707 device_idx = len(instance.disks)
8708 for node, disk in device.ComputeNodeTree(instance.primary_node):
8709 self.cfg.SetDiskID(disk, node)
8710 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8712 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8713 " continuing anyway", device_idx, node, msg)
8714 result.append(("disk/%d" % device_idx, "remove"))
8715 elif disk_op == constants.DDM_ADD:
8717 if instance.disk_template == constants.DT_FILE:
8718 file_driver, file_path = instance.disks[0].logical_id
8719 file_path = os.path.dirname(file_path)
8721 file_driver = file_path = None
8722 disk_idx_base = len(instance.disks)
8723 new_disk = _GenerateDiskTemplate(self,
8724 instance.disk_template,
8725 instance.name, instance.primary_node,
8726 instance.secondary_nodes,
8731 instance.disks.append(new_disk)
8732 info = _GetInstanceInfoText(instance)
8734 logging.info("Creating volume %s for instance %s",
8735 new_disk.iv_name, instance.name)
8736 # Note: this needs to be kept in sync with _CreateDisks
8738 for node in instance.all_nodes:
8739 f_create = node == instance.primary_node
8741 _CreateBlockDev(self, node, instance, new_disk,
8742 f_create, info, f_create)
8743 except errors.OpExecError, err:
8744 self.LogWarning("Failed to create volume %s (%s) on"
8746 new_disk.iv_name, new_disk, node, err)
8747 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8748 (new_disk.size, new_disk.mode)))
8750 # change a given disk
8751 instance.disks[disk_op].mode = disk_dict['mode']
8752 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8754 if self.op.disk_template:
8755 r_shut = _ShutdownInstanceDisks(self, instance)
8757 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8758 " proceed with disk template conversion")
8759 mode = (instance.disk_template, self.op.disk_template)
8761 self._DISK_CONVERSIONS[mode](self, feedback_fn)
8763 self.cfg.ReleaseDRBDMinors(instance.name)
8765 result.append(("disk_template", self.op.disk_template))
8768 for nic_op, nic_dict in self.op.nics:
8769 if nic_op == constants.DDM_REMOVE:
8770 # remove the last nic
8771 del instance.nics[-1]
8772 result.append(("nic.%d" % len(instance.nics), "remove"))
8773 elif nic_op == constants.DDM_ADD:
8774 # mac and bridge should be set, by now
8775 mac = nic_dict['mac']
8776 ip = nic_dict.get('ip', None)
8777 nicparams = self.nic_pinst[constants.DDM_ADD]
8778 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8779 instance.nics.append(new_nic)
8780 result.append(("nic.%d" % (len(instance.nics) - 1),
8781 "add:mac=%s,ip=%s,mode=%s,link=%s" %
8782 (new_nic.mac, new_nic.ip,
8783 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8784 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8787 for key in 'mac', 'ip':
8789 setattr(instance.nics[nic_op], key, nic_dict[key])
8790 if nic_op in self.nic_pinst:
8791 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8792 for key, val in nic_dict.iteritems():
8793 result.append(("nic.%s/%d" % (key, nic_op), val))
8796 if self.op.hvparams:
8797 instance.hvparams = self.hv_inst
8798 for key, val in self.op.hvparams.iteritems():
8799 result.append(("hv/%s" % key, val))
8802 if self.op.beparams:
8803 instance.beparams = self.be_inst
8804 for key, val in self.op.beparams.iteritems():
8805 result.append(("be/%s" % key, val))
8809 instance.os = self.op.os_name
8811 self.cfg.Update(instance, feedback_fn)
8815 _DISK_CONVERSIONS = {
8816 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8817 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8820 class LUQueryExports(NoHooksLU):
8821 """Query the exports list
8824 _OP_REQP = ['nodes']
8827 def ExpandNames(self):
8828 self.needed_locks = {}
8829 self.share_locks[locking.LEVEL_NODE] = 1
8830 if not self.op.nodes:
8831 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8833 self.needed_locks[locking.LEVEL_NODE] = \
8834 _GetWantedNodes(self, self.op.nodes)
8836 def CheckPrereq(self):
8837 """Check prerequisites.
8840 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8842 def Exec(self, feedback_fn):
8843 """Compute the list of all the exported system images.
8846 @return: a dictionary with the structure node->(export-list)
8847 where export-list is a list of the instances exported on
8851 rpcresult = self.rpc.call_export_list(self.nodes)
8853 for node in rpcresult:
8854 if rpcresult[node].fail_msg:
8855 result[node] = False
8857 result[node] = rpcresult[node].payload
8862 class LUExportInstance(LogicalUnit):
8863 """Export an instance to an image in the cluster.
8866 HPATH = "instance-export"
8867 HTYPE = constants.HTYPE_INSTANCE
8868 _OP_REQP = ["instance_name", "target_node", "shutdown"]
8871 def CheckArguments(self):
8872 """Check the arguments.
8875 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8876 constants.DEFAULT_SHUTDOWN_TIMEOUT)
8878 def ExpandNames(self):
8879 self._ExpandAndLockInstance()
8880 # FIXME: lock only instance primary and destination node
8882 # Sad but true, for now we have do lock all nodes, as we don't know where
8883 # the previous export might be, and and in this LU we search for it and
8884 # remove it from its current node. In the future we could fix this by:
8885 # - making a tasklet to search (share-lock all), then create the new one,
8886 # then one to remove, after
8887 # - removing the removal operation altogether
8888 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8890 def DeclareLocks(self, level):
8891 """Last minute lock declaration."""
8892 # All nodes are locked anyway, so nothing to do here.
8894 def BuildHooksEnv(self):
8897 This will run on the master, primary node and target node.
8901 "EXPORT_NODE": self.op.target_node,
8902 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8903 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8905 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8906 nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8907 self.op.target_node]
8910 def CheckPrereq(self):
8911 """Check prerequisites.
8913 This checks that the instance and node names are valid.
8916 instance_name = self.op.instance_name
8917 self.instance = self.cfg.GetInstanceInfo(instance_name)
8918 assert self.instance is not None, \
8919 "Cannot retrieve locked instance %s" % self.op.instance_name
8920 _CheckNodeOnline(self, self.instance.primary_node)
8922 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8923 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8924 assert self.dst_node is not None
8926 _CheckNodeOnline(self, self.dst_node.name)
8927 _CheckNodeNotDrained(self, self.dst_node.name)
8929 # instance disk type verification
8930 for disk in self.instance.disks:
8931 if disk.dev_type == constants.LD_FILE:
8932 raise errors.OpPrereqError("Export not supported for instances with"
8933 " file-based disks", errors.ECODE_INVAL)
8935 def _CreateSnapshots(self, feedback_fn):
8936 """Creates an LVM snapshot for every disk of the instance.
8938 @return: List of snapshots as L{objects.Disk} instances
8941 instance = self.instance
8942 src_node = instance.primary_node
8944 vgname = self.cfg.GetVGName()
8948 for idx, disk in enumerate(instance.disks):
8949 feedback_fn("Creating a snapshot of disk/%s on node %s" %
8952 # result.payload will be a snapshot of an lvm leaf of the one we
8954 result = self.rpc.call_blockdev_snapshot(src_node, disk)
8955 msg = result.fail_msg
8957 self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8959 snap_disks.append(False)
8961 disk_id = (vgname, result.payload)
8962 new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8963 logical_id=disk_id, physical_id=disk_id,
8964 iv_name=disk.iv_name)
8965 snap_disks.append(new_dev)
8969 def _RemoveSnapshot(self, feedback_fn, snap_disks, disk_index):
8970 """Removes an LVM snapshot.
8972 @type snap_disks: list
8973 @param snap_disks: The list of all snapshots as returned by
8975 @type disk_index: number
8976 @param disk_index: Index of the snapshot to be removed
8978 @return: Whether removal was successful or not
8981 disk = snap_disks[disk_index]
8983 src_node = self.instance.primary_node
8985 feedback_fn("Removing snapshot of disk/%s on node %s" %
8986 (disk_index, src_node))
8988 result = self.rpc.call_blockdev_remove(src_node, disk)
8989 if not result.fail_msg:
8992 self.LogWarning("Could not remove snapshot for disk/%d from node"
8993 " %s: %s", disk_index, src_node, result.fail_msg)
8997 def _CleanupExports(self, feedback_fn):
8998 """Removes exports of current instance from all other nodes.
9000 If an instance in a cluster with nodes A..D was exported to node C, its
9001 exports will be removed from the nodes A, B and D.
9004 nodelist = self.cfg.GetNodeList()
9005 nodelist.remove(self.dst_node.name)
9007 # on one-node clusters nodelist will be empty after the removal
9008 # if we proceed the backup would be removed because OpQueryExports
9009 # substitutes an empty list with the full cluster node list.
9010 iname = self.instance.name
9012 feedback_fn("Removing old exports for instance %s" % iname)
9013 exportlist = self.rpc.call_export_list(nodelist)
9014 for node in exportlist:
9015 if exportlist[node].fail_msg:
9017 if iname in exportlist[node].payload:
9018 msg = self.rpc.call_export_remove(node, iname).fail_msg
9020 self.LogWarning("Could not remove older export for instance %s"
9021 " on node %s: %s", iname, node, msg)
9023 def Exec(self, feedback_fn):
9024 """Export an instance to an image in the cluster.
9027 instance = self.instance
9028 dst_node = self.dst_node
9029 src_node = instance.primary_node
9031 if self.op.shutdown:
9032 # shutdown the instance, but not the disks
9033 feedback_fn("Shutting down instance %s" % instance.name)
9034 result = self.rpc.call_instance_shutdown(src_node, instance,
9035 self.shutdown_timeout)
9036 result.Raise("Could not shutdown instance %s on"
9037 " node %s" % (instance.name, src_node))
9039 # set the disks ID correctly since call_instance_start needs the
9040 # correct drbd minor to create the symlinks
9041 for disk in instance.disks:
9042 self.cfg.SetDiskID(disk, src_node)
9044 activate_disks = (not instance.admin_up)
9047 # Activate the instance disks if we'exporting a stopped instance
9048 feedback_fn("Activating disks for %s" % instance.name)
9049 _StartInstanceDisks(self, instance, None)
9054 removed_snaps = [False] * len(instance.disks)
9059 snap_disks = self._CreateSnapshots(feedback_fn)
9061 if self.op.shutdown and instance.admin_up:
9062 feedback_fn("Starting instance %s" % instance.name)
9063 result = self.rpc.call_instance_start(src_node, instance,
9065 msg = result.fail_msg
9067 _ShutdownInstanceDisks(self, instance)
9068 raise errors.OpExecError("Could not start instance: %s" % msg)
9070 assert len(snap_disks) == len(instance.disks)
9071 assert len(removed_snaps) == len(instance.disks)
9073 # TODO: check for size
9075 cluster_name = self.cfg.GetClusterName()
9076 for idx, dev in enumerate(snap_disks):
9077 feedback_fn("Exporting snapshot %s from %s to %s" %
9078 (idx, src_node, dst_node.name))
9080 # FIXME: pass debug from opcode to backend
9081 result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
9082 instance, cluster_name,
9083 idx, self.op.debug_level)
9084 msg = result.fail_msg
9086 self.LogWarning("Could not export disk/%s from node %s to"
9087 " node %s: %s", idx, src_node, dst_node.name, msg)
9088 dresults.append(False)
9090 dresults.append(True)
9093 if self._RemoveSnapshot(feedback_fn, snap_disks, idx):
9094 removed_snaps[idx] = True
9096 dresults.append(False)
9098 assert len(dresults) == len(instance.disks)
9100 # Check for backwards compatibility
9101 assert compat.all(isinstance(i, bool) for i in dresults), \
9102 "Not all results are boolean: %r" % dresults
9104 feedback_fn("Finalizing export on %s" % dst_node.name)
9105 result = self.rpc.call_finalize_export(dst_node.name, instance,
9107 msg = result.fail_msg
9110 self.LogWarning("Could not finalize export for instance %s"
9111 " on node %s: %s", instance.name, dst_node.name, msg)
9114 # Remove all snapshots
9115 assert len(removed_snaps) == len(instance.disks)
9116 for idx, removed in enumerate(removed_snaps):
9118 self._RemoveSnapshot(feedback_fn, snap_disks, idx)
9122 feedback_fn("Deactivating disks for %s" % instance.name)
9123 _ShutdownInstanceDisks(self, instance)
9125 self._CleanupExports(feedback_fn)
9127 return fin_resu, dresults
9130 class LURemoveExport(NoHooksLU):
9131 """Remove exports related to the named instance.
9134 _OP_REQP = ["instance_name"]
9137 def ExpandNames(self):
9138 self.needed_locks = {}
9139 # We need all nodes to be locked in order for RemoveExport to work, but we
9140 # don't need to lock the instance itself, as nothing will happen to it (and
9141 # we can remove exports also for a removed instance)
9142 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9144 def CheckPrereq(self):
9145 """Check prerequisites.
9149 def Exec(self, feedback_fn):
9150 """Remove any export.
9153 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9154 # If the instance was not found we'll try with the name that was passed in.
9155 # This will only work if it was an FQDN, though.
9157 if not instance_name:
9159 instance_name = self.op.instance_name
9161 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9162 exportlist = self.rpc.call_export_list(locked_nodes)
9164 for node in exportlist:
9165 msg = exportlist[node].fail_msg
9167 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9169 if instance_name in exportlist[node].payload:
9171 result = self.rpc.call_export_remove(node, instance_name)
9172 msg = result.fail_msg
9174 logging.error("Could not remove export for instance %s"
9175 " on node %s: %s", instance_name, node, msg)
9177 if fqdn_warn and not found:
9178 feedback_fn("Export not found. If trying to remove an export belonging"
9179 " to a deleted instance please use its Fully Qualified"
9183 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9186 This is an abstract class which is the parent of all the other tags LUs.
9190 def ExpandNames(self):
9191 self.needed_locks = {}
9192 if self.op.kind == constants.TAG_NODE:
9193 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9194 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9195 elif self.op.kind == constants.TAG_INSTANCE:
9196 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9197 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9199 def CheckPrereq(self):
9200 """Check prerequisites.
9203 if self.op.kind == constants.TAG_CLUSTER:
9204 self.target = self.cfg.GetClusterInfo()
9205 elif self.op.kind == constants.TAG_NODE:
9206 self.target = self.cfg.GetNodeInfo(self.op.name)
9207 elif self.op.kind == constants.TAG_INSTANCE:
9208 self.target = self.cfg.GetInstanceInfo(self.op.name)
9210 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9211 str(self.op.kind), errors.ECODE_INVAL)
9214 class LUGetTags(TagsLU):
9215 """Returns the tags of a given object.
9218 _OP_REQP = ["kind", "name"]
9221 def Exec(self, feedback_fn):
9222 """Returns the tag list.
9225 return list(self.target.GetTags())
9228 class LUSearchTags(NoHooksLU):
9229 """Searches the tags for a given pattern.
9232 _OP_REQP = ["pattern"]
9235 def ExpandNames(self):
9236 self.needed_locks = {}
9238 def CheckPrereq(self):
9239 """Check prerequisites.
9241 This checks the pattern passed for validity by compiling it.
9245 self.re = re.compile(self.op.pattern)
9246 except re.error, err:
9247 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9248 (self.op.pattern, err), errors.ECODE_INVAL)
9250 def Exec(self, feedback_fn):
9251 """Returns the tag list.
9255 tgts = [("/cluster", cfg.GetClusterInfo())]
9256 ilist = cfg.GetAllInstancesInfo().values()
9257 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9258 nlist = cfg.GetAllNodesInfo().values()
9259 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9261 for path, target in tgts:
9262 for tag in target.GetTags():
9263 if self.re.search(tag):
9264 results.append((path, tag))
9268 class LUAddTags(TagsLU):
9269 """Sets a tag on a given object.
9272 _OP_REQP = ["kind", "name", "tags"]
9275 def CheckPrereq(self):
9276 """Check prerequisites.
9278 This checks the type and length of the tag name and value.
9281 TagsLU.CheckPrereq(self)
9282 for tag in self.op.tags:
9283 objects.TaggableObject.ValidateTag(tag)
9285 def Exec(self, feedback_fn):
9290 for tag in self.op.tags:
9291 self.target.AddTag(tag)
9292 except errors.TagError, err:
9293 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9294 self.cfg.Update(self.target, feedback_fn)
9297 class LUDelTags(TagsLU):
9298 """Delete a list of tags from a given object.
9301 _OP_REQP = ["kind", "name", "tags"]
9304 def CheckPrereq(self):
9305 """Check prerequisites.
9307 This checks that we have the given tag.
9310 TagsLU.CheckPrereq(self)
9311 for tag in self.op.tags:
9312 objects.TaggableObject.ValidateTag(tag)
9313 del_tags = frozenset(self.op.tags)
9314 cur_tags = self.target.GetTags()
9315 if not del_tags <= cur_tags:
9316 diff_tags = del_tags - cur_tags
9317 diff_names = ["'%s'" % tag for tag in diff_tags]
9319 raise errors.OpPrereqError("Tag(s) %s not found" %
9320 (",".join(diff_names)), errors.ECODE_NOENT)
9322 def Exec(self, feedback_fn):
9323 """Remove the tag from the object.
9326 for tag in self.op.tags:
9327 self.target.RemoveTag(tag)
9328 self.cfg.Update(self.target, feedback_fn)
9331 class LUTestDelay(NoHooksLU):
9332 """Sleep for a specified amount of time.
9334 This LU sleeps on the master and/or nodes for a specified amount of
9338 _OP_REQP = ["duration", "on_master", "on_nodes"]
9341 def ExpandNames(self):
9342 """Expand names and set required locks.
9344 This expands the node list, if any.
9347 self.needed_locks = {}
9348 if self.op.on_nodes:
9349 # _GetWantedNodes can be used here, but is not always appropriate to use
9350 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9352 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9353 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9355 def CheckPrereq(self):
9356 """Check prerequisites.
9360 def Exec(self, feedback_fn):
9361 """Do the actual sleep.
9364 if self.op.on_master:
9365 if not utils.TestDelay(self.op.duration):
9366 raise errors.OpExecError("Error during master delay test")
9367 if self.op.on_nodes:
9368 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9369 for node, node_result in result.items():
9370 node_result.Raise("Failure during rpc call to node %s" % node)
9373 class IAllocator(object):
9374 """IAllocator framework.
9376 An IAllocator instance has three sets of attributes:
9377 - cfg that is needed to query the cluster
9378 - input data (all members of the _KEYS class attribute are required)
9379 - four buffer attributes (in|out_data|text), that represent the
9380 input (to the external script) in text and data structure format,
9381 and the output from it, again in two formats
9382 - the result variables from the script (success, info, nodes) for
9386 # pylint: disable-msg=R0902
9387 # lots of instance attributes
9389 "name", "mem_size", "disks", "disk_template",
9390 "os", "tags", "nics", "vcpus", "hypervisor",
9393 "name", "relocate_from",
9399 def __init__(self, cfg, rpc, mode, **kwargs):
9402 # init buffer variables
9403 self.in_text = self.out_text = self.in_data = self.out_data = None
9404 # init all input fields so that pylint is happy
9406 self.mem_size = self.disks = self.disk_template = None
9407 self.os = self.tags = self.nics = self.vcpus = None
9408 self.hypervisor = None
9409 self.relocate_from = None
9411 self.evac_nodes = None
9413 self.required_nodes = None
9414 # init result fields
9415 self.success = self.info = self.result = None
9416 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9417 keyset = self._ALLO_KEYS
9418 fn = self._AddNewInstance
9419 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9420 keyset = self._RELO_KEYS
9421 fn = self._AddRelocateInstance
9422 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9423 keyset = self._EVAC_KEYS
9424 fn = self._AddEvacuateNodes
9426 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9427 " IAllocator" % self.mode)
9429 if key not in keyset:
9430 raise errors.ProgrammerError("Invalid input parameter '%s' to"
9431 " IAllocator" % key)
9432 setattr(self, key, kwargs[key])
9435 if key not in kwargs:
9436 raise errors.ProgrammerError("Missing input parameter '%s' to"
9437 " IAllocator" % key)
9438 self._BuildInputData(fn)
9440 def _ComputeClusterData(self):
9441 """Compute the generic allocator input data.
9443 This is the data that is independent of the actual operation.
9447 cluster_info = cfg.GetClusterInfo()
9450 "version": constants.IALLOCATOR_VERSION,
9451 "cluster_name": cfg.GetClusterName(),
9452 "cluster_tags": list(cluster_info.GetTags()),
9453 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9454 # we don't have job IDs
9456 iinfo = cfg.GetAllInstancesInfo().values()
9457 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9461 node_list = cfg.GetNodeList()
9463 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9464 hypervisor_name = self.hypervisor
9465 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9466 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9467 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9468 hypervisor_name = cluster_info.enabled_hypervisors[0]
9470 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9473 self.rpc.call_all_instances_info(node_list,
9474 cluster_info.enabled_hypervisors)
9475 for nname, nresult in node_data.items():
9476 # first fill in static (config-based) values
9477 ninfo = cfg.GetNodeInfo(nname)
9479 "tags": list(ninfo.GetTags()),
9480 "primary_ip": ninfo.primary_ip,
9481 "secondary_ip": ninfo.secondary_ip,
9482 "offline": ninfo.offline,
9483 "drained": ninfo.drained,
9484 "master_candidate": ninfo.master_candidate,
9487 if not (ninfo.offline or ninfo.drained):
9488 nresult.Raise("Can't get data for node %s" % nname)
9489 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9491 remote_info = nresult.payload
9493 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9494 'vg_size', 'vg_free', 'cpu_total']:
9495 if attr not in remote_info:
9496 raise errors.OpExecError("Node '%s' didn't return attribute"
9497 " '%s'" % (nname, attr))
9498 if not isinstance(remote_info[attr], int):
9499 raise errors.OpExecError("Node '%s' returned invalid value"
9501 (nname, attr, remote_info[attr]))
9502 # compute memory used by primary instances
9503 i_p_mem = i_p_up_mem = 0
9504 for iinfo, beinfo in i_list:
9505 if iinfo.primary_node == nname:
9506 i_p_mem += beinfo[constants.BE_MEMORY]
9507 if iinfo.name not in node_iinfo[nname].payload:
9510 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9511 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9512 remote_info['memory_free'] -= max(0, i_mem_diff)
9515 i_p_up_mem += beinfo[constants.BE_MEMORY]
9517 # compute memory used by instances
9519 "total_memory": remote_info['memory_total'],
9520 "reserved_memory": remote_info['memory_dom0'],
9521 "free_memory": remote_info['memory_free'],
9522 "total_disk": remote_info['vg_size'],
9523 "free_disk": remote_info['vg_free'],
9524 "total_cpus": remote_info['cpu_total'],
9525 "i_pri_memory": i_p_mem,
9526 "i_pri_up_memory": i_p_up_mem,
9530 node_results[nname] = pnr
9531 data["nodes"] = node_results
9535 for iinfo, beinfo in i_list:
9537 for nic in iinfo.nics:
9538 filled_params = objects.FillDict(
9539 cluster_info.nicparams[constants.PP_DEFAULT],
9541 nic_dict = {"mac": nic.mac,
9543 "mode": filled_params[constants.NIC_MODE],
9544 "link": filled_params[constants.NIC_LINK],
9546 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9547 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9548 nic_data.append(nic_dict)
9550 "tags": list(iinfo.GetTags()),
9551 "admin_up": iinfo.admin_up,
9552 "vcpus": beinfo[constants.BE_VCPUS],
9553 "memory": beinfo[constants.BE_MEMORY],
9555 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9557 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9558 "disk_template": iinfo.disk_template,
9559 "hypervisor": iinfo.hypervisor,
9561 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9563 instance_data[iinfo.name] = pir
9565 data["instances"] = instance_data
9569 def _AddNewInstance(self):
9570 """Add new instance data to allocator structure.
9572 This in combination with _AllocatorGetClusterData will create the
9573 correct structure needed as input for the allocator.
9575 The checks for the completeness of the opcode must have already been
9579 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9581 if self.disk_template in constants.DTS_NET_MIRROR:
9582 self.required_nodes = 2
9584 self.required_nodes = 1
9587 "disk_template": self.disk_template,
9590 "vcpus": self.vcpus,
9591 "memory": self.mem_size,
9592 "disks": self.disks,
9593 "disk_space_total": disk_space,
9595 "required_nodes": self.required_nodes,
9599 def _AddRelocateInstance(self):
9600 """Add relocate instance data to allocator structure.
9602 This in combination with _IAllocatorGetClusterData will create the
9603 correct structure needed as input for the allocator.
9605 The checks for the completeness of the opcode must have already been
9609 instance = self.cfg.GetInstanceInfo(self.name)
9610 if instance is None:
9611 raise errors.ProgrammerError("Unknown instance '%s' passed to"
9612 " IAllocator" % self.name)
9614 if instance.disk_template not in constants.DTS_NET_MIRROR:
9615 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9618 if len(instance.secondary_nodes) != 1:
9619 raise errors.OpPrereqError("Instance has not exactly one secondary node",
9622 self.required_nodes = 1
9623 disk_sizes = [{'size': disk.size} for disk in instance.disks]
9624 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9628 "disk_space_total": disk_space,
9629 "required_nodes": self.required_nodes,
9630 "relocate_from": self.relocate_from,
9634 def _AddEvacuateNodes(self):
9635 """Add evacuate nodes data to allocator structure.
9639 "evac_nodes": self.evac_nodes
9643 def _BuildInputData(self, fn):
9644 """Build input data structures.
9647 self._ComputeClusterData()
9650 request["type"] = self.mode
9651 self.in_data["request"] = request
9653 self.in_text = serializer.Dump(self.in_data)
9655 def Run(self, name, validate=True, call_fn=None):
9656 """Run an instance allocator and return the results.
9660 call_fn = self.rpc.call_iallocator_runner
9662 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9663 result.Raise("Failure while running the iallocator script")
9665 self.out_text = result.payload
9667 self._ValidateResult()
9669 def _ValidateResult(self):
9670 """Process the allocator results.
9672 This will process and if successful save the result in
9673 self.out_data and the other parameters.
9677 rdict = serializer.Load(self.out_text)
9678 except Exception, err:
9679 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9681 if not isinstance(rdict, dict):
9682 raise errors.OpExecError("Can't parse iallocator results: not a dict")
9684 # TODO: remove backwards compatiblity in later versions
9685 if "nodes" in rdict and "result" not in rdict:
9686 rdict["result"] = rdict["nodes"]
9689 for key in "success", "info", "result":
9690 if key not in rdict:
9691 raise errors.OpExecError("Can't parse iallocator results:"
9692 " missing key '%s'" % key)
9693 setattr(self, key, rdict[key])
9695 if not isinstance(rdict["result"], list):
9696 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9698 self.out_data = rdict
9701 class LUTestAllocator(NoHooksLU):
9702 """Run allocator tests.
9704 This LU runs the allocator tests
9707 _OP_REQP = ["direction", "mode", "name"]
9709 def CheckPrereq(self):
9710 """Check prerequisites.
9712 This checks the opcode parameters depending on the director and mode test.
9715 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9716 for attr in ["name", "mem_size", "disks", "disk_template",
9717 "os", "tags", "nics", "vcpus"]:
9718 if not hasattr(self.op, attr):
9719 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9720 attr, errors.ECODE_INVAL)
9721 iname = self.cfg.ExpandInstanceName(self.op.name)
9722 if iname is not None:
9723 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9724 iname, errors.ECODE_EXISTS)
9725 if not isinstance(self.op.nics, list):
9726 raise errors.OpPrereqError("Invalid parameter 'nics'",
9728 for row in self.op.nics:
9729 if (not isinstance(row, dict) or
9732 "bridge" not in row):
9733 raise errors.OpPrereqError("Invalid contents of the 'nics'"
9734 " parameter", errors.ECODE_INVAL)
9735 if not isinstance(self.op.disks, list):
9736 raise errors.OpPrereqError("Invalid parameter 'disks'",
9738 for row in self.op.disks:
9739 if (not isinstance(row, dict) or
9740 "size" not in row or
9741 not isinstance(row["size"], int) or
9742 "mode" not in row or
9743 row["mode"] not in ['r', 'w']):
9744 raise errors.OpPrereqError("Invalid contents of the 'disks'"
9745 " parameter", errors.ECODE_INVAL)
9746 if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9747 self.op.hypervisor = self.cfg.GetHypervisorType()
9748 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9749 if not hasattr(self.op, "name"):
9750 raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9752 fname = _ExpandInstanceName(self.cfg, self.op.name)
9753 self.op.name = fname
9754 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9755 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9756 if not hasattr(self.op, "evac_nodes"):
9757 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9758 " opcode input", errors.ECODE_INVAL)
9760 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9761 self.op.mode, errors.ECODE_INVAL)
9763 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9764 if not hasattr(self.op, "allocator") or self.op.allocator is None:
9765 raise errors.OpPrereqError("Missing allocator name",
9767 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9768 raise errors.OpPrereqError("Wrong allocator test '%s'" %
9769 self.op.direction, errors.ECODE_INVAL)
9771 def Exec(self, feedback_fn):
9772 """Run the allocator test.
9775 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9776 ial = IAllocator(self.cfg, self.rpc,
9779 mem_size=self.op.mem_size,
9780 disks=self.op.disks,
9781 disk_template=self.op.disk_template,
9785 vcpus=self.op.vcpus,
9786 hypervisor=self.op.hypervisor,
9788 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9789 ial = IAllocator(self.cfg, self.rpc,
9792 relocate_from=list(self.relocate_from),
9794 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9795 ial = IAllocator(self.cfg, self.rpc,
9797 evac_nodes=self.op.evac_nodes)
9799 raise errors.ProgrammerError("Uncatched mode %s in"
9800 " LUTestAllocator.Exec", self.op.mode)
9802 if self.op.direction == constants.IALLOCATOR_DIR_IN:
9803 result = ial.in_text
9805 ial.Run(self.op.allocator, validate=False)
9806 result = ial.out_text