4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
38 from ganeti import ssh
39 from ganeti import utils
40 from ganeti import errors
41 from ganeti import hypervisor
42 from ganeti import locking
43 from ganeti import constants
44 from ganeti import objects
45 from ganeti import serializer
46 from ganeti import ssconf
47 from ganeti import uidpool
48 from ganeti import compat
51 class LogicalUnit(object):
52 """Logical Unit base class.
54 Subclasses must follow these rules:
55 - implement ExpandNames
56 - implement CheckPrereq (except when tasklets are used)
57 - implement Exec (except when tasklets are used)
58 - implement BuildHooksEnv
59 - redefine HPATH and HTYPE
60 - optionally redefine their run requirements:
61 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
63 Note that all commands require root permissions.
65 @ivar dry_run_result: the value (if any) that will be returned to the caller
66 in dry-run mode (signalled by opcode dry_run parameter)
74 def __init__(self, processor, op, context, rpc):
75 """Constructor for LogicalUnit.
77 This needs to be overridden in derived classes in order to check op
83 self.cfg = context.cfg
84 self.context = context
86 # Dicts used to declare locking needs to mcpu
87 self.needed_locks = None
88 self.acquired_locks = {}
89 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
91 self.remove_locks = {}
92 # Used to force good behavior when calling helper functions
93 self.recalculate_locks = {}
96 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
97 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
98 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
100 self.dry_run_result = None
101 # support for generic debug attribute
102 if (not hasattr(self.op, "debug_level") or
103 not isinstance(self.op.debug_level, int)):
104 self.op.debug_level = 0
109 for attr_name in self._OP_REQP:
110 attr_val = getattr(op, attr_name, None)
112 raise errors.OpPrereqError("Required parameter '%s' missing" %
113 attr_name, errors.ECODE_INVAL)
115 self.CheckArguments()
118 """Returns the SshRunner object
122 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
125 ssh = property(fget=__GetSSH)
127 def CheckArguments(self):
128 """Check syntactic validity for the opcode arguments.
130 This method is for doing a simple syntactic check and ensure
131 validity of opcode parameters, without any cluster-related
132 checks. While the same can be accomplished in ExpandNames and/or
133 CheckPrereq, doing these separate is better because:
135 - ExpandNames is left as as purely a lock-related function
136 - CheckPrereq is run after we have acquired locks (and possible
139 The function is allowed to change the self.op attribute so that
140 later methods can no longer worry about missing parameters.
145 def ExpandNames(self):
146 """Expand names for this LU.
148 This method is called before starting to execute the opcode, and it should
149 update all the parameters of the opcode to their canonical form (e.g. a
150 short node name must be fully expanded after this method has successfully
151 completed). This way locking, hooks, logging, ecc. can work correctly.
153 LUs which implement this method must also populate the self.needed_locks
154 member, as a dict with lock levels as keys, and a list of needed lock names
157 - use an empty dict if you don't need any lock
158 - if you don't need any lock at a particular level omit that level
159 - don't put anything for the BGL level
160 - if you want all locks at a level use locking.ALL_SET as a value
162 If you need to share locks (rather than acquire them exclusively) at one
163 level you can modify self.share_locks, setting a true value (usually 1) for
164 that level. By default locks are not shared.
166 This function can also define a list of tasklets, which then will be
167 executed in order instead of the usual LU-level CheckPrereq and Exec
168 functions, if those are not defined by the LU.
172 # Acquire all nodes and one instance
173 self.needed_locks = {
174 locking.LEVEL_NODE: locking.ALL_SET,
175 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
177 # Acquire just two nodes
178 self.needed_locks = {
179 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
182 self.needed_locks = {} # No, you can't leave it to the default value None
185 # The implementation of this method is mandatory only if the new LU is
186 # concurrent, so that old LUs don't need to be changed all at the same
189 self.needed_locks = {} # Exclusive LUs don't need locks.
191 raise NotImplementedError
193 def DeclareLocks(self, level):
194 """Declare LU locking needs for a level
196 While most LUs can just declare their locking needs at ExpandNames time,
197 sometimes there's the need to calculate some locks after having acquired
198 the ones before. This function is called just before acquiring locks at a
199 particular level, but after acquiring the ones at lower levels, and permits
200 such calculations. It can be used to modify self.needed_locks, and by
201 default it does nothing.
203 This function is only called if you have something already set in
204 self.needed_locks for the level.
206 @param level: Locking level which is going to be locked
207 @type level: member of ganeti.locking.LEVELS
211 def CheckPrereq(self):
212 """Check prerequisites for this LU.
214 This method should check that the prerequisites for the execution
215 of this LU are fulfilled. It can do internode communication, but
216 it should be idempotent - no cluster or system changes are
219 The method should raise errors.OpPrereqError in case something is
220 not fulfilled. Its return value is ignored.
222 This method should also update all the parameters of the opcode to
223 their canonical form if it hasn't been done by ExpandNames before.
226 if self.tasklets is not None:
227 for (idx, tl) in enumerate(self.tasklets):
228 logging.debug("Checking prerequisites for tasklet %s/%s",
229 idx + 1, len(self.tasklets))
232 raise NotImplementedError
234 def Exec(self, feedback_fn):
237 This method should implement the actual work. It should raise
238 errors.OpExecError for failures that are somewhat dealt with in
242 if self.tasklets is not None:
243 for (idx, tl) in enumerate(self.tasklets):
244 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
247 raise NotImplementedError
249 def BuildHooksEnv(self):
250 """Build hooks environment for this LU.
252 This method should return a three-node tuple consisting of: a dict
253 containing the environment that will be used for running the
254 specific hook for this LU, a list of node names on which the hook
255 should run before the execution, and a list of node names on which
256 the hook should run after the execution.
258 The keys of the dict must not have 'GANETI_' prefixed as this will
259 be handled in the hooks runner. Also note additional keys will be
260 added by the hooks runner. If the LU doesn't define any
261 environment, an empty dict (and not None) should be returned.
263 No nodes should be returned as an empty list (and not None).
265 Note that if the HPATH for a LU class is None, this function will
269 raise NotImplementedError
271 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
272 """Notify the LU about the results of its hooks.
274 This method is called every time a hooks phase is executed, and notifies
275 the Logical Unit about the hooks' result. The LU can then use it to alter
276 its result based on the hooks. By default the method does nothing and the
277 previous result is passed back unchanged but any LU can define it if it
278 wants to use the local cluster hook-scripts somehow.
280 @param phase: one of L{constants.HOOKS_PHASE_POST} or
281 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
282 @param hook_results: the results of the multi-node hooks rpc call
283 @param feedback_fn: function used send feedback back to the caller
284 @param lu_result: the previous Exec result this LU had, or None
286 @return: the new Exec result, based on the previous result
290 # API must be kept, thus we ignore the unused argument and could
291 # be a function warnings
292 # pylint: disable-msg=W0613,R0201
295 def _ExpandAndLockInstance(self):
296 """Helper function to expand and lock an instance.
298 Many LUs that work on an instance take its name in self.op.instance_name
299 and need to expand it and then declare the expanded name for locking. This
300 function does it, and then updates self.op.instance_name to the expanded
301 name. It also initializes needed_locks as a dict, if this hasn't been done
305 if self.needed_locks is None:
306 self.needed_locks = {}
308 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
309 "_ExpandAndLockInstance called with instance-level locks set"
310 self.op.instance_name = _ExpandInstanceName(self.cfg,
311 self.op.instance_name)
312 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
314 def _LockInstancesNodes(self, primary_only=False):
315 """Helper function to declare instances' nodes for locking.
317 This function should be called after locking one or more instances to lock
318 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
319 with all primary or secondary nodes for instances already locked and
320 present in self.needed_locks[locking.LEVEL_INSTANCE].
322 It should be called from DeclareLocks, and for safety only works if
323 self.recalculate_locks[locking.LEVEL_NODE] is set.
325 In the future it may grow parameters to just lock some instance's nodes, or
326 to just lock primaries or secondary nodes, if needed.
328 If should be called in DeclareLocks in a way similar to::
330 if level == locking.LEVEL_NODE:
331 self._LockInstancesNodes()
333 @type primary_only: boolean
334 @param primary_only: only lock primary nodes of locked instances
337 assert locking.LEVEL_NODE in self.recalculate_locks, \
338 "_LockInstancesNodes helper function called with no nodes to recalculate"
340 # TODO: check if we're really been called with the instance locks held
342 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
343 # future we might want to have different behaviors depending on the value
344 # of self.recalculate_locks[locking.LEVEL_NODE]
346 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
347 instance = self.context.cfg.GetInstanceInfo(instance_name)
348 wanted_nodes.append(instance.primary_node)
350 wanted_nodes.extend(instance.secondary_nodes)
352 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
353 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
354 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
355 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
357 del self.recalculate_locks[locking.LEVEL_NODE]
360 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
361 """Simple LU which runs no hooks.
363 This LU is intended as a parent for other LogicalUnits which will
364 run no hooks, in order to reduce duplicate code.
370 def BuildHooksEnv(self):
371 """Empty BuildHooksEnv for NoHooksLu.
373 This just raises an error.
376 assert False, "BuildHooksEnv called for NoHooksLUs"
380 """Tasklet base class.
382 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
383 they can mix legacy code with tasklets. Locking needs to be done in the LU,
384 tasklets know nothing about locks.
386 Subclasses must follow these rules:
387 - Implement CheckPrereq
391 def __init__(self, lu):
398 def CheckPrereq(self):
399 """Check prerequisites for this tasklets.
401 This method should check whether the prerequisites for the execution of
402 this tasklet are fulfilled. It can do internode communication, but it
403 should be idempotent - no cluster or system changes are allowed.
405 The method should raise errors.OpPrereqError in case something is not
406 fulfilled. Its return value is ignored.
408 This method should also update all parameters to their canonical form if it
409 hasn't been done before.
412 raise NotImplementedError
414 def Exec(self, feedback_fn):
415 """Execute the tasklet.
417 This method should implement the actual work. It should raise
418 errors.OpExecError for failures that are somewhat dealt with in code, or
422 raise NotImplementedError
425 def _GetWantedNodes(lu, nodes):
426 """Returns list of checked and expanded node names.
428 @type lu: L{LogicalUnit}
429 @param lu: the logical unit on whose behalf we execute
431 @param nodes: list of node names or None for all nodes
433 @return: the list of nodes, sorted
434 @raise errors.ProgrammerError: if the nodes parameter is wrong type
437 if not isinstance(nodes, list):
438 raise errors.OpPrereqError("Invalid argument type 'nodes'",
442 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
443 " non-empty list of nodes whose name is to be expanded.")
445 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
446 return utils.NiceSort(wanted)
449 def _GetWantedInstances(lu, instances):
450 """Returns list of checked and expanded instance names.
452 @type lu: L{LogicalUnit}
453 @param lu: the logical unit on whose behalf we execute
454 @type instances: list
455 @param instances: list of instance names or None for all instances
457 @return: the list of instances, sorted
458 @raise errors.OpPrereqError: if the instances parameter is wrong type
459 @raise errors.OpPrereqError: if any of the passed instances is not found
462 if not isinstance(instances, list):
463 raise errors.OpPrereqError("Invalid argument type 'instances'",
467 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
469 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
473 def _CheckOutputFields(static, dynamic, selected):
474 """Checks whether all selected fields are valid.
476 @type static: L{utils.FieldSet}
477 @param static: static fields set
478 @type dynamic: L{utils.FieldSet}
479 @param dynamic: dynamic fields set
486 delta = f.NonMatching(selected)
488 raise errors.OpPrereqError("Unknown output fields selected: %s"
489 % ",".join(delta), errors.ECODE_INVAL)
492 def _CheckBooleanOpField(op, name):
493 """Validates boolean opcode parameters.
495 This will ensure that an opcode parameter is either a boolean value,
496 or None (but that it always exists).
499 val = getattr(op, name, None)
500 if not (val is None or isinstance(val, bool)):
501 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
502 (name, str(val)), errors.ECODE_INVAL)
503 setattr(op, name, val)
506 def _CheckGlobalHvParams(params):
507 """Validates that given hypervisor params are not global ones.
509 This will ensure that instances don't get customised versions of
513 used_globals = constants.HVC_GLOBALS.intersection(params)
515 msg = ("The following hypervisor parameters are global and cannot"
516 " be customized at instance level, please modify them at"
517 " cluster level: %s" % utils.CommaJoin(used_globals))
518 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
521 def _CheckNodeOnline(lu, node):
522 """Ensure that a given node is online.
524 @param lu: the LU on behalf of which we make the check
525 @param node: the node to check
526 @raise errors.OpPrereqError: if the node is offline
529 if lu.cfg.GetNodeInfo(node).offline:
530 raise errors.OpPrereqError("Can't use offline node %s" % node,
534 def _CheckNodeNotDrained(lu, node):
535 """Ensure that a given node is not drained.
537 @param lu: the LU on behalf of which we make the check
538 @param node: the node to check
539 @raise errors.OpPrereqError: if the node is drained
542 if lu.cfg.GetNodeInfo(node).drained:
543 raise errors.OpPrereqError("Can't use drained node %s" % node,
547 def _CheckNodeHasOS(lu, node, os_name, force_variant):
548 """Ensure that a node supports a given OS.
550 @param lu: the LU on behalf of which we make the check
551 @param node: the node to check
552 @param os_name: the OS to query about
553 @param force_variant: whether to ignore variant errors
554 @raise errors.OpPrereqError: if the node is not supporting the OS
557 result = lu.rpc.call_os_get(node, os_name)
558 result.Raise("OS '%s' not in supported OS list for node %s" %
560 prereq=True, ecode=errors.ECODE_INVAL)
561 if not force_variant:
562 _CheckOSVariant(result.payload, os_name)
565 def _RequireFileStorage():
566 """Checks that file storage is enabled.
568 @raise errors.OpPrereqError: when file storage is disabled
571 if not constants.ENABLE_FILE_STORAGE:
572 raise errors.OpPrereqError("File storage disabled at configure time",
576 def _CheckDiskTemplate(template):
577 """Ensure a given disk template is valid.
580 if template not in constants.DISK_TEMPLATES:
581 msg = ("Invalid disk template name '%s', valid templates are: %s" %
582 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
583 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
584 if template == constants.DT_FILE:
585 _RequireFileStorage()
588 def _CheckStorageType(storage_type):
589 """Ensure a given storage type is valid.
592 if storage_type not in constants.VALID_STORAGE_TYPES:
593 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
595 if storage_type == constants.ST_FILE:
596 _RequireFileStorage()
600 def _CheckInstanceDown(lu, instance, reason):
601 """Ensure that an instance is not running."""
602 if instance.admin_up:
603 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
604 (instance.name, reason), errors.ECODE_STATE)
606 pnode = instance.primary_node
607 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
608 ins_l.Raise("Can't contact node %s for instance information" % pnode,
609 prereq=True, ecode=errors.ECODE_ENVIRON)
611 if instance.name in ins_l.payload:
612 raise errors.OpPrereqError("Instance %s is running, %s" %
613 (instance.name, reason), errors.ECODE_STATE)
616 def _ExpandItemName(fn, name, kind):
617 """Expand an item name.
619 @param fn: the function to use for expansion
620 @param name: requested item name
621 @param kind: text description ('Node' or 'Instance')
622 @return: the resolved (full) name
623 @raise errors.OpPrereqError: if the item is not found
627 if full_name is None:
628 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
633 def _ExpandNodeName(cfg, name):
634 """Wrapper over L{_ExpandItemName} for nodes."""
635 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
638 def _ExpandInstanceName(cfg, name):
639 """Wrapper over L{_ExpandItemName} for instance."""
640 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
643 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
644 memory, vcpus, nics, disk_template, disks,
645 bep, hvp, hypervisor_name):
646 """Builds instance related env variables for hooks
648 This builds the hook environment from individual variables.
651 @param name: the name of the instance
652 @type primary_node: string
653 @param primary_node: the name of the instance's primary node
654 @type secondary_nodes: list
655 @param secondary_nodes: list of secondary nodes as strings
656 @type os_type: string
657 @param os_type: the name of the instance's OS
658 @type status: boolean
659 @param status: the should_run status of the instance
661 @param memory: the memory size of the instance
663 @param vcpus: the count of VCPUs the instance has
665 @param nics: list of tuples (ip, mac, mode, link) representing
666 the NICs the instance has
667 @type disk_template: string
668 @param disk_template: the disk template of the instance
670 @param disks: the list of (size, mode) pairs
672 @param bep: the backend parameters for the instance
674 @param hvp: the hypervisor parameters for the instance
675 @type hypervisor_name: string
676 @param hypervisor_name: the hypervisor for the instance
678 @return: the hook environment for this instance
687 "INSTANCE_NAME": name,
688 "INSTANCE_PRIMARY": primary_node,
689 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
690 "INSTANCE_OS_TYPE": os_type,
691 "INSTANCE_STATUS": str_status,
692 "INSTANCE_MEMORY": memory,
693 "INSTANCE_VCPUS": vcpus,
694 "INSTANCE_DISK_TEMPLATE": disk_template,
695 "INSTANCE_HYPERVISOR": hypervisor_name,
699 nic_count = len(nics)
700 for idx, (ip, mac, mode, link) in enumerate(nics):
703 env["INSTANCE_NIC%d_IP" % idx] = ip
704 env["INSTANCE_NIC%d_MAC" % idx] = mac
705 env["INSTANCE_NIC%d_MODE" % idx] = mode
706 env["INSTANCE_NIC%d_LINK" % idx] = link
707 if mode == constants.NIC_MODE_BRIDGED:
708 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
712 env["INSTANCE_NIC_COUNT"] = nic_count
715 disk_count = len(disks)
716 for idx, (size, mode) in enumerate(disks):
717 env["INSTANCE_DISK%d_SIZE" % idx] = size
718 env["INSTANCE_DISK%d_MODE" % idx] = mode
722 env["INSTANCE_DISK_COUNT"] = disk_count
724 for source, kind in [(bep, "BE"), (hvp, "HV")]:
725 for key, value in source.items():
726 env["INSTANCE_%s_%s" % (kind, key)] = value
731 def _NICListToTuple(lu, nics):
732 """Build a list of nic information tuples.
734 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
735 value in LUQueryInstanceData.
737 @type lu: L{LogicalUnit}
738 @param lu: the logical unit on whose behalf we execute
739 @type nics: list of L{objects.NIC}
740 @param nics: list of nics to convert to hooks tuples
744 c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
748 filled_params = objects.FillDict(c_nicparams, nic.nicparams)
749 mode = filled_params[constants.NIC_MODE]
750 link = filled_params[constants.NIC_LINK]
751 hooks_nics.append((ip, mac, mode, link))
755 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
756 """Builds instance related env variables for hooks from an object.
758 @type lu: L{LogicalUnit}
759 @param lu: the logical unit on whose behalf we execute
760 @type instance: L{objects.Instance}
761 @param instance: the instance for which we should build the
764 @param override: dictionary with key/values that will override
767 @return: the hook environment dictionary
770 cluster = lu.cfg.GetClusterInfo()
771 bep = cluster.FillBE(instance)
772 hvp = cluster.FillHV(instance)
774 'name': instance.name,
775 'primary_node': instance.primary_node,
776 'secondary_nodes': instance.secondary_nodes,
777 'os_type': instance.os,
778 'status': instance.admin_up,
779 'memory': bep[constants.BE_MEMORY],
780 'vcpus': bep[constants.BE_VCPUS],
781 'nics': _NICListToTuple(lu, instance.nics),
782 'disk_template': instance.disk_template,
783 'disks': [(disk.size, disk.mode) for disk in instance.disks],
786 'hypervisor_name': instance.hypervisor,
789 args.update(override)
790 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
793 def _AdjustCandidatePool(lu, exceptions):
794 """Adjust the candidate pool after node operations.
797 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
799 lu.LogInfo("Promoted nodes to master candidate role: %s",
800 utils.CommaJoin(node.name for node in mod_list))
801 for name in mod_list:
802 lu.context.ReaddNode(name)
803 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
805 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
809 def _DecideSelfPromotion(lu, exceptions=None):
810 """Decide whether I should promote myself as a master candidate.
813 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
814 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
815 # the new node will increase mc_max with one, so:
816 mc_should = min(mc_should + 1, cp_size)
817 return mc_now < mc_should
820 def _CheckNicsBridgesExist(lu, target_nics, target_node,
821 profile=constants.PP_DEFAULT):
822 """Check that the brigdes needed by a list of nics exist.
825 c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
826 paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
827 for nic in target_nics]
828 brlist = [params[constants.NIC_LINK] for params in paramslist
829 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
831 result = lu.rpc.call_bridges_exist(target_node, brlist)
832 result.Raise("Error checking bridges on destination node '%s'" %
833 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
836 def _CheckInstanceBridgesExist(lu, instance, node=None):
837 """Check that the brigdes needed by an instance exist.
841 node = instance.primary_node
842 _CheckNicsBridgesExist(lu, instance.nics, node)
845 def _CheckOSVariant(os_obj, name):
846 """Check whether an OS name conforms to the os variants specification.
848 @type os_obj: L{objects.OS}
849 @param os_obj: OS object to check
851 @param name: OS name passed by the user, to check for validity
854 if not os_obj.supported_variants:
857 variant = name.split("+", 1)[1]
859 raise errors.OpPrereqError("OS name must include a variant",
862 if variant not in os_obj.supported_variants:
863 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
866 def _GetNodeInstancesInner(cfg, fn):
867 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
870 def _GetNodeInstances(cfg, node_name):
871 """Returns a list of all primary and secondary instances on a node.
875 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
878 def _GetNodePrimaryInstances(cfg, node_name):
879 """Returns primary instances on a node.
882 return _GetNodeInstancesInner(cfg,
883 lambda inst: node_name == inst.primary_node)
886 def _GetNodeSecondaryInstances(cfg, node_name):
887 """Returns secondary instances on a node.
890 return _GetNodeInstancesInner(cfg,
891 lambda inst: node_name in inst.secondary_nodes)
894 def _GetStorageTypeArgs(cfg, storage_type):
895 """Returns the arguments for a storage type.
898 # Special case for file storage
899 if storage_type == constants.ST_FILE:
900 # storage.FileStorage wants a list of storage directories
901 return [[cfg.GetFileStorageDir()]]
906 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
909 for dev in instance.disks:
910 cfg.SetDiskID(dev, node_name)
912 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
913 result.Raise("Failed to get disk status from node %s" % node_name,
914 prereq=prereq, ecode=errors.ECODE_ENVIRON)
916 for idx, bdev_status in enumerate(result.payload):
917 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
923 def _FormatTimestamp(secs):
924 """Formats a Unix timestamp with the local timezone.
927 return time.strftime("%F %T %Z", time.gmtime(secs))
930 class LUPostInitCluster(LogicalUnit):
931 """Logical unit for running hooks after cluster initialization.
934 HPATH = "cluster-init"
935 HTYPE = constants.HTYPE_CLUSTER
938 def BuildHooksEnv(self):
942 env = {"OP_TARGET": self.cfg.GetClusterName()}
943 mn = self.cfg.GetMasterNode()
946 def CheckPrereq(self):
947 """No prerequisites to check.
952 def Exec(self, feedback_fn):
959 class LUDestroyCluster(LogicalUnit):
960 """Logical unit for destroying the cluster.
963 HPATH = "cluster-destroy"
964 HTYPE = constants.HTYPE_CLUSTER
967 def BuildHooksEnv(self):
971 env = {"OP_TARGET": self.cfg.GetClusterName()}
974 def CheckPrereq(self):
975 """Check prerequisites.
977 This checks whether the cluster is empty.
979 Any errors are signaled by raising errors.OpPrereqError.
982 master = self.cfg.GetMasterNode()
984 nodelist = self.cfg.GetNodeList()
985 if len(nodelist) != 1 or nodelist[0] != master:
986 raise errors.OpPrereqError("There are still %d node(s) in"
987 " this cluster." % (len(nodelist) - 1),
989 instancelist = self.cfg.GetInstanceList()
991 raise errors.OpPrereqError("There are still %d instance(s) in"
992 " this cluster." % len(instancelist),
995 def Exec(self, feedback_fn):
996 """Destroys the cluster.
999 master = self.cfg.GetMasterNode()
1000 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1002 # Run post hooks on master node before it's removed
1003 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1005 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1007 # pylint: disable-msg=W0702
1008 self.LogWarning("Errors occurred running hooks on %s" % master)
1010 result = self.rpc.call_node_stop_master(master, False)
1011 result.Raise("Could not disable the master role")
1013 if modify_ssh_setup:
1014 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1015 utils.CreateBackup(priv_key)
1016 utils.CreateBackup(pub_key)
1021 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
1022 warn_days=constants.SSL_CERT_EXPIRATION_WARN,
1023 error_days=constants.SSL_CERT_EXPIRATION_ERROR):
1024 """Verifies certificate details for LUVerifyCluster.
1028 msg = "Certificate %s is expired" % filename
1030 if not_before is not None and not_after is not None:
1031 msg += (" (valid from %s to %s)" %
1032 (_FormatTimestamp(not_before),
1033 _FormatTimestamp(not_after)))
1034 elif not_before is not None:
1035 msg += " (valid from %s)" % _FormatTimestamp(not_before)
1036 elif not_after is not None:
1037 msg += " (valid until %s)" % _FormatTimestamp(not_after)
1039 return (LUVerifyCluster.ETYPE_ERROR, msg)
1041 elif not_before is not None and not_before > now:
1042 return (LUVerifyCluster.ETYPE_WARNING,
1043 "Certificate %s not yet valid (valid from %s)" %
1044 (filename, _FormatTimestamp(not_before)))
1046 elif not_after is not None:
1047 remaining_days = int((not_after - now) / (24 * 3600))
1049 msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1051 if remaining_days <= error_days:
1052 return (LUVerifyCluster.ETYPE_ERROR, msg)
1054 if remaining_days <= warn_days:
1055 return (LUVerifyCluster.ETYPE_WARNING, msg)
1060 def _VerifyCertificate(filename):
1061 """Verifies a certificate for LUVerifyCluster.
1063 @type filename: string
1064 @param filename: Path to PEM file
1068 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1069 utils.ReadFile(filename))
1070 except Exception, err: # pylint: disable-msg=W0703
1071 return (LUVerifyCluster.ETYPE_ERROR,
1072 "Failed to load X509 certificate %s: %s" % (filename, err))
1074 # Depending on the pyOpenSSL version, this can just return (None, None)
1075 (not_before, not_after) = utils.GetX509CertValidity(cert)
1077 return _VerifyCertificateInner(filename, cert.has_expired(),
1078 not_before, not_after, time.time())
1081 class LUVerifyCluster(LogicalUnit):
1082 """Verifies the cluster status.
1085 HPATH = "cluster-verify"
1086 HTYPE = constants.HTYPE_CLUSTER
1087 _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1090 TCLUSTER = "cluster"
1092 TINSTANCE = "instance"
1094 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1095 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1096 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1097 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1098 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1099 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1100 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1101 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1102 ENODEDRBD = (TNODE, "ENODEDRBD")
1103 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1104 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1105 ENODEHV = (TNODE, "ENODEHV")
1106 ENODELVM = (TNODE, "ENODELVM")
1107 ENODEN1 = (TNODE, "ENODEN1")
1108 ENODENET = (TNODE, "ENODENET")
1109 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1110 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1111 ENODERPC = (TNODE, "ENODERPC")
1112 ENODESSH = (TNODE, "ENODESSH")
1113 ENODEVERSION = (TNODE, "ENODEVERSION")
1114 ENODESETUP = (TNODE, "ENODESETUP")
1115 ENODETIME = (TNODE, "ENODETIME")
1117 ETYPE_FIELD = "code"
1118 ETYPE_ERROR = "ERROR"
1119 ETYPE_WARNING = "WARNING"
1121 class NodeImage(object):
1122 """A class representing the logical and physical status of a node.
1124 @ivar volumes: a structure as returned from
1125 L{ganeti.backend.GetVolumeList} (runtime)
1126 @ivar instances: a list of running instances (runtime)
1127 @ivar pinst: list of configured primary instances (config)
1128 @ivar sinst: list of configured secondary instances (config)
1129 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1130 of this node (config)
1131 @ivar mfree: free memory, as reported by hypervisor (runtime)
1132 @ivar dfree: free disk, as reported by the node (runtime)
1133 @ivar offline: the offline status (config)
1134 @type rpc_fail: boolean
1135 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1136 not whether the individual keys were correct) (runtime)
1137 @type lvm_fail: boolean
1138 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1139 @type hyp_fail: boolean
1140 @ivar hyp_fail: whether the RPC call didn't return the instance list
1141 @type ghost: boolean
1142 @ivar ghost: whether this is a known node or not (config)
1145 def __init__(self, offline=False):
1153 self.offline = offline
1154 self.rpc_fail = False
1155 self.lvm_fail = False
1156 self.hyp_fail = False
1159 def ExpandNames(self):
1160 self.needed_locks = {
1161 locking.LEVEL_NODE: locking.ALL_SET,
1162 locking.LEVEL_INSTANCE: locking.ALL_SET,
1164 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1166 def _Error(self, ecode, item, msg, *args, **kwargs):
1167 """Format an error message.
1169 Based on the opcode's error_codes parameter, either format a
1170 parseable error code, or a simpler error string.
1172 This must be called only from Exec and functions called from Exec.
1175 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1177 # first complete the msg
1180 # then format the whole message
1181 if self.op.error_codes:
1182 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1188 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1189 # and finally report it via the feedback_fn
1190 self._feedback_fn(" - %s" % msg)
1192 def _ErrorIf(self, cond, *args, **kwargs):
1193 """Log an error message if the passed condition is True.
1196 cond = bool(cond) or self.op.debug_simulate_errors
1198 self._Error(*args, **kwargs)
1199 # do not mark the operation as failed for WARN cases only
1200 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1201 self.bad = self.bad or cond
1203 def _VerifyNode(self, ninfo, nresult):
1204 """Run multiple tests against a node.
1208 - compares ganeti version
1209 - checks vg existence and size > 20G
1210 - checks config file checksum
1211 - checks ssh to other nodes
1213 @type ninfo: L{objects.Node}
1214 @param ninfo: the node to check
1215 @param nresult: the results from the node
1217 @return: whether overall this call was successful (and we can expect
1218 reasonable values in the respose)
1222 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1224 # main result, nresult should be a non-empty dict
1225 test = not nresult or not isinstance(nresult, dict)
1226 _ErrorIf(test, self.ENODERPC, node,
1227 "unable to verify node: no data returned")
1231 # compares ganeti version
1232 local_version = constants.PROTOCOL_VERSION
1233 remote_version = nresult.get("version", None)
1234 test = not (remote_version and
1235 isinstance(remote_version, (list, tuple)) and
1236 len(remote_version) == 2)
1237 _ErrorIf(test, self.ENODERPC, node,
1238 "connection to node returned invalid data")
1242 test = local_version != remote_version[0]
1243 _ErrorIf(test, self.ENODEVERSION, node,
1244 "incompatible protocol versions: master %s,"
1245 " node %s", local_version, remote_version[0])
1249 # node seems compatible, we can actually try to look into its results
1251 # full package version
1252 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1253 self.ENODEVERSION, node,
1254 "software version mismatch: master %s, node %s",
1255 constants.RELEASE_VERSION, remote_version[1],
1256 code=self.ETYPE_WARNING)
1258 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1259 if isinstance(hyp_result, dict):
1260 for hv_name, hv_result in hyp_result.iteritems():
1261 test = hv_result is not None
1262 _ErrorIf(test, self.ENODEHV, node,
1263 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1266 test = nresult.get(constants.NV_NODESETUP,
1267 ["Missing NODESETUP results"])
1268 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1273 def _VerifyNodeTime(self, ninfo, nresult,
1274 nvinfo_starttime, nvinfo_endtime):
1275 """Check the node time.
1277 @type ninfo: L{objects.Node}
1278 @param ninfo: the node to check
1279 @param nresult: the remote results for the node
1280 @param nvinfo_starttime: the start time of the RPC call
1281 @param nvinfo_endtime: the end time of the RPC call
1285 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1287 ntime = nresult.get(constants.NV_TIME, None)
1289 ntime_merged = utils.MergeTime(ntime)
1290 except (ValueError, TypeError):
1291 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1294 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1295 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1296 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1297 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1301 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1302 "Node time diverges by at least %s from master node time",
1305 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1306 """Check the node time.
1308 @type ninfo: L{objects.Node}
1309 @param ninfo: the node to check
1310 @param nresult: the remote results for the node
1311 @param vg_name: the configured VG name
1318 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1320 # checks vg existence and size > 20G
1321 vglist = nresult.get(constants.NV_VGLIST, None)
1323 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1325 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1326 constants.MIN_VG_SIZE)
1327 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1330 pvlist = nresult.get(constants.NV_PVLIST, None)
1331 test = pvlist is None
1332 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1334 # check that ':' is not present in PV names, since it's a
1335 # special character for lvcreate (denotes the range of PEs to
1337 for _, pvname, owner_vg in pvlist:
1338 test = ":" in pvname
1339 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1340 " '%s' of VG '%s'", pvname, owner_vg)
1342 def _VerifyNodeNetwork(self, ninfo, nresult):
1343 """Check the node time.
1345 @type ninfo: L{objects.Node}
1346 @param ninfo: the node to check
1347 @param nresult: the remote results for the node
1351 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1353 test = constants.NV_NODELIST not in nresult
1354 _ErrorIf(test, self.ENODESSH, node,
1355 "node hasn't returned node ssh connectivity data")
1357 if nresult[constants.NV_NODELIST]:
1358 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1359 _ErrorIf(True, self.ENODESSH, node,
1360 "ssh communication with node '%s': %s", a_node, a_msg)
1362 test = constants.NV_NODENETTEST not in nresult
1363 _ErrorIf(test, self.ENODENET, node,
1364 "node hasn't returned node tcp connectivity data")
1366 if nresult[constants.NV_NODENETTEST]:
1367 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1369 _ErrorIf(True, self.ENODENET, node,
1370 "tcp communication with node '%s': %s",
1371 anode, nresult[constants.NV_NODENETTEST][anode])
1373 test = constants.NV_MASTERIP not in nresult
1374 _ErrorIf(test, self.ENODENET, node,
1375 "node hasn't returned node master IP reachability data")
1377 if not nresult[constants.NV_MASTERIP]:
1378 if node == self.master_node:
1379 msg = "the master node cannot reach the master IP (not configured?)"
1381 msg = "cannot reach the master IP"
1382 _ErrorIf(True, self.ENODENET, node, msg)
1385 def _VerifyInstance(self, instance, instanceconfig, node_image):
1386 """Verify an instance.
1388 This function checks to see if the required block devices are
1389 available on the instance's node.
1392 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1393 node_current = instanceconfig.primary_node
1395 node_vol_should = {}
1396 instanceconfig.MapLVsByNode(node_vol_should)
1398 for node in node_vol_should:
1399 n_img = node_image[node]
1400 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1401 # ignore missing volumes on offline or broken nodes
1403 for volume in node_vol_should[node]:
1404 test = volume not in n_img.volumes
1405 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1406 "volume %s missing on node %s", volume, node)
1408 if instanceconfig.admin_up:
1409 pri_img = node_image[node_current]
1410 test = instance not in pri_img.instances and not pri_img.offline
1411 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1412 "instance not running on its primary node %s",
1415 for node, n_img in node_image.items():
1416 if (not node == node_current):
1417 test = instance in n_img.instances
1418 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1419 "instance should not run on node %s", node)
1421 def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1422 """Verify if there are any unknown volumes in the cluster.
1424 The .os, .swap and backup volumes are ignored. All other volumes are
1425 reported as unknown.
1428 for node, n_img in node_image.items():
1429 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1430 # skip non-healthy nodes
1432 for volume in n_img.volumes:
1433 test = (node not in node_vol_should or
1434 volume not in node_vol_should[node])
1435 self._ErrorIf(test, self.ENODEORPHANLV, node,
1436 "volume %s is unknown", volume)
1438 def _VerifyOrphanInstances(self, instancelist, node_image):
1439 """Verify the list of running instances.
1441 This checks what instances are running but unknown to the cluster.
1444 for node, n_img in node_image.items():
1445 for o_inst in n_img.instances:
1446 test = o_inst not in instancelist
1447 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1448 "instance %s on node %s should not exist", o_inst, node)
1450 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1451 """Verify N+1 Memory Resilience.
1453 Check that if one single node dies we can still start all the
1454 instances it was primary for.
1457 for node, n_img in node_image.items():
1458 # This code checks that every node which is now listed as
1459 # secondary has enough memory to host all instances it is
1460 # supposed to should a single other node in the cluster fail.
1461 # FIXME: not ready for failover to an arbitrary node
1462 # FIXME: does not support file-backed instances
1463 # WARNING: we currently take into account down instances as well
1464 # as up ones, considering that even if they're down someone
1465 # might want to start them even in the event of a node failure.
1466 for prinode, instances in n_img.sbp.items():
1468 for instance in instances:
1469 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1470 if bep[constants.BE_AUTO_BALANCE]:
1471 needed_mem += bep[constants.BE_MEMORY]
1472 test = n_img.mfree < needed_mem
1473 self._ErrorIf(test, self.ENODEN1, node,
1474 "not enough memory on to accommodate"
1475 " failovers should peer node %s fail", prinode)
1477 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1479 """Verifies and computes the node required file checksums.
1481 @type ninfo: L{objects.Node}
1482 @param ninfo: the node to check
1483 @param nresult: the remote results for the node
1484 @param file_list: required list of files
1485 @param local_cksum: dictionary of local files and their checksums
1486 @param master_files: list of files that only masters should have
1490 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1492 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1493 test = not isinstance(remote_cksum, dict)
1494 _ErrorIf(test, self.ENODEFILECHECK, node,
1495 "node hasn't returned file checksum data")
1499 for file_name in file_list:
1500 node_is_mc = ninfo.master_candidate
1501 must_have = (file_name not in master_files) or node_is_mc
1503 test1 = file_name not in remote_cksum
1505 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1507 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1508 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1509 "file '%s' missing", file_name)
1510 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1511 "file '%s' has wrong checksum", file_name)
1512 # not candidate and this is not a must-have file
1513 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1514 "file '%s' should not exist on non master"
1515 " candidates (and the file is outdated)", file_name)
1516 # all good, except non-master/non-must have combination
1517 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1518 "file '%s' should not exist"
1519 " on non master candidates", file_name)
1521 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1522 """Verifies and the node DRBD status.
1524 @type ninfo: L{objects.Node}
1525 @param ninfo: the node to check
1526 @param nresult: the remote results for the node
1527 @param instanceinfo: the dict of instances
1528 @param drbd_map: the DRBD map as returned by
1529 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1533 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1535 # compute the DRBD minors
1537 for minor, instance in drbd_map[node].items():
1538 test = instance not in instanceinfo
1539 _ErrorIf(test, self.ECLUSTERCFG, None,
1540 "ghost instance '%s' in temporary DRBD map", instance)
1541 # ghost instance should not be running, but otherwise we
1542 # don't give double warnings (both ghost instance and
1543 # unallocated minor in use)
1545 node_drbd[minor] = (instance, False)
1547 instance = instanceinfo[instance]
1548 node_drbd[minor] = (instance.name, instance.admin_up)
1550 # and now check them
1551 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1552 test = not isinstance(used_minors, (tuple, list))
1553 _ErrorIf(test, self.ENODEDRBD, node,
1554 "cannot parse drbd status file: %s", str(used_minors))
1556 # we cannot check drbd status
1559 for minor, (iname, must_exist) in node_drbd.items():
1560 test = minor not in used_minors and must_exist
1561 _ErrorIf(test, self.ENODEDRBD, node,
1562 "drbd minor %d of instance %s is not active", minor, iname)
1563 for minor in used_minors:
1564 test = minor not in node_drbd
1565 _ErrorIf(test, self.ENODEDRBD, node,
1566 "unallocated drbd minor %d is in use", minor)
1568 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1569 """Verifies and updates the node volume data.
1571 This function will update a L{NodeImage}'s internal structures
1572 with data from the remote call.
1574 @type ninfo: L{objects.Node}
1575 @param ninfo: the node to check
1576 @param nresult: the remote results for the node
1577 @param nimg: the node image object
1578 @param vg_name: the configured VG name
1582 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1584 nimg.lvm_fail = True
1585 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1588 elif isinstance(lvdata, basestring):
1589 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1590 utils.SafeEncode(lvdata))
1591 elif not isinstance(lvdata, dict):
1592 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1594 nimg.volumes = lvdata
1595 nimg.lvm_fail = False
1597 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1598 """Verifies and updates the node instance list.
1600 If the listing was successful, then updates this node's instance
1601 list. Otherwise, it marks the RPC call as failed for the instance
1604 @type ninfo: L{objects.Node}
1605 @param ninfo: the node to check
1606 @param nresult: the remote results for the node
1607 @param nimg: the node image object
1610 idata = nresult.get(constants.NV_INSTANCELIST, None)
1611 test = not isinstance(idata, list)
1612 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1613 " (instancelist): %s", utils.SafeEncode(str(idata)))
1615 nimg.hyp_fail = True
1617 nimg.instances = idata
1619 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1620 """Verifies and computes a node information map
1622 @type ninfo: L{objects.Node}
1623 @param ninfo: the node to check
1624 @param nresult: the remote results for the node
1625 @param nimg: the node image object
1626 @param vg_name: the configured VG name
1630 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1632 # try to read free memory (from the hypervisor)
1633 hv_info = nresult.get(constants.NV_HVINFO, None)
1634 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1635 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1638 nimg.mfree = int(hv_info["memory_free"])
1639 except (ValueError, TypeError):
1640 _ErrorIf(True, self.ENODERPC, node,
1641 "node returned invalid nodeinfo, check hypervisor")
1643 # FIXME: devise a free space model for file based instances as well
1644 if vg_name is not None:
1645 test = (constants.NV_VGLIST not in nresult or
1646 vg_name not in nresult[constants.NV_VGLIST])
1647 _ErrorIf(test, self.ENODELVM, node,
1648 "node didn't return data for the volume group '%s'"
1649 " - it is either missing or broken", vg_name)
1652 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1653 except (ValueError, TypeError):
1654 _ErrorIf(True, self.ENODERPC, node,
1655 "node returned invalid LVM info, check LVM status")
1657 def CheckPrereq(self):
1658 """Check prerequisites.
1660 Transform the list of checks we're going to skip into a set and check that
1661 all its members are valid.
1664 self.skip_set = frozenset(self.op.skip_checks)
1665 if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1666 raise errors.OpPrereqError("Invalid checks to be skipped specified",
1669 def BuildHooksEnv(self):
1672 Cluster-Verify hooks just ran in the post phase and their failure makes
1673 the output be logged in the verify output and the verification to fail.
1676 all_nodes = self.cfg.GetNodeList()
1678 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1680 for node in self.cfg.GetAllNodesInfo().values():
1681 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1683 return env, [], all_nodes
1685 def Exec(self, feedback_fn):
1686 """Verify integrity of cluster, performing various test on nodes.
1690 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1691 verbose = self.op.verbose
1692 self._feedback_fn = feedback_fn
1693 feedback_fn("* Verifying global settings")
1694 for msg in self.cfg.VerifyConfig():
1695 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1697 # Check the cluster certificates
1698 for cert_filename in constants.ALL_CERT_FILES:
1699 (errcode, msg) = _VerifyCertificate(cert_filename)
1700 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1702 vg_name = self.cfg.GetVGName()
1703 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1704 cluster = self.cfg.GetClusterInfo()
1705 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1706 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1707 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1708 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1709 for iname in instancelist)
1710 i_non_redundant = [] # Non redundant instances
1711 i_non_a_balanced = [] # Non auto-balanced instances
1712 n_offline = 0 # Count of offline nodes
1713 n_drained = 0 # Count of nodes being drained
1714 node_vol_should = {}
1716 # FIXME: verify OS list
1717 # do local checksums
1718 master_files = [constants.CLUSTER_CONF_FILE]
1719 master_node = self.master_node = self.cfg.GetMasterNode()
1720 master_ip = self.cfg.GetMasterIP()
1722 file_names = ssconf.SimpleStore().GetFileList()
1723 file_names.extend(constants.ALL_CERT_FILES)
1724 file_names.extend(master_files)
1725 if cluster.modify_etc_hosts:
1726 file_names.append(constants.ETC_HOSTS)
1728 local_checksums = utils.FingerprintFiles(file_names)
1730 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1731 node_verify_param = {
1732 constants.NV_FILELIST: file_names,
1733 constants.NV_NODELIST: [node.name for node in nodeinfo
1734 if not node.offline],
1735 constants.NV_HYPERVISOR: hypervisors,
1736 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1737 node.secondary_ip) for node in nodeinfo
1738 if not node.offline],
1739 constants.NV_INSTANCELIST: hypervisors,
1740 constants.NV_VERSION: None,
1741 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1742 constants.NV_NODESETUP: None,
1743 constants.NV_TIME: None,
1744 constants.NV_MASTERIP: (master_node, master_ip),
1747 if vg_name is not None:
1748 node_verify_param[constants.NV_VGLIST] = None
1749 node_verify_param[constants.NV_LVLIST] = vg_name
1750 node_verify_param[constants.NV_PVLIST] = [vg_name]
1751 node_verify_param[constants.NV_DRBDLIST] = None
1753 # Build our expected cluster state
1754 node_image = dict((node.name, self.NodeImage(offline=node.offline))
1755 for node in nodeinfo)
1757 for instance in instancelist:
1758 inst_config = instanceinfo[instance]
1760 for nname in inst_config.all_nodes:
1761 if nname not in node_image:
1763 gnode = self.NodeImage()
1765 node_image[nname] = gnode
1767 inst_config.MapLVsByNode(node_vol_should)
1769 pnode = inst_config.primary_node
1770 node_image[pnode].pinst.append(instance)
1772 for snode in inst_config.secondary_nodes:
1773 nimg = node_image[snode]
1774 nimg.sinst.append(instance)
1775 if pnode not in nimg.sbp:
1776 nimg.sbp[pnode] = []
1777 nimg.sbp[pnode].append(instance)
1779 # At this point, we have the in-memory data structures complete,
1780 # except for the runtime information, which we'll gather next
1782 # Due to the way our RPC system works, exact response times cannot be
1783 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1784 # time before and after executing the request, we can at least have a time
1786 nvinfo_starttime = time.time()
1787 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1788 self.cfg.GetClusterName())
1789 nvinfo_endtime = time.time()
1791 all_drbd_map = self.cfg.ComputeDRBDMap()
1793 feedback_fn("* Verifying node status")
1794 for node_i in nodeinfo:
1796 nimg = node_image[node]
1800 feedback_fn("* Skipping offline node %s" % (node,))
1804 if node == master_node:
1806 elif node_i.master_candidate:
1807 ntype = "master candidate"
1808 elif node_i.drained:
1814 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1816 msg = all_nvinfo[node].fail_msg
1817 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1819 nimg.rpc_fail = True
1822 nresult = all_nvinfo[node].payload
1824 nimg.call_ok = self._VerifyNode(node_i, nresult)
1825 self._VerifyNodeNetwork(node_i, nresult)
1826 self._VerifyNodeLVM(node_i, nresult, vg_name)
1827 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1829 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1830 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1832 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1833 self._UpdateNodeInstances(node_i, nresult, nimg)
1834 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1836 feedback_fn("* Verifying instance status")
1837 for instance in instancelist:
1839 feedback_fn("* Verifying instance %s" % instance)
1840 inst_config = instanceinfo[instance]
1841 self._VerifyInstance(instance, inst_config, node_image)
1842 inst_nodes_offline = []
1844 pnode = inst_config.primary_node
1845 pnode_img = node_image[pnode]
1846 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1847 self.ENODERPC, pnode, "instance %s, connection to"
1848 " primary node failed", instance)
1850 if pnode_img.offline:
1851 inst_nodes_offline.append(pnode)
1853 # If the instance is non-redundant we cannot survive losing its primary
1854 # node, so we are not N+1 compliant. On the other hand we have no disk
1855 # templates with more than one secondary so that situation is not well
1857 # FIXME: does not support file-backed instances
1858 if not inst_config.secondary_nodes:
1859 i_non_redundant.append(instance)
1860 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1861 instance, "instance has multiple secondary nodes: %s",
1862 utils.CommaJoin(inst_config.secondary_nodes),
1863 code=self.ETYPE_WARNING)
1865 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1866 i_non_a_balanced.append(instance)
1868 for snode in inst_config.secondary_nodes:
1869 s_img = node_image[snode]
1870 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1871 "instance %s, connection to secondary node failed", instance)
1874 inst_nodes_offline.append(snode)
1876 # warn that the instance lives on offline nodes
1877 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1878 "instance lives on offline node(s) %s",
1879 utils.CommaJoin(inst_nodes_offline))
1880 # ... or ghost nodes
1881 for node in inst_config.all_nodes:
1882 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1883 "instance lives on ghost node %s", node)
1885 feedback_fn("* Verifying orphan volumes")
1886 self._VerifyOrphanVolumes(node_vol_should, node_image)
1888 feedback_fn("* Verifying orphan instances")
1889 self._VerifyOrphanInstances(instancelist, node_image)
1891 if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1892 feedback_fn("* Verifying N+1 Memory redundancy")
1893 self._VerifyNPlusOneMemory(node_image, instanceinfo)
1895 feedback_fn("* Other Notes")
1897 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
1898 % len(i_non_redundant))
1900 if i_non_a_balanced:
1901 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
1902 % len(i_non_a_balanced))
1905 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
1908 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
1912 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1913 """Analyze the post-hooks' result
1915 This method analyses the hook result, handles it, and sends some
1916 nicely-formatted feedback back to the user.
1918 @param phase: one of L{constants.HOOKS_PHASE_POST} or
1919 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1920 @param hooks_results: the results of the multi-node hooks rpc call
1921 @param feedback_fn: function used send feedback back to the caller
1922 @param lu_result: previous Exec result
1923 @return: the new Exec result, based on the previous result
1927 # We only really run POST phase hooks, and are only interested in
1929 if phase == constants.HOOKS_PHASE_POST:
1930 # Used to change hooks' output to proper indentation
1931 indent_re = re.compile('^', re.M)
1932 feedback_fn("* Hooks Results")
1933 assert hooks_results, "invalid result from hooks"
1935 for node_name in hooks_results:
1936 res = hooks_results[node_name]
1938 test = msg and not res.offline
1939 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1940 "Communication failure in hooks execution: %s", msg)
1941 if res.offline or msg:
1942 # No need to investigate payload if node is offline or gave an error.
1943 # override manually lu_result here as _ErrorIf only
1944 # overrides self.bad
1947 for script, hkr, output in res.payload:
1948 test = hkr == constants.HKR_FAIL
1949 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1950 "Script %s failed, output:", script)
1952 output = indent_re.sub(' ', output)
1953 feedback_fn("%s" % output)
1959 class LUVerifyDisks(NoHooksLU):
1960 """Verifies the cluster disks status.
1966 def ExpandNames(self):
1967 self.needed_locks = {
1968 locking.LEVEL_NODE: locking.ALL_SET,
1969 locking.LEVEL_INSTANCE: locking.ALL_SET,
1971 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1973 def CheckPrereq(self):
1974 """Check prerequisites.
1976 This has no prerequisites.
1981 def Exec(self, feedback_fn):
1982 """Verify integrity of cluster disks.
1984 @rtype: tuple of three items
1985 @return: a tuple of (dict of node-to-node_error, list of instances
1986 which need activate-disks, dict of instance: (node, volume) for
1990 result = res_nodes, res_instances, res_missing = {}, [], {}
1992 vg_name = self.cfg.GetVGName()
1993 nodes = utils.NiceSort(self.cfg.GetNodeList())
1994 instances = [self.cfg.GetInstanceInfo(name)
1995 for name in self.cfg.GetInstanceList()]
1998 for inst in instances:
2000 if (not inst.admin_up or
2001 inst.disk_template not in constants.DTS_NET_MIRROR):
2003 inst.MapLVsByNode(inst_lvs)
2004 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2005 for node, vol_list in inst_lvs.iteritems():
2006 for vol in vol_list:
2007 nv_dict[(node, vol)] = inst
2012 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2016 node_res = node_lvs[node]
2017 if node_res.offline:
2019 msg = node_res.fail_msg
2021 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2022 res_nodes[node] = msg
2025 lvs = node_res.payload
2026 for lv_name, (_, _, lv_online) in lvs.items():
2027 inst = nv_dict.pop((node, lv_name), None)
2028 if (not lv_online and inst is not None
2029 and inst.name not in res_instances):
2030 res_instances.append(inst.name)
2032 # any leftover items in nv_dict are missing LVs, let's arrange the
2034 for key, inst in nv_dict.iteritems():
2035 if inst.name not in res_missing:
2036 res_missing[inst.name] = []
2037 res_missing[inst.name].append(key)
2042 class LURepairDiskSizes(NoHooksLU):
2043 """Verifies the cluster disks sizes.
2046 _OP_REQP = ["instances"]
2049 def ExpandNames(self):
2050 if not isinstance(self.op.instances, list):
2051 raise errors.OpPrereqError("Invalid argument type 'instances'",
2054 if self.op.instances:
2055 self.wanted_names = []
2056 for name in self.op.instances:
2057 full_name = _ExpandInstanceName(self.cfg, name)
2058 self.wanted_names.append(full_name)
2059 self.needed_locks = {
2060 locking.LEVEL_NODE: [],
2061 locking.LEVEL_INSTANCE: self.wanted_names,
2063 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2065 self.wanted_names = None
2066 self.needed_locks = {
2067 locking.LEVEL_NODE: locking.ALL_SET,
2068 locking.LEVEL_INSTANCE: locking.ALL_SET,
2070 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2072 def DeclareLocks(self, level):
2073 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2074 self._LockInstancesNodes(primary_only=True)
2076 def CheckPrereq(self):
2077 """Check prerequisites.
2079 This only checks the optional instance list against the existing names.
2082 if self.wanted_names is None:
2083 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2085 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2086 in self.wanted_names]
2088 def _EnsureChildSizes(self, disk):
2089 """Ensure children of the disk have the needed disk size.
2091 This is valid mainly for DRBD8 and fixes an issue where the
2092 children have smaller disk size.
2094 @param disk: an L{ganeti.objects.Disk} object
2097 if disk.dev_type == constants.LD_DRBD8:
2098 assert disk.children, "Empty children for DRBD8?"
2099 fchild = disk.children[0]
2100 mismatch = fchild.size < disk.size
2102 self.LogInfo("Child disk has size %d, parent %d, fixing",
2103 fchild.size, disk.size)
2104 fchild.size = disk.size
2106 # and we recurse on this child only, not on the metadev
2107 return self._EnsureChildSizes(fchild) or mismatch
2111 def Exec(self, feedback_fn):
2112 """Verify the size of cluster disks.
2115 # TODO: check child disks too
2116 # TODO: check differences in size between primary/secondary nodes
2118 for instance in self.wanted_instances:
2119 pnode = instance.primary_node
2120 if pnode not in per_node_disks:
2121 per_node_disks[pnode] = []
2122 for idx, disk in enumerate(instance.disks):
2123 per_node_disks[pnode].append((instance, idx, disk))
2126 for node, dskl in per_node_disks.items():
2127 newl = [v[2].Copy() for v in dskl]
2129 self.cfg.SetDiskID(dsk, node)
2130 result = self.rpc.call_blockdev_getsizes(node, newl)
2132 self.LogWarning("Failure in blockdev_getsizes call to node"
2133 " %s, ignoring", node)
2135 if len(result.data) != len(dskl):
2136 self.LogWarning("Invalid result from node %s, ignoring node results",
2139 for ((instance, idx, disk), size) in zip(dskl, result.data):
2141 self.LogWarning("Disk %d of instance %s did not return size"
2142 " information, ignoring", idx, instance.name)
2144 if not isinstance(size, (int, long)):
2145 self.LogWarning("Disk %d of instance %s did not return valid"
2146 " size information, ignoring", idx, instance.name)
2149 if size != disk.size:
2150 self.LogInfo("Disk %d of instance %s has mismatched size,"
2151 " correcting: recorded %d, actual %d", idx,
2152 instance.name, disk.size, size)
2154 self.cfg.Update(instance, feedback_fn)
2155 changed.append((instance.name, idx, size))
2156 if self._EnsureChildSizes(disk):
2157 self.cfg.Update(instance, feedback_fn)
2158 changed.append((instance.name, idx, disk.size))
2162 class LURenameCluster(LogicalUnit):
2163 """Rename the cluster.
2166 HPATH = "cluster-rename"
2167 HTYPE = constants.HTYPE_CLUSTER
2170 def BuildHooksEnv(self):
2175 "OP_TARGET": self.cfg.GetClusterName(),
2176 "NEW_NAME": self.op.name,
2178 mn = self.cfg.GetMasterNode()
2179 all_nodes = self.cfg.GetNodeList()
2180 return env, [mn], all_nodes
2182 def CheckPrereq(self):
2183 """Verify that the passed name is a valid one.
2186 hostname = utils.GetHostInfo(self.op.name)
2188 new_name = hostname.name
2189 self.ip = new_ip = hostname.ip
2190 old_name = self.cfg.GetClusterName()
2191 old_ip = self.cfg.GetMasterIP()
2192 if new_name == old_name and new_ip == old_ip:
2193 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2194 " cluster has changed",
2196 if new_ip != old_ip:
2197 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2198 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2199 " reachable on the network. Aborting." %
2200 new_ip, errors.ECODE_NOTUNIQUE)
2202 self.op.name = new_name
2204 def Exec(self, feedback_fn):
2205 """Rename the cluster.
2208 clustername = self.op.name
2211 # shutdown the master IP
2212 master = self.cfg.GetMasterNode()
2213 result = self.rpc.call_node_stop_master(master, False)
2214 result.Raise("Could not disable the master role")
2217 cluster = self.cfg.GetClusterInfo()
2218 cluster.cluster_name = clustername
2219 cluster.master_ip = ip
2220 self.cfg.Update(cluster, feedback_fn)
2222 # update the known hosts file
2223 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2224 node_list = self.cfg.GetNodeList()
2226 node_list.remove(master)
2229 result = self.rpc.call_upload_file(node_list,
2230 constants.SSH_KNOWN_HOSTS_FILE)
2231 for to_node, to_result in result.iteritems():
2232 msg = to_result.fail_msg
2234 msg = ("Copy of file %s to node %s failed: %s" %
2235 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2236 self.proc.LogWarning(msg)
2239 result = self.rpc.call_node_start_master(master, False, False)
2240 msg = result.fail_msg
2242 self.LogWarning("Could not re-enable the master role on"
2243 " the master, please restart manually: %s", msg)
2246 def _RecursiveCheckIfLVMBased(disk):
2247 """Check if the given disk or its children are lvm-based.
2249 @type disk: L{objects.Disk}
2250 @param disk: the disk to check
2252 @return: boolean indicating whether a LD_LV dev_type was found or not
2256 for chdisk in disk.children:
2257 if _RecursiveCheckIfLVMBased(chdisk):
2259 return disk.dev_type == constants.LD_LV
2262 class LUSetClusterParams(LogicalUnit):
2263 """Change the parameters of the cluster.
2266 HPATH = "cluster-modify"
2267 HTYPE = constants.HTYPE_CLUSTER
2271 def CheckArguments(self):
2275 for attr in ["candidate_pool_size",
2276 "uid_pool", "add_uids", "remove_uids"]:
2277 if not hasattr(self.op, attr):
2278 setattr(self.op, attr, None)
2280 if self.op.candidate_pool_size is not None:
2282 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2283 except (ValueError, TypeError), err:
2284 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2285 str(err), errors.ECODE_INVAL)
2286 if self.op.candidate_pool_size < 1:
2287 raise errors.OpPrereqError("At least one master candidate needed",
2290 _CheckBooleanOpField(self.op, "maintain_node_health")
2292 if self.op.uid_pool:
2293 uidpool.CheckUidPool(self.op.uid_pool)
2295 if self.op.add_uids:
2296 uidpool.CheckUidPool(self.op.add_uids)
2298 if self.op.remove_uids:
2299 uidpool.CheckUidPool(self.op.remove_uids)
2301 def ExpandNames(self):
2302 # FIXME: in the future maybe other cluster params won't require checking on
2303 # all nodes to be modified.
2304 self.needed_locks = {
2305 locking.LEVEL_NODE: locking.ALL_SET,
2307 self.share_locks[locking.LEVEL_NODE] = 1
2309 def BuildHooksEnv(self):
2314 "OP_TARGET": self.cfg.GetClusterName(),
2315 "NEW_VG_NAME": self.op.vg_name,
2317 mn = self.cfg.GetMasterNode()
2318 return env, [mn], [mn]
2320 def CheckPrereq(self):
2321 """Check prerequisites.
2323 This checks whether the given params don't conflict and
2324 if the given volume group is valid.
2327 if self.op.vg_name is not None and not self.op.vg_name:
2328 instances = self.cfg.GetAllInstancesInfo().values()
2329 for inst in instances:
2330 for disk in inst.disks:
2331 if _RecursiveCheckIfLVMBased(disk):
2332 raise errors.OpPrereqError("Cannot disable lvm storage while"
2333 " lvm-based instances exist",
2336 node_list = self.acquired_locks[locking.LEVEL_NODE]
2338 # if vg_name not None, checks given volume group on all nodes
2340 vglist = self.rpc.call_vg_list(node_list)
2341 for node in node_list:
2342 msg = vglist[node].fail_msg
2344 # ignoring down node
2345 self.LogWarning("Error while gathering data on node %s"
2346 " (ignoring node): %s", node, msg)
2348 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2350 constants.MIN_VG_SIZE)
2352 raise errors.OpPrereqError("Error on node '%s': %s" %
2353 (node, vgstatus), errors.ECODE_ENVIRON)
2355 self.cluster = cluster = self.cfg.GetClusterInfo()
2356 # validate params changes
2357 if self.op.beparams:
2358 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2359 self.new_beparams = objects.FillDict(
2360 cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2362 if self.op.nicparams:
2363 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2364 self.new_nicparams = objects.FillDict(
2365 cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2366 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2369 # check all instances for consistency
2370 for instance in self.cfg.GetAllInstancesInfo().values():
2371 for nic_idx, nic in enumerate(instance.nics):
2372 params_copy = copy.deepcopy(nic.nicparams)
2373 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2375 # check parameter syntax
2377 objects.NIC.CheckParameterSyntax(params_filled)
2378 except errors.ConfigurationError, err:
2379 nic_errors.append("Instance %s, nic/%d: %s" %
2380 (instance.name, nic_idx, err))
2382 # if we're moving instances to routed, check that they have an ip
2383 target_mode = params_filled[constants.NIC_MODE]
2384 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2385 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2386 (instance.name, nic_idx))
2388 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2389 "\n".join(nic_errors))
2391 # hypervisor list/parameters
2392 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2393 if self.op.hvparams:
2394 if not isinstance(self.op.hvparams, dict):
2395 raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2397 for hv_name, hv_dict in self.op.hvparams.items():
2398 if hv_name not in self.new_hvparams:
2399 self.new_hvparams[hv_name] = hv_dict
2401 self.new_hvparams[hv_name].update(hv_dict)
2403 # os hypervisor parameters
2404 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2406 if not isinstance(self.op.os_hvp, dict):
2407 raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2409 for os_name, hvs in self.op.os_hvp.items():
2410 if not isinstance(hvs, dict):
2411 raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2412 " input"), errors.ECODE_INVAL)
2413 if os_name not in self.new_os_hvp:
2414 self.new_os_hvp[os_name] = hvs
2416 for hv_name, hv_dict in hvs.items():
2417 if hv_name not in self.new_os_hvp[os_name]:
2418 self.new_os_hvp[os_name][hv_name] = hv_dict
2420 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2422 # changes to the hypervisor list
2423 if self.op.enabled_hypervisors is not None:
2424 self.hv_list = self.op.enabled_hypervisors
2425 if not self.hv_list:
2426 raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2427 " least one member",
2429 invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2431 raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2433 utils.CommaJoin(invalid_hvs),
2435 for hv in self.hv_list:
2436 # if the hypervisor doesn't already exist in the cluster
2437 # hvparams, we initialize it to empty, and then (in both
2438 # cases) we make sure to fill the defaults, as we might not
2439 # have a complete defaults list if the hypervisor wasn't
2441 if hv not in new_hvp:
2443 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2444 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2446 self.hv_list = cluster.enabled_hypervisors
2448 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2449 # either the enabled list has changed, or the parameters have, validate
2450 for hv_name, hv_params in self.new_hvparams.items():
2451 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2452 (self.op.enabled_hypervisors and
2453 hv_name in self.op.enabled_hypervisors)):
2454 # either this is a new hypervisor, or its parameters have changed
2455 hv_class = hypervisor.GetHypervisor(hv_name)
2456 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2457 hv_class.CheckParameterSyntax(hv_params)
2458 _CheckHVParams(self, node_list, hv_name, hv_params)
2461 # no need to check any newly-enabled hypervisors, since the
2462 # defaults have already been checked in the above code-block
2463 for os_name, os_hvp in self.new_os_hvp.items():
2464 for hv_name, hv_params in os_hvp.items():
2465 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2466 # we need to fill in the new os_hvp on top of the actual hv_p
2467 cluster_defaults = self.new_hvparams.get(hv_name, {})
2468 new_osp = objects.FillDict(cluster_defaults, hv_params)
2469 hv_class = hypervisor.GetHypervisor(hv_name)
2470 hv_class.CheckParameterSyntax(new_osp)
2471 _CheckHVParams(self, node_list, hv_name, new_osp)
2474 def Exec(self, feedback_fn):
2475 """Change the parameters of the cluster.
2478 if self.op.vg_name is not None:
2479 new_volume = self.op.vg_name
2482 if new_volume != self.cfg.GetVGName():
2483 self.cfg.SetVGName(new_volume)
2485 feedback_fn("Cluster LVM configuration already in desired"
2486 " state, not changing")
2487 if self.op.hvparams:
2488 self.cluster.hvparams = self.new_hvparams
2490 self.cluster.os_hvp = self.new_os_hvp
2491 if self.op.enabled_hypervisors is not None:
2492 self.cluster.hvparams = self.new_hvparams
2493 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2494 if self.op.beparams:
2495 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2496 if self.op.nicparams:
2497 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2499 if self.op.candidate_pool_size is not None:
2500 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2501 # we need to update the pool size here, otherwise the save will fail
2502 _AdjustCandidatePool(self, [])
2504 if self.op.maintain_node_health is not None:
2505 self.cluster.maintain_node_health = self.op.maintain_node_health
2507 if self.op.add_uids is not None:
2508 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2510 if self.op.remove_uids is not None:
2511 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2513 if self.op.uid_pool is not None:
2514 self.cluster.uid_pool = self.op.uid_pool
2516 self.cfg.Update(self.cluster, feedback_fn)
2519 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2520 """Distribute additional files which are part of the cluster configuration.
2522 ConfigWriter takes care of distributing the config and ssconf files, but
2523 there are more files which should be distributed to all nodes. This function
2524 makes sure those are copied.
2526 @param lu: calling logical unit
2527 @param additional_nodes: list of nodes not in the config to distribute to
2530 # 1. Gather target nodes
2531 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2532 dist_nodes = lu.cfg.GetOnlineNodeList()
2533 if additional_nodes is not None:
2534 dist_nodes.extend(additional_nodes)
2535 if myself.name in dist_nodes:
2536 dist_nodes.remove(myself.name)
2538 # 2. Gather files to distribute
2539 dist_files = set([constants.ETC_HOSTS,
2540 constants.SSH_KNOWN_HOSTS_FILE,
2541 constants.RAPI_CERT_FILE,
2542 constants.RAPI_USERS_FILE,
2543 constants.CONFD_HMAC_KEY,
2546 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2547 for hv_name in enabled_hypervisors:
2548 hv_class = hypervisor.GetHypervisor(hv_name)
2549 dist_files.update(hv_class.GetAncillaryFiles())
2551 # 3. Perform the files upload
2552 for fname in dist_files:
2553 if os.path.exists(fname):
2554 result = lu.rpc.call_upload_file(dist_nodes, fname)
2555 for to_node, to_result in result.items():
2556 msg = to_result.fail_msg
2558 msg = ("Copy of file %s to node %s failed: %s" %
2559 (fname, to_node, msg))
2560 lu.proc.LogWarning(msg)
2563 class LURedistributeConfig(NoHooksLU):
2564 """Force the redistribution of cluster configuration.
2566 This is a very simple LU.
2572 def ExpandNames(self):
2573 self.needed_locks = {
2574 locking.LEVEL_NODE: locking.ALL_SET,
2576 self.share_locks[locking.LEVEL_NODE] = 1
2578 def CheckPrereq(self):
2579 """Check prerequisites.
2583 def Exec(self, feedback_fn):
2584 """Redistribute the configuration.
2587 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2588 _RedistributeAncillaryFiles(self)
2591 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2592 """Sleep and poll for an instance's disk to sync.
2595 if not instance.disks or disks is not None and not disks:
2598 disks = _ExpandCheckDisks(instance, disks)
2601 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2603 node = instance.primary_node
2606 lu.cfg.SetDiskID(dev, node)
2608 # TODO: Convert to utils.Retry
2611 degr_retries = 10 # in seconds, as we sleep 1 second each time
2615 cumul_degraded = False
2616 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2617 msg = rstats.fail_msg
2619 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2622 raise errors.RemoteError("Can't contact node %s for mirror data,"
2623 " aborting." % node)
2626 rstats = rstats.payload
2628 for i, mstat in enumerate(rstats):
2630 lu.LogWarning("Can't compute data for node %s/%s",
2631 node, disks[i].iv_name)
2634 cumul_degraded = (cumul_degraded or
2635 (mstat.is_degraded and mstat.sync_percent is None))
2636 if mstat.sync_percent is not None:
2638 if mstat.estimated_time is not None:
2639 rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2640 max_time = mstat.estimated_time
2642 rem_time = "no time estimate"
2643 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2644 (disks[i].iv_name, mstat.sync_percent, rem_time))
2646 # if we're done but degraded, let's do a few small retries, to
2647 # make sure we see a stable and not transient situation; therefore
2648 # we force restart of the loop
2649 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2650 logging.info("Degraded disks found, %d retries left", degr_retries)
2658 time.sleep(min(60, max_time))
2661 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2662 return not cumul_degraded
2665 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2666 """Check that mirrors are not degraded.
2668 The ldisk parameter, if True, will change the test from the
2669 is_degraded attribute (which represents overall non-ok status for
2670 the device(s)) to the ldisk (representing the local storage status).
2673 lu.cfg.SetDiskID(dev, node)
2677 if on_primary or dev.AssembleOnSecondary():
2678 rstats = lu.rpc.call_blockdev_find(node, dev)
2679 msg = rstats.fail_msg
2681 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2683 elif not rstats.payload:
2684 lu.LogWarning("Can't find disk on node %s", node)
2688 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2690 result = result and not rstats.payload.is_degraded
2693 for child in dev.children:
2694 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2699 class LUDiagnoseOS(NoHooksLU):
2700 """Logical unit for OS diagnose/query.
2703 _OP_REQP = ["output_fields", "names"]
2705 _FIELDS_STATIC = utils.FieldSet()
2706 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2707 # Fields that need calculation of global os validity
2708 _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2710 def ExpandNames(self):
2712 raise errors.OpPrereqError("Selective OS query not supported",
2715 _CheckOutputFields(static=self._FIELDS_STATIC,
2716 dynamic=self._FIELDS_DYNAMIC,
2717 selected=self.op.output_fields)
2719 # Lock all nodes, in shared mode
2720 # Temporary removal of locks, should be reverted later
2721 # TODO: reintroduce locks when they are lighter-weight
2722 self.needed_locks = {}
2723 #self.share_locks[locking.LEVEL_NODE] = 1
2724 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2726 def CheckPrereq(self):
2727 """Check prerequisites.
2732 def _DiagnoseByOS(rlist):
2733 """Remaps a per-node return list into an a per-os per-node dictionary
2735 @param rlist: a map with node names as keys and OS objects as values
2738 @return: a dictionary with osnames as keys and as value another map, with
2739 nodes as keys and tuples of (path, status, diagnose) as values, eg::
2741 {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2742 (/srv/..., False, "invalid api")],
2743 "node2": [(/srv/..., True, "")]}
2748 # we build here the list of nodes that didn't fail the RPC (at RPC
2749 # level), so that nodes with a non-responding node daemon don't
2750 # make all OSes invalid
2751 good_nodes = [node_name for node_name in rlist
2752 if not rlist[node_name].fail_msg]
2753 for node_name, nr in rlist.items():
2754 if nr.fail_msg or not nr.payload:
2756 for name, path, status, diagnose, variants in nr.payload:
2757 if name not in all_os:
2758 # build a list of nodes for this os containing empty lists
2759 # for each node in node_list
2761 for nname in good_nodes:
2762 all_os[name][nname] = []
2763 all_os[name][node_name].append((path, status, diagnose, variants))
2766 def Exec(self, feedback_fn):
2767 """Compute the list of OSes.
2770 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2771 node_data = self.rpc.call_os_diagnose(valid_nodes)
2772 pol = self._DiagnoseByOS(node_data)
2774 calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2775 calc_variants = "variants" in self.op.output_fields
2777 for os_name, os_data in pol.items():
2782 for osl in os_data.values():
2783 valid = valid and osl and osl[0][1]
2788 node_variants = osl[0][3]
2789 if variants is None:
2790 variants = node_variants
2792 variants = [v for v in variants if v in node_variants]
2794 for field in self.op.output_fields:
2797 elif field == "valid":
2799 elif field == "node_status":
2800 # this is just a copy of the dict
2802 for node_name, nos_list in os_data.items():
2803 val[node_name] = nos_list
2804 elif field == "variants":
2807 raise errors.ParameterError(field)
2814 class LURemoveNode(LogicalUnit):
2815 """Logical unit for removing a node.
2818 HPATH = "node-remove"
2819 HTYPE = constants.HTYPE_NODE
2820 _OP_REQP = ["node_name"]
2822 def BuildHooksEnv(self):
2825 This doesn't run on the target node in the pre phase as a failed
2826 node would then be impossible to remove.
2830 "OP_TARGET": self.op.node_name,
2831 "NODE_NAME": self.op.node_name,
2833 all_nodes = self.cfg.GetNodeList()
2835 all_nodes.remove(self.op.node_name)
2837 logging.warning("Node %s which is about to be removed not found"
2838 " in the all nodes list", self.op.node_name)
2839 return env, all_nodes, all_nodes
2841 def CheckPrereq(self):
2842 """Check prerequisites.
2845 - the node exists in the configuration
2846 - it does not have primary or secondary instances
2847 - it's not the master
2849 Any errors are signaled by raising errors.OpPrereqError.
2852 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2853 node = self.cfg.GetNodeInfo(self.op.node_name)
2854 assert node is not None
2856 instance_list = self.cfg.GetInstanceList()
2858 masternode = self.cfg.GetMasterNode()
2859 if node.name == masternode:
2860 raise errors.OpPrereqError("Node is the master node,"
2861 " you need to failover first.",
2864 for instance_name in instance_list:
2865 instance = self.cfg.GetInstanceInfo(instance_name)
2866 if node.name in instance.all_nodes:
2867 raise errors.OpPrereqError("Instance %s is still running on the node,"
2868 " please remove first." % instance_name,
2870 self.op.node_name = node.name
2873 def Exec(self, feedback_fn):
2874 """Removes the node from the cluster.
2878 logging.info("Stopping the node daemon and removing configs from node %s",
2881 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2883 # Promote nodes to master candidate as needed
2884 _AdjustCandidatePool(self, exceptions=[node.name])
2885 self.context.RemoveNode(node.name)
2887 # Run post hooks on the node before it's removed
2888 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2890 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2892 # pylint: disable-msg=W0702
2893 self.LogWarning("Errors occurred running hooks on %s" % node.name)
2895 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2896 msg = result.fail_msg
2898 self.LogWarning("Errors encountered on the remote node while leaving"
2899 " the cluster: %s", msg)
2901 # Remove node from our /etc/hosts
2902 if self.cfg.GetClusterInfo().modify_etc_hosts:
2903 # FIXME: this should be done via an rpc call to node daemon
2904 utils.RemoveHostFromEtcHosts(node.name)
2905 _RedistributeAncillaryFiles(self)
2908 class LUQueryNodes(NoHooksLU):
2909 """Logical unit for querying nodes.
2912 # pylint: disable-msg=W0142
2913 _OP_REQP = ["output_fields", "names", "use_locking"]
2916 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2917 "master_candidate", "offline", "drained"]
2919 _FIELDS_DYNAMIC = utils.FieldSet(
2921 "mtotal", "mnode", "mfree",
2923 "ctotal", "cnodes", "csockets",
2926 _FIELDS_STATIC = utils.FieldSet(*[
2927 "pinst_cnt", "sinst_cnt",
2928 "pinst_list", "sinst_list",
2929 "pip", "sip", "tags",
2931 "role"] + _SIMPLE_FIELDS
2934 def ExpandNames(self):
2935 _CheckOutputFields(static=self._FIELDS_STATIC,
2936 dynamic=self._FIELDS_DYNAMIC,
2937 selected=self.op.output_fields)
2939 self.needed_locks = {}
2940 self.share_locks[locking.LEVEL_NODE] = 1
2943 self.wanted = _GetWantedNodes(self, self.op.names)
2945 self.wanted = locking.ALL_SET
2947 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2948 self.do_locking = self.do_node_query and self.op.use_locking
2950 # if we don't request only static fields, we need to lock the nodes
2951 self.needed_locks[locking.LEVEL_NODE] = self.wanted
2953 def CheckPrereq(self):
2954 """Check prerequisites.
2957 # The validation of the node list is done in the _GetWantedNodes,
2958 # if non empty, and if empty, there's no validation to do
2961 def Exec(self, feedback_fn):
2962 """Computes the list of nodes and their attributes.
2965 all_info = self.cfg.GetAllNodesInfo()
2967 nodenames = self.acquired_locks[locking.LEVEL_NODE]
2968 elif self.wanted != locking.ALL_SET:
2969 nodenames = self.wanted
2970 missing = set(nodenames).difference(all_info.keys())
2972 raise errors.OpExecError(
2973 "Some nodes were removed before retrieving their data: %s" % missing)
2975 nodenames = all_info.keys()
2977 nodenames = utils.NiceSort(nodenames)
2978 nodelist = [all_info[name] for name in nodenames]
2980 # begin data gathering
2982 if self.do_node_query:
2984 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2985 self.cfg.GetHypervisorType())
2986 for name in nodenames:
2987 nodeinfo = node_data[name]
2988 if not nodeinfo.fail_msg and nodeinfo.payload:
2989 nodeinfo = nodeinfo.payload
2990 fn = utils.TryConvert
2992 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2993 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2994 "mfree": fn(int, nodeinfo.get('memory_free', None)),
2995 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2996 "dfree": fn(int, nodeinfo.get('vg_free', None)),
2997 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2998 "bootid": nodeinfo.get('bootid', None),
2999 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3000 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3003 live_data[name] = {}
3005 live_data = dict.fromkeys(nodenames, {})
3007 node_to_primary = dict([(name, set()) for name in nodenames])
3008 node_to_secondary = dict([(name, set()) for name in nodenames])
3010 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3011 "sinst_cnt", "sinst_list"))
3012 if inst_fields & frozenset(self.op.output_fields):
3013 inst_data = self.cfg.GetAllInstancesInfo()
3015 for inst in inst_data.values():
3016 if inst.primary_node in node_to_primary:
3017 node_to_primary[inst.primary_node].add(inst.name)
3018 for secnode in inst.secondary_nodes:
3019 if secnode in node_to_secondary:
3020 node_to_secondary[secnode].add(inst.name)
3022 master_node = self.cfg.GetMasterNode()
3024 # end data gathering
3027 for node in nodelist:
3029 for field in self.op.output_fields:
3030 if field in self._SIMPLE_FIELDS:
3031 val = getattr(node, field)
3032 elif field == "pinst_list":
3033 val = list(node_to_primary[node.name])
3034 elif field == "sinst_list":
3035 val = list(node_to_secondary[node.name])
3036 elif field == "pinst_cnt":
3037 val = len(node_to_primary[node.name])
3038 elif field == "sinst_cnt":
3039 val = len(node_to_secondary[node.name])
3040 elif field == "pip":
3041 val = node.primary_ip
3042 elif field == "sip":
3043 val = node.secondary_ip
3044 elif field == "tags":
3045 val = list(node.GetTags())
3046 elif field == "master":
3047 val = node.name == master_node
3048 elif self._FIELDS_DYNAMIC.Matches(field):
3049 val = live_data[node.name].get(field, None)
3050 elif field == "role":
3051 if node.name == master_node:
3053 elif node.master_candidate:
3062 raise errors.ParameterError(field)
3063 node_output.append(val)
3064 output.append(node_output)
3069 class LUQueryNodeVolumes(NoHooksLU):
3070 """Logical unit for getting volumes on node(s).
3073 _OP_REQP = ["nodes", "output_fields"]
3075 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3076 _FIELDS_STATIC = utils.FieldSet("node")
3078 def ExpandNames(self):
3079 _CheckOutputFields(static=self._FIELDS_STATIC,
3080 dynamic=self._FIELDS_DYNAMIC,
3081 selected=self.op.output_fields)
3083 self.needed_locks = {}
3084 self.share_locks[locking.LEVEL_NODE] = 1
3085 if not self.op.nodes:
3086 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3088 self.needed_locks[locking.LEVEL_NODE] = \
3089 _GetWantedNodes(self, self.op.nodes)
3091 def CheckPrereq(self):
3092 """Check prerequisites.
3094 This checks that the fields required are valid output fields.
3097 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3099 def Exec(self, feedback_fn):
3100 """Computes the list of nodes and their attributes.
3103 nodenames = self.nodes
3104 volumes = self.rpc.call_node_volumes(nodenames)
3106 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3107 in self.cfg.GetInstanceList()]
3109 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3112 for node in nodenames:
3113 nresult = volumes[node]
3116 msg = nresult.fail_msg
3118 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3121 node_vols = nresult.payload[:]
3122 node_vols.sort(key=lambda vol: vol['dev'])
3124 for vol in node_vols:
3126 for field in self.op.output_fields:
3129 elif field == "phys":
3133 elif field == "name":
3135 elif field == "size":
3136 val = int(float(vol['size']))
3137 elif field == "instance":
3139 if node not in lv_by_node[inst]:
3141 if vol['name'] in lv_by_node[inst][node]:
3147 raise errors.ParameterError(field)
3148 node_output.append(str(val))
3150 output.append(node_output)
3155 class LUQueryNodeStorage(NoHooksLU):
3156 """Logical unit for getting information on storage units on node(s).
3159 _OP_REQP = ["nodes", "storage_type", "output_fields"]
3161 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3163 def CheckArguments(self):
3164 _CheckStorageType(self.op.storage_type)
3166 _CheckOutputFields(static=self._FIELDS_STATIC,
3167 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3168 selected=self.op.output_fields)
3170 def ExpandNames(self):
3171 self.needed_locks = {}
3172 self.share_locks[locking.LEVEL_NODE] = 1
3175 self.needed_locks[locking.LEVEL_NODE] = \
3176 _GetWantedNodes(self, self.op.nodes)
3178 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3180 def CheckPrereq(self):
3181 """Check prerequisites.
3183 This checks that the fields required are valid output fields.
3186 self.op.name = getattr(self.op, "name", None)
3188 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3190 def Exec(self, feedback_fn):
3191 """Computes the list of nodes and their attributes.
3194 # Always get name to sort by
3195 if constants.SF_NAME in self.op.output_fields:
3196 fields = self.op.output_fields[:]
3198 fields = [constants.SF_NAME] + self.op.output_fields
3200 # Never ask for node or type as it's only known to the LU
3201 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3202 while extra in fields:
3203 fields.remove(extra)
3205 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3206 name_idx = field_idx[constants.SF_NAME]
3208 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3209 data = self.rpc.call_storage_list(self.nodes,
3210 self.op.storage_type, st_args,
3211 self.op.name, fields)
3215 for node in utils.NiceSort(self.nodes):
3216 nresult = data[node]
3220 msg = nresult.fail_msg
3222 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3225 rows = dict([(row[name_idx], row) for row in nresult.payload])
3227 for name in utils.NiceSort(rows.keys()):
3232 for field in self.op.output_fields:
3233 if field == constants.SF_NODE:
3235 elif field == constants.SF_TYPE:
3236 val = self.op.storage_type
3237 elif field in field_idx:
3238 val = row[field_idx[field]]
3240 raise errors.ParameterError(field)
3249 class LUModifyNodeStorage(NoHooksLU):
3250 """Logical unit for modifying a storage volume on a node.
3253 _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3256 def CheckArguments(self):
3257 self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3259 _CheckStorageType(self.op.storage_type)
3261 def ExpandNames(self):
3262 self.needed_locks = {
3263 locking.LEVEL_NODE: self.op.node_name,
3266 def CheckPrereq(self):
3267 """Check prerequisites.
3270 storage_type = self.op.storage_type
3273 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3275 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3276 " modified" % storage_type,
3279 diff = set(self.op.changes.keys()) - modifiable
3281 raise errors.OpPrereqError("The following fields can not be modified for"
3282 " storage units of type '%s': %r" %
3283 (storage_type, list(diff)),
3286 def Exec(self, feedback_fn):
3287 """Computes the list of nodes and their attributes.
3290 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3291 result = self.rpc.call_storage_modify(self.op.node_name,
3292 self.op.storage_type, st_args,
3293 self.op.name, self.op.changes)
3294 result.Raise("Failed to modify storage unit '%s' on %s" %
3295 (self.op.name, self.op.node_name))
3298 class LUAddNode(LogicalUnit):
3299 """Logical unit for adding node to the cluster.
3303 HTYPE = constants.HTYPE_NODE
3304 _OP_REQP = ["node_name"]
3306 def CheckArguments(self):
3307 # validate/normalize the node name
3308 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3310 def BuildHooksEnv(self):
3313 This will run on all nodes before, and on all nodes + the new node after.
3317 "OP_TARGET": self.op.node_name,
3318 "NODE_NAME": self.op.node_name,
3319 "NODE_PIP": self.op.primary_ip,
3320 "NODE_SIP": self.op.secondary_ip,
3322 nodes_0 = self.cfg.GetNodeList()
3323 nodes_1 = nodes_0 + [self.op.node_name, ]
3324 return env, nodes_0, nodes_1
3326 def CheckPrereq(self):
3327 """Check prerequisites.
3330 - the new node is not already in the config
3332 - its parameters (single/dual homed) matches the cluster
3334 Any errors are signaled by raising errors.OpPrereqError.
3337 node_name = self.op.node_name
3340 dns_data = utils.GetHostInfo(node_name)
3342 node = dns_data.name
3343 primary_ip = self.op.primary_ip = dns_data.ip
3344 secondary_ip = getattr(self.op, "secondary_ip", None)
3345 if secondary_ip is None:
3346 secondary_ip = primary_ip
3347 if not utils.IsValidIP(secondary_ip):
3348 raise errors.OpPrereqError("Invalid secondary IP given",
3350 self.op.secondary_ip = secondary_ip
3352 node_list = cfg.GetNodeList()
3353 if not self.op.readd and node in node_list:
3354 raise errors.OpPrereqError("Node %s is already in the configuration" %
3355 node, errors.ECODE_EXISTS)
3356 elif self.op.readd and node not in node_list:
3357 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3360 self.changed_primary_ip = False
3362 for existing_node_name in node_list:
3363 existing_node = cfg.GetNodeInfo(existing_node_name)
3365 if self.op.readd and node == existing_node_name:
3366 if existing_node.secondary_ip != secondary_ip:
3367 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3368 " address configuration as before",
3370 if existing_node.primary_ip != primary_ip:
3371 self.changed_primary_ip = True
3375 if (existing_node.primary_ip == primary_ip or
3376 existing_node.secondary_ip == primary_ip or
3377 existing_node.primary_ip == secondary_ip or
3378 existing_node.secondary_ip == secondary_ip):
3379 raise errors.OpPrereqError("New node ip address(es) conflict with"
3380 " existing node %s" % existing_node.name,
3381 errors.ECODE_NOTUNIQUE)
3383 # check that the type of the node (single versus dual homed) is the
3384 # same as for the master
3385 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3386 master_singlehomed = myself.secondary_ip == myself.primary_ip
3387 newbie_singlehomed = secondary_ip == primary_ip
3388 if master_singlehomed != newbie_singlehomed:
3389 if master_singlehomed:
3390 raise errors.OpPrereqError("The master has no private ip but the"
3391 " new node has one",
3394 raise errors.OpPrereqError("The master has a private ip but the"
3395 " new node doesn't have one",
3398 # checks reachability
3399 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3400 raise errors.OpPrereqError("Node not reachable by ping",
3401 errors.ECODE_ENVIRON)
3403 if not newbie_singlehomed:
3404 # check reachability from my secondary ip to newbie's secondary ip
3405 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3406 source=myself.secondary_ip):
3407 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3408 " based ping to noded port",
3409 errors.ECODE_ENVIRON)
3416 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3419 self.new_node = self.cfg.GetNodeInfo(node)
3420 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3422 self.new_node = objects.Node(name=node,
3423 primary_ip=primary_ip,
3424 secondary_ip=secondary_ip,
3425 master_candidate=self.master_candidate,
3426 offline=False, drained=False)
3428 def Exec(self, feedback_fn):
3429 """Adds the new node to the cluster.
3432 new_node = self.new_node
3433 node = new_node.name
3435 # for re-adds, reset the offline/drained/master-candidate flags;
3436 # we need to reset here, otherwise offline would prevent RPC calls
3437 # later in the procedure; this also means that if the re-add
3438 # fails, we are left with a non-offlined, broken node
3440 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3441 self.LogInfo("Readding a node, the offline/drained flags were reset")
3442 # if we demote the node, we do cleanup later in the procedure
3443 new_node.master_candidate = self.master_candidate
3444 if self.changed_primary_ip:
3445 new_node.primary_ip = self.op.primary_ip
3447 # notify the user about any possible mc promotion
3448 if new_node.master_candidate:
3449 self.LogInfo("Node will be a master candidate")
3451 # check connectivity
3452 result = self.rpc.call_version([node])[node]
3453 result.Raise("Can't get version information from node %s" % node)
3454 if constants.PROTOCOL_VERSION == result.payload:
3455 logging.info("Communication to node %s fine, sw version %s match",
3456 node, result.payload)
3458 raise errors.OpExecError("Version mismatch master version %s,"
3459 " node version %s" %
3460 (constants.PROTOCOL_VERSION, result.payload))
3463 if self.cfg.GetClusterInfo().modify_ssh_setup:
3464 logging.info("Copy ssh key to node %s", node)
3465 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3467 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3468 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3472 keyarray.append(utils.ReadFile(i))
3474 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3475 keyarray[2], keyarray[3], keyarray[4],
3477 result.Raise("Cannot transfer ssh keys to the new node")
3479 # Add node to our /etc/hosts, and add key to known_hosts
3480 if self.cfg.GetClusterInfo().modify_etc_hosts:
3481 # FIXME: this should be done via an rpc call to node daemon
3482 utils.AddHostToEtcHosts(new_node.name)
3484 if new_node.secondary_ip != new_node.primary_ip:
3485 result = self.rpc.call_node_has_ip_address(new_node.name,
3486 new_node.secondary_ip)
3487 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3488 prereq=True, ecode=errors.ECODE_ENVIRON)
3489 if not result.payload:
3490 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3491 " you gave (%s). Please fix and re-run this"
3492 " command." % new_node.secondary_ip)
3494 node_verify_list = [self.cfg.GetMasterNode()]
3495 node_verify_param = {
3496 constants.NV_NODELIST: [node],
3497 # TODO: do a node-net-test as well?
3500 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3501 self.cfg.GetClusterName())
3502 for verifier in node_verify_list:
3503 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3504 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3506 for failed in nl_payload:
3507 feedback_fn("ssh/hostname verification failed"
3508 " (checking from %s): %s" %
3509 (verifier, nl_payload[failed]))
3510 raise errors.OpExecError("ssh/hostname verification failed.")
3513 _RedistributeAncillaryFiles(self)
3514 self.context.ReaddNode(new_node)
3515 # make sure we redistribute the config
3516 self.cfg.Update(new_node, feedback_fn)
3517 # and make sure the new node will not have old files around
3518 if not new_node.master_candidate:
3519 result = self.rpc.call_node_demote_from_mc(new_node.name)
3520 msg = result.fail_msg
3522 self.LogWarning("Node failed to demote itself from master"
3523 " candidate status: %s" % msg)
3525 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3526 self.context.AddNode(new_node, self.proc.GetECId())
3529 class LUSetNodeParams(LogicalUnit):
3530 """Modifies the parameters of a node.
3533 HPATH = "node-modify"
3534 HTYPE = constants.HTYPE_NODE
3535 _OP_REQP = ["node_name"]
3538 def CheckArguments(self):
3539 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3540 _CheckBooleanOpField(self.op, 'master_candidate')
3541 _CheckBooleanOpField(self.op, 'offline')
3542 _CheckBooleanOpField(self.op, 'drained')
3543 _CheckBooleanOpField(self.op, 'auto_promote')
3544 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3545 if all_mods.count(None) == 3:
3546 raise errors.OpPrereqError("Please pass at least one modification",
3548 if all_mods.count(True) > 1:
3549 raise errors.OpPrereqError("Can't set the node into more than one"
3550 " state at the same time",
3553 # Boolean value that tells us whether we're offlining or draining the node
3554 self.offline_or_drain = (self.op.offline == True or
3555 self.op.drained == True)
3556 self.deoffline_or_drain = (self.op.offline == False or
3557 self.op.drained == False)
3558 self.might_demote = (self.op.master_candidate == False or
3559 self.offline_or_drain)
3561 self.lock_all = self.op.auto_promote and self.might_demote
3564 def ExpandNames(self):
3566 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3568 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3570 def BuildHooksEnv(self):
3573 This runs on the master node.
3577 "OP_TARGET": self.op.node_name,
3578 "MASTER_CANDIDATE": str(self.op.master_candidate),
3579 "OFFLINE": str(self.op.offline),
3580 "DRAINED": str(self.op.drained),
3582 nl = [self.cfg.GetMasterNode(),
3586 def CheckPrereq(self):
3587 """Check prerequisites.
3589 This only checks the instance list against the existing names.
3592 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3594 if (self.op.master_candidate is not None or
3595 self.op.drained is not None or
3596 self.op.offline is not None):
3597 # we can't change the master's node flags
3598 if self.op.node_name == self.cfg.GetMasterNode():
3599 raise errors.OpPrereqError("The master role can be changed"
3600 " only via masterfailover",
3604 if node.master_candidate and self.might_demote and not self.lock_all:
3605 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3606 # check if after removing the current node, we're missing master
3608 (mc_remaining, mc_should, _) = \
3609 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3610 if mc_remaining < mc_should:
3611 raise errors.OpPrereqError("Not enough master candidates, please"
3612 " pass auto_promote to allow promotion",
3615 if (self.op.master_candidate == True and
3616 ((node.offline and not self.op.offline == False) or
3617 (node.drained and not self.op.drained == False))):
3618 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3619 " to master_candidate" % node.name,
3622 # If we're being deofflined/drained, we'll MC ourself if needed
3623 if (self.deoffline_or_drain and not self.offline_or_drain and not
3624 self.op.master_candidate == True and not node.master_candidate):
3625 self.op.master_candidate = _DecideSelfPromotion(self)
3626 if self.op.master_candidate:
3627 self.LogInfo("Autopromoting node to master candidate")
3631 def Exec(self, feedback_fn):
3640 if self.op.offline is not None:
3641 node.offline = self.op.offline
3642 result.append(("offline", str(self.op.offline)))
3643 if self.op.offline == True:
3644 if node.master_candidate:
3645 node.master_candidate = False
3647 result.append(("master_candidate", "auto-demotion due to offline"))
3649 node.drained = False
3650 result.append(("drained", "clear drained status due to offline"))
3652 if self.op.master_candidate is not None:
3653 node.master_candidate = self.op.master_candidate
3655 result.append(("master_candidate", str(self.op.master_candidate)))
3656 if self.op.master_candidate == False:
3657 rrc = self.rpc.call_node_demote_from_mc(node.name)
3660 self.LogWarning("Node failed to demote itself: %s" % msg)
3662 if self.op.drained is not None:
3663 node.drained = self.op.drained
3664 result.append(("drained", str(self.op.drained)))
3665 if self.op.drained == True:
3666 if node.master_candidate:
3667 node.master_candidate = False
3669 result.append(("master_candidate", "auto-demotion due to drain"))
3670 rrc = self.rpc.call_node_demote_from_mc(node.name)
3673 self.LogWarning("Node failed to demote itself: %s" % msg)
3675 node.offline = False
3676 result.append(("offline", "clear offline status due to drain"))
3678 # we locked all nodes, we adjust the CP before updating this node
3680 _AdjustCandidatePool(self, [node.name])
3682 # this will trigger configuration file update, if needed
3683 self.cfg.Update(node, feedback_fn)
3685 # this will trigger job queue propagation or cleanup
3687 self.context.ReaddNode(node)
3692 class LUPowercycleNode(NoHooksLU):
3693 """Powercycles a node.
3696 _OP_REQP = ["node_name", "force"]
3699 def CheckArguments(self):
3700 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3701 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3702 raise errors.OpPrereqError("The node is the master and the force"
3703 " parameter was not set",
3706 def ExpandNames(self):
3707 """Locking for PowercycleNode.
3709 This is a last-resort option and shouldn't block on other
3710 jobs. Therefore, we grab no locks.
3713 self.needed_locks = {}
3715 def CheckPrereq(self):
3716 """Check prerequisites.
3718 This LU has no prereqs.
3723 def Exec(self, feedback_fn):
3727 result = self.rpc.call_node_powercycle(self.op.node_name,
3728 self.cfg.GetHypervisorType())
3729 result.Raise("Failed to schedule the reboot")
3730 return result.payload
3733 class LUQueryClusterInfo(NoHooksLU):
3734 """Query cluster configuration.
3740 def ExpandNames(self):
3741 self.needed_locks = {}
3743 def CheckPrereq(self):
3744 """No prerequsites needed for this LU.
3749 def Exec(self, feedback_fn):
3750 """Return cluster config.
3753 cluster = self.cfg.GetClusterInfo()
3756 # Filter just for enabled hypervisors
3757 for os_name, hv_dict in cluster.os_hvp.items():
3758 os_hvp[os_name] = {}
3759 for hv_name, hv_params in hv_dict.items():
3760 if hv_name in cluster.enabled_hypervisors:
3761 os_hvp[os_name][hv_name] = hv_params
3764 "software_version": constants.RELEASE_VERSION,
3765 "protocol_version": constants.PROTOCOL_VERSION,
3766 "config_version": constants.CONFIG_VERSION,
3767 "os_api_version": max(constants.OS_API_VERSIONS),
3768 "export_version": constants.EXPORT_VERSION,
3769 "architecture": (platform.architecture()[0], platform.machine()),
3770 "name": cluster.cluster_name,
3771 "master": cluster.master_node,
3772 "default_hypervisor": cluster.enabled_hypervisors[0],
3773 "enabled_hypervisors": cluster.enabled_hypervisors,
3774 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3775 for hypervisor_name in cluster.enabled_hypervisors]),
3777 "beparams": cluster.beparams,
3778 "nicparams": cluster.nicparams,
3779 "candidate_pool_size": cluster.candidate_pool_size,
3780 "master_netdev": cluster.master_netdev,
3781 "volume_group_name": cluster.volume_group_name,
3782 "file_storage_dir": cluster.file_storage_dir,
3783 "maintain_node_health": cluster.maintain_node_health,
3784 "ctime": cluster.ctime,
3785 "mtime": cluster.mtime,
3786 "uuid": cluster.uuid,
3787 "tags": list(cluster.GetTags()),
3788 "uid_pool": cluster.uid_pool,
3794 class LUQueryConfigValues(NoHooksLU):
3795 """Return configuration values.
3800 _FIELDS_DYNAMIC = utils.FieldSet()
3801 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3804 def ExpandNames(self):
3805 self.needed_locks = {}
3807 _CheckOutputFields(static=self._FIELDS_STATIC,
3808 dynamic=self._FIELDS_DYNAMIC,
3809 selected=self.op.output_fields)
3811 def CheckPrereq(self):
3812 """No prerequisites.
3817 def Exec(self, feedback_fn):
3818 """Dump a representation of the cluster config to the standard output.
3822 for field in self.op.output_fields:
3823 if field == "cluster_name":
3824 entry = self.cfg.GetClusterName()
3825 elif field == "master_node":
3826 entry = self.cfg.GetMasterNode()
3827 elif field == "drain_flag":
3828 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3829 elif field == "watcher_pause":
3830 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3832 raise errors.ParameterError(field)
3833 values.append(entry)
3837 class LUActivateInstanceDisks(NoHooksLU):
3838 """Bring up an instance's disks.
3841 _OP_REQP = ["instance_name"]
3844 def ExpandNames(self):
3845 self._ExpandAndLockInstance()
3846 self.needed_locks[locking.LEVEL_NODE] = []
3847 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3849 def DeclareLocks(self, level):
3850 if level == locking.LEVEL_NODE:
3851 self._LockInstancesNodes()
3853 def CheckPrereq(self):
3854 """Check prerequisites.
3856 This checks that the instance is in the cluster.
3859 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3860 assert self.instance is not None, \
3861 "Cannot retrieve locked instance %s" % self.op.instance_name
3862 _CheckNodeOnline(self, self.instance.primary_node)
3863 if not hasattr(self.op, "ignore_size"):
3864 self.op.ignore_size = False
3866 def Exec(self, feedback_fn):
3867 """Activate the disks.
3870 disks_ok, disks_info = \
3871 _AssembleInstanceDisks(self, self.instance,
3872 ignore_size=self.op.ignore_size)
3874 raise errors.OpExecError("Cannot activate block devices")
3879 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
3881 """Prepare the block devices for an instance.
3883 This sets up the block devices on all nodes.
3885 @type lu: L{LogicalUnit}
3886 @param lu: the logical unit on whose behalf we execute
3887 @type instance: L{objects.Instance}
3888 @param instance: the instance for whose disks we assemble
3889 @type disks: list of L{objects.Disk} or None
3890 @param disks: which disks to assemble (or all, if None)
3891 @type ignore_secondaries: boolean
3892 @param ignore_secondaries: if true, errors on secondary nodes
3893 won't result in an error return from the function
3894 @type ignore_size: boolean
3895 @param ignore_size: if true, the current known size of the disk
3896 will not be used during the disk activation, useful for cases
3897 when the size is wrong
3898 @return: False if the operation failed, otherwise a list of
3899 (host, instance_visible_name, node_visible_name)
3900 with the mapping from node devices to instance devices
3905 iname = instance.name
3906 disks = _ExpandCheckDisks(instance, disks)
3908 # With the two passes mechanism we try to reduce the window of
3909 # opportunity for the race condition of switching DRBD to primary
3910 # before handshaking occured, but we do not eliminate it
3912 # The proper fix would be to wait (with some limits) until the
3913 # connection has been made and drbd transitions from WFConnection
3914 # into any other network-connected state (Connected, SyncTarget,
3917 # 1st pass, assemble on all nodes in secondary mode
3918 for inst_disk in disks:
3919 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3921 node_disk = node_disk.Copy()
3922 node_disk.UnsetSize()
3923 lu.cfg.SetDiskID(node_disk, node)
3924 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3925 msg = result.fail_msg
3927 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3928 " (is_primary=False, pass=1): %s",
3929 inst_disk.iv_name, node, msg)
3930 if not ignore_secondaries:
3933 # FIXME: race condition on drbd migration to primary
3935 # 2nd pass, do only the primary node
3936 for inst_disk in disks:
3939 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3940 if node != instance.primary_node:
3943 node_disk = node_disk.Copy()
3944 node_disk.UnsetSize()
3945 lu.cfg.SetDiskID(node_disk, node)
3946 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3947 msg = result.fail_msg
3949 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3950 " (is_primary=True, pass=2): %s",
3951 inst_disk.iv_name, node, msg)
3954 dev_path = result.payload
3956 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3958 # leave the disks configured for the primary node
3959 # this is a workaround that would be fixed better by
3960 # improving the logical/physical id handling
3962 lu.cfg.SetDiskID(disk, instance.primary_node)
3964 return disks_ok, device_info
3967 def _StartInstanceDisks(lu, instance, force):
3968 """Start the disks of an instance.
3971 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3972 ignore_secondaries=force)
3974 _ShutdownInstanceDisks(lu, instance)
3975 if force is not None and not force:
3976 lu.proc.LogWarning("", hint="If the message above refers to a"
3978 " you can retry the operation using '--force'.")
3979 raise errors.OpExecError("Disk consistency error")
3982 class LUDeactivateInstanceDisks(NoHooksLU):
3983 """Shutdown an instance's disks.
3986 _OP_REQP = ["instance_name"]
3989 def ExpandNames(self):
3990 self._ExpandAndLockInstance()
3991 self.needed_locks[locking.LEVEL_NODE] = []
3992 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3994 def DeclareLocks(self, level):
3995 if level == locking.LEVEL_NODE:
3996 self._LockInstancesNodes()
3998 def CheckPrereq(self):
3999 """Check prerequisites.
4001 This checks that the instance is in the cluster.
4004 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4005 assert self.instance is not None, \
4006 "Cannot retrieve locked instance %s" % self.op.instance_name
4008 def Exec(self, feedback_fn):
4009 """Deactivate the disks
4012 instance = self.instance
4013 _SafeShutdownInstanceDisks(self, instance)
4016 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4017 """Shutdown block devices of an instance.
4019 This function checks if an instance is running, before calling
4020 _ShutdownInstanceDisks.
4023 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4024 _ShutdownInstanceDisks(lu, instance, disks=disks)
4027 def _ExpandCheckDisks(instance, disks):
4028 """Return the instance disks selected by the disks list
4030 @type disks: list of L{objects.Disk} or None
4031 @param disks: selected disks
4032 @rtype: list of L{objects.Disk}
4033 @return: selected instance disks to act on
4037 return instance.disks
4039 if not set(disks).issubset(instance.disks):
4040 raise errors.ProgrammerError("Can only act on disks belonging to the"
4045 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4046 """Shutdown block devices of an instance.
4048 This does the shutdown on all nodes of the instance.
4050 If the ignore_primary is false, errors on the primary node are
4055 disks = _ExpandCheckDisks(instance, disks)
4058 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4059 lu.cfg.SetDiskID(top_disk, node)
4060 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4061 msg = result.fail_msg
4063 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4064 disk.iv_name, node, msg)
4065 if not ignore_primary or node != instance.primary_node:
4070 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4071 """Checks if a node has enough free memory.
4073 This function check if a given node has the needed amount of free
4074 memory. In case the node has less memory or we cannot get the
4075 information from the node, this function raise an OpPrereqError
4078 @type lu: C{LogicalUnit}
4079 @param lu: a logical unit from which we get configuration data
4081 @param node: the node to check
4082 @type reason: C{str}
4083 @param reason: string to use in the error message
4084 @type requested: C{int}
4085 @param requested: the amount of memory in MiB to check for
4086 @type hypervisor_name: C{str}
4087 @param hypervisor_name: the hypervisor to ask for memory stats
4088 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4089 we cannot check the node
4092 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4093 nodeinfo[node].Raise("Can't get data from node %s" % node,
4094 prereq=True, ecode=errors.ECODE_ENVIRON)
4095 free_mem = nodeinfo[node].payload.get('memory_free', None)
4096 if not isinstance(free_mem, int):
4097 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4098 " was '%s'" % (node, free_mem),
4099 errors.ECODE_ENVIRON)
4100 if requested > free_mem:
4101 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4102 " needed %s MiB, available %s MiB" %
4103 (node, reason, requested, free_mem),
4107 def _CheckNodesFreeDisk(lu, nodenames, requested):
4108 """Checks if nodes have enough free disk space in the default VG.
4110 This function check if all given nodes have the needed amount of
4111 free disk. In case any node has less disk or we cannot get the
4112 information from the node, this function raise an OpPrereqError
4115 @type lu: C{LogicalUnit}
4116 @param lu: a logical unit from which we get configuration data
4117 @type nodenames: C{list}
4118 @param nodenames: the list of node names to check
4119 @type requested: C{int}
4120 @param requested: the amount of disk in MiB to check for
4121 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4122 we cannot check the node
4125 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4126 lu.cfg.GetHypervisorType())
4127 for node in nodenames:
4128 info = nodeinfo[node]
4129 info.Raise("Cannot get current information from node %s" % node,
4130 prereq=True, ecode=errors.ECODE_ENVIRON)
4131 vg_free = info.payload.get("vg_free", None)
4132 if not isinstance(vg_free, int):
4133 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4134 " result was '%s'" % (node, vg_free),
4135 errors.ECODE_ENVIRON)
4136 if requested > vg_free:
4137 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4138 " required %d MiB, available %d MiB" %
4139 (node, requested, vg_free),
4143 class LUStartupInstance(LogicalUnit):
4144 """Starts an instance.
4147 HPATH = "instance-start"
4148 HTYPE = constants.HTYPE_INSTANCE
4149 _OP_REQP = ["instance_name", "force"]
4152 def ExpandNames(self):
4153 self._ExpandAndLockInstance()
4155 def BuildHooksEnv(self):
4158 This runs on master, primary and secondary nodes of the instance.
4162 "FORCE": self.op.force,
4164 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4165 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4168 def CheckPrereq(self):
4169 """Check prerequisites.
4171 This checks that the instance is in the cluster.
4174 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4175 assert self.instance is not None, \
4176 "Cannot retrieve locked instance %s" % self.op.instance_name
4179 self.beparams = getattr(self.op, "beparams", {})
4181 if not isinstance(self.beparams, dict):
4182 raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4183 " dict" % (type(self.beparams), ),
4185 # fill the beparams dict
4186 utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4187 self.op.beparams = self.beparams
4190 self.hvparams = getattr(self.op, "hvparams", {})
4192 if not isinstance(self.hvparams, dict):
4193 raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4194 " dict" % (type(self.hvparams), ),
4197 # check hypervisor parameter syntax (locally)
4198 cluster = self.cfg.GetClusterInfo()
4199 utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4200 filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
4202 filled_hvp.update(self.hvparams)
4203 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4204 hv_type.CheckParameterSyntax(filled_hvp)
4205 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4206 self.op.hvparams = self.hvparams
4208 _CheckNodeOnline(self, instance.primary_node)
4210 bep = self.cfg.GetClusterInfo().FillBE(instance)
4211 # check bridges existence
4212 _CheckInstanceBridgesExist(self, instance)
4214 remote_info = self.rpc.call_instance_info(instance.primary_node,
4216 instance.hypervisor)
4217 remote_info.Raise("Error checking node %s" % instance.primary_node,
4218 prereq=True, ecode=errors.ECODE_ENVIRON)
4219 if not remote_info.payload: # not running already
4220 _CheckNodeFreeMemory(self, instance.primary_node,
4221 "starting instance %s" % instance.name,
4222 bep[constants.BE_MEMORY], instance.hypervisor)
4224 def Exec(self, feedback_fn):
4225 """Start the instance.
4228 instance = self.instance
4229 force = self.op.force
4231 self.cfg.MarkInstanceUp(instance.name)
4233 node_current = instance.primary_node
4235 _StartInstanceDisks(self, instance, force)
4237 result = self.rpc.call_instance_start(node_current, instance,
4238 self.hvparams, self.beparams)
4239 msg = result.fail_msg
4241 _ShutdownInstanceDisks(self, instance)
4242 raise errors.OpExecError("Could not start instance: %s" % msg)
4245 class LURebootInstance(LogicalUnit):
4246 """Reboot an instance.
4249 HPATH = "instance-reboot"
4250 HTYPE = constants.HTYPE_INSTANCE
4251 _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4254 def CheckArguments(self):
4255 """Check the arguments.
4258 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4259 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4261 def ExpandNames(self):
4262 if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4263 constants.INSTANCE_REBOOT_HARD,
4264 constants.INSTANCE_REBOOT_FULL]:
4265 raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4266 (constants.INSTANCE_REBOOT_SOFT,
4267 constants.INSTANCE_REBOOT_HARD,
4268 constants.INSTANCE_REBOOT_FULL))
4269 self._ExpandAndLockInstance()
4271 def BuildHooksEnv(self):
4274 This runs on master, primary and secondary nodes of the instance.
4278 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4279 "REBOOT_TYPE": self.op.reboot_type,
4280 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4282 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4283 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4286 def CheckPrereq(self):
4287 """Check prerequisites.
4289 This checks that the instance is in the cluster.
4292 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4293 assert self.instance is not None, \
4294 "Cannot retrieve locked instance %s" % self.op.instance_name
4296 _CheckNodeOnline(self, instance.primary_node)
4298 # check bridges existence
4299 _CheckInstanceBridgesExist(self, instance)
4301 def Exec(self, feedback_fn):
4302 """Reboot the instance.
4305 instance = self.instance
4306 ignore_secondaries = self.op.ignore_secondaries
4307 reboot_type = self.op.reboot_type
4309 node_current = instance.primary_node
4311 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4312 constants.INSTANCE_REBOOT_HARD]:
4313 for disk in instance.disks:
4314 self.cfg.SetDiskID(disk, node_current)
4315 result = self.rpc.call_instance_reboot(node_current, instance,
4317 self.shutdown_timeout)
4318 result.Raise("Could not reboot instance")
4320 result = self.rpc.call_instance_shutdown(node_current, instance,
4321 self.shutdown_timeout)
4322 result.Raise("Could not shutdown instance for full reboot")
4323 _ShutdownInstanceDisks(self, instance)
4324 _StartInstanceDisks(self, instance, ignore_secondaries)
4325 result = self.rpc.call_instance_start(node_current, instance, None, None)
4326 msg = result.fail_msg
4328 _ShutdownInstanceDisks(self, instance)
4329 raise errors.OpExecError("Could not start instance for"
4330 " full reboot: %s" % msg)
4332 self.cfg.MarkInstanceUp(instance.name)
4335 class LUShutdownInstance(LogicalUnit):
4336 """Shutdown an instance.
4339 HPATH = "instance-stop"
4340 HTYPE = constants.HTYPE_INSTANCE
4341 _OP_REQP = ["instance_name"]
4344 def CheckArguments(self):
4345 """Check the arguments.
4348 self.timeout = getattr(self.op, "timeout",
4349 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4351 def ExpandNames(self):
4352 self._ExpandAndLockInstance()
4354 def BuildHooksEnv(self):
4357 This runs on master, primary and secondary nodes of the instance.
4360 env = _BuildInstanceHookEnvByObject(self, self.instance)
4361 env["TIMEOUT"] = self.timeout
4362 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4365 def CheckPrereq(self):
4366 """Check prerequisites.
4368 This checks that the instance is in the cluster.
4371 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4372 assert self.instance is not None, \
4373 "Cannot retrieve locked instance %s" % self.op.instance_name
4374 _CheckNodeOnline(self, self.instance.primary_node)
4376 def Exec(self, feedback_fn):
4377 """Shutdown the instance.
4380 instance = self.instance
4381 node_current = instance.primary_node
4382 timeout = self.timeout
4383 self.cfg.MarkInstanceDown(instance.name)
4384 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4385 msg = result.fail_msg
4387 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4389 _ShutdownInstanceDisks(self, instance)
4392 class LUReinstallInstance(LogicalUnit):
4393 """Reinstall an instance.
4396 HPATH = "instance-reinstall"
4397 HTYPE = constants.HTYPE_INSTANCE
4398 _OP_REQP = ["instance_name"]
4401 def ExpandNames(self):
4402 self._ExpandAndLockInstance()
4404 def BuildHooksEnv(self):
4407 This runs on master, primary and secondary nodes of the instance.
4410 env = _BuildInstanceHookEnvByObject(self, self.instance)
4411 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4414 def CheckPrereq(self):
4415 """Check prerequisites.
4417 This checks that the instance is in the cluster and is not running.
4420 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4421 assert instance is not None, \
4422 "Cannot retrieve locked instance %s" % self.op.instance_name
4423 _CheckNodeOnline(self, instance.primary_node)
4425 if instance.disk_template == constants.DT_DISKLESS:
4426 raise errors.OpPrereqError("Instance '%s' has no disks" %
4427 self.op.instance_name,
4429 _CheckInstanceDown(self, instance, "cannot reinstall")
4431 self.op.os_type = getattr(self.op, "os_type", None)
4432 self.op.force_variant = getattr(self.op, "force_variant", False)
4433 if self.op.os_type is not None:
4435 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4436 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4438 self.instance = instance
4440 def Exec(self, feedback_fn):
4441 """Reinstall the instance.
4444 inst = self.instance
4446 if self.op.os_type is not None:
4447 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4448 inst.os = self.op.os_type
4449 self.cfg.Update(inst, feedback_fn)
4451 _StartInstanceDisks(self, inst, None)
4453 feedback_fn("Running the instance OS create scripts...")
4454 # FIXME: pass debug option from opcode to backend
4455 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4456 self.op.debug_level)
4457 result.Raise("Could not install OS for instance %s on node %s" %
4458 (inst.name, inst.primary_node))
4460 _ShutdownInstanceDisks(self, inst)
4463 class LURecreateInstanceDisks(LogicalUnit):
4464 """Recreate an instance's missing disks.
4467 HPATH = "instance-recreate-disks"
4468 HTYPE = constants.HTYPE_INSTANCE
4469 _OP_REQP = ["instance_name", "disks"]
4472 def CheckArguments(self):
4473 """Check the arguments.
4476 if not isinstance(self.op.disks, list):
4477 raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4478 for item in self.op.disks:
4479 if (not isinstance(item, int) or
4481 raise errors.OpPrereqError("Invalid disk specification '%s'" %
4482 str(item), errors.ECODE_INVAL)
4484 def ExpandNames(self):
4485 self._ExpandAndLockInstance()
4487 def BuildHooksEnv(self):
4490 This runs on master, primary and secondary nodes of the instance.
4493 env = _BuildInstanceHookEnvByObject(self, self.instance)
4494 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4497 def CheckPrereq(self):
4498 """Check prerequisites.
4500 This checks that the instance is in the cluster and is not running.
4503 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4504 assert instance is not None, \
4505 "Cannot retrieve locked instance %s" % self.op.instance_name
4506 _CheckNodeOnline(self, instance.primary_node)
4508 if instance.disk_template == constants.DT_DISKLESS:
4509 raise errors.OpPrereqError("Instance '%s' has no disks" %
4510 self.op.instance_name, errors.ECODE_INVAL)
4511 _CheckInstanceDown(self, instance, "cannot recreate disks")
4513 if not self.op.disks:
4514 self.op.disks = range(len(instance.disks))
4516 for idx in self.op.disks:
4517 if idx >= len(instance.disks):
4518 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4521 self.instance = instance
4523 def Exec(self, feedback_fn):
4524 """Recreate the disks.
4528 for idx, _ in enumerate(self.instance.disks):
4529 if idx not in self.op.disks: # disk idx has not been passed in
4533 _CreateDisks(self, self.instance, to_skip=to_skip)
4536 class LURenameInstance(LogicalUnit):
4537 """Rename an instance.
4540 HPATH = "instance-rename"
4541 HTYPE = constants.HTYPE_INSTANCE
4542 _OP_REQP = ["instance_name", "new_name"]
4544 def BuildHooksEnv(self):
4547 This runs on master, primary and secondary nodes of the instance.
4550 env = _BuildInstanceHookEnvByObject(self, self.instance)
4551 env["INSTANCE_NEW_NAME"] = self.op.new_name
4552 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4555 def CheckPrereq(self):
4556 """Check prerequisites.
4558 This checks that the instance is in the cluster and is not running.
4561 self.op.instance_name = _ExpandInstanceName(self.cfg,
4562 self.op.instance_name)
4563 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4564 assert instance is not None
4565 _CheckNodeOnline(self, instance.primary_node)
4566 _CheckInstanceDown(self, instance, "cannot rename")
4567 self.instance = instance
4569 # new name verification
4570 name_info = utils.GetHostInfo(self.op.new_name)
4572 self.op.new_name = new_name = name_info.name
4573 instance_list = self.cfg.GetInstanceList()
4574 if new_name in instance_list:
4575 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4576 new_name, errors.ECODE_EXISTS)
4578 if not getattr(self.op, "ignore_ip", False):
4579 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4580 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4581 (name_info.ip, new_name),
4582 errors.ECODE_NOTUNIQUE)
4585 def Exec(self, feedback_fn):
4586 """Reinstall the instance.
4589 inst = self.instance
4590 old_name = inst.name
4592 if inst.disk_template == constants.DT_FILE:
4593 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4595 self.cfg.RenameInstance(inst.name, self.op.new_name)
4596 # Change the instance lock. This is definitely safe while we hold the BGL
4597 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4598 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4600 # re-read the instance from the configuration after rename
4601 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4603 if inst.disk_template == constants.DT_FILE:
4604 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4605 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4606 old_file_storage_dir,
4607 new_file_storage_dir)
4608 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4609 " (but the instance has been renamed in Ganeti)" %
4610 (inst.primary_node, old_file_storage_dir,
4611 new_file_storage_dir))
4613 _StartInstanceDisks(self, inst, None)
4615 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4616 old_name, self.op.debug_level)
4617 msg = result.fail_msg
4619 msg = ("Could not run OS rename script for instance %s on node %s"
4620 " (but the instance has been renamed in Ganeti): %s" %
4621 (inst.name, inst.primary_node, msg))
4622 self.proc.LogWarning(msg)
4624 _ShutdownInstanceDisks(self, inst)
4627 class LURemoveInstance(LogicalUnit):
4628 """Remove an instance.
4631 HPATH = "instance-remove"
4632 HTYPE = constants.HTYPE_INSTANCE
4633 _OP_REQP = ["instance_name", "ignore_failures"]
4636 def CheckArguments(self):
4637 """Check the arguments.
4640 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4641 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4643 def ExpandNames(self):
4644 self._ExpandAndLockInstance()
4645 self.needed_locks[locking.LEVEL_NODE] = []
4646 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4648 def DeclareLocks(self, level):
4649 if level == locking.LEVEL_NODE:
4650 self._LockInstancesNodes()
4652 def BuildHooksEnv(self):
4655 This runs on master, primary and secondary nodes of the instance.
4658 env = _BuildInstanceHookEnvByObject(self, self.instance)
4659 env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4660 nl = [self.cfg.GetMasterNode()]
4661 nl_post = list(self.instance.all_nodes) + nl
4662 return env, nl, nl_post
4664 def CheckPrereq(self):
4665 """Check prerequisites.
4667 This checks that the instance is in the cluster.
4670 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4671 assert self.instance is not None, \
4672 "Cannot retrieve locked instance %s" % self.op.instance_name
4674 def Exec(self, feedback_fn):
4675 """Remove the instance.
4678 instance = self.instance
4679 logging.info("Shutting down instance %s on node %s",
4680 instance.name, instance.primary_node)
4682 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4683 self.shutdown_timeout)
4684 msg = result.fail_msg
4686 if self.op.ignore_failures:
4687 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4689 raise errors.OpExecError("Could not shutdown instance %s on"
4691 (instance.name, instance.primary_node, msg))
4693 logging.info("Removing block devices for instance %s", instance.name)
4695 if not _RemoveDisks(self, instance):
4696 if self.op.ignore_failures:
4697 feedback_fn("Warning: can't remove instance's disks")
4699 raise errors.OpExecError("Can't remove instance's disks")
4701 logging.info("Removing instance %s out of cluster config", instance.name)
4703 self.cfg.RemoveInstance(instance.name)
4704 self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4707 class LUQueryInstances(NoHooksLU):
4708 """Logical unit for querying instances.
4711 # pylint: disable-msg=W0142
4712 _OP_REQP = ["output_fields", "names", "use_locking"]
4714 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4715 "serial_no", "ctime", "mtime", "uuid"]
4716 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4718 "disk_template", "ip", "mac", "bridge",
4719 "nic_mode", "nic_link",
4720 "sda_size", "sdb_size", "vcpus", "tags",
4721 "network_port", "beparams",
4722 r"(disk)\.(size)/([0-9]+)",
4723 r"(disk)\.(sizes)", "disk_usage",
4724 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4725 r"(nic)\.(bridge)/([0-9]+)",
4726 r"(nic)\.(macs|ips|modes|links|bridges)",
4727 r"(disk|nic)\.(count)",
4729 ] + _SIMPLE_FIELDS +
4731 for name in constants.HVS_PARAMETERS
4732 if name not in constants.HVC_GLOBALS] +
4734 for name in constants.BES_PARAMETERS])
4735 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4738 def ExpandNames(self):
4739 _CheckOutputFields(static=self._FIELDS_STATIC,
4740 dynamic=self._FIELDS_DYNAMIC,
4741 selected=self.op.output_fields)
4743 self.needed_locks = {}
4744 self.share_locks[locking.LEVEL_INSTANCE] = 1
4745 self.share_locks[locking.LEVEL_NODE] = 1
4748 self.wanted = _GetWantedInstances(self, self.op.names)
4750 self.wanted = locking.ALL_SET
4752 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4753 self.do_locking = self.do_node_query and self.op.use_locking
4755 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4756 self.needed_locks[locking.LEVEL_NODE] = []
4757 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4759 def DeclareLocks(self, level):
4760 if level == locking.LEVEL_NODE and self.do_locking:
4761 self._LockInstancesNodes()
4763 def CheckPrereq(self):
4764 """Check prerequisites.
4769 def Exec(self, feedback_fn):
4770 """Computes the list of nodes and their attributes.
4773 # pylint: disable-msg=R0912
4774 # way too many branches here
4775 all_info = self.cfg.GetAllInstancesInfo()
4776 if self.wanted == locking.ALL_SET:
4777 # caller didn't specify instance names, so ordering is not important
4779 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4781 instance_names = all_info.keys()
4782 instance_names = utils.NiceSort(instance_names)
4784 # caller did specify names, so we must keep the ordering
4786 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4788 tgt_set = all_info.keys()
4789 missing = set(self.wanted).difference(tgt_set)
4791 raise errors.OpExecError("Some instances were removed before"
4792 " retrieving their data: %s" % missing)
4793 instance_names = self.wanted
4795 instance_list = [all_info[iname] for iname in instance_names]
4797 # begin data gathering
4799 nodes = frozenset([inst.primary_node for inst in instance_list])
4800 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4804 if self.do_node_query:
4806 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4808 result = node_data[name]
4810 # offline nodes will be in both lists
4811 off_nodes.append(name)
4813 bad_nodes.append(name)
4816 live_data.update(result.payload)
4817 # else no instance is alive
4819 live_data = dict([(name, {}) for name in instance_names])
4821 # end data gathering
4826 cluster = self.cfg.GetClusterInfo()
4827 for instance in instance_list:
4829 i_hv = cluster.FillHV(instance, skip_globals=True)
4830 i_be = cluster.FillBE(instance)
4831 i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4832 nic.nicparams) for nic in instance.nics]
4833 for field in self.op.output_fields:
4834 st_match = self._FIELDS_STATIC.Matches(field)
4835 if field in self._SIMPLE_FIELDS:
4836 val = getattr(instance, field)
4837 elif field == "pnode":
4838 val = instance.primary_node
4839 elif field == "snodes":
4840 val = list(instance.secondary_nodes)
4841 elif field == "admin_state":
4842 val = instance.admin_up
4843 elif field == "oper_state":
4844 if instance.primary_node in bad_nodes:
4847 val = bool(live_data.get(instance.name))
4848 elif field == "status":
4849 if instance.primary_node in off_nodes:
4850 val = "ERROR_nodeoffline"
4851 elif instance.primary_node in bad_nodes:
4852 val = "ERROR_nodedown"
4854 running = bool(live_data.get(instance.name))
4856 if instance.admin_up:
4861 if instance.admin_up:
4865 elif field == "oper_ram":
4866 if instance.primary_node in bad_nodes:
4868 elif instance.name in live_data:
4869 val = live_data[instance.name].get("memory", "?")
4872 elif field == "vcpus":
4873 val = i_be[constants.BE_VCPUS]
4874 elif field == "disk_template":
4875 val = instance.disk_template
4878 val = instance.nics[0].ip
4881 elif field == "nic_mode":
4883 val = i_nicp[0][constants.NIC_MODE]
4886 elif field == "nic_link":
4888 val = i_nicp[0][constants.NIC_LINK]
4891 elif field == "bridge":
4892 if (instance.nics and
4893 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4894 val = i_nicp[0][constants.NIC_LINK]
4897 elif field == "mac":
4899 val = instance.nics[0].mac
4902 elif field == "sda_size" or field == "sdb_size":
4903 idx = ord(field[2]) - ord('a')
4905 val = instance.FindDisk(idx).size
4906 except errors.OpPrereqError:
4908 elif field == "disk_usage": # total disk usage per node
4909 disk_sizes = [{'size': disk.size} for disk in instance.disks]
4910 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4911 elif field == "tags":
4912 val = list(instance.GetTags())
4913 elif field == "hvparams":
4915 elif (field.startswith(HVPREFIX) and
4916 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4917 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4918 val = i_hv.get(field[len(HVPREFIX):], None)
4919 elif field == "beparams":
4921 elif (field.startswith(BEPREFIX) and
4922 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4923 val = i_be.get(field[len(BEPREFIX):], None)
4924 elif st_match and st_match.groups():
4925 # matches a variable list
4926 st_groups = st_match.groups()
4927 if st_groups and st_groups[0] == "disk":
4928 if st_groups[1] == "count":
4929 val = len(instance.disks)
4930 elif st_groups[1] == "sizes":
4931 val = [disk.size for disk in instance.disks]
4932 elif st_groups[1] == "size":
4934 val = instance.FindDisk(st_groups[2]).size
4935 except errors.OpPrereqError:
4938 assert False, "Unhandled disk parameter"
4939 elif st_groups[0] == "nic":
4940 if st_groups[1] == "count":
4941 val = len(instance.nics)
4942 elif st_groups[1] == "macs":
4943 val = [nic.mac for nic in instance.nics]
4944 elif st_groups[1] == "ips":
4945 val = [nic.ip for nic in instance.nics]
4946 elif st_groups[1] == "modes":
4947 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4948 elif st_groups[1] == "links":
4949 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4950 elif st_groups[1] == "bridges":
4953 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4954 val.append(nicp[constants.NIC_LINK])
4959 nic_idx = int(st_groups[2])
4960 if nic_idx >= len(instance.nics):
4963 if st_groups[1] == "mac":
4964 val = instance.nics[nic_idx].mac
4965 elif st_groups[1] == "ip":
4966 val = instance.nics[nic_idx].ip
4967 elif st_groups[1] == "mode":
4968 val = i_nicp[nic_idx][constants.NIC_MODE]
4969 elif st_groups[1] == "link":
4970 val = i_nicp[nic_idx][constants.NIC_LINK]
4971 elif st_groups[1] == "bridge":
4972 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4973 if nic_mode == constants.NIC_MODE_BRIDGED:
4974 val = i_nicp[nic_idx][constants.NIC_LINK]
4978 assert False, "Unhandled NIC parameter"
4980 assert False, ("Declared but unhandled variable parameter '%s'" %
4983 assert False, "Declared but unhandled parameter '%s'" % field
4990 class LUFailoverInstance(LogicalUnit):
4991 """Failover an instance.
4994 HPATH = "instance-failover"
4995 HTYPE = constants.HTYPE_INSTANCE
4996 _OP_REQP = ["instance_name", "ignore_consistency"]
4999 def CheckArguments(self):
5000 """Check the arguments.
5003 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5004 constants.DEFAULT_SHUTDOWN_TIMEOUT)
5006 def ExpandNames(self):
5007 self._ExpandAndLockInstance()
5008 self.needed_locks[locking.LEVEL_NODE] = []
5009 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5011 def DeclareLocks(self, level):
5012 if level == locking.LEVEL_NODE:
5013 self._LockInstancesNodes()
5015 def BuildHooksEnv(self):
5018 This runs on master, primary and secondary nodes of the instance.
5021 instance = self.instance
5022 source_node = instance.primary_node
5023 target_node = instance.secondary_nodes[0]
5025 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5026 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5027 "OLD_PRIMARY": source_node,
5028 "OLD_SECONDARY": target_node,
5029 "NEW_PRIMARY": target_node,
5030 "NEW_SECONDARY": source_node,
5032 env.update(_BuildInstanceHookEnvByObject(self, instance))
5033 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5035 nl_post.append(source_node)
5036 return env, nl, nl_post
5038 def CheckPrereq(self):
5039 """Check prerequisites.
5041 This checks that the instance is in the cluster.
5044 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5045 assert self.instance is not None, \
5046 "Cannot retrieve locked instance %s" % self.op.instance_name
5048 bep = self.cfg.GetClusterInfo().FillBE(instance)
5049 if instance.disk_template not in constants.DTS_NET_MIRROR:
5050 raise errors.OpPrereqError("Instance's disk layout is not"
5051 " network mirrored, cannot failover.",
5054 secondary_nodes = instance.secondary_nodes
5055 if not secondary_nodes:
5056 raise errors.ProgrammerError("no secondary node but using "
5057 "a mirrored disk template")
5059 target_node = secondary_nodes[0]
5060 _CheckNodeOnline(self, target_node)
5061 _CheckNodeNotDrained(self, target_node)
5062 if instance.admin_up:
5063 # check memory requirements on the secondary node
5064 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5065 instance.name, bep[constants.BE_MEMORY],
5066 instance.hypervisor)
5068 self.LogInfo("Not checking memory on the secondary node as"
5069 " instance will not be started")
5071 # check bridge existance
5072 _CheckInstanceBridgesExist(self, instance, node=target_node)
5074 def Exec(self, feedback_fn):
5075 """Failover an instance.
5077 The failover is done by shutting it down on its present node and
5078 starting it on the secondary.
5081 instance = self.instance
5083 source_node = instance.primary_node
5084 target_node = instance.secondary_nodes[0]
5086 if instance.admin_up:
5087 feedback_fn("* checking disk consistency between source and target")
5088 for dev in instance.disks:
5089 # for drbd, these are drbd over lvm
5090 if not _CheckDiskConsistency(self, dev, target_node, False):
5091 if not self.op.ignore_consistency:
5092 raise errors.OpExecError("Disk %s is degraded on target node,"
5093 " aborting failover." % dev.iv_name)
5095 feedback_fn("* not checking disk consistency as instance is not running")
5097 feedback_fn("* shutting down instance on source node")
5098 logging.info("Shutting down instance %s on node %s",
5099 instance.name, source_node)
5101 result = self.rpc.call_instance_shutdown(source_node, instance,
5102 self.shutdown_timeout)
5103 msg = result.fail_msg
5105 if self.op.ignore_consistency:
5106 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5107 " Proceeding anyway. Please make sure node"
5108 " %s is down. Error details: %s",
5109 instance.name, source_node, source_node, msg)
5111 raise errors.OpExecError("Could not shutdown instance %s on"
5113 (instance.name, source_node, msg))
5115 feedback_fn("* deactivating the instance's disks on source node")
5116 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5117 raise errors.OpExecError("Can't shut down the instance's disks.")
5119 instance.primary_node = target_node
5120 # distribute new instance config to the other nodes
5121 self.cfg.Update(instance, feedback_fn)
5123 # Only start the instance if it's marked as up
5124 if instance.admin_up:
5125 feedback_fn("* activating the instance's disks on target node")
5126 logging.info("Starting instance %s on node %s",
5127 instance.name, target_node)
5129 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5130 ignore_secondaries=True)
5132 _ShutdownInstanceDisks(self, instance)
5133 raise errors.OpExecError("Can't activate the instance's disks")
5135 feedback_fn("* starting the instance on the target node")
5136 result = self.rpc.call_instance_start(target_node, instance, None, None)
5137 msg = result.fail_msg
5139 _ShutdownInstanceDisks(self, instance)
5140 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5141 (instance.name, target_node, msg))
5144 class LUMigrateInstance(LogicalUnit):
5145 """Migrate an instance.
5147 This is migration without shutting down, compared to the failover,
5148 which is done with shutdown.
5151 HPATH = "instance-migrate"
5152 HTYPE = constants.HTYPE_INSTANCE
5153 _OP_REQP = ["instance_name", "live", "cleanup"]
5157 def ExpandNames(self):
5158 self._ExpandAndLockInstance()
5160 self.needed_locks[locking.LEVEL_NODE] = []
5161 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5163 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5164 self.op.live, self.op.cleanup)
5165 self.tasklets = [self._migrater]
5167 def DeclareLocks(self, level):
5168 if level == locking.LEVEL_NODE:
5169 self._LockInstancesNodes()
5171 def BuildHooksEnv(self):
5174 This runs on master, primary and secondary nodes of the instance.
5177 instance = self._migrater.instance
5178 source_node = instance.primary_node
5179 target_node = instance.secondary_nodes[0]
5180 env = _BuildInstanceHookEnvByObject(self, instance)
5181 env["MIGRATE_LIVE"] = self.op.live
5182 env["MIGRATE_CLEANUP"] = self.op.cleanup
5184 "OLD_PRIMARY": source_node,
5185 "OLD_SECONDARY": target_node,
5186 "NEW_PRIMARY": target_node,
5187 "NEW_SECONDARY": source_node,
5189 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5191 nl_post.append(source_node)
5192 return env, nl, nl_post
5195 class LUMoveInstance(LogicalUnit):
5196 """Move an instance by data-copying.
5199 HPATH = "instance-move"
5200 HTYPE = constants.HTYPE_INSTANCE
5201 _OP_REQP = ["instance_name", "target_node"]
5204 def CheckArguments(self):
5205 """Check the arguments.
5208 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5209 constants.DEFAULT_SHUTDOWN_TIMEOUT)
5211 def ExpandNames(self):
5212 self._ExpandAndLockInstance()
5213 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5214 self.op.target_node = target_node
5215 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5216 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5218 def DeclareLocks(self, level):
5219 if level == locking.LEVEL_NODE:
5220 self._LockInstancesNodes(primary_only=True)
5222 def BuildHooksEnv(self):
5225 This runs on master, primary and secondary nodes of the instance.
5229 "TARGET_NODE": self.op.target_node,
5230 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5232 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5233 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5234 self.op.target_node]
5237 def CheckPrereq(self):
5238 """Check prerequisites.
5240 This checks that the instance is in the cluster.
5243 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5244 assert self.instance is not None, \
5245 "Cannot retrieve locked instance %s" % self.op.instance_name
5247 node = self.cfg.GetNodeInfo(self.op.target_node)
5248 assert node is not None, \
5249 "Cannot retrieve locked node %s" % self.op.target_node
5251 self.target_node = target_node = node.name
5253 if target_node == instance.primary_node:
5254 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5255 (instance.name, target_node),
5258 bep = self.cfg.GetClusterInfo().FillBE(instance)
5260 for idx, dsk in enumerate(instance.disks):
5261 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5262 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5263 " cannot copy" % idx, errors.ECODE_STATE)
5265 _CheckNodeOnline(self, target_node)
5266 _CheckNodeNotDrained(self, target_node)
5268 if instance.admin_up:
5269 # check memory requirements on the secondary node
5270 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5271 instance.name, bep[constants.BE_MEMORY],
5272 instance.hypervisor)
5274 self.LogInfo("Not checking memory on the secondary node as"
5275 " instance will not be started")
5277 # check bridge existance
5278 _CheckInstanceBridgesExist(self, instance, node=target_node)
5280 def Exec(self, feedback_fn):
5281 """Move an instance.
5283 The move is done by shutting it down on its present node, copying
5284 the data over (slow) and starting it on the new node.
5287 instance = self.instance
5289 source_node = instance.primary_node
5290 target_node = self.target_node
5292 self.LogInfo("Shutting down instance %s on source node %s",
5293 instance.name, source_node)
5295 result = self.rpc.call_instance_shutdown(source_node, instance,
5296 self.shutdown_timeout)
5297 msg = result.fail_msg
5299 if self.op.ignore_consistency:
5300 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5301 " Proceeding anyway. Please make sure node"
5302 " %s is down. Error details: %s",
5303 instance.name, source_node, source_node, msg)
5305 raise errors.OpExecError("Could not shutdown instance %s on"
5307 (instance.name, source_node, msg))
5309 # create the target disks
5311 _CreateDisks(self, instance, target_node=target_node)
5312 except errors.OpExecError:
5313 self.LogWarning("Device creation failed, reverting...")
5315 _RemoveDisks(self, instance, target_node=target_node)
5317 self.cfg.ReleaseDRBDMinors(instance.name)
5320 cluster_name = self.cfg.GetClusterInfo().cluster_name
5323 # activate, get path, copy the data over
5324 for idx, disk in enumerate(instance.disks):
5325 self.LogInfo("Copying data for disk %d", idx)
5326 result = self.rpc.call_blockdev_assemble(target_node, disk,
5327 instance.name, True)
5329 self.LogWarning("Can't assemble newly created disk %d: %s",
5330 idx, result.fail_msg)
5331 errs.append(result.fail_msg)
5333 dev_path = result.payload
5334 result = self.rpc.call_blockdev_export(source_node, disk,
5335 target_node, dev_path,
5338 self.LogWarning("Can't copy data over for disk %d: %s",
5339 idx, result.fail_msg)
5340 errs.append(result.fail_msg)
5344 self.LogWarning("Some disks failed to copy, aborting")
5346 _RemoveDisks(self, instance, target_node=target_node)
5348 self.cfg.ReleaseDRBDMinors(instance.name)
5349 raise errors.OpExecError("Errors during disk copy: %s" %
5352 instance.primary_node = target_node
5353 self.cfg.Update(instance, feedback_fn)
5355 self.LogInfo("Removing the disks on the original node")
5356 _RemoveDisks(self, instance, target_node=source_node)
5358 # Only start the instance if it's marked as up
5359 if instance.admin_up:
5360 self.LogInfo("Starting instance %s on node %s",
5361 instance.name, target_node)
5363 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5364 ignore_secondaries=True)
5366 _ShutdownInstanceDisks(self, instance)
5367 raise errors.OpExecError("Can't activate the instance's disks")
5369 result = self.rpc.call_instance_start(target_node, instance, None, None)
5370 msg = result.fail_msg
5372 _ShutdownInstanceDisks(self, instance)
5373 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5374 (instance.name, target_node, msg))
5377 class LUMigrateNode(LogicalUnit):
5378 """Migrate all instances from a node.
5381 HPATH = "node-migrate"
5382 HTYPE = constants.HTYPE_NODE
5383 _OP_REQP = ["node_name", "live"]
5386 def ExpandNames(self):
5387 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5389 self.needed_locks = {
5390 locking.LEVEL_NODE: [self.op.node_name],
5393 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5395 # Create tasklets for migrating instances for all instances on this node
5399 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5400 logging.debug("Migrating instance %s", inst.name)
5401 names.append(inst.name)
5403 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5405 self.tasklets = tasklets
5407 # Declare instance locks
5408 self.needed_locks[locking.LEVEL_INSTANCE] = names
5410 def DeclareLocks(self, level):
5411 if level == locking.LEVEL_NODE:
5412 self._LockInstancesNodes()
5414 def BuildHooksEnv(self):
5417 This runs on the master, the primary and all the secondaries.
5421 "NODE_NAME": self.op.node_name,
5424 nl = [self.cfg.GetMasterNode()]
5426 return (env, nl, nl)
5429 class TLMigrateInstance(Tasklet):
5430 def __init__(self, lu, instance_name, live, cleanup):
5431 """Initializes this class.
5434 Tasklet.__init__(self, lu)
5437 self.instance_name = instance_name
5439 self.cleanup = cleanup
5441 def CheckPrereq(self):
5442 """Check prerequisites.
5444 This checks that the instance is in the cluster.
5447 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5448 instance = self.cfg.GetInstanceInfo(instance_name)
5449 assert instance is not None
5451 if instance.disk_template != constants.DT_DRBD8:
5452 raise errors.OpPrereqError("Instance's disk layout is not"
5453 " drbd8, cannot migrate.", errors.ECODE_STATE)
5455 secondary_nodes = instance.secondary_nodes
5456 if not secondary_nodes:
5457 raise errors.ConfigurationError("No secondary node but using"
5458 " drbd8 disk template")
5460 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5462 target_node = secondary_nodes[0]
5463 # check memory requirements on the secondary node
5464 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5465 instance.name, i_be[constants.BE_MEMORY],
5466 instance.hypervisor)
5468 # check bridge existance
5469 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5471 if not self.cleanup:
5472 _CheckNodeNotDrained(self.lu, target_node)
5473 result = self.rpc.call_instance_migratable(instance.primary_node,
5475 result.Raise("Can't migrate, please use failover",
5476 prereq=True, ecode=errors.ECODE_STATE)
5478 self.instance = instance
5480 def _WaitUntilSync(self):
5481 """Poll with custom rpc for disk sync.
5483 This uses our own step-based rpc call.
5486 self.feedback_fn("* wait until resync is done")
5490 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5492 self.instance.disks)
5494 for node, nres in result.items():
5495 nres.Raise("Cannot resync disks on node %s" % node)
5496 node_done, node_percent = nres.payload
5497 all_done = all_done and node_done
5498 if node_percent is not None:
5499 min_percent = min(min_percent, node_percent)
5501 if min_percent < 100:
5502 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5505 def _EnsureSecondary(self, node):
5506 """Demote a node to secondary.
5509 self.feedback_fn("* switching node %s to secondary mode" % node)
5511 for dev in self.instance.disks:
5512 self.cfg.SetDiskID(dev, node)
5514 result = self.rpc.call_blockdev_close(node, self.instance.name,
5515 self.instance.disks)
5516 result.Raise("Cannot change disk to secondary on node %s" % node)
5518 def _GoStandalone(self):
5519 """Disconnect from the network.
5522 self.feedback_fn("* changing into standalone mode")
5523 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5524 self.instance.disks)
5525 for node, nres in result.items():
5526 nres.Raise("Cannot disconnect disks node %s" % node)
5528 def _GoReconnect(self, multimaster):
5529 """Reconnect to the network.
5535 msg = "single-master"
5536 self.feedback_fn("* changing disks into %s mode" % msg)
5537 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5538 self.instance.disks,
5539 self.instance.name, multimaster)
5540 for node, nres in result.items():
5541 nres.Raise("Cannot change disks config on node %s" % node)
5543 def _ExecCleanup(self):
5544 """Try to cleanup after a failed migration.
5546 The cleanup is done by:
5547 - check that the instance is running only on one node
5548 (and update the config if needed)
5549 - change disks on its secondary node to secondary
5550 - wait until disks are fully synchronized
5551 - disconnect from the network
5552 - change disks into single-master mode
5553 - wait again until disks are fully synchronized
5556 instance = self.instance
5557 target_node = self.target_node
5558 source_node = self.source_node
5560 # check running on only one node
5561 self.feedback_fn("* checking where the instance actually runs"
5562 " (if this hangs, the hypervisor might be in"
5564 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5565 for node, result in ins_l.items():
5566 result.Raise("Can't contact node %s" % node)
5568 runningon_source = instance.name in ins_l[source_node].payload
5569 runningon_target = instance.name in ins_l[target_node].payload
5571 if runningon_source and runningon_target:
5572 raise errors.OpExecError("Instance seems to be running on two nodes,"
5573 " or the hypervisor is confused. You will have"
5574 " to ensure manually that it runs only on one"
5575 " and restart this operation.")
5577 if not (runningon_source or runningon_target):
5578 raise errors.OpExecError("Instance does not seem to be running at all."
5579 " In this case, it's safer to repair by"
5580 " running 'gnt-instance stop' to ensure disk"
5581 " shutdown, and then restarting it.")
5583 if runningon_target:
5584 # the migration has actually succeeded, we need to update the config
5585 self.feedback_fn("* instance running on secondary node (%s),"
5586 " updating config" % target_node)
5587 instance.primary_node = target_node
5588 self.cfg.Update(instance, self.feedback_fn)
5589 demoted_node = source_node
5591 self.feedback_fn("* instance confirmed to be running on its"
5592 " primary node (%s)" % source_node)
5593 demoted_node = target_node
5595 self._EnsureSecondary(demoted_node)
5597 self._WaitUntilSync()
5598 except errors.OpExecError:
5599 # we ignore here errors, since if the device is standalone, it
5600 # won't be able to sync
5602 self._GoStandalone()
5603 self._GoReconnect(False)
5604 self._WaitUntilSync()
5606 self.feedback_fn("* done")
5608 def _RevertDiskStatus(self):
5609 """Try to revert the disk status after a failed migration.
5612 target_node = self.target_node
5614 self._EnsureSecondary(target_node)
5615 self._GoStandalone()
5616 self._GoReconnect(False)
5617 self._WaitUntilSync()
5618 except errors.OpExecError, err:
5619 self.lu.LogWarning("Migration failed and I can't reconnect the"
5620 " drives: error '%s'\n"
5621 "Please look and recover the instance status" %
5624 def _AbortMigration(self):
5625 """Call the hypervisor code to abort a started migration.
5628 instance = self.instance
5629 target_node = self.target_node
5630 migration_info = self.migration_info
5632 abort_result = self.rpc.call_finalize_migration(target_node,
5636 abort_msg = abort_result.fail_msg
5638 logging.error("Aborting migration failed on target node %s: %s",
5639 target_node, abort_msg)
5640 # Don't raise an exception here, as we stil have to try to revert the
5641 # disk status, even if this step failed.
5643 def _ExecMigration(self):
5644 """Migrate an instance.
5646 The migrate is done by:
5647 - change the disks into dual-master mode
5648 - wait until disks are fully synchronized again
5649 - migrate the instance
5650 - change disks on the new secondary node (the old primary) to secondary
5651 - wait until disks are fully synchronized
5652 - change disks into single-master mode
5655 instance = self.instance
5656 target_node = self.target_node
5657 source_node = self.source_node
5659 self.feedback_fn("* checking disk consistency between source and target")
5660 for dev in instance.disks:
5661 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5662 raise errors.OpExecError("Disk %s is degraded or not fully"
5663 " synchronized on target node,"
5664 " aborting migrate." % dev.iv_name)
5666 # First get the migration information from the remote node
5667 result = self.rpc.call_migration_info(source_node, instance)
5668 msg = result.fail_msg
5670 log_err = ("Failed fetching source migration information from %s: %s" %
5672 logging.error(log_err)
5673 raise errors.OpExecError(log_err)
5675 self.migration_info = migration_info = result.payload
5677 # Then switch the disks to master/master mode
5678 self._EnsureSecondary(target_node)
5679 self._GoStandalone()
5680 self._GoReconnect(True)
5681 self._WaitUntilSync()
5683 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5684 result = self.rpc.call_accept_instance(target_node,
5687 self.nodes_ip[target_node])
5689 msg = result.fail_msg
5691 logging.error("Instance pre-migration failed, trying to revert"
5692 " disk status: %s", msg)
5693 self.feedback_fn("Pre-migration failed, aborting")
5694 self._AbortMigration()
5695 self._RevertDiskStatus()
5696 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5697 (instance.name, msg))
5699 self.feedback_fn("* migrating instance to %s" % target_node)
5701 result = self.rpc.call_instance_migrate(source_node, instance,
5702 self.nodes_ip[target_node],
5704 msg = result.fail_msg
5706 logging.error("Instance migration failed, trying to revert"
5707 " disk status: %s", msg)
5708 self.feedback_fn("Migration failed, aborting")
5709 self._AbortMigration()
5710 self._RevertDiskStatus()
5711 raise errors.OpExecError("Could not migrate instance %s: %s" %
5712 (instance.name, msg))
5715 instance.primary_node = target_node
5716 # distribute new instance config to the other nodes
5717 self.cfg.Update(instance, self.feedback_fn)
5719 result = self.rpc.call_finalize_migration(target_node,
5723 msg = result.fail_msg
5725 logging.error("Instance migration succeeded, but finalization failed:"
5727 raise errors.OpExecError("Could not finalize instance migration: %s" %
5730 self._EnsureSecondary(source_node)
5731 self._WaitUntilSync()
5732 self._GoStandalone()
5733 self._GoReconnect(False)
5734 self._WaitUntilSync()
5736 self.feedback_fn("* done")
5738 def Exec(self, feedback_fn):
5739 """Perform the migration.
5742 feedback_fn("Migrating instance %s" % self.instance.name)
5744 self.feedback_fn = feedback_fn
5746 self.source_node = self.instance.primary_node
5747 self.target_node = self.instance.secondary_nodes[0]
5748 self.all_nodes = [self.source_node, self.target_node]
5750 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5751 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5755 return self._ExecCleanup()
5757 return self._ExecMigration()
5760 def _CreateBlockDev(lu, node, instance, device, force_create,
5762 """Create a tree of block devices on a given node.
5764 If this device type has to be created on secondaries, create it and
5767 If not, just recurse to children keeping the same 'force' value.
5769 @param lu: the lu on whose behalf we execute
5770 @param node: the node on which to create the device
5771 @type instance: L{objects.Instance}
5772 @param instance: the instance which owns the device
5773 @type device: L{objects.Disk}
5774 @param device: the device to create
5775 @type force_create: boolean
5776 @param force_create: whether to force creation of this device; this
5777 will be change to True whenever we find a device which has
5778 CreateOnSecondary() attribute
5779 @param info: the extra 'metadata' we should attach to the device
5780 (this will be represented as a LVM tag)
5781 @type force_open: boolean
5782 @param force_open: this parameter will be passes to the
5783 L{backend.BlockdevCreate} function where it specifies
5784 whether we run on primary or not, and it affects both
5785 the child assembly and the device own Open() execution
5788 if device.CreateOnSecondary():
5792 for child in device.children:
5793 _CreateBlockDev(lu, node, instance, child, force_create,
5796 if not force_create:
5799 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5802 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5803 """Create a single block device on a given node.
5805 This will not recurse over children of the device, so they must be
5808 @param lu: the lu on whose behalf we execute
5809 @param node: the node on which to create the device
5810 @type instance: L{objects.Instance}
5811 @param instance: the instance which owns the device
5812 @type device: L{objects.Disk}
5813 @param device: the device to create
5814 @param info: the extra 'metadata' we should attach to the device
5815 (this will be represented as a LVM tag)
5816 @type force_open: boolean
5817 @param force_open: this parameter will be passes to the
5818 L{backend.BlockdevCreate} function where it specifies
5819 whether we run on primary or not, and it affects both
5820 the child assembly and the device own Open() execution
5823 lu.cfg.SetDiskID(device, node)
5824 result = lu.rpc.call_blockdev_create(node, device, device.size,
5825 instance.name, force_open, info)
5826 result.Raise("Can't create block device %s on"
5827 " node %s for instance %s" % (device, node, instance.name))
5828 if device.physical_id is None:
5829 device.physical_id = result.payload
5832 def _GenerateUniqueNames(lu, exts):
5833 """Generate a suitable LV name.
5835 This will generate a logical volume name for the given instance.
5840 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5841 results.append("%s%s" % (new_id, val))
5845 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5847 """Generate a drbd8 device complete with its children.
5850 port = lu.cfg.AllocatePort()
5851 vgname = lu.cfg.GetVGName()
5852 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5853 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5854 logical_id=(vgname, names[0]))
5855 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5856 logical_id=(vgname, names[1]))
5857 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5858 logical_id=(primary, secondary, port,
5861 children=[dev_data, dev_meta],
5866 def _GenerateDiskTemplate(lu, template_name,
5867 instance_name, primary_node,
5868 secondary_nodes, disk_info,
5869 file_storage_dir, file_driver,
5871 """Generate the entire disk layout for a given template type.
5874 #TODO: compute space requirements
5876 vgname = lu.cfg.GetVGName()
5877 disk_count = len(disk_info)
5879 if template_name == constants.DT_DISKLESS:
5881 elif template_name == constants.DT_PLAIN:
5882 if len(secondary_nodes) != 0:
5883 raise errors.ProgrammerError("Wrong template configuration")
5885 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5886 for i in range(disk_count)])
5887 for idx, disk in enumerate(disk_info):
5888 disk_index = idx + base_index
5889 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5890 logical_id=(vgname, names[idx]),
5891 iv_name="disk/%d" % disk_index,
5893 disks.append(disk_dev)
5894 elif template_name == constants.DT_DRBD8:
5895 if len(secondary_nodes) != 1:
5896 raise errors.ProgrammerError("Wrong template configuration")
5897 remote_node = secondary_nodes[0]
5898 minors = lu.cfg.AllocateDRBDMinor(
5899 [primary_node, remote_node] * len(disk_info), instance_name)
5902 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5903 for i in range(disk_count)]):
5904 names.append(lv_prefix + "_data")
5905 names.append(lv_prefix + "_meta")
5906 for idx, disk in enumerate(disk_info):
5907 disk_index = idx + base_index
5908 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5909 disk["size"], names[idx*2:idx*2+2],
5910 "disk/%d" % disk_index,
5911 minors[idx*2], minors[idx*2+1])
5912 disk_dev.mode = disk["mode"]
5913 disks.append(disk_dev)
5914 elif template_name == constants.DT_FILE:
5915 if len(secondary_nodes) != 0:
5916 raise errors.ProgrammerError("Wrong template configuration")
5918 _RequireFileStorage()
5920 for idx, disk in enumerate(disk_info):
5921 disk_index = idx + base_index
5922 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5923 iv_name="disk/%d" % disk_index,
5924 logical_id=(file_driver,
5925 "%s/disk%d" % (file_storage_dir,
5928 disks.append(disk_dev)
5930 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5934 def _GetInstanceInfoText(instance):
5935 """Compute that text that should be added to the disk's metadata.
5938 return "originstname+%s" % instance.name
5941 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5942 """Create all disks for an instance.
5944 This abstracts away some work from AddInstance.
5946 @type lu: L{LogicalUnit}
5947 @param lu: the logical unit on whose behalf we execute
5948 @type instance: L{objects.Instance}
5949 @param instance: the instance whose disks we should create
5951 @param to_skip: list of indices to skip
5952 @type target_node: string
5953 @param target_node: if passed, overrides the target node for creation
5955 @return: the success of the creation
5958 info = _GetInstanceInfoText(instance)
5959 if target_node is None:
5960 pnode = instance.primary_node
5961 all_nodes = instance.all_nodes
5966 if instance.disk_template == constants.DT_FILE:
5967 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5968 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5970 result.Raise("Failed to create directory '%s' on"
5971 " node %s" % (file_storage_dir, pnode))
5973 # Note: this needs to be kept in sync with adding of disks in
5974 # LUSetInstanceParams
5975 for idx, device in enumerate(instance.disks):
5976 if to_skip and idx in to_skip:
5978 logging.info("Creating volume %s for instance %s",
5979 device.iv_name, instance.name)
5981 for node in all_nodes:
5982 f_create = node == pnode
5983 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5986 def _RemoveDisks(lu, instance, target_node=None):
5987 """Remove all disks for an instance.
5989 This abstracts away some work from `AddInstance()` and
5990 `RemoveInstance()`. Note that in case some of the devices couldn't
5991 be removed, the removal will continue with the other ones (compare
5992 with `_CreateDisks()`).
5994 @type lu: L{LogicalUnit}
5995 @param lu: the logical unit on whose behalf we execute
5996 @type instance: L{objects.Instance}
5997 @param instance: the instance whose disks we should remove
5998 @type target_node: string
5999 @param target_node: used to override the node on which to remove the disks
6001 @return: the success of the removal
6004 logging.info("Removing block devices for instance %s", instance.name)
6007 for device in instance.disks:
6009 edata = [(target_node, device)]
6011 edata = device.ComputeNodeTree(instance.primary_node)
6012 for node, disk in edata:
6013 lu.cfg.SetDiskID(disk, node)
6014 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6016 lu.LogWarning("Could not remove block device %s on node %s,"
6017 " continuing anyway: %s", device.iv_name, node, msg)
6020 if instance.disk_template == constants.DT_FILE:
6021 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6025 tgt = instance.primary_node
6026 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6028 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6029 file_storage_dir, instance.primary_node, result.fail_msg)
6035 def _ComputeDiskSize(disk_template, disks):
6036 """Compute disk size requirements in the volume group
6039 # Required free disk space as a function of disk and swap space
6041 constants.DT_DISKLESS: None,
6042 constants.DT_PLAIN: sum(d["size"] for d in disks),
6043 # 128 MB are added for drbd metadata for each disk
6044 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6045 constants.DT_FILE: None,
6048 if disk_template not in req_size_dict:
6049 raise errors.ProgrammerError("Disk template '%s' size requirement"
6050 " is unknown" % disk_template)
6052 return req_size_dict[disk_template]
6055 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6056 """Hypervisor parameter validation.
6058 This function abstract the hypervisor parameter validation to be
6059 used in both instance create and instance modify.
6061 @type lu: L{LogicalUnit}
6062 @param lu: the logical unit for which we check
6063 @type nodenames: list
6064 @param nodenames: the list of nodes on which we should check
6065 @type hvname: string
6066 @param hvname: the name of the hypervisor we should use
6067 @type hvparams: dict
6068 @param hvparams: the parameters which we need to check
6069 @raise errors.OpPrereqError: if the parameters are not valid
6072 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6075 for node in nodenames:
6079 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6082 class LUCreateInstance(LogicalUnit):
6083 """Create an instance.
6086 HPATH = "instance-add"
6087 HTYPE = constants.HTYPE_INSTANCE
6088 _OP_REQP = ["instance_name", "disks",
6090 "wait_for_sync", "ip_check", "nics",
6091 "hvparams", "beparams"]
6094 def CheckArguments(self):
6098 # set optional parameters to none if they don't exist
6099 for attr in ["pnode", "snode", "iallocator", "hypervisor",
6100 "disk_template", "identify_defaults"]:
6101 if not hasattr(self.op, attr):
6102 setattr(self.op, attr, None)
6104 # do not require name_check to ease forward/backward compatibility
6106 if not hasattr(self.op, "name_check"):
6107 self.op.name_check = True
6108 if not hasattr(self.op, "no_install"):
6109 self.op.no_install = False
6110 if self.op.no_install and self.op.start:
6111 self.LogInfo("No-installation mode selected, disabling startup")
6112 self.op.start = False
6113 # validate/normalize the instance name
6114 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6115 if self.op.ip_check and not self.op.name_check:
6116 # TODO: make the ip check more flexible and not depend on the name check
6117 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6120 # check nics' parameter names
6121 for nic in self.op.nics:
6122 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6124 # check disks. parameter names and consistent adopt/no-adopt strategy
6125 has_adopt = has_no_adopt = False
6126 for disk in self.op.disks:
6127 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6132 if has_adopt and has_no_adopt:
6133 raise errors.OpPrereqError("Either all disks are adopted or none is",
6136 if self.op.disk_template != constants.DT_PLAIN:
6137 raise errors.OpPrereqError("Disk adoption is only supported for the"
6138 " 'plain' disk template",
6140 if self.op.iallocator is not None:
6141 raise errors.OpPrereqError("Disk adoption not allowed with an"
6142 " iallocator script", errors.ECODE_INVAL)
6143 if self.op.mode == constants.INSTANCE_IMPORT:
6144 raise errors.OpPrereqError("Disk adoption not allowed for"
6145 " instance import", errors.ECODE_INVAL)
6147 self.adopt_disks = has_adopt
6149 # verify creation mode
6150 if self.op.mode not in (constants.INSTANCE_CREATE,
6151 constants.INSTANCE_IMPORT):
6152 raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6153 self.op.mode, errors.ECODE_INVAL)
6155 # instance name verification
6156 if self.op.name_check:
6157 self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6158 self.op.instance_name = self.hostname1.name
6159 # used in CheckPrereq for ip ping check
6160 self.check_ip = self.hostname1.ip
6162 self.check_ip = None
6164 # file storage checks
6165 if (self.op.file_driver and
6166 not self.op.file_driver in constants.FILE_DRIVER):
6167 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6168 self.op.file_driver, errors.ECODE_INVAL)
6170 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6171 raise errors.OpPrereqError("File storage directory path not absolute",
6174 ### Node/iallocator related checks
6175 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6176 raise errors.OpPrereqError("One and only one of iallocator and primary"
6177 " node must be given",
6180 if self.op.mode == constants.INSTANCE_IMPORT:
6181 # On import force_variant must be True, because if we forced it at
6182 # initial install, our only chance when importing it back is that it
6184 self.op.force_variant = True
6186 if self.op.no_install:
6187 self.LogInfo("No-installation mode has no effect during import")
6189 else: # INSTANCE_CREATE
6190 if getattr(self.op, "os_type", None) is None:
6191 raise errors.OpPrereqError("No guest OS specified",
6193 self.op.force_variant = getattr(self.op, "force_variant", False)
6194 if self.op.disk_template is None:
6195 raise errors.OpPrereqError("No disk template specified",
6198 def ExpandNames(self):
6199 """ExpandNames for CreateInstance.
6201 Figure out the right locks for instance creation.
6204 self.needed_locks = {}
6206 instance_name = self.op.instance_name
6207 # this is just a preventive check, but someone might still add this
6208 # instance in the meantime, and creation will fail at lock-add time
6209 if instance_name in self.cfg.GetInstanceList():
6210 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6211 instance_name, errors.ECODE_EXISTS)
6213 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6215 if self.op.iallocator:
6216 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6218 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6219 nodelist = [self.op.pnode]
6220 if self.op.snode is not None:
6221 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6222 nodelist.append(self.op.snode)
6223 self.needed_locks[locking.LEVEL_NODE] = nodelist
6225 # in case of import lock the source node too
6226 if self.op.mode == constants.INSTANCE_IMPORT:
6227 src_node = getattr(self.op, "src_node", None)
6228 src_path = getattr(self.op, "src_path", None)
6230 if src_path is None:
6231 self.op.src_path = src_path = self.op.instance_name
6233 if src_node is None:
6234 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6235 self.op.src_node = None
6236 if os.path.isabs(src_path):
6237 raise errors.OpPrereqError("Importing an instance from an absolute"
6238 " path requires a source node option.",
6241 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6242 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6243 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6244 if not os.path.isabs(src_path):
6245 self.op.src_path = src_path = \
6246 utils.PathJoin(constants.EXPORT_DIR, src_path)
6248 def _RunAllocator(self):
6249 """Run the allocator based on input opcode.
6252 nics = [n.ToDict() for n in self.nics]
6253 ial = IAllocator(self.cfg, self.rpc,
6254 mode=constants.IALLOCATOR_MODE_ALLOC,
6255 name=self.op.instance_name,
6256 disk_template=self.op.disk_template,
6259 vcpus=self.be_full[constants.BE_VCPUS],
6260 mem_size=self.be_full[constants.BE_MEMORY],
6263 hypervisor=self.op.hypervisor,
6266 ial.Run(self.op.iallocator)
6269 raise errors.OpPrereqError("Can't compute nodes using"
6270 " iallocator '%s': %s" %
6271 (self.op.iallocator, ial.info),
6273 if len(ial.result) != ial.required_nodes:
6274 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6275 " of nodes (%s), required %s" %
6276 (self.op.iallocator, len(ial.result),
6277 ial.required_nodes), errors.ECODE_FAULT)
6278 self.op.pnode = ial.result[0]
6279 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6280 self.op.instance_name, self.op.iallocator,
6281 utils.CommaJoin(ial.result))
6282 if ial.required_nodes == 2:
6283 self.op.snode = ial.result[1]
6285 def BuildHooksEnv(self):
6288 This runs on master, primary and secondary nodes of the instance.
6292 "ADD_MODE": self.op.mode,
6294 if self.op.mode == constants.INSTANCE_IMPORT:
6295 env["SRC_NODE"] = self.op.src_node
6296 env["SRC_PATH"] = self.op.src_path
6297 env["SRC_IMAGES"] = self.src_images
6299 env.update(_BuildInstanceHookEnv(
6300 name=self.op.instance_name,
6301 primary_node=self.op.pnode,
6302 secondary_nodes=self.secondaries,
6303 status=self.op.start,
6304 os_type=self.op.os_type,
6305 memory=self.be_full[constants.BE_MEMORY],
6306 vcpus=self.be_full[constants.BE_VCPUS],
6307 nics=_NICListToTuple(self, self.nics),
6308 disk_template=self.op.disk_template,
6309 disks=[(d["size"], d["mode"]) for d in self.disks],
6312 hypervisor_name=self.op.hypervisor,
6315 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6319 def _ReadExportInfo(self):
6320 """Reads the export information from disk.
6322 It will override the opcode source node and path with the actual
6323 information, if these two were not specified before.
6325 @return: the export information
6328 assert self.op.mode == constants.INSTANCE_IMPORT
6330 src_node = self.op.src_node
6331 src_path = self.op.src_path
6333 if src_node is None:
6334 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6335 exp_list = self.rpc.call_export_list(locked_nodes)
6337 for node in exp_list:
6338 if exp_list[node].fail_msg:
6340 if src_path in exp_list[node].payload:
6342 self.op.src_node = src_node = node
6343 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6347 raise errors.OpPrereqError("No export found for relative path %s" %
6348 src_path, errors.ECODE_INVAL)
6350 _CheckNodeOnline(self, src_node)
6351 result = self.rpc.call_export_info(src_node, src_path)
6352 result.Raise("No export or invalid export found in dir %s" % src_path)
6354 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6355 if not export_info.has_section(constants.INISECT_EXP):
6356 raise errors.ProgrammerError("Corrupted export config",
6357 errors.ECODE_ENVIRON)
6359 ei_version = export_info.get(constants.INISECT_EXP, "version")
6360 if (int(ei_version) != constants.EXPORT_VERSION):
6361 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6362 (ei_version, constants.EXPORT_VERSION),
6363 errors.ECODE_ENVIRON)
6366 def _ReadExportParams(self, einfo):
6367 """Use export parameters as defaults.
6369 In case the opcode doesn't specify (as in override) some instance
6370 parameters, then try to use them from the export information, if
6374 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6376 if self.op.disk_template is None:
6377 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6378 self.op.disk_template = einfo.get(constants.INISECT_INS,
6381 raise errors.OpPrereqError("No disk template specified and the export"
6382 " is missing the disk_template information",
6385 if not self.op.disks:
6386 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6388 # TODO: import the disk iv_name too
6389 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6390 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6391 disks.append({"size": disk_sz})
6392 self.op.disks = disks
6394 raise errors.OpPrereqError("No disk info specified and the export"
6395 " is missing the disk information",
6398 if (not self.op.nics and
6399 einfo.has_option(constants.INISECT_INS, "nic_count")):
6401 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6403 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6404 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6409 if (self.op.hypervisor is None and
6410 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6411 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6412 if einfo.has_section(constants.INISECT_HYP):
6413 # use the export parameters but do not override the ones
6414 # specified by the user
6415 for name, value in einfo.items(constants.INISECT_HYP):
6416 if name not in self.op.hvparams:
6417 self.op.hvparams[name] = value
6419 if einfo.has_section(constants.INISECT_BEP):
6420 # use the parameters, without overriding
6421 for name, value in einfo.items(constants.INISECT_BEP):
6422 if name not in self.op.beparams:
6423 self.op.beparams[name] = value
6425 # try to read the parameters old style, from the main section
6426 for name in constants.BES_PARAMETERS:
6427 if (name not in self.op.beparams and
6428 einfo.has_option(constants.INISECT_INS, name)):
6429 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6431 def _RevertToDefaults(self, cluster):
6432 """Revert the instance parameters to the default values.
6436 hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6437 for name in self.op.hvparams.keys():
6438 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6439 del self.op.hvparams[name]
6441 be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6442 for name in self.op.beparams.keys():
6443 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6444 del self.op.beparams[name]
6446 nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6447 for nic in self.op.nics:
6448 for name in constants.NICS_PARAMETERS:
6449 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6452 def CheckPrereq(self):
6453 """Check prerequisites.
6456 if self.op.mode == constants.INSTANCE_IMPORT:
6457 export_info = self._ReadExportInfo()
6458 self._ReadExportParams(export_info)
6460 _CheckDiskTemplate(self.op.disk_template)
6462 if (not self.cfg.GetVGName() and
6463 self.op.disk_template not in constants.DTS_NOT_LVM):
6464 raise errors.OpPrereqError("Cluster does not support lvm-based"
6465 " instances", errors.ECODE_STATE)
6467 if self.op.hypervisor is None:
6468 self.op.hypervisor = self.cfg.GetHypervisorType()
6470 cluster = self.cfg.GetClusterInfo()
6471 enabled_hvs = cluster.enabled_hypervisors
6472 if self.op.hypervisor not in enabled_hvs:
6473 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6474 " cluster (%s)" % (self.op.hypervisor,
6475 ",".join(enabled_hvs)),
6478 # check hypervisor parameter syntax (locally)
6479 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6480 filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6483 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6484 hv_type.CheckParameterSyntax(filled_hvp)
6485 self.hv_full = filled_hvp
6486 # check that we don't specify global parameters on an instance
6487 _CheckGlobalHvParams(self.op.hvparams)
6489 # fill and remember the beparams dict
6490 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6491 self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6494 # now that hvp/bep are in final format, let's reset to defaults,
6496 if self.op.identify_defaults:
6497 self._RevertToDefaults(cluster)
6501 for idx, nic in enumerate(self.op.nics):
6502 nic_mode_req = nic.get("mode", None)
6503 nic_mode = nic_mode_req
6504 if nic_mode is None:
6505 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6507 # in routed mode, for the first nic, the default ip is 'auto'
6508 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6509 default_ip_mode = constants.VALUE_AUTO
6511 default_ip_mode = constants.VALUE_NONE
6513 # ip validity checks
6514 ip = nic.get("ip", default_ip_mode)
6515 if ip is None or ip.lower() == constants.VALUE_NONE:
6517 elif ip.lower() == constants.VALUE_AUTO:
6518 if not self.op.name_check:
6519 raise errors.OpPrereqError("IP address set to auto but name checks"
6520 " have been skipped. Aborting.",
6522 nic_ip = self.hostname1.ip
6524 if not utils.IsValidIP(ip):
6525 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6526 " like a valid IP" % ip,
6530 # TODO: check the ip address for uniqueness
6531 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6532 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6535 # MAC address verification
6536 mac = nic.get("mac", constants.VALUE_AUTO)
6537 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6538 mac = utils.NormalizeAndValidateMac(mac)
6541 self.cfg.ReserveMAC(mac, self.proc.GetECId())
6542 except errors.ReservationError:
6543 raise errors.OpPrereqError("MAC address %s already in use"
6544 " in cluster" % mac,
6545 errors.ECODE_NOTUNIQUE)
6547 # bridge verification
6548 bridge = nic.get("bridge", None)
6549 link = nic.get("link", None)
6551 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6552 " at the same time", errors.ECODE_INVAL)
6553 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6554 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6561 nicparams[constants.NIC_MODE] = nic_mode_req
6563 nicparams[constants.NIC_LINK] = link
6565 check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6567 objects.NIC.CheckParameterSyntax(check_params)
6568 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6570 # disk checks/pre-build
6572 for disk in self.op.disks:
6573 mode = disk.get("mode", constants.DISK_RDWR)
6574 if mode not in constants.DISK_ACCESS_SET:
6575 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6576 mode, errors.ECODE_INVAL)
6577 size = disk.get("size", None)
6579 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6582 except (TypeError, ValueError):
6583 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6585 new_disk = {"size": size, "mode": mode}
6587 new_disk["adopt"] = disk["adopt"]
6588 self.disks.append(new_disk)
6590 if self.op.mode == constants.INSTANCE_IMPORT:
6592 # Check that the new instance doesn't have less disks than the export
6593 instance_disks = len(self.disks)
6594 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6595 if instance_disks < export_disks:
6596 raise errors.OpPrereqError("Not enough disks to import."
6597 " (instance: %d, export: %d)" %
6598 (instance_disks, export_disks),
6602 for idx in range(export_disks):
6603 option = 'disk%d_dump' % idx
6604 if export_info.has_option(constants.INISECT_INS, option):
6605 # FIXME: are the old os-es, disk sizes, etc. useful?
6606 export_name = export_info.get(constants.INISECT_INS, option)
6607 image = utils.PathJoin(self.op.src_path, export_name)
6608 disk_images.append(image)
6610 disk_images.append(False)
6612 self.src_images = disk_images
6614 old_name = export_info.get(constants.INISECT_INS, 'name')
6616 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6617 except (TypeError, ValueError), err:
6618 raise errors.OpPrereqError("Invalid export file, nic_count is not"
6619 " an integer: %s" % str(err),
6621 if self.op.instance_name == old_name:
6622 for idx, nic in enumerate(self.nics):
6623 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6624 nic_mac_ini = 'nic%d_mac' % idx
6625 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6627 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6629 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6630 if self.op.ip_check:
6631 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6632 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6633 (self.check_ip, self.op.instance_name),
6634 errors.ECODE_NOTUNIQUE)
6636 #### mac address generation
6637 # By generating here the mac address both the allocator and the hooks get
6638 # the real final mac address rather than the 'auto' or 'generate' value.
6639 # There is a race condition between the generation and the instance object
6640 # creation, which means that we know the mac is valid now, but we're not
6641 # sure it will be when we actually add the instance. If things go bad
6642 # adding the instance will abort because of a duplicate mac, and the
6643 # creation job will fail.
6644 for nic in self.nics:
6645 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6646 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6650 if self.op.iallocator is not None:
6651 self._RunAllocator()
6653 #### node related checks
6655 # check primary node
6656 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6657 assert self.pnode is not None, \
6658 "Cannot retrieve locked node %s" % self.op.pnode
6660 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6661 pnode.name, errors.ECODE_STATE)
6663 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6664 pnode.name, errors.ECODE_STATE)
6666 self.secondaries = []
6668 # mirror node verification
6669 if self.op.disk_template in constants.DTS_NET_MIRROR:
6670 if self.op.snode is None:
6671 raise errors.OpPrereqError("The networked disk templates need"
6672 " a mirror node", errors.ECODE_INVAL)
6673 if self.op.snode == pnode.name:
6674 raise errors.OpPrereqError("The secondary node cannot be the"
6675 " primary node.", errors.ECODE_INVAL)
6676 _CheckNodeOnline(self, self.op.snode)
6677 _CheckNodeNotDrained(self, self.op.snode)
6678 self.secondaries.append(self.op.snode)
6680 nodenames = [pnode.name] + self.secondaries
6682 req_size = _ComputeDiskSize(self.op.disk_template,
6685 # Check lv size requirements, if not adopting
6686 if req_size is not None and not self.adopt_disks:
6687 _CheckNodesFreeDisk(self, nodenames, req_size)
6689 if self.adopt_disks: # instead, we must check the adoption data
6690 all_lvs = set([i["adopt"] for i in self.disks])
6691 if len(all_lvs) != len(self.disks):
6692 raise errors.OpPrereqError("Duplicate volume names given for adoption",
6694 for lv_name in all_lvs:
6696 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6697 except errors.ReservationError:
6698 raise errors.OpPrereqError("LV named %s used by another instance" %
6699 lv_name, errors.ECODE_NOTUNIQUE)
6701 node_lvs = self.rpc.call_lv_list([pnode.name],
6702 self.cfg.GetVGName())[pnode.name]
6703 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6704 node_lvs = node_lvs.payload
6705 delta = all_lvs.difference(node_lvs.keys())
6707 raise errors.OpPrereqError("Missing logical volume(s): %s" %
6708 utils.CommaJoin(delta),
6710 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6712 raise errors.OpPrereqError("Online logical volumes found, cannot"
6713 " adopt: %s" % utils.CommaJoin(online_lvs),
6715 # update the size of disk based on what is found
6716 for dsk in self.disks:
6717 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6719 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6721 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6723 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6725 # memory check on primary node
6727 _CheckNodeFreeMemory(self, self.pnode.name,
6728 "creating instance %s" % self.op.instance_name,
6729 self.be_full[constants.BE_MEMORY],
6732 self.dry_run_result = list(nodenames)
6734 def Exec(self, feedback_fn):
6735 """Create and add the instance to the cluster.
6738 instance = self.op.instance_name
6739 pnode_name = self.pnode.name
6741 ht_kind = self.op.hypervisor
6742 if ht_kind in constants.HTS_REQ_PORT:
6743 network_port = self.cfg.AllocatePort()
6747 if constants.ENABLE_FILE_STORAGE:
6748 # this is needed because os.path.join does not accept None arguments
6749 if self.op.file_storage_dir is None:
6750 string_file_storage_dir = ""
6752 string_file_storage_dir = self.op.file_storage_dir
6754 # build the full file storage dir path
6755 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6756 string_file_storage_dir, instance)
6758 file_storage_dir = ""
6761 disks = _GenerateDiskTemplate(self,
6762 self.op.disk_template,
6763 instance, pnode_name,
6767 self.op.file_driver,
6770 iobj = objects.Instance(name=instance, os=self.op.os_type,
6771 primary_node=pnode_name,
6772 nics=self.nics, disks=disks,
6773 disk_template=self.op.disk_template,
6775 network_port=network_port,
6776 beparams=self.op.beparams,
6777 hvparams=self.op.hvparams,
6778 hypervisor=self.op.hypervisor,
6781 if self.adopt_disks:
6782 # rename LVs to the newly-generated names; we need to construct
6783 # 'fake' LV disks with the old data, plus the new unique_id
6784 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6786 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6787 rename_to.append(t_dsk.logical_id)
6788 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6789 self.cfg.SetDiskID(t_dsk, pnode_name)
6790 result = self.rpc.call_blockdev_rename(pnode_name,
6791 zip(tmp_disks, rename_to))
6792 result.Raise("Failed to rename adoped LVs")
6794 feedback_fn("* creating instance disks...")
6796 _CreateDisks(self, iobj)
6797 except errors.OpExecError:
6798 self.LogWarning("Device creation failed, reverting...")
6800 _RemoveDisks(self, iobj)
6802 self.cfg.ReleaseDRBDMinors(instance)
6805 feedback_fn("adding instance %s to cluster config" % instance)
6807 self.cfg.AddInstance(iobj, self.proc.GetECId())
6809 # Declare that we don't want to remove the instance lock anymore, as we've
6810 # added the instance to the config
6811 del self.remove_locks[locking.LEVEL_INSTANCE]
6812 # Unlock all the nodes
6813 if self.op.mode == constants.INSTANCE_IMPORT:
6814 nodes_keep = [self.op.src_node]
6815 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6816 if node != self.op.src_node]
6817 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6818 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6820 self.context.glm.release(locking.LEVEL_NODE)
6821 del self.acquired_locks[locking.LEVEL_NODE]
6823 if self.op.wait_for_sync:
6824 disk_abort = not _WaitForSync(self, iobj)
6825 elif iobj.disk_template in constants.DTS_NET_MIRROR:
6826 # make sure the disks are not degraded (still sync-ing is ok)
6828 feedback_fn("* checking mirrors status")
6829 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6834 _RemoveDisks(self, iobj)
6835 self.cfg.RemoveInstance(iobj.name)
6836 # Make sure the instance lock gets removed
6837 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6838 raise errors.OpExecError("There are some degraded disks for"
6841 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6842 if self.op.mode == constants.INSTANCE_CREATE:
6843 if not self.op.no_install:
6844 feedback_fn("* running the instance OS create scripts...")
6845 # FIXME: pass debug option from opcode to backend
6846 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6847 self.op.debug_level)
6848 result.Raise("Could not add os for instance %s"
6849 " on node %s" % (instance, pnode_name))
6851 elif self.op.mode == constants.INSTANCE_IMPORT:
6852 feedback_fn("* running the instance OS import scripts...")
6853 src_node = self.op.src_node
6854 src_images = self.src_images
6855 cluster_name = self.cfg.GetClusterName()
6856 # FIXME: pass debug option from opcode to backend
6857 import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6858 src_node, src_images,
6860 self.op.debug_level)
6861 msg = import_result.fail_msg
6863 self.LogWarning("Error while importing the disk images for instance"
6864 " %s on node %s: %s" % (instance, pnode_name, msg))
6866 # also checked in the prereq part
6867 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6871 iobj.admin_up = True
6872 self.cfg.Update(iobj, feedback_fn)
6873 logging.info("Starting instance %s on node %s", instance, pnode_name)
6874 feedback_fn("* starting instance...")
6875 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6876 result.Raise("Could not start instance")
6878 return list(iobj.all_nodes)
6881 class LUConnectConsole(NoHooksLU):
6882 """Connect to an instance's console.
6884 This is somewhat special in that it returns the command line that
6885 you need to run on the master node in order to connect to the
6889 _OP_REQP = ["instance_name"]
6892 def ExpandNames(self):
6893 self._ExpandAndLockInstance()
6895 def CheckPrereq(self):
6896 """Check prerequisites.
6898 This checks that the instance is in the cluster.
6901 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6902 assert self.instance is not None, \
6903 "Cannot retrieve locked instance %s" % self.op.instance_name
6904 _CheckNodeOnline(self, self.instance.primary_node)
6906 def Exec(self, feedback_fn):
6907 """Connect to the console of an instance
6910 instance = self.instance
6911 node = instance.primary_node
6913 node_insts = self.rpc.call_instance_list([node],
6914 [instance.hypervisor])[node]
6915 node_insts.Raise("Can't get node information from %s" % node)
6917 if instance.name not in node_insts.payload:
6918 raise errors.OpExecError("Instance %s is not running." % instance.name)
6920 logging.debug("Connecting to console of %s on %s", instance.name, node)
6922 hyper = hypervisor.GetHypervisor(instance.hypervisor)
6923 cluster = self.cfg.GetClusterInfo()
6924 # beparams and hvparams are passed separately, to avoid editing the
6925 # instance and then saving the defaults in the instance itself.
6926 hvparams = cluster.FillHV(instance)
6927 beparams = cluster.FillBE(instance)
6928 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6931 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6934 class LUReplaceDisks(LogicalUnit):
6935 """Replace the disks of an instance.
6938 HPATH = "mirrors-replace"
6939 HTYPE = constants.HTYPE_INSTANCE
6940 _OP_REQP = ["instance_name", "mode", "disks"]
6943 def CheckArguments(self):
6944 if not hasattr(self.op, "remote_node"):
6945 self.op.remote_node = None
6946 if not hasattr(self.op, "iallocator"):
6947 self.op.iallocator = None
6948 if not hasattr(self.op, "early_release"):
6949 self.op.early_release = False
6951 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6954 def ExpandNames(self):
6955 self._ExpandAndLockInstance()
6957 if self.op.iallocator is not None:
6958 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6960 elif self.op.remote_node is not None:
6961 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6962 self.op.remote_node = remote_node
6964 # Warning: do not remove the locking of the new secondary here
6965 # unless DRBD8.AddChildren is changed to work in parallel;
6966 # currently it doesn't since parallel invocations of
6967 # FindUnusedMinor will conflict
6968 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6969 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6972 self.needed_locks[locking.LEVEL_NODE] = []
6973 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6975 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6976 self.op.iallocator, self.op.remote_node,
6977 self.op.disks, False, self.op.early_release)
6979 self.tasklets = [self.replacer]
6981 def DeclareLocks(self, level):
6982 # If we're not already locking all nodes in the set we have to declare the
6983 # instance's primary/secondary nodes.
6984 if (level == locking.LEVEL_NODE and
6985 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6986 self._LockInstancesNodes()
6988 def BuildHooksEnv(self):
6991 This runs on the master, the primary and all the secondaries.
6994 instance = self.replacer.instance
6996 "MODE": self.op.mode,
6997 "NEW_SECONDARY": self.op.remote_node,
6998 "OLD_SECONDARY": instance.secondary_nodes[0],
7000 env.update(_BuildInstanceHookEnvByObject(self, instance))
7002 self.cfg.GetMasterNode(),
7003 instance.primary_node,
7005 if self.op.remote_node is not None:
7006 nl.append(self.op.remote_node)
7010 class LUEvacuateNode(LogicalUnit):
7011 """Relocate the secondary instances from a node.
7014 HPATH = "node-evacuate"
7015 HTYPE = constants.HTYPE_NODE
7016 _OP_REQP = ["node_name"]
7019 def CheckArguments(self):
7020 if not hasattr(self.op, "remote_node"):
7021 self.op.remote_node = None
7022 if not hasattr(self.op, "iallocator"):
7023 self.op.iallocator = None
7024 if not hasattr(self.op, "early_release"):
7025 self.op.early_release = False
7027 TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
7028 self.op.remote_node,
7031 def ExpandNames(self):
7032 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7034 self.needed_locks = {}
7036 # Declare node locks
7037 if self.op.iallocator is not None:
7038 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7040 elif self.op.remote_node is not None:
7041 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7043 # Warning: do not remove the locking of the new secondary here
7044 # unless DRBD8.AddChildren is changed to work in parallel;
7045 # currently it doesn't since parallel invocations of
7046 # FindUnusedMinor will conflict
7047 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7048 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7051 raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7053 # Create tasklets for replacing disks for all secondary instances on this
7058 for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7059 logging.debug("Replacing disks for instance %s", inst.name)
7060 names.append(inst.name)
7062 replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7063 self.op.iallocator, self.op.remote_node, [],
7064 True, self.op.early_release)
7065 tasklets.append(replacer)
7067 self.tasklets = tasklets
7068 self.instance_names = names
7070 # Declare instance locks
7071 self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7073 def DeclareLocks(self, level):
7074 # If we're not already locking all nodes in the set we have to declare the
7075 # instance's primary/secondary nodes.
7076 if (level == locking.LEVEL_NODE and
7077 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7078 self._LockInstancesNodes()
7080 def BuildHooksEnv(self):
7083 This runs on the master, the primary and all the secondaries.
7087 "NODE_NAME": self.op.node_name,
7090 nl = [self.cfg.GetMasterNode()]
7092 if self.op.remote_node is not None:
7093 env["NEW_SECONDARY"] = self.op.remote_node
7094 nl.append(self.op.remote_node)
7096 return (env, nl, nl)
7099 class TLReplaceDisks(Tasklet):
7100 """Replaces disks for an instance.
7102 Note: Locking is not within the scope of this class.
7105 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7106 disks, delay_iallocator, early_release):
7107 """Initializes this class.
7110 Tasklet.__init__(self, lu)
7113 self.instance_name = instance_name
7115 self.iallocator_name = iallocator_name
7116 self.remote_node = remote_node
7118 self.delay_iallocator = delay_iallocator
7119 self.early_release = early_release
7122 self.instance = None
7123 self.new_node = None
7124 self.target_node = None
7125 self.other_node = None
7126 self.remote_node_info = None
7127 self.node_secondary_ip = None
7130 def CheckArguments(mode, remote_node, iallocator):
7131 """Helper function for users of this class.
7134 # check for valid parameter combination
7135 if mode == constants.REPLACE_DISK_CHG:
7136 if remote_node is None and iallocator is None:
7137 raise errors.OpPrereqError("When changing the secondary either an"
7138 " iallocator script must be used or the"
7139 " new node given", errors.ECODE_INVAL)
7141 if remote_node is not None and iallocator is not None:
7142 raise errors.OpPrereqError("Give either the iallocator or the new"
7143 " secondary, not both", errors.ECODE_INVAL)
7145 elif remote_node is not None or iallocator is not None:
7146 # Not replacing the secondary
7147 raise errors.OpPrereqError("The iallocator and new node options can"
7148 " only be used when changing the"
7149 " secondary node", errors.ECODE_INVAL)
7152 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7153 """Compute a new secondary node using an IAllocator.
7156 ial = IAllocator(lu.cfg, lu.rpc,
7157 mode=constants.IALLOCATOR_MODE_RELOC,
7159 relocate_from=relocate_from)
7161 ial.Run(iallocator_name)
7164 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7165 " %s" % (iallocator_name, ial.info),
7168 if len(ial.result) != ial.required_nodes:
7169 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7170 " of nodes (%s), required %s" %
7172 len(ial.result), ial.required_nodes),
7175 remote_node_name = ial.result[0]
7177 lu.LogInfo("Selected new secondary for instance '%s': %s",
7178 instance_name, remote_node_name)
7180 return remote_node_name
7182 def _FindFaultyDisks(self, node_name):
7183 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7186 def CheckPrereq(self):
7187 """Check prerequisites.
7189 This checks that the instance is in the cluster.
7192 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7193 assert instance is not None, \
7194 "Cannot retrieve locked instance %s" % self.instance_name
7196 if instance.disk_template != constants.DT_DRBD8:
7197 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7198 " instances", errors.ECODE_INVAL)
7200 if len(instance.secondary_nodes) != 1:
7201 raise errors.OpPrereqError("The instance has a strange layout,"
7202 " expected one secondary but found %d" %
7203 len(instance.secondary_nodes),
7206 if not self.delay_iallocator:
7207 self._CheckPrereq2()
7209 def _CheckPrereq2(self):
7210 """Check prerequisites, second part.
7212 This function should always be part of CheckPrereq. It was separated and is
7213 now called from Exec because during node evacuation iallocator was only
7214 called with an unmodified cluster model, not taking planned changes into
7218 instance = self.instance
7219 secondary_node = instance.secondary_nodes[0]
7221 if self.iallocator_name is None:
7222 remote_node = self.remote_node
7224 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7225 instance.name, instance.secondary_nodes)
7227 if remote_node is not None:
7228 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7229 assert self.remote_node_info is not None, \
7230 "Cannot retrieve locked node %s" % remote_node
7232 self.remote_node_info = None
7234 if remote_node == self.instance.primary_node:
7235 raise errors.OpPrereqError("The specified node is the primary node of"
7236 " the instance.", errors.ECODE_INVAL)
7238 if remote_node == secondary_node:
7239 raise errors.OpPrereqError("The specified node is already the"
7240 " secondary node of the instance.",
7243 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7244 constants.REPLACE_DISK_CHG):
7245 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7248 if self.mode == constants.REPLACE_DISK_AUTO:
7249 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7250 faulty_secondary = self._FindFaultyDisks(secondary_node)
7252 if faulty_primary and faulty_secondary:
7253 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7254 " one node and can not be repaired"
7255 " automatically" % self.instance_name,
7259 self.disks = faulty_primary
7260 self.target_node = instance.primary_node
7261 self.other_node = secondary_node
7262 check_nodes = [self.target_node, self.other_node]
7263 elif faulty_secondary:
7264 self.disks = faulty_secondary
7265 self.target_node = secondary_node
7266 self.other_node = instance.primary_node
7267 check_nodes = [self.target_node, self.other_node]
7273 # Non-automatic modes
7274 if self.mode == constants.REPLACE_DISK_PRI:
7275 self.target_node = instance.primary_node
7276 self.other_node = secondary_node
7277 check_nodes = [self.target_node, self.other_node]
7279 elif self.mode == constants.REPLACE_DISK_SEC:
7280 self.target_node = secondary_node
7281 self.other_node = instance.primary_node
7282 check_nodes = [self.target_node, self.other_node]
7284 elif self.mode == constants.REPLACE_DISK_CHG:
7285 self.new_node = remote_node
7286 self.other_node = instance.primary_node
7287 self.target_node = secondary_node
7288 check_nodes = [self.new_node, self.other_node]
7290 _CheckNodeNotDrained(self.lu, remote_node)
7292 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7293 assert old_node_info is not None
7294 if old_node_info.offline and not self.early_release:
7295 # doesn't make sense to delay the release
7296 self.early_release = True
7297 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7298 " early-release mode", secondary_node)
7301 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7304 # If not specified all disks should be replaced
7306 self.disks = range(len(self.instance.disks))
7308 for node in check_nodes:
7309 _CheckNodeOnline(self.lu, node)
7311 # Check whether disks are valid
7312 for disk_idx in self.disks:
7313 instance.FindDisk(disk_idx)
7315 # Get secondary node IP addresses
7318 for node_name in [self.target_node, self.other_node, self.new_node]:
7319 if node_name is not None:
7320 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7322 self.node_secondary_ip = node_2nd_ip
7324 def Exec(self, feedback_fn):
7325 """Execute disk replacement.
7327 This dispatches the disk replacement to the appropriate handler.
7330 if self.delay_iallocator:
7331 self._CheckPrereq2()
7334 feedback_fn("No disks need replacement")
7337 feedback_fn("Replacing disk(s) %s for %s" %
7338 (utils.CommaJoin(self.disks), self.instance.name))
7340 activate_disks = (not self.instance.admin_up)
7342 # Activate the instance disks if we're replacing them on a down instance
7344 _StartInstanceDisks(self.lu, self.instance, True)
7347 # Should we replace the secondary node?
7348 if self.new_node is not None:
7349 fn = self._ExecDrbd8Secondary
7351 fn = self._ExecDrbd8DiskOnly
7353 return fn(feedback_fn)
7356 # Deactivate the instance disks if we're replacing them on a
7359 _SafeShutdownInstanceDisks(self.lu, self.instance)
7361 def _CheckVolumeGroup(self, nodes):
7362 self.lu.LogInfo("Checking volume groups")
7364 vgname = self.cfg.GetVGName()
7366 # Make sure volume group exists on all involved nodes
7367 results = self.rpc.call_vg_list(nodes)
7369 raise errors.OpExecError("Can't list volume groups on the nodes")
7373 res.Raise("Error checking node %s" % node)
7374 if vgname not in res.payload:
7375 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7378 def _CheckDisksExistence(self, nodes):
7379 # Check disk existence
7380 for idx, dev in enumerate(self.instance.disks):
7381 if idx not in self.disks:
7385 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7386 self.cfg.SetDiskID(dev, node)
7388 result = self.rpc.call_blockdev_find(node, dev)
7390 msg = result.fail_msg
7391 if msg or not result.payload:
7393 msg = "disk not found"
7394 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7397 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7398 for idx, dev in enumerate(self.instance.disks):
7399 if idx not in self.disks:
7402 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7405 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7407 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7408 " replace disks for instance %s" %
7409 (node_name, self.instance.name))
7411 def _CreateNewStorage(self, node_name):
7412 vgname = self.cfg.GetVGName()
7415 for idx, dev in enumerate(self.instance.disks):
7416 if idx not in self.disks:
7419 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7421 self.cfg.SetDiskID(dev, node_name)
7423 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7424 names = _GenerateUniqueNames(self.lu, lv_names)
7426 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7427 logical_id=(vgname, names[0]))
7428 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7429 logical_id=(vgname, names[1]))
7431 new_lvs = [lv_data, lv_meta]
7432 old_lvs = dev.children
7433 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7435 # we pass force_create=True to force the LVM creation
7436 for new_lv in new_lvs:
7437 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7438 _GetInstanceInfoText(self.instance), False)
7442 def _CheckDevices(self, node_name, iv_names):
7443 for name, (dev, _, _) in iv_names.iteritems():
7444 self.cfg.SetDiskID(dev, node_name)
7446 result = self.rpc.call_blockdev_find(node_name, dev)
7448 msg = result.fail_msg
7449 if msg or not result.payload:
7451 msg = "disk not found"
7452 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7455 if result.payload.is_degraded:
7456 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7458 def _RemoveOldStorage(self, node_name, iv_names):
7459 for name, (_, old_lvs, _) in iv_names.iteritems():
7460 self.lu.LogInfo("Remove logical volumes for %s" % name)
7463 self.cfg.SetDiskID(lv, node_name)
7465 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7467 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7468 hint="remove unused LVs manually")
7470 def _ReleaseNodeLock(self, node_name):
7471 """Releases the lock for a given node."""
7472 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7474 def _ExecDrbd8DiskOnly(self, feedback_fn):
7475 """Replace a disk on the primary or secondary for DRBD 8.
7477 The algorithm for replace is quite complicated:
7479 1. for each disk to be replaced:
7481 1. create new LVs on the target node with unique names
7482 1. detach old LVs from the drbd device
7483 1. rename old LVs to name_replaced.<time_t>
7484 1. rename new LVs to old LVs
7485 1. attach the new LVs (with the old names now) to the drbd device
7487 1. wait for sync across all devices
7489 1. for each modified disk:
7491 1. remove old LVs (which have the name name_replaces.<time_t>)
7493 Failures are not very well handled.
7498 # Step: check device activation
7499 self.lu.LogStep(1, steps_total, "Check device existence")
7500 self._CheckDisksExistence([self.other_node, self.target_node])
7501 self._CheckVolumeGroup([self.target_node, self.other_node])
7503 # Step: check other node consistency
7504 self.lu.LogStep(2, steps_total, "Check peer consistency")
7505 self._CheckDisksConsistency(self.other_node,
7506 self.other_node == self.instance.primary_node,
7509 # Step: create new storage
7510 self.lu.LogStep(3, steps_total, "Allocate new storage")
7511 iv_names = self._CreateNewStorage(self.target_node)
7513 # Step: for each lv, detach+rename*2+attach
7514 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7515 for dev, old_lvs, new_lvs in iv_names.itervalues():
7516 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7518 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7520 result.Raise("Can't detach drbd from local storage on node"
7521 " %s for device %s" % (self.target_node, dev.iv_name))
7523 #cfg.Update(instance)
7525 # ok, we created the new LVs, so now we know we have the needed
7526 # storage; as such, we proceed on the target node to rename
7527 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7528 # using the assumption that logical_id == physical_id (which in
7529 # turn is the unique_id on that node)
7531 # FIXME(iustin): use a better name for the replaced LVs
7532 temp_suffix = int(time.time())
7533 ren_fn = lambda d, suff: (d.physical_id[0],
7534 d.physical_id[1] + "_replaced-%s" % suff)
7536 # Build the rename list based on what LVs exist on the node
7537 rename_old_to_new = []
7538 for to_ren in old_lvs:
7539 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7540 if not result.fail_msg and result.payload:
7542 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7544 self.lu.LogInfo("Renaming the old LVs on the target node")
7545 result = self.rpc.call_blockdev_rename(self.target_node,
7547 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7549 # Now we rename the new LVs to the old LVs
7550 self.lu.LogInfo("Renaming the new LVs on the target node")
7551 rename_new_to_old = [(new, old.physical_id)
7552 for old, new in zip(old_lvs, new_lvs)]
7553 result = self.rpc.call_blockdev_rename(self.target_node,
7555 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7557 for old, new in zip(old_lvs, new_lvs):
7558 new.logical_id = old.logical_id
7559 self.cfg.SetDiskID(new, self.target_node)
7561 for disk in old_lvs:
7562 disk.logical_id = ren_fn(disk, temp_suffix)
7563 self.cfg.SetDiskID(disk, self.target_node)
7565 # Now that the new lvs have the old name, we can add them to the device
7566 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7567 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7569 msg = result.fail_msg
7571 for new_lv in new_lvs:
7572 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7575 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7576 hint=("cleanup manually the unused logical"
7578 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7580 dev.children = new_lvs
7582 self.cfg.Update(self.instance, feedback_fn)
7585 if self.early_release:
7586 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7588 self._RemoveOldStorage(self.target_node, iv_names)
7589 # WARNING: we release both node locks here, do not do other RPCs
7590 # than WaitForSync to the primary node
7591 self._ReleaseNodeLock([self.target_node, self.other_node])
7594 # This can fail as the old devices are degraded and _WaitForSync
7595 # does a combined result over all disks, so we don't check its return value
7596 self.lu.LogStep(cstep, steps_total, "Sync devices")
7598 _WaitForSync(self.lu, self.instance)
7600 # Check all devices manually
7601 self._CheckDevices(self.instance.primary_node, iv_names)
7603 # Step: remove old storage
7604 if not self.early_release:
7605 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7607 self._RemoveOldStorage(self.target_node, iv_names)
7609 def _ExecDrbd8Secondary(self, feedback_fn):
7610 """Replace the secondary node for DRBD 8.
7612 The algorithm for replace is quite complicated:
7613 - for all disks of the instance:
7614 - create new LVs on the new node with same names
7615 - shutdown the drbd device on the old secondary
7616 - disconnect the drbd network on the primary
7617 - create the drbd device on the new secondary
7618 - network attach the drbd on the primary, using an artifice:
7619 the drbd code for Attach() will connect to the network if it
7620 finds a device which is connected to the good local disks but
7622 - wait for sync across all devices
7623 - remove all disks from the old secondary
7625 Failures are not very well handled.
7630 # Step: check device activation
7631 self.lu.LogStep(1, steps_total, "Check device existence")
7632 self._CheckDisksExistence([self.instance.primary_node])
7633 self._CheckVolumeGroup([self.instance.primary_node])
7635 # Step: check other node consistency
7636 self.lu.LogStep(2, steps_total, "Check peer consistency")
7637 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7639 # Step: create new storage
7640 self.lu.LogStep(3, steps_total, "Allocate new storage")
7641 for idx, dev in enumerate(self.instance.disks):
7642 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7643 (self.new_node, idx))
7644 # we pass force_create=True to force LVM creation
7645 for new_lv in dev.children:
7646 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7647 _GetInstanceInfoText(self.instance), False)
7649 # Step 4: dbrd minors and drbd setups changes
7650 # after this, we must manually remove the drbd minors on both the
7651 # error and the success paths
7652 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7653 minors = self.cfg.AllocateDRBDMinor([self.new_node
7654 for dev in self.instance.disks],
7656 logging.debug("Allocated minors %r", minors)
7659 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7660 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7661 (self.new_node, idx))
7662 # create new devices on new_node; note that we create two IDs:
7663 # one without port, so the drbd will be activated without
7664 # networking information on the new node at this stage, and one
7665 # with network, for the latter activation in step 4
7666 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7667 if self.instance.primary_node == o_node1:
7670 assert self.instance.primary_node == o_node2, "Three-node instance?"
7673 new_alone_id = (self.instance.primary_node, self.new_node, None,
7674 p_minor, new_minor, o_secret)
7675 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7676 p_minor, new_minor, o_secret)
7678 iv_names[idx] = (dev, dev.children, new_net_id)
7679 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7681 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7682 logical_id=new_alone_id,
7683 children=dev.children,
7686 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7687 _GetInstanceInfoText(self.instance), False)
7688 except errors.GenericError:
7689 self.cfg.ReleaseDRBDMinors(self.instance.name)
7692 # We have new devices, shutdown the drbd on the old secondary
7693 for idx, dev in enumerate(self.instance.disks):
7694 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7695 self.cfg.SetDiskID(dev, self.target_node)
7696 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7698 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7699 "node: %s" % (idx, msg),
7700 hint=("Please cleanup this device manually as"
7701 " soon as possible"))
7703 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7704 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7705 self.node_secondary_ip,
7706 self.instance.disks)\
7707 [self.instance.primary_node]
7709 msg = result.fail_msg
7711 # detaches didn't succeed (unlikely)
7712 self.cfg.ReleaseDRBDMinors(self.instance.name)
7713 raise errors.OpExecError("Can't detach the disks from the network on"
7714 " old node: %s" % (msg,))
7716 # if we managed to detach at least one, we update all the disks of
7717 # the instance to point to the new secondary
7718 self.lu.LogInfo("Updating instance configuration")
7719 for dev, _, new_logical_id in iv_names.itervalues():
7720 dev.logical_id = new_logical_id
7721 self.cfg.SetDiskID(dev, self.instance.primary_node)
7723 self.cfg.Update(self.instance, feedback_fn)
7725 # and now perform the drbd attach
7726 self.lu.LogInfo("Attaching primary drbds to new secondary"
7727 " (standalone => connected)")
7728 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7730 self.node_secondary_ip,
7731 self.instance.disks,
7734 for to_node, to_result in result.items():
7735 msg = to_result.fail_msg
7737 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7739 hint=("please do a gnt-instance info to see the"
7740 " status of disks"))
7742 if self.early_release:
7743 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7745 self._RemoveOldStorage(self.target_node, iv_names)
7746 # WARNING: we release all node locks here, do not do other RPCs
7747 # than WaitForSync to the primary node
7748 self._ReleaseNodeLock([self.instance.primary_node,
7753 # This can fail as the old devices are degraded and _WaitForSync
7754 # does a combined result over all disks, so we don't check its return value
7755 self.lu.LogStep(cstep, steps_total, "Sync devices")
7757 _WaitForSync(self.lu, self.instance)
7759 # Check all devices manually
7760 self._CheckDevices(self.instance.primary_node, iv_names)
7762 # Step: remove old storage
7763 if not self.early_release:
7764 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7765 self._RemoveOldStorage(self.target_node, iv_names)
7768 class LURepairNodeStorage(NoHooksLU):
7769 """Repairs the volume group on a node.
7772 _OP_REQP = ["node_name"]
7775 def CheckArguments(self):
7776 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7778 _CheckStorageType(self.op.storage_type)
7780 def ExpandNames(self):
7781 self.needed_locks = {
7782 locking.LEVEL_NODE: [self.op.node_name],
7785 def _CheckFaultyDisks(self, instance, node_name):
7786 """Ensure faulty disks abort the opcode or at least warn."""
7788 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7790 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7791 " node '%s'" % (instance.name, node_name),
7793 except errors.OpPrereqError, err:
7794 if self.op.ignore_consistency:
7795 self.proc.LogWarning(str(err.args[0]))
7799 def CheckPrereq(self):
7800 """Check prerequisites.
7803 storage_type = self.op.storage_type
7805 if (constants.SO_FIX_CONSISTENCY not in
7806 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7807 raise errors.OpPrereqError("Storage units of type '%s' can not be"
7808 " repaired" % storage_type,
7811 # Check whether any instance on this node has faulty disks
7812 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7813 if not inst.admin_up:
7815 check_nodes = set(inst.all_nodes)
7816 check_nodes.discard(self.op.node_name)
7817 for inst_node_name in check_nodes:
7818 self._CheckFaultyDisks(inst, inst_node_name)
7820 def Exec(self, feedback_fn):
7821 feedback_fn("Repairing storage unit '%s' on %s ..." %
7822 (self.op.name, self.op.node_name))
7824 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7825 result = self.rpc.call_storage_execute(self.op.node_name,
7826 self.op.storage_type, st_args,
7828 constants.SO_FIX_CONSISTENCY)
7829 result.Raise("Failed to repair storage unit '%s' on %s" %
7830 (self.op.name, self.op.node_name))
7833 class LUNodeEvacuationStrategy(NoHooksLU):
7834 """Computes the node evacuation strategy.
7837 _OP_REQP = ["nodes"]
7840 def CheckArguments(self):
7841 if not hasattr(self.op, "remote_node"):
7842 self.op.remote_node = None
7843 if not hasattr(self.op, "iallocator"):
7844 self.op.iallocator = None
7845 if self.op.remote_node is not None and self.op.iallocator is not None:
7846 raise errors.OpPrereqError("Give either the iallocator or the new"
7847 " secondary, not both", errors.ECODE_INVAL)
7849 def ExpandNames(self):
7850 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7851 self.needed_locks = locks = {}
7852 if self.op.remote_node is None:
7853 locks[locking.LEVEL_NODE] = locking.ALL_SET
7855 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7856 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7858 def CheckPrereq(self):
7861 def Exec(self, feedback_fn):
7862 if self.op.remote_node is not None:
7864 for node in self.op.nodes:
7865 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7868 if i.primary_node == self.op.remote_node:
7869 raise errors.OpPrereqError("Node %s is the primary node of"
7870 " instance %s, cannot use it as"
7872 (self.op.remote_node, i.name),
7874 result.append([i.name, self.op.remote_node])
7876 ial = IAllocator(self.cfg, self.rpc,
7877 mode=constants.IALLOCATOR_MODE_MEVAC,
7878 evac_nodes=self.op.nodes)
7879 ial.Run(self.op.iallocator, validate=True)
7881 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7887 class LUGrowDisk(LogicalUnit):
7888 """Grow a disk of an instance.
7892 HTYPE = constants.HTYPE_INSTANCE
7893 _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7896 def ExpandNames(self):
7897 self._ExpandAndLockInstance()
7898 self.needed_locks[locking.LEVEL_NODE] = []
7899 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7901 def DeclareLocks(self, level):
7902 if level == locking.LEVEL_NODE:
7903 self._LockInstancesNodes()
7905 def BuildHooksEnv(self):
7908 This runs on the master, the primary and all the secondaries.
7912 "DISK": self.op.disk,
7913 "AMOUNT": self.op.amount,
7915 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7916 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7919 def CheckPrereq(self):
7920 """Check prerequisites.
7922 This checks that the instance is in the cluster.
7925 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7926 assert instance is not None, \
7927 "Cannot retrieve locked instance %s" % self.op.instance_name
7928 nodenames = list(instance.all_nodes)
7929 for node in nodenames:
7930 _CheckNodeOnline(self, node)
7933 self.instance = instance
7935 if instance.disk_template not in constants.DTS_GROWABLE:
7936 raise errors.OpPrereqError("Instance's disk layout does not support"
7937 " growing.", errors.ECODE_INVAL)
7939 self.disk = instance.FindDisk(self.op.disk)
7941 if instance.disk_template != constants.DT_FILE:
7942 # TODO: check the free disk space for file, when that feature will be
7944 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7946 def Exec(self, feedback_fn):
7947 """Execute disk grow.
7950 instance = self.instance
7953 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
7955 raise errors.OpExecError("Cannot activate block device to grow")
7957 for node in instance.all_nodes:
7958 self.cfg.SetDiskID(disk, node)
7959 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7960 result.Raise("Grow request failed to node %s" % node)
7962 # TODO: Rewrite code to work properly
7963 # DRBD goes into sync mode for a short amount of time after executing the
7964 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7965 # calling "resize" in sync mode fails. Sleeping for a short amount of
7966 # time is a work-around.
7969 disk.RecordGrow(self.op.amount)
7970 self.cfg.Update(instance, feedback_fn)
7971 if self.op.wait_for_sync:
7972 disk_abort = not _WaitForSync(self, instance, disks=[disk])
7974 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7975 " status.\nPlease check the instance.")
7976 if not instance.admin_up:
7977 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
7978 elif not instance.admin_up:
7979 self.proc.LogWarning("Not shutting down the disk even if the instance is"
7980 " not supposed to be running because no wait for"
7981 " sync mode was requested.")
7984 class LUQueryInstanceData(NoHooksLU):
7985 """Query runtime instance data.
7988 _OP_REQP = ["instances", "static"]
7991 def ExpandNames(self):
7992 self.needed_locks = {}
7993 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7995 if not isinstance(self.op.instances, list):
7996 raise errors.OpPrereqError("Invalid argument type 'instances'",
7999 if self.op.instances:
8000 self.wanted_names = []
8001 for name in self.op.instances:
8002 full_name = _ExpandInstanceName(self.cfg, name)
8003 self.wanted_names.append(full_name)
8004 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8006 self.wanted_names = None
8007 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8009 self.needed_locks[locking.LEVEL_NODE] = []
8010 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8012 def DeclareLocks(self, level):
8013 if level == locking.LEVEL_NODE:
8014 self._LockInstancesNodes()
8016 def CheckPrereq(self):
8017 """Check prerequisites.
8019 This only checks the optional instance list against the existing names.
8022 if self.wanted_names is None:
8023 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8025 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8026 in self.wanted_names]
8029 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8030 """Returns the status of a block device
8033 if self.op.static or not node:
8036 self.cfg.SetDiskID(dev, node)
8038 result = self.rpc.call_blockdev_find(node, dev)
8042 result.Raise("Can't compute disk status for %s" % instance_name)
8044 status = result.payload
8048 return (status.dev_path, status.major, status.minor,
8049 status.sync_percent, status.estimated_time,
8050 status.is_degraded, status.ldisk_status)
8052 def _ComputeDiskStatus(self, instance, snode, dev):
8053 """Compute block device status.
8056 if dev.dev_type in constants.LDS_DRBD:
8057 # we change the snode then (otherwise we use the one passed in)
8058 if dev.logical_id[0] == instance.primary_node:
8059 snode = dev.logical_id[1]
8061 snode = dev.logical_id[0]
8063 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8065 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8068 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8069 for child in dev.children]
8074 "iv_name": dev.iv_name,
8075 "dev_type": dev.dev_type,
8076 "logical_id": dev.logical_id,
8077 "physical_id": dev.physical_id,
8078 "pstatus": dev_pstatus,
8079 "sstatus": dev_sstatus,
8080 "children": dev_children,
8087 def Exec(self, feedback_fn):
8088 """Gather and return data"""
8091 cluster = self.cfg.GetClusterInfo()
8093 for instance in self.wanted_instances:
8094 if not self.op.static:
8095 remote_info = self.rpc.call_instance_info(instance.primary_node,
8097 instance.hypervisor)
8098 remote_info.Raise("Error checking node %s" % instance.primary_node)
8099 remote_info = remote_info.payload
8100 if remote_info and "state" in remote_info:
8103 remote_state = "down"
8106 if instance.admin_up:
8109 config_state = "down"
8111 disks = [self._ComputeDiskStatus(instance, None, device)
8112 for device in instance.disks]
8115 "name": instance.name,
8116 "config_state": config_state,
8117 "run_state": remote_state,
8118 "pnode": instance.primary_node,
8119 "snodes": instance.secondary_nodes,
8121 # this happens to be the same format used for hooks
8122 "nics": _NICListToTuple(self, instance.nics),
8123 "disk_template": instance.disk_template,
8125 "hypervisor": instance.hypervisor,
8126 "network_port": instance.network_port,
8127 "hv_instance": instance.hvparams,
8128 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8129 "be_instance": instance.beparams,
8130 "be_actual": cluster.FillBE(instance),
8131 "serial_no": instance.serial_no,
8132 "mtime": instance.mtime,
8133 "ctime": instance.ctime,
8134 "uuid": instance.uuid,
8137 result[instance.name] = idict
8142 class LUSetInstanceParams(LogicalUnit):
8143 """Modifies an instances's parameters.
8146 HPATH = "instance-modify"
8147 HTYPE = constants.HTYPE_INSTANCE
8148 _OP_REQP = ["instance_name"]
8151 def CheckArguments(self):
8152 if not hasattr(self.op, 'nics'):
8154 if not hasattr(self.op, 'disks'):
8156 if not hasattr(self.op, 'beparams'):
8157 self.op.beparams = {}
8158 if not hasattr(self.op, 'hvparams'):
8159 self.op.hvparams = {}
8160 if not hasattr(self.op, "disk_template"):
8161 self.op.disk_template = None
8162 if not hasattr(self.op, "remote_node"):
8163 self.op.remote_node = None
8164 if not hasattr(self.op, "os_name"):
8165 self.op.os_name = None
8166 if not hasattr(self.op, "force_variant"):
8167 self.op.force_variant = False
8168 self.op.force = getattr(self.op, "force", False)
8169 if not (self.op.nics or self.op.disks or self.op.disk_template or
8170 self.op.hvparams or self.op.beparams or self.op.os_name):
8171 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8173 if self.op.hvparams:
8174 _CheckGlobalHvParams(self.op.hvparams)
8178 for disk_op, disk_dict in self.op.disks:
8179 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8180 if disk_op == constants.DDM_REMOVE:
8183 elif disk_op == constants.DDM_ADD:
8186 if not isinstance(disk_op, int):
8187 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8188 if not isinstance(disk_dict, dict):
8189 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8190 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8192 if disk_op == constants.DDM_ADD:
8193 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8194 if mode not in constants.DISK_ACCESS_SET:
8195 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8197 size = disk_dict.get('size', None)
8199 raise errors.OpPrereqError("Required disk parameter size missing",
8203 except (TypeError, ValueError), err:
8204 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8205 str(err), errors.ECODE_INVAL)
8206 disk_dict['size'] = size
8208 # modification of disk
8209 if 'size' in disk_dict:
8210 raise errors.OpPrereqError("Disk size change not possible, use"
8211 " grow-disk", errors.ECODE_INVAL)
8213 if disk_addremove > 1:
8214 raise errors.OpPrereqError("Only one disk add or remove operation"
8215 " supported at a time", errors.ECODE_INVAL)
8217 if self.op.disks and self.op.disk_template is not None:
8218 raise errors.OpPrereqError("Disk template conversion and other disk"
8219 " changes not supported at the same time",
8222 if self.op.disk_template:
8223 _CheckDiskTemplate(self.op.disk_template)
8224 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8225 self.op.remote_node is None):
8226 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8227 " one requires specifying a secondary node",
8232 for nic_op, nic_dict in self.op.nics:
8233 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8234 if nic_op == constants.DDM_REMOVE:
8237 elif nic_op == constants.DDM_ADD:
8240 if not isinstance(nic_op, int):
8241 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8242 if not isinstance(nic_dict, dict):
8243 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8244 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8246 # nic_dict should be a dict
8247 nic_ip = nic_dict.get('ip', None)
8248 if nic_ip is not None:
8249 if nic_ip.lower() == constants.VALUE_NONE:
8250 nic_dict['ip'] = None
8252 if not utils.IsValidIP(nic_ip):
8253 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8256 nic_bridge = nic_dict.get('bridge', None)
8257 nic_link = nic_dict.get('link', None)
8258 if nic_bridge and nic_link:
8259 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8260 " at the same time", errors.ECODE_INVAL)
8261 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8262 nic_dict['bridge'] = None
8263 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8264 nic_dict['link'] = None
8266 if nic_op == constants.DDM_ADD:
8267 nic_mac = nic_dict.get('mac', None)
8269 nic_dict['mac'] = constants.VALUE_AUTO
8271 if 'mac' in nic_dict:
8272 nic_mac = nic_dict['mac']
8273 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8274 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8276 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8277 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8278 " modifying an existing nic",
8281 if nic_addremove > 1:
8282 raise errors.OpPrereqError("Only one NIC add or remove operation"
8283 " supported at a time", errors.ECODE_INVAL)
8285 def ExpandNames(self):
8286 self._ExpandAndLockInstance()
8287 self.needed_locks[locking.LEVEL_NODE] = []
8288 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8290 def DeclareLocks(self, level):
8291 if level == locking.LEVEL_NODE:
8292 self._LockInstancesNodes()
8293 if self.op.disk_template and self.op.remote_node:
8294 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8295 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8297 def BuildHooksEnv(self):
8300 This runs on the master, primary and secondaries.
8304 if constants.BE_MEMORY in self.be_new:
8305 args['memory'] = self.be_new[constants.BE_MEMORY]
8306 if constants.BE_VCPUS in self.be_new:
8307 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8308 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8309 # information at all.
8312 nic_override = dict(self.op.nics)
8313 c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8314 for idx, nic in enumerate(self.instance.nics):
8315 if idx in nic_override:
8316 this_nic_override = nic_override[idx]
8318 this_nic_override = {}
8319 if 'ip' in this_nic_override:
8320 ip = this_nic_override['ip']
8323 if 'mac' in this_nic_override:
8324 mac = this_nic_override['mac']
8327 if idx in self.nic_pnew:
8328 nicparams = self.nic_pnew[idx]
8330 nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8331 mode = nicparams[constants.NIC_MODE]
8332 link = nicparams[constants.NIC_LINK]
8333 args['nics'].append((ip, mac, mode, link))
8334 if constants.DDM_ADD in nic_override:
8335 ip = nic_override[constants.DDM_ADD].get('ip', None)
8336 mac = nic_override[constants.DDM_ADD]['mac']
8337 nicparams = self.nic_pnew[constants.DDM_ADD]
8338 mode = nicparams[constants.NIC_MODE]
8339 link = nicparams[constants.NIC_LINK]
8340 args['nics'].append((ip, mac, mode, link))
8341 elif constants.DDM_REMOVE in nic_override:
8342 del args['nics'][-1]
8344 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8345 if self.op.disk_template:
8346 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8347 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8351 def _GetUpdatedParams(old_params, update_dict,
8352 default_values, parameter_types):
8353 """Return the new params dict for the given params.
8355 @type old_params: dict
8356 @param old_params: old parameters
8357 @type update_dict: dict
8358 @param update_dict: dict containing new parameter values,
8359 or constants.VALUE_DEFAULT to reset the
8360 parameter to its default value
8361 @type default_values: dict
8362 @param default_values: default values for the filled parameters
8363 @type parameter_types: dict
8364 @param parameter_types: dict mapping target dict keys to types
8365 in constants.ENFORCEABLE_TYPES
8366 @rtype: (dict, dict)
8367 @return: (new_parameters, filled_parameters)
8370 params_copy = copy.deepcopy(old_params)
8371 for key, val in update_dict.iteritems():
8372 if val == constants.VALUE_DEFAULT:
8374 del params_copy[key]
8378 params_copy[key] = val
8379 utils.ForceDictType(params_copy, parameter_types)
8380 params_filled = objects.FillDict(default_values, params_copy)
8381 return (params_copy, params_filled)
8383 def CheckPrereq(self):
8384 """Check prerequisites.
8386 This only checks the instance list against the existing names.
8389 self.force = self.op.force
8391 # checking the new params on the primary/secondary nodes
8393 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8394 cluster = self.cluster = self.cfg.GetClusterInfo()
8395 assert self.instance is not None, \
8396 "Cannot retrieve locked instance %s" % self.op.instance_name
8397 pnode = instance.primary_node
8398 nodelist = list(instance.all_nodes)
8400 if self.op.disk_template:
8401 if instance.disk_template == self.op.disk_template:
8402 raise errors.OpPrereqError("Instance already has disk template %s" %
8403 instance.disk_template, errors.ECODE_INVAL)
8405 if (instance.disk_template,
8406 self.op.disk_template) not in self._DISK_CONVERSIONS:
8407 raise errors.OpPrereqError("Unsupported disk template conversion from"
8408 " %s to %s" % (instance.disk_template,
8409 self.op.disk_template),
8411 if self.op.disk_template in constants.DTS_NET_MIRROR:
8412 _CheckNodeOnline(self, self.op.remote_node)
8413 _CheckNodeNotDrained(self, self.op.remote_node)
8414 disks = [{"size": d.size} for d in instance.disks]
8415 required = _ComputeDiskSize(self.op.disk_template, disks)
8416 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8417 _CheckInstanceDown(self, instance, "cannot change disk template")
8419 # hvparams processing
8420 if self.op.hvparams:
8421 i_hvdict, hv_new = self._GetUpdatedParams(
8422 instance.hvparams, self.op.hvparams,
8423 cluster.hvparams[instance.hypervisor],
8424 constants.HVS_PARAMETER_TYPES)
8426 hypervisor.GetHypervisor(
8427 instance.hypervisor).CheckParameterSyntax(hv_new)
8428 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8429 self.hv_new = hv_new # the new actual values
8430 self.hv_inst = i_hvdict # the new dict (without defaults)
8432 self.hv_new = self.hv_inst = {}
8434 # beparams processing
8435 if self.op.beparams:
8436 i_bedict, be_new = self._GetUpdatedParams(
8437 instance.beparams, self.op.beparams,
8438 cluster.beparams[constants.PP_DEFAULT],
8439 constants.BES_PARAMETER_TYPES)
8440 self.be_new = be_new # the new actual values
8441 self.be_inst = i_bedict # the new dict (without defaults)
8443 self.be_new = self.be_inst = {}
8447 if constants.BE_MEMORY in self.op.beparams and not self.force:
8448 mem_check_list = [pnode]
8449 if be_new[constants.BE_AUTO_BALANCE]:
8450 # either we changed auto_balance to yes or it was from before
8451 mem_check_list.extend(instance.secondary_nodes)
8452 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8453 instance.hypervisor)
8454 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8455 instance.hypervisor)
8456 pninfo = nodeinfo[pnode]
8457 msg = pninfo.fail_msg
8459 # Assume the primary node is unreachable and go ahead
8460 self.warn.append("Can't get info from primary node %s: %s" %
8462 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8463 self.warn.append("Node data from primary node %s doesn't contain"
8464 " free memory information" % pnode)
8465 elif instance_info.fail_msg:
8466 self.warn.append("Can't get instance runtime information: %s" %
8467 instance_info.fail_msg)
8469 if instance_info.payload:
8470 current_mem = int(instance_info.payload['memory'])
8472 # Assume instance not running
8473 # (there is a slight race condition here, but it's not very probable,
8474 # and we have no other way to check)
8476 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8477 pninfo.payload['memory_free'])
8479 raise errors.OpPrereqError("This change will prevent the instance"
8480 " from starting, due to %d MB of memory"
8481 " missing on its primary node" % miss_mem,
8484 if be_new[constants.BE_AUTO_BALANCE]:
8485 for node, nres in nodeinfo.items():
8486 if node not in instance.secondary_nodes:
8490 self.warn.append("Can't get info from secondary node %s: %s" %
8492 elif not isinstance(nres.payload.get('memory_free', None), int):
8493 self.warn.append("Secondary node %s didn't return free"
8494 " memory information" % node)
8495 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8496 self.warn.append("Not enough memory to failover instance to"
8497 " secondary node %s" % node)
8502 for nic_op, nic_dict in self.op.nics:
8503 if nic_op == constants.DDM_REMOVE:
8504 if not instance.nics:
8505 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8508 if nic_op != constants.DDM_ADD:
8510 if not instance.nics:
8511 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8512 " no NICs" % nic_op,
8514 if nic_op < 0 or nic_op >= len(instance.nics):
8515 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8517 (nic_op, len(instance.nics) - 1),
8519 old_nic_params = instance.nics[nic_op].nicparams
8520 old_nic_ip = instance.nics[nic_op].ip
8525 update_params_dict = dict([(key, nic_dict[key])
8526 for key in constants.NICS_PARAMETERS
8527 if key in nic_dict])
8529 if 'bridge' in nic_dict:
8530 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8532 new_nic_params, new_filled_nic_params = \
8533 self._GetUpdatedParams(old_nic_params, update_params_dict,
8534 cluster.nicparams[constants.PP_DEFAULT],
8535 constants.NICS_PARAMETER_TYPES)
8536 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8537 self.nic_pinst[nic_op] = new_nic_params
8538 self.nic_pnew[nic_op] = new_filled_nic_params
8539 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8541 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8542 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8543 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8545 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8547 self.warn.append(msg)
8549 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8550 if new_nic_mode == constants.NIC_MODE_ROUTED:
8551 if 'ip' in nic_dict:
8552 nic_ip = nic_dict['ip']
8556 raise errors.OpPrereqError('Cannot set the nic ip to None'
8557 ' on a routed nic', errors.ECODE_INVAL)
8558 if 'mac' in nic_dict:
8559 nic_mac = nic_dict['mac']
8561 raise errors.OpPrereqError('Cannot set the nic mac to None',
8563 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8564 # otherwise generate the mac
8565 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8567 # or validate/reserve the current one
8569 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8570 except errors.ReservationError:
8571 raise errors.OpPrereqError("MAC address %s already in use"
8572 " in cluster" % nic_mac,
8573 errors.ECODE_NOTUNIQUE)
8576 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8577 raise errors.OpPrereqError("Disk operations not supported for"
8578 " diskless instances",
8580 for disk_op, _ in self.op.disks:
8581 if disk_op == constants.DDM_REMOVE:
8582 if len(instance.disks) == 1:
8583 raise errors.OpPrereqError("Cannot remove the last disk of"
8584 " an instance", errors.ECODE_INVAL)
8585 _CheckInstanceDown(self, instance, "cannot remove disks")
8587 if (disk_op == constants.DDM_ADD and
8588 len(instance.nics) >= constants.MAX_DISKS):
8589 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8590 " add more" % constants.MAX_DISKS,
8592 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8594 if disk_op < 0 or disk_op >= len(instance.disks):
8595 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8597 (disk_op, len(instance.disks)),
8601 if self.op.os_name and not self.op.force:
8602 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8603 self.op.force_variant)
8607 def _ConvertPlainToDrbd(self, feedback_fn):
8608 """Converts an instance from plain to drbd.
8611 feedback_fn("Converting template to drbd")
8612 instance = self.instance
8613 pnode = instance.primary_node
8614 snode = self.op.remote_node
8616 # create a fake disk info for _GenerateDiskTemplate
8617 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8618 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8619 instance.name, pnode, [snode],
8620 disk_info, None, None, 0)
8621 info = _GetInstanceInfoText(instance)
8622 feedback_fn("Creating aditional volumes...")
8623 # first, create the missing data and meta devices
8624 for disk in new_disks:
8625 # unfortunately this is... not too nice
8626 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8628 for child in disk.children:
8629 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8630 # at this stage, all new LVs have been created, we can rename the
8632 feedback_fn("Renaming original volumes...")
8633 rename_list = [(o, n.children[0].logical_id)
8634 for (o, n) in zip(instance.disks, new_disks)]
8635 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8636 result.Raise("Failed to rename original LVs")
8638 feedback_fn("Initializing DRBD devices...")
8639 # all child devices are in place, we can now create the DRBD devices
8640 for disk in new_disks:
8641 for node in [pnode, snode]:
8642 f_create = node == pnode
8643 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8645 # at this point, the instance has been modified
8646 instance.disk_template = constants.DT_DRBD8
8647 instance.disks = new_disks
8648 self.cfg.Update(instance, feedback_fn)
8650 # disks are created, waiting for sync
8651 disk_abort = not _WaitForSync(self, instance)
8653 raise errors.OpExecError("There are some degraded disks for"
8654 " this instance, please cleanup manually")
8656 def _ConvertDrbdToPlain(self, feedback_fn):
8657 """Converts an instance from drbd to plain.
8660 instance = self.instance
8661 assert len(instance.secondary_nodes) == 1
8662 pnode = instance.primary_node
8663 snode = instance.secondary_nodes[0]
8664 feedback_fn("Converting template to plain")
8666 old_disks = instance.disks
8667 new_disks = [d.children[0] for d in old_disks]
8669 # copy over size and mode
8670 for parent, child in zip(old_disks, new_disks):
8671 child.size = parent.size
8672 child.mode = parent.mode
8674 # update instance structure
8675 instance.disks = new_disks
8676 instance.disk_template = constants.DT_PLAIN
8677 self.cfg.Update(instance, feedback_fn)
8679 feedback_fn("Removing volumes on the secondary node...")
8680 for disk in old_disks:
8681 self.cfg.SetDiskID(disk, snode)
8682 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8684 self.LogWarning("Could not remove block device %s on node %s,"
8685 " continuing anyway: %s", disk.iv_name, snode, msg)
8687 feedback_fn("Removing unneeded volumes on the primary node...")
8688 for idx, disk in enumerate(old_disks):
8689 meta = disk.children[1]
8690 self.cfg.SetDiskID(meta, pnode)
8691 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8693 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8694 " continuing anyway: %s", idx, pnode, msg)
8697 def Exec(self, feedback_fn):
8698 """Modifies an instance.
8700 All parameters take effect only at the next restart of the instance.
8703 # Process here the warnings from CheckPrereq, as we don't have a
8704 # feedback_fn there.
8705 for warn in self.warn:
8706 feedback_fn("WARNING: %s" % warn)
8709 instance = self.instance
8711 for disk_op, disk_dict in self.op.disks:
8712 if disk_op == constants.DDM_REMOVE:
8713 # remove the last disk
8714 device = instance.disks.pop()
8715 device_idx = len(instance.disks)
8716 for node, disk in device.ComputeNodeTree(instance.primary_node):
8717 self.cfg.SetDiskID(disk, node)
8718 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8720 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8721 " continuing anyway", device_idx, node, msg)
8722 result.append(("disk/%d" % device_idx, "remove"))
8723 elif disk_op == constants.DDM_ADD:
8725 if instance.disk_template == constants.DT_FILE:
8726 file_driver, file_path = instance.disks[0].logical_id
8727 file_path = os.path.dirname(file_path)
8729 file_driver = file_path = None
8730 disk_idx_base = len(instance.disks)
8731 new_disk = _GenerateDiskTemplate(self,
8732 instance.disk_template,
8733 instance.name, instance.primary_node,
8734 instance.secondary_nodes,
8739 instance.disks.append(new_disk)
8740 info = _GetInstanceInfoText(instance)
8742 logging.info("Creating volume %s for instance %s",
8743 new_disk.iv_name, instance.name)
8744 # Note: this needs to be kept in sync with _CreateDisks
8746 for node in instance.all_nodes:
8747 f_create = node == instance.primary_node
8749 _CreateBlockDev(self, node, instance, new_disk,
8750 f_create, info, f_create)
8751 except errors.OpExecError, err:
8752 self.LogWarning("Failed to create volume %s (%s) on"
8754 new_disk.iv_name, new_disk, node, err)
8755 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8756 (new_disk.size, new_disk.mode)))
8758 # change a given disk
8759 instance.disks[disk_op].mode = disk_dict['mode']
8760 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8762 if self.op.disk_template:
8763 r_shut = _ShutdownInstanceDisks(self, instance)
8765 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8766 " proceed with disk template conversion")
8767 mode = (instance.disk_template, self.op.disk_template)
8769 self._DISK_CONVERSIONS[mode](self, feedback_fn)
8771 self.cfg.ReleaseDRBDMinors(instance.name)
8773 result.append(("disk_template", self.op.disk_template))
8776 for nic_op, nic_dict in self.op.nics:
8777 if nic_op == constants.DDM_REMOVE:
8778 # remove the last nic
8779 del instance.nics[-1]
8780 result.append(("nic.%d" % len(instance.nics), "remove"))
8781 elif nic_op == constants.DDM_ADD:
8782 # mac and bridge should be set, by now
8783 mac = nic_dict['mac']
8784 ip = nic_dict.get('ip', None)
8785 nicparams = self.nic_pinst[constants.DDM_ADD]
8786 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8787 instance.nics.append(new_nic)
8788 result.append(("nic.%d" % (len(instance.nics) - 1),
8789 "add:mac=%s,ip=%s,mode=%s,link=%s" %
8790 (new_nic.mac, new_nic.ip,
8791 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8792 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8795 for key in 'mac', 'ip':
8797 setattr(instance.nics[nic_op], key, nic_dict[key])
8798 if nic_op in self.nic_pinst:
8799 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8800 for key, val in nic_dict.iteritems():
8801 result.append(("nic.%s/%d" % (key, nic_op), val))
8804 if self.op.hvparams:
8805 instance.hvparams = self.hv_inst
8806 for key, val in self.op.hvparams.iteritems():
8807 result.append(("hv/%s" % key, val))
8810 if self.op.beparams:
8811 instance.beparams = self.be_inst
8812 for key, val in self.op.beparams.iteritems():
8813 result.append(("be/%s" % key, val))
8817 instance.os = self.op.os_name
8819 self.cfg.Update(instance, feedback_fn)
8823 _DISK_CONVERSIONS = {
8824 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8825 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8828 class LUQueryExports(NoHooksLU):
8829 """Query the exports list
8832 _OP_REQP = ['nodes']
8835 def ExpandNames(self):
8836 self.needed_locks = {}
8837 self.share_locks[locking.LEVEL_NODE] = 1
8838 if not self.op.nodes:
8839 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8841 self.needed_locks[locking.LEVEL_NODE] = \
8842 _GetWantedNodes(self, self.op.nodes)
8844 def CheckPrereq(self):
8845 """Check prerequisites.
8848 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8850 def Exec(self, feedback_fn):
8851 """Compute the list of all the exported system images.
8854 @return: a dictionary with the structure node->(export-list)
8855 where export-list is a list of the instances exported on
8859 rpcresult = self.rpc.call_export_list(self.nodes)
8861 for node in rpcresult:
8862 if rpcresult[node].fail_msg:
8863 result[node] = False
8865 result[node] = rpcresult[node].payload
8870 class LUExportInstance(LogicalUnit):
8871 """Export an instance to an image in the cluster.
8874 HPATH = "instance-export"
8875 HTYPE = constants.HTYPE_INSTANCE
8876 _OP_REQP = ["instance_name", "target_node", "shutdown"]
8879 def CheckArguments(self):
8880 """Check the arguments.
8883 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8884 constants.DEFAULT_SHUTDOWN_TIMEOUT)
8886 def ExpandNames(self):
8887 self._ExpandAndLockInstance()
8888 # FIXME: lock only instance primary and destination node
8890 # Sad but true, for now we have do lock all nodes, as we don't know where
8891 # the previous export might be, and and in this LU we search for it and
8892 # remove it from its current node. In the future we could fix this by:
8893 # - making a tasklet to search (share-lock all), then create the new one,
8894 # then one to remove, after
8895 # - removing the removal operation altogether
8896 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8898 def DeclareLocks(self, level):
8899 """Last minute lock declaration."""
8900 # All nodes are locked anyway, so nothing to do here.
8902 def BuildHooksEnv(self):
8905 This will run on the master, primary node and target node.
8909 "EXPORT_NODE": self.op.target_node,
8910 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8911 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8913 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8914 nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8915 self.op.target_node]
8918 def CheckPrereq(self):
8919 """Check prerequisites.
8921 This checks that the instance and node names are valid.
8924 instance_name = self.op.instance_name
8925 self.instance = self.cfg.GetInstanceInfo(instance_name)
8926 assert self.instance is not None, \
8927 "Cannot retrieve locked instance %s" % self.op.instance_name
8928 _CheckNodeOnline(self, self.instance.primary_node)
8930 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8931 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8932 assert self.dst_node is not None
8934 _CheckNodeOnline(self, self.dst_node.name)
8935 _CheckNodeNotDrained(self, self.dst_node.name)
8937 # instance disk type verification
8938 for disk in self.instance.disks:
8939 if disk.dev_type == constants.LD_FILE:
8940 raise errors.OpPrereqError("Export not supported for instances with"
8941 " file-based disks", errors.ECODE_INVAL)
8943 def _CreateSnapshots(self, feedback_fn):
8944 """Creates an LVM snapshot for every disk of the instance.
8946 @return: List of snapshots as L{objects.Disk} instances
8949 instance = self.instance
8950 src_node = instance.primary_node
8952 vgname = self.cfg.GetVGName()
8956 for idx, disk in enumerate(instance.disks):
8957 feedback_fn("Creating a snapshot of disk/%s on node %s" %
8960 # result.payload will be a snapshot of an lvm leaf of the one we
8962 result = self.rpc.call_blockdev_snapshot(src_node, disk)
8963 msg = result.fail_msg
8965 self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8967 snap_disks.append(False)
8969 disk_id = (vgname, result.payload)
8970 new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8971 logical_id=disk_id, physical_id=disk_id,
8972 iv_name=disk.iv_name)
8973 snap_disks.append(new_dev)
8977 def _RemoveSnapshot(self, feedback_fn, snap_disks, disk_index):
8978 """Removes an LVM snapshot.
8980 @type snap_disks: list
8981 @param snap_disks: The list of all snapshots as returned by
8983 @type disk_index: number
8984 @param disk_index: Index of the snapshot to be removed
8986 @return: Whether removal was successful or not
8989 disk = snap_disks[disk_index]
8991 src_node = self.instance.primary_node
8993 feedback_fn("Removing snapshot of disk/%s on node %s" %
8994 (disk_index, src_node))
8996 result = self.rpc.call_blockdev_remove(src_node, disk)
8997 if not result.fail_msg:
9000 self.LogWarning("Could not remove snapshot for disk/%d from node"
9001 " %s: %s", disk_index, src_node, result.fail_msg)
9005 def _CleanupExports(self, feedback_fn):
9006 """Removes exports of current instance from all other nodes.
9008 If an instance in a cluster with nodes A..D was exported to node C, its
9009 exports will be removed from the nodes A, B and D.
9012 nodelist = self.cfg.GetNodeList()
9013 nodelist.remove(self.dst_node.name)
9015 # on one-node clusters nodelist will be empty after the removal
9016 # if we proceed the backup would be removed because OpQueryExports
9017 # substitutes an empty list with the full cluster node list.
9018 iname = self.instance.name
9020 feedback_fn("Removing old exports for instance %s" % iname)
9021 exportlist = self.rpc.call_export_list(nodelist)
9022 for node in exportlist:
9023 if exportlist[node].fail_msg:
9025 if iname in exportlist[node].payload:
9026 msg = self.rpc.call_export_remove(node, iname).fail_msg
9028 self.LogWarning("Could not remove older export for instance %s"
9029 " on node %s: %s", iname, node, msg)
9031 def Exec(self, feedback_fn):
9032 """Export an instance to an image in the cluster.
9035 instance = self.instance
9036 dst_node = self.dst_node
9037 src_node = instance.primary_node
9039 if self.op.shutdown:
9040 # shutdown the instance, but not the disks
9041 feedback_fn("Shutting down instance %s" % instance.name)
9042 result = self.rpc.call_instance_shutdown(src_node, instance,
9043 self.shutdown_timeout)
9044 result.Raise("Could not shutdown instance %s on"
9045 " node %s" % (instance.name, src_node))
9047 # set the disks ID correctly since call_instance_start needs the
9048 # correct drbd minor to create the symlinks
9049 for disk in instance.disks:
9050 self.cfg.SetDiskID(disk, src_node)
9052 activate_disks = (not instance.admin_up)
9055 # Activate the instance disks if we'exporting a stopped instance
9056 feedback_fn("Activating disks for %s" % instance.name)
9057 _StartInstanceDisks(self, instance, None)
9062 removed_snaps = [False] * len(instance.disks)
9067 snap_disks = self._CreateSnapshots(feedback_fn)
9069 if self.op.shutdown and instance.admin_up:
9070 feedback_fn("Starting instance %s" % instance.name)
9071 result = self.rpc.call_instance_start(src_node, instance,
9073 msg = result.fail_msg
9075 _ShutdownInstanceDisks(self, instance)
9076 raise errors.OpExecError("Could not start instance: %s" % msg)
9078 assert len(snap_disks) == len(instance.disks)
9079 assert len(removed_snaps) == len(instance.disks)
9081 # TODO: check for size
9083 cluster_name = self.cfg.GetClusterName()
9084 for idx, dev in enumerate(snap_disks):
9085 feedback_fn("Exporting snapshot %s from %s to %s" %
9086 (idx, src_node, dst_node.name))
9088 # FIXME: pass debug from opcode to backend
9089 result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
9090 instance, cluster_name,
9091 idx, self.op.debug_level)
9092 msg = result.fail_msg
9094 self.LogWarning("Could not export disk/%s from node %s to"
9095 " node %s: %s", idx, src_node, dst_node.name, msg)
9096 dresults.append(False)
9098 dresults.append(True)
9101 if self._RemoveSnapshot(feedback_fn, snap_disks, idx):
9102 removed_snaps[idx] = True
9104 dresults.append(False)
9106 assert len(dresults) == len(instance.disks)
9108 # Check for backwards compatibility
9109 assert compat.all(isinstance(i, bool) for i in dresults), \
9110 "Not all results are boolean: %r" % dresults
9112 feedback_fn("Finalizing export on %s" % dst_node.name)
9113 result = self.rpc.call_finalize_export(dst_node.name, instance,
9115 msg = result.fail_msg
9118 self.LogWarning("Could not finalize export for instance %s"
9119 " on node %s: %s", instance.name, dst_node.name, msg)
9122 # Remove all snapshots
9123 assert len(removed_snaps) == len(instance.disks)
9124 for idx, removed in enumerate(removed_snaps):
9126 self._RemoveSnapshot(feedback_fn, snap_disks, idx)
9130 feedback_fn("Deactivating disks for %s" % instance.name)
9131 _ShutdownInstanceDisks(self, instance)
9133 self._CleanupExports(feedback_fn)
9135 return fin_resu, dresults
9138 class LURemoveExport(NoHooksLU):
9139 """Remove exports related to the named instance.
9142 _OP_REQP = ["instance_name"]
9145 def ExpandNames(self):
9146 self.needed_locks = {}
9147 # We need all nodes to be locked in order for RemoveExport to work, but we
9148 # don't need to lock the instance itself, as nothing will happen to it (and
9149 # we can remove exports also for a removed instance)
9150 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9152 def CheckPrereq(self):
9153 """Check prerequisites.
9157 def Exec(self, feedback_fn):
9158 """Remove any export.
9161 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9162 # If the instance was not found we'll try with the name that was passed in.
9163 # This will only work if it was an FQDN, though.
9165 if not instance_name:
9167 instance_name = self.op.instance_name
9169 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9170 exportlist = self.rpc.call_export_list(locked_nodes)
9172 for node in exportlist:
9173 msg = exportlist[node].fail_msg
9175 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9177 if instance_name in exportlist[node].payload:
9179 result = self.rpc.call_export_remove(node, instance_name)
9180 msg = result.fail_msg
9182 logging.error("Could not remove export for instance %s"
9183 " on node %s: %s", instance_name, node, msg)
9185 if fqdn_warn and not found:
9186 feedback_fn("Export not found. If trying to remove an export belonging"
9187 " to a deleted instance please use its Fully Qualified"
9191 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9194 This is an abstract class which is the parent of all the other tags LUs.
9198 def ExpandNames(self):
9199 self.needed_locks = {}
9200 if self.op.kind == constants.TAG_NODE:
9201 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9202 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9203 elif self.op.kind == constants.TAG_INSTANCE:
9204 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9205 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9207 def CheckPrereq(self):
9208 """Check prerequisites.
9211 if self.op.kind == constants.TAG_CLUSTER:
9212 self.target = self.cfg.GetClusterInfo()
9213 elif self.op.kind == constants.TAG_NODE:
9214 self.target = self.cfg.GetNodeInfo(self.op.name)
9215 elif self.op.kind == constants.TAG_INSTANCE:
9216 self.target = self.cfg.GetInstanceInfo(self.op.name)
9218 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9219 str(self.op.kind), errors.ECODE_INVAL)
9222 class LUGetTags(TagsLU):
9223 """Returns the tags of a given object.
9226 _OP_REQP = ["kind", "name"]
9229 def Exec(self, feedback_fn):
9230 """Returns the tag list.
9233 return list(self.target.GetTags())
9236 class LUSearchTags(NoHooksLU):
9237 """Searches the tags for a given pattern.
9240 _OP_REQP = ["pattern"]
9243 def ExpandNames(self):
9244 self.needed_locks = {}
9246 def CheckPrereq(self):
9247 """Check prerequisites.
9249 This checks the pattern passed for validity by compiling it.
9253 self.re = re.compile(self.op.pattern)
9254 except re.error, err:
9255 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9256 (self.op.pattern, err), errors.ECODE_INVAL)
9258 def Exec(self, feedback_fn):
9259 """Returns the tag list.
9263 tgts = [("/cluster", cfg.GetClusterInfo())]
9264 ilist = cfg.GetAllInstancesInfo().values()
9265 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9266 nlist = cfg.GetAllNodesInfo().values()
9267 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9269 for path, target in tgts:
9270 for tag in target.GetTags():
9271 if self.re.search(tag):
9272 results.append((path, tag))
9276 class LUAddTags(TagsLU):
9277 """Sets a tag on a given object.
9280 _OP_REQP = ["kind", "name", "tags"]
9283 def CheckPrereq(self):
9284 """Check prerequisites.
9286 This checks the type and length of the tag name and value.
9289 TagsLU.CheckPrereq(self)
9290 for tag in self.op.tags:
9291 objects.TaggableObject.ValidateTag(tag)
9293 def Exec(self, feedback_fn):
9298 for tag in self.op.tags:
9299 self.target.AddTag(tag)
9300 except errors.TagError, err:
9301 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9302 self.cfg.Update(self.target, feedback_fn)
9305 class LUDelTags(TagsLU):
9306 """Delete a list of tags from a given object.
9309 _OP_REQP = ["kind", "name", "tags"]
9312 def CheckPrereq(self):
9313 """Check prerequisites.
9315 This checks that we have the given tag.
9318 TagsLU.CheckPrereq(self)
9319 for tag in self.op.tags:
9320 objects.TaggableObject.ValidateTag(tag)
9321 del_tags = frozenset(self.op.tags)
9322 cur_tags = self.target.GetTags()
9323 if not del_tags <= cur_tags:
9324 diff_tags = del_tags - cur_tags
9325 diff_names = ["'%s'" % tag for tag in diff_tags]
9327 raise errors.OpPrereqError("Tag(s) %s not found" %
9328 (",".join(diff_names)), errors.ECODE_NOENT)
9330 def Exec(self, feedback_fn):
9331 """Remove the tag from the object.
9334 for tag in self.op.tags:
9335 self.target.RemoveTag(tag)
9336 self.cfg.Update(self.target, feedback_fn)
9339 class LUTestDelay(NoHooksLU):
9340 """Sleep for a specified amount of time.
9342 This LU sleeps on the master and/or nodes for a specified amount of
9346 _OP_REQP = ["duration", "on_master", "on_nodes"]
9349 def ExpandNames(self):
9350 """Expand names and set required locks.
9352 This expands the node list, if any.
9355 self.needed_locks = {}
9356 if self.op.on_nodes:
9357 # _GetWantedNodes can be used here, but is not always appropriate to use
9358 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9360 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9361 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9363 def CheckPrereq(self):
9364 """Check prerequisites.
9368 def Exec(self, feedback_fn):
9369 """Do the actual sleep.
9372 if self.op.on_master:
9373 if not utils.TestDelay(self.op.duration):
9374 raise errors.OpExecError("Error during master delay test")
9375 if self.op.on_nodes:
9376 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9377 for node, node_result in result.items():
9378 node_result.Raise("Failure during rpc call to node %s" % node)
9381 class IAllocator(object):
9382 """IAllocator framework.
9384 An IAllocator instance has three sets of attributes:
9385 - cfg that is needed to query the cluster
9386 - input data (all members of the _KEYS class attribute are required)
9387 - four buffer attributes (in|out_data|text), that represent the
9388 input (to the external script) in text and data structure format,
9389 and the output from it, again in two formats
9390 - the result variables from the script (success, info, nodes) for
9394 # pylint: disable-msg=R0902
9395 # lots of instance attributes
9397 "name", "mem_size", "disks", "disk_template",
9398 "os", "tags", "nics", "vcpus", "hypervisor",
9401 "name", "relocate_from",
9407 def __init__(self, cfg, rpc, mode, **kwargs):
9410 # init buffer variables
9411 self.in_text = self.out_text = self.in_data = self.out_data = None
9412 # init all input fields so that pylint is happy
9414 self.mem_size = self.disks = self.disk_template = None
9415 self.os = self.tags = self.nics = self.vcpus = None
9416 self.hypervisor = None
9417 self.relocate_from = None
9419 self.evac_nodes = None
9421 self.required_nodes = None
9422 # init result fields
9423 self.success = self.info = self.result = None
9424 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9425 keyset = self._ALLO_KEYS
9426 fn = self._AddNewInstance
9427 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9428 keyset = self._RELO_KEYS
9429 fn = self._AddRelocateInstance
9430 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9431 keyset = self._EVAC_KEYS
9432 fn = self._AddEvacuateNodes
9434 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9435 " IAllocator" % self.mode)
9437 if key not in keyset:
9438 raise errors.ProgrammerError("Invalid input parameter '%s' to"
9439 " IAllocator" % key)
9440 setattr(self, key, kwargs[key])
9443 if key not in kwargs:
9444 raise errors.ProgrammerError("Missing input parameter '%s' to"
9445 " IAllocator" % key)
9446 self._BuildInputData(fn)
9448 def _ComputeClusterData(self):
9449 """Compute the generic allocator input data.
9451 This is the data that is independent of the actual operation.
9455 cluster_info = cfg.GetClusterInfo()
9458 "version": constants.IALLOCATOR_VERSION,
9459 "cluster_name": cfg.GetClusterName(),
9460 "cluster_tags": list(cluster_info.GetTags()),
9461 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9462 # we don't have job IDs
9464 iinfo = cfg.GetAllInstancesInfo().values()
9465 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9469 node_list = cfg.GetNodeList()
9471 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9472 hypervisor_name = self.hypervisor
9473 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9474 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9475 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9476 hypervisor_name = cluster_info.enabled_hypervisors[0]
9478 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9481 self.rpc.call_all_instances_info(node_list,
9482 cluster_info.enabled_hypervisors)
9483 for nname, nresult in node_data.items():
9484 # first fill in static (config-based) values
9485 ninfo = cfg.GetNodeInfo(nname)
9487 "tags": list(ninfo.GetTags()),
9488 "primary_ip": ninfo.primary_ip,
9489 "secondary_ip": ninfo.secondary_ip,
9490 "offline": ninfo.offline,
9491 "drained": ninfo.drained,
9492 "master_candidate": ninfo.master_candidate,
9495 if not (ninfo.offline or ninfo.drained):
9496 nresult.Raise("Can't get data for node %s" % nname)
9497 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9499 remote_info = nresult.payload
9501 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9502 'vg_size', 'vg_free', 'cpu_total']:
9503 if attr not in remote_info:
9504 raise errors.OpExecError("Node '%s' didn't return attribute"
9505 " '%s'" % (nname, attr))
9506 if not isinstance(remote_info[attr], int):
9507 raise errors.OpExecError("Node '%s' returned invalid value"
9509 (nname, attr, remote_info[attr]))
9510 # compute memory used by primary instances
9511 i_p_mem = i_p_up_mem = 0
9512 for iinfo, beinfo in i_list:
9513 if iinfo.primary_node == nname:
9514 i_p_mem += beinfo[constants.BE_MEMORY]
9515 if iinfo.name not in node_iinfo[nname].payload:
9518 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9519 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9520 remote_info['memory_free'] -= max(0, i_mem_diff)
9523 i_p_up_mem += beinfo[constants.BE_MEMORY]
9525 # compute memory used by instances
9527 "total_memory": remote_info['memory_total'],
9528 "reserved_memory": remote_info['memory_dom0'],
9529 "free_memory": remote_info['memory_free'],
9530 "total_disk": remote_info['vg_size'],
9531 "free_disk": remote_info['vg_free'],
9532 "total_cpus": remote_info['cpu_total'],
9533 "i_pri_memory": i_p_mem,
9534 "i_pri_up_memory": i_p_up_mem,
9538 node_results[nname] = pnr
9539 data["nodes"] = node_results
9543 for iinfo, beinfo in i_list:
9545 for nic in iinfo.nics:
9546 filled_params = objects.FillDict(
9547 cluster_info.nicparams[constants.PP_DEFAULT],
9549 nic_dict = {"mac": nic.mac,
9551 "mode": filled_params[constants.NIC_MODE],
9552 "link": filled_params[constants.NIC_LINK],
9554 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9555 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9556 nic_data.append(nic_dict)
9558 "tags": list(iinfo.GetTags()),
9559 "admin_up": iinfo.admin_up,
9560 "vcpus": beinfo[constants.BE_VCPUS],
9561 "memory": beinfo[constants.BE_MEMORY],
9563 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9565 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9566 "disk_template": iinfo.disk_template,
9567 "hypervisor": iinfo.hypervisor,
9569 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9571 instance_data[iinfo.name] = pir
9573 data["instances"] = instance_data
9577 def _AddNewInstance(self):
9578 """Add new instance data to allocator structure.
9580 This in combination with _AllocatorGetClusterData will create the
9581 correct structure needed as input for the allocator.
9583 The checks for the completeness of the opcode must have already been
9587 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9589 if self.disk_template in constants.DTS_NET_MIRROR:
9590 self.required_nodes = 2
9592 self.required_nodes = 1
9595 "disk_template": self.disk_template,
9598 "vcpus": self.vcpus,
9599 "memory": self.mem_size,
9600 "disks": self.disks,
9601 "disk_space_total": disk_space,
9603 "required_nodes": self.required_nodes,
9607 def _AddRelocateInstance(self):
9608 """Add relocate instance data to allocator structure.
9610 This in combination with _IAllocatorGetClusterData will create the
9611 correct structure needed as input for the allocator.
9613 The checks for the completeness of the opcode must have already been
9617 instance = self.cfg.GetInstanceInfo(self.name)
9618 if instance is None:
9619 raise errors.ProgrammerError("Unknown instance '%s' passed to"
9620 " IAllocator" % self.name)
9622 if instance.disk_template not in constants.DTS_NET_MIRROR:
9623 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9626 if len(instance.secondary_nodes) != 1:
9627 raise errors.OpPrereqError("Instance has not exactly one secondary node",
9630 self.required_nodes = 1
9631 disk_sizes = [{'size': disk.size} for disk in instance.disks]
9632 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9636 "disk_space_total": disk_space,
9637 "required_nodes": self.required_nodes,
9638 "relocate_from": self.relocate_from,
9642 def _AddEvacuateNodes(self):
9643 """Add evacuate nodes data to allocator structure.
9647 "evac_nodes": self.evac_nodes
9651 def _BuildInputData(self, fn):
9652 """Build input data structures.
9655 self._ComputeClusterData()
9658 request["type"] = self.mode
9659 self.in_data["request"] = request
9661 self.in_text = serializer.Dump(self.in_data)
9663 def Run(self, name, validate=True, call_fn=None):
9664 """Run an instance allocator and return the results.
9668 call_fn = self.rpc.call_iallocator_runner
9670 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9671 result.Raise("Failure while running the iallocator script")
9673 self.out_text = result.payload
9675 self._ValidateResult()
9677 def _ValidateResult(self):
9678 """Process the allocator results.
9680 This will process and if successful save the result in
9681 self.out_data and the other parameters.
9685 rdict = serializer.Load(self.out_text)
9686 except Exception, err:
9687 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9689 if not isinstance(rdict, dict):
9690 raise errors.OpExecError("Can't parse iallocator results: not a dict")
9692 # TODO: remove backwards compatiblity in later versions
9693 if "nodes" in rdict and "result" not in rdict:
9694 rdict["result"] = rdict["nodes"]
9697 for key in "success", "info", "result":
9698 if key not in rdict:
9699 raise errors.OpExecError("Can't parse iallocator results:"
9700 " missing key '%s'" % key)
9701 setattr(self, key, rdict[key])
9703 if not isinstance(rdict["result"], list):
9704 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9706 self.out_data = rdict
9709 class LUTestAllocator(NoHooksLU):
9710 """Run allocator tests.
9712 This LU runs the allocator tests
9715 _OP_REQP = ["direction", "mode", "name"]
9717 def CheckPrereq(self):
9718 """Check prerequisites.
9720 This checks the opcode parameters depending on the director and mode test.
9723 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9724 for attr in ["name", "mem_size", "disks", "disk_template",
9725 "os", "tags", "nics", "vcpus"]:
9726 if not hasattr(self.op, attr):
9727 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9728 attr, errors.ECODE_INVAL)
9729 iname = self.cfg.ExpandInstanceName(self.op.name)
9730 if iname is not None:
9731 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9732 iname, errors.ECODE_EXISTS)
9733 if not isinstance(self.op.nics, list):
9734 raise errors.OpPrereqError("Invalid parameter 'nics'",
9736 for row in self.op.nics:
9737 if (not isinstance(row, dict) or
9740 "bridge" not in row):
9741 raise errors.OpPrereqError("Invalid contents of the 'nics'"
9742 " parameter", errors.ECODE_INVAL)
9743 if not isinstance(self.op.disks, list):
9744 raise errors.OpPrereqError("Invalid parameter 'disks'",
9746 for row in self.op.disks:
9747 if (not isinstance(row, dict) or
9748 "size" not in row or
9749 not isinstance(row["size"], int) or
9750 "mode" not in row or
9751 row["mode"] not in ['r', 'w']):
9752 raise errors.OpPrereqError("Invalid contents of the 'disks'"
9753 " parameter", errors.ECODE_INVAL)
9754 if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9755 self.op.hypervisor = self.cfg.GetHypervisorType()
9756 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9757 if not hasattr(self.op, "name"):
9758 raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9760 fname = _ExpandInstanceName(self.cfg, self.op.name)
9761 self.op.name = fname
9762 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9763 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9764 if not hasattr(self.op, "evac_nodes"):
9765 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9766 " opcode input", errors.ECODE_INVAL)
9768 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9769 self.op.mode, errors.ECODE_INVAL)
9771 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9772 if not hasattr(self.op, "allocator") or self.op.allocator is None:
9773 raise errors.OpPrereqError("Missing allocator name",
9775 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9776 raise errors.OpPrereqError("Wrong allocator test '%s'" %
9777 self.op.direction, errors.ECODE_INVAL)
9779 def Exec(self, feedback_fn):
9780 """Run the allocator test.
9783 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9784 ial = IAllocator(self.cfg, self.rpc,
9787 mem_size=self.op.mem_size,
9788 disks=self.op.disks,
9789 disk_template=self.op.disk_template,
9793 vcpus=self.op.vcpus,
9794 hypervisor=self.op.hypervisor,
9796 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9797 ial = IAllocator(self.cfg, self.rpc,
9800 relocate_from=list(self.relocate_from),
9802 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9803 ial = IAllocator(self.cfg, self.rpc,
9805 evac_nodes=self.op.evac_nodes)
9807 raise errors.ProgrammerError("Uncatched mode %s in"
9808 " LUTestAllocator.Exec", self.op.mode)
9810 if self.op.direction == constants.IALLOCATOR_DIR_IN:
9811 result = ial.in_text
9813 ial.Run(self.op.allocator, validate=False)
9814 result = ial.out_text