4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
38 from ganeti import ssh
39 from ganeti import utils
40 from ganeti import errors
41 from ganeti import hypervisor
42 from ganeti import locking
43 from ganeti import constants
44 from ganeti import objects
45 from ganeti import serializer
46 from ganeti import ssconf
47 from ganeti import uidpool
48 from ganeti import compat
51 class LogicalUnit(object):
52 """Logical Unit base class.
54 Subclasses must follow these rules:
55 - implement ExpandNames
56 - implement CheckPrereq (except when tasklets are used)
57 - implement Exec (except when tasklets are used)
58 - implement BuildHooksEnv
59 - redefine HPATH and HTYPE
60 - optionally redefine their run requirements:
61 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
63 Note that all commands require root permissions.
65 @ivar dry_run_result: the value (if any) that will be returned to the caller
66 in dry-run mode (signalled by opcode dry_run parameter)
74 def __init__(self, processor, op, context, rpc):
75 """Constructor for LogicalUnit.
77 This needs to be overridden in derived classes in order to check op
83 self.cfg = context.cfg
84 self.context = context
86 # Dicts used to declare locking needs to mcpu
87 self.needed_locks = None
88 self.acquired_locks = {}
89 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
91 self.remove_locks = {}
92 # Used to force good behavior when calling helper functions
93 self.recalculate_locks = {}
96 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
97 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
98 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
100 self.dry_run_result = None
101 # support for generic debug attribute
102 if (not hasattr(self.op, "debug_level") or
103 not isinstance(self.op.debug_level, int)):
104 self.op.debug_level = 0
109 for attr_name in self._OP_REQP:
110 attr_val = getattr(op, attr_name, None)
112 raise errors.OpPrereqError("Required parameter '%s' missing" %
113 attr_name, errors.ECODE_INVAL)
115 self.CheckArguments()
118 """Returns the SshRunner object
122 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
125 ssh = property(fget=__GetSSH)
127 def CheckArguments(self):
128 """Check syntactic validity for the opcode arguments.
130 This method is for doing a simple syntactic check and ensure
131 validity of opcode parameters, without any cluster-related
132 checks. While the same can be accomplished in ExpandNames and/or
133 CheckPrereq, doing these separate is better because:
135 - ExpandNames is left as as purely a lock-related function
136 - CheckPrereq is run after we have acquired locks (and possible
139 The function is allowed to change the self.op attribute so that
140 later methods can no longer worry about missing parameters.
145 def ExpandNames(self):
146 """Expand names for this LU.
148 This method is called before starting to execute the opcode, and it should
149 update all the parameters of the opcode to their canonical form (e.g. a
150 short node name must be fully expanded after this method has successfully
151 completed). This way locking, hooks, logging, ecc. can work correctly.
153 LUs which implement this method must also populate the self.needed_locks
154 member, as a dict with lock levels as keys, and a list of needed lock names
157 - use an empty dict if you don't need any lock
158 - if you don't need any lock at a particular level omit that level
159 - don't put anything for the BGL level
160 - if you want all locks at a level use locking.ALL_SET as a value
162 If you need to share locks (rather than acquire them exclusively) at one
163 level you can modify self.share_locks, setting a true value (usually 1) for
164 that level. By default locks are not shared.
166 This function can also define a list of tasklets, which then will be
167 executed in order instead of the usual LU-level CheckPrereq and Exec
168 functions, if those are not defined by the LU.
172 # Acquire all nodes and one instance
173 self.needed_locks = {
174 locking.LEVEL_NODE: locking.ALL_SET,
175 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
177 # Acquire just two nodes
178 self.needed_locks = {
179 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
182 self.needed_locks = {} # No, you can't leave it to the default value None
185 # The implementation of this method is mandatory only if the new LU is
186 # concurrent, so that old LUs don't need to be changed all at the same
189 self.needed_locks = {} # Exclusive LUs don't need locks.
191 raise NotImplementedError
193 def DeclareLocks(self, level):
194 """Declare LU locking needs for a level
196 While most LUs can just declare their locking needs at ExpandNames time,
197 sometimes there's the need to calculate some locks after having acquired
198 the ones before. This function is called just before acquiring locks at a
199 particular level, but after acquiring the ones at lower levels, and permits
200 such calculations. It can be used to modify self.needed_locks, and by
201 default it does nothing.
203 This function is only called if you have something already set in
204 self.needed_locks for the level.
206 @param level: Locking level which is going to be locked
207 @type level: member of ganeti.locking.LEVELS
211 def CheckPrereq(self):
212 """Check prerequisites for this LU.
214 This method should check that the prerequisites for the execution
215 of this LU are fulfilled. It can do internode communication, but
216 it should be idempotent - no cluster or system changes are
219 The method should raise errors.OpPrereqError in case something is
220 not fulfilled. Its return value is ignored.
222 This method should also update all the parameters of the opcode to
223 their canonical form if it hasn't been done by ExpandNames before.
226 if self.tasklets is not None:
227 for (idx, tl) in enumerate(self.tasklets):
228 logging.debug("Checking prerequisites for tasklet %s/%s",
229 idx + 1, len(self.tasklets))
232 raise NotImplementedError
234 def Exec(self, feedback_fn):
237 This method should implement the actual work. It should raise
238 errors.OpExecError for failures that are somewhat dealt with in
242 if self.tasklets is not None:
243 for (idx, tl) in enumerate(self.tasklets):
244 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
247 raise NotImplementedError
249 def BuildHooksEnv(self):
250 """Build hooks environment for this LU.
252 This method should return a three-node tuple consisting of: a dict
253 containing the environment that will be used for running the
254 specific hook for this LU, a list of node names on which the hook
255 should run before the execution, and a list of node names on which
256 the hook should run after the execution.
258 The keys of the dict must not have 'GANETI_' prefixed as this will
259 be handled in the hooks runner. Also note additional keys will be
260 added by the hooks runner. If the LU doesn't define any
261 environment, an empty dict (and not None) should be returned.
263 No nodes should be returned as an empty list (and not None).
265 Note that if the HPATH for a LU class is None, this function will
269 raise NotImplementedError
271 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
272 """Notify the LU about the results of its hooks.
274 This method is called every time a hooks phase is executed, and notifies
275 the Logical Unit about the hooks' result. The LU can then use it to alter
276 its result based on the hooks. By default the method does nothing and the
277 previous result is passed back unchanged but any LU can define it if it
278 wants to use the local cluster hook-scripts somehow.
280 @param phase: one of L{constants.HOOKS_PHASE_POST} or
281 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
282 @param hook_results: the results of the multi-node hooks rpc call
283 @param feedback_fn: function used send feedback back to the caller
284 @param lu_result: the previous Exec result this LU had, or None
286 @return: the new Exec result, based on the previous result
290 # API must be kept, thus we ignore the unused argument and could
291 # be a function warnings
292 # pylint: disable-msg=W0613,R0201
295 def _ExpandAndLockInstance(self):
296 """Helper function to expand and lock an instance.
298 Many LUs that work on an instance take its name in self.op.instance_name
299 and need to expand it and then declare the expanded name for locking. This
300 function does it, and then updates self.op.instance_name to the expanded
301 name. It also initializes needed_locks as a dict, if this hasn't been done
305 if self.needed_locks is None:
306 self.needed_locks = {}
308 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
309 "_ExpandAndLockInstance called with instance-level locks set"
310 self.op.instance_name = _ExpandInstanceName(self.cfg,
311 self.op.instance_name)
312 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
314 def _LockInstancesNodes(self, primary_only=False):
315 """Helper function to declare instances' nodes for locking.
317 This function should be called after locking one or more instances to lock
318 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
319 with all primary or secondary nodes for instances already locked and
320 present in self.needed_locks[locking.LEVEL_INSTANCE].
322 It should be called from DeclareLocks, and for safety only works if
323 self.recalculate_locks[locking.LEVEL_NODE] is set.
325 In the future it may grow parameters to just lock some instance's nodes, or
326 to just lock primaries or secondary nodes, if needed.
328 If should be called in DeclareLocks in a way similar to::
330 if level == locking.LEVEL_NODE:
331 self._LockInstancesNodes()
333 @type primary_only: boolean
334 @param primary_only: only lock primary nodes of locked instances
337 assert locking.LEVEL_NODE in self.recalculate_locks, \
338 "_LockInstancesNodes helper function called with no nodes to recalculate"
340 # TODO: check if we're really been called with the instance locks held
342 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
343 # future we might want to have different behaviors depending on the value
344 # of self.recalculate_locks[locking.LEVEL_NODE]
346 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
347 instance = self.context.cfg.GetInstanceInfo(instance_name)
348 wanted_nodes.append(instance.primary_node)
350 wanted_nodes.extend(instance.secondary_nodes)
352 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
353 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
354 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
355 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
357 del self.recalculate_locks[locking.LEVEL_NODE]
360 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
361 """Simple LU which runs no hooks.
363 This LU is intended as a parent for other LogicalUnits which will
364 run no hooks, in order to reduce duplicate code.
370 def BuildHooksEnv(self):
371 """Empty BuildHooksEnv for NoHooksLu.
373 This just raises an error.
376 assert False, "BuildHooksEnv called for NoHooksLUs"
380 """Tasklet base class.
382 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
383 they can mix legacy code with tasklets. Locking needs to be done in the LU,
384 tasklets know nothing about locks.
386 Subclasses must follow these rules:
387 - Implement CheckPrereq
391 def __init__(self, lu):
398 def CheckPrereq(self):
399 """Check prerequisites for this tasklets.
401 This method should check whether the prerequisites for the execution of
402 this tasklet are fulfilled. It can do internode communication, but it
403 should be idempotent - no cluster or system changes are allowed.
405 The method should raise errors.OpPrereqError in case something is not
406 fulfilled. Its return value is ignored.
408 This method should also update all parameters to their canonical form if it
409 hasn't been done before.
412 raise NotImplementedError
414 def Exec(self, feedback_fn):
415 """Execute the tasklet.
417 This method should implement the actual work. It should raise
418 errors.OpExecError for failures that are somewhat dealt with in code, or
422 raise NotImplementedError
425 def _GetWantedNodes(lu, nodes):
426 """Returns list of checked and expanded node names.
428 @type lu: L{LogicalUnit}
429 @param lu: the logical unit on whose behalf we execute
431 @param nodes: list of node names or None for all nodes
433 @return: the list of nodes, sorted
434 @raise errors.ProgrammerError: if the nodes parameter is wrong type
437 if not isinstance(nodes, list):
438 raise errors.OpPrereqError("Invalid argument type 'nodes'",
442 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
443 " non-empty list of nodes whose name is to be expanded.")
445 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
446 return utils.NiceSort(wanted)
449 def _GetWantedInstances(lu, instances):
450 """Returns list of checked and expanded instance names.
452 @type lu: L{LogicalUnit}
453 @param lu: the logical unit on whose behalf we execute
454 @type instances: list
455 @param instances: list of instance names or None for all instances
457 @return: the list of instances, sorted
458 @raise errors.OpPrereqError: if the instances parameter is wrong type
459 @raise errors.OpPrereqError: if any of the passed instances is not found
462 if not isinstance(instances, list):
463 raise errors.OpPrereqError("Invalid argument type 'instances'",
467 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
469 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
473 def _CheckOutputFields(static, dynamic, selected):
474 """Checks whether all selected fields are valid.
476 @type static: L{utils.FieldSet}
477 @param static: static fields set
478 @type dynamic: L{utils.FieldSet}
479 @param dynamic: dynamic fields set
486 delta = f.NonMatching(selected)
488 raise errors.OpPrereqError("Unknown output fields selected: %s"
489 % ",".join(delta), errors.ECODE_INVAL)
492 def _CheckBooleanOpField(op, name):
493 """Validates boolean opcode parameters.
495 This will ensure that an opcode parameter is either a boolean value,
496 or None (but that it always exists).
499 val = getattr(op, name, None)
500 if not (val is None or isinstance(val, bool)):
501 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
502 (name, str(val)), errors.ECODE_INVAL)
503 setattr(op, name, val)
506 def _CheckGlobalHvParams(params):
507 """Validates that given hypervisor params are not global ones.
509 This will ensure that instances don't get customised versions of
513 used_globals = constants.HVC_GLOBALS.intersection(params)
515 msg = ("The following hypervisor parameters are global and cannot"
516 " be customized at instance level, please modify them at"
517 " cluster level: %s" % utils.CommaJoin(used_globals))
518 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
521 def _CheckNodeOnline(lu, node):
522 """Ensure that a given node is online.
524 @param lu: the LU on behalf of which we make the check
525 @param node: the node to check
526 @raise errors.OpPrereqError: if the node is offline
529 if lu.cfg.GetNodeInfo(node).offline:
530 raise errors.OpPrereqError("Can't use offline node %s" % node,
534 def _CheckNodeNotDrained(lu, node):
535 """Ensure that a given node is not drained.
537 @param lu: the LU on behalf of which we make the check
538 @param node: the node to check
539 @raise errors.OpPrereqError: if the node is drained
542 if lu.cfg.GetNodeInfo(node).drained:
543 raise errors.OpPrereqError("Can't use drained node %s" % node,
547 def _CheckNodeHasOS(lu, node, os_name, force_variant):
548 """Ensure that a node supports a given OS.
550 @param lu: the LU on behalf of which we make the check
551 @param node: the node to check
552 @param os_name: the OS to query about
553 @param force_variant: whether to ignore variant errors
554 @raise errors.OpPrereqError: if the node is not supporting the OS
557 result = lu.rpc.call_os_get(node, os_name)
558 result.Raise("OS '%s' not in supported OS list for node %s" %
560 prereq=True, ecode=errors.ECODE_INVAL)
561 if not force_variant:
562 _CheckOSVariant(result.payload, os_name)
565 def _RequireFileStorage():
566 """Checks that file storage is enabled.
568 @raise errors.OpPrereqError: when file storage is disabled
571 if not constants.ENABLE_FILE_STORAGE:
572 raise errors.OpPrereqError("File storage disabled at configure time",
576 def _CheckDiskTemplate(template):
577 """Ensure a given disk template is valid.
580 if template not in constants.DISK_TEMPLATES:
581 msg = ("Invalid disk template name '%s', valid templates are: %s" %
582 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
583 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
584 if template == constants.DT_FILE:
585 _RequireFileStorage()
588 def _CheckStorageType(storage_type):
589 """Ensure a given storage type is valid.
592 if storage_type not in constants.VALID_STORAGE_TYPES:
593 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
595 if storage_type == constants.ST_FILE:
596 _RequireFileStorage()
600 def _CheckInstanceDown(lu, instance, reason):
601 """Ensure that an instance is not running."""
602 if instance.admin_up:
603 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
604 (instance.name, reason), errors.ECODE_STATE)
606 pnode = instance.primary_node
607 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
608 ins_l.Raise("Can't contact node %s for instance information" % pnode,
609 prereq=True, ecode=errors.ECODE_ENVIRON)
611 if instance.name in ins_l.payload:
612 raise errors.OpPrereqError("Instance %s is running, %s" %
613 (instance.name, reason), errors.ECODE_STATE)
616 def _ExpandItemName(fn, name, kind):
617 """Expand an item name.
619 @param fn: the function to use for expansion
620 @param name: requested item name
621 @param kind: text description ('Node' or 'Instance')
622 @return: the resolved (full) name
623 @raise errors.OpPrereqError: if the item is not found
627 if full_name is None:
628 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
633 def _ExpandNodeName(cfg, name):
634 """Wrapper over L{_ExpandItemName} for nodes."""
635 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
638 def _ExpandInstanceName(cfg, name):
639 """Wrapper over L{_ExpandItemName} for instance."""
640 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
643 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
644 memory, vcpus, nics, disk_template, disks,
645 bep, hvp, hypervisor_name):
646 """Builds instance related env variables for hooks
648 This builds the hook environment from individual variables.
651 @param name: the name of the instance
652 @type primary_node: string
653 @param primary_node: the name of the instance's primary node
654 @type secondary_nodes: list
655 @param secondary_nodes: list of secondary nodes as strings
656 @type os_type: string
657 @param os_type: the name of the instance's OS
658 @type status: boolean
659 @param status: the should_run status of the instance
661 @param memory: the memory size of the instance
663 @param vcpus: the count of VCPUs the instance has
665 @param nics: list of tuples (ip, mac, mode, link) representing
666 the NICs the instance has
667 @type disk_template: string
668 @param disk_template: the disk template of the instance
670 @param disks: the list of (size, mode) pairs
672 @param bep: the backend parameters for the instance
674 @param hvp: the hypervisor parameters for the instance
675 @type hypervisor_name: string
676 @param hypervisor_name: the hypervisor for the instance
678 @return: the hook environment for this instance
687 "INSTANCE_NAME": name,
688 "INSTANCE_PRIMARY": primary_node,
689 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
690 "INSTANCE_OS_TYPE": os_type,
691 "INSTANCE_STATUS": str_status,
692 "INSTANCE_MEMORY": memory,
693 "INSTANCE_VCPUS": vcpus,
694 "INSTANCE_DISK_TEMPLATE": disk_template,
695 "INSTANCE_HYPERVISOR": hypervisor_name,
699 nic_count = len(nics)
700 for idx, (ip, mac, mode, link) in enumerate(nics):
703 env["INSTANCE_NIC%d_IP" % idx] = ip
704 env["INSTANCE_NIC%d_MAC" % idx] = mac
705 env["INSTANCE_NIC%d_MODE" % idx] = mode
706 env["INSTANCE_NIC%d_LINK" % idx] = link
707 if mode == constants.NIC_MODE_BRIDGED:
708 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
712 env["INSTANCE_NIC_COUNT"] = nic_count
715 disk_count = len(disks)
716 for idx, (size, mode) in enumerate(disks):
717 env["INSTANCE_DISK%d_SIZE" % idx] = size
718 env["INSTANCE_DISK%d_MODE" % idx] = mode
722 env["INSTANCE_DISK_COUNT"] = disk_count
724 for source, kind in [(bep, "BE"), (hvp, "HV")]:
725 for key, value in source.items():
726 env["INSTANCE_%s_%s" % (kind, key)] = value
731 def _NICListToTuple(lu, nics):
732 """Build a list of nic information tuples.
734 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
735 value in LUQueryInstanceData.
737 @type lu: L{LogicalUnit}
738 @param lu: the logical unit on whose behalf we execute
739 @type nics: list of L{objects.NIC}
740 @param nics: list of nics to convert to hooks tuples
744 c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
748 filled_params = objects.FillDict(c_nicparams, nic.nicparams)
749 mode = filled_params[constants.NIC_MODE]
750 link = filled_params[constants.NIC_LINK]
751 hooks_nics.append((ip, mac, mode, link))
755 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
756 """Builds instance related env variables for hooks from an object.
758 @type lu: L{LogicalUnit}
759 @param lu: the logical unit on whose behalf we execute
760 @type instance: L{objects.Instance}
761 @param instance: the instance for which we should build the
764 @param override: dictionary with key/values that will override
767 @return: the hook environment dictionary
770 cluster = lu.cfg.GetClusterInfo()
771 bep = cluster.FillBE(instance)
772 hvp = cluster.FillHV(instance)
774 'name': instance.name,
775 'primary_node': instance.primary_node,
776 'secondary_nodes': instance.secondary_nodes,
777 'os_type': instance.os,
778 'status': instance.admin_up,
779 'memory': bep[constants.BE_MEMORY],
780 'vcpus': bep[constants.BE_VCPUS],
781 'nics': _NICListToTuple(lu, instance.nics),
782 'disk_template': instance.disk_template,
783 'disks': [(disk.size, disk.mode) for disk in instance.disks],
786 'hypervisor_name': instance.hypervisor,
789 args.update(override)
790 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
793 def _AdjustCandidatePool(lu, exceptions):
794 """Adjust the candidate pool after node operations.
797 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
799 lu.LogInfo("Promoted nodes to master candidate role: %s",
800 utils.CommaJoin(node.name for node in mod_list))
801 for name in mod_list:
802 lu.context.ReaddNode(name)
803 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
805 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
809 def _DecideSelfPromotion(lu, exceptions=None):
810 """Decide whether I should promote myself as a master candidate.
813 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
814 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
815 # the new node will increase mc_max with one, so:
816 mc_should = min(mc_should + 1, cp_size)
817 return mc_now < mc_should
820 def _CheckNicsBridgesExist(lu, target_nics, target_node,
821 profile=constants.PP_DEFAULT):
822 """Check that the brigdes needed by a list of nics exist.
825 c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
826 paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
827 for nic in target_nics]
828 brlist = [params[constants.NIC_LINK] for params in paramslist
829 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
831 result = lu.rpc.call_bridges_exist(target_node, brlist)
832 result.Raise("Error checking bridges on destination node '%s'" %
833 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
836 def _CheckInstanceBridgesExist(lu, instance, node=None):
837 """Check that the brigdes needed by an instance exist.
841 node = instance.primary_node
842 _CheckNicsBridgesExist(lu, instance.nics, node)
845 def _CheckOSVariant(os_obj, name):
846 """Check whether an OS name conforms to the os variants specification.
848 @type os_obj: L{objects.OS}
849 @param os_obj: OS object to check
851 @param name: OS name passed by the user, to check for validity
854 if not os_obj.supported_variants:
857 variant = name.split("+", 1)[1]
859 raise errors.OpPrereqError("OS name must include a variant",
862 if variant not in os_obj.supported_variants:
863 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
866 def _GetNodeInstancesInner(cfg, fn):
867 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
870 def _GetNodeInstances(cfg, node_name):
871 """Returns a list of all primary and secondary instances on a node.
875 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
878 def _GetNodePrimaryInstances(cfg, node_name):
879 """Returns primary instances on a node.
882 return _GetNodeInstancesInner(cfg,
883 lambda inst: node_name == inst.primary_node)
886 def _GetNodeSecondaryInstances(cfg, node_name):
887 """Returns secondary instances on a node.
890 return _GetNodeInstancesInner(cfg,
891 lambda inst: node_name in inst.secondary_nodes)
894 def _GetStorageTypeArgs(cfg, storage_type):
895 """Returns the arguments for a storage type.
898 # Special case for file storage
899 if storage_type == constants.ST_FILE:
900 # storage.FileStorage wants a list of storage directories
901 return [[cfg.GetFileStorageDir()]]
906 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
909 for dev in instance.disks:
910 cfg.SetDiskID(dev, node_name)
912 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
913 result.Raise("Failed to get disk status from node %s" % node_name,
914 prereq=prereq, ecode=errors.ECODE_ENVIRON)
916 for idx, bdev_status in enumerate(result.payload):
917 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
923 def _FormatTimestamp(secs):
924 """Formats a Unix timestamp with the local timezone.
927 return time.strftime("%F %T %Z", time.gmtime(secs))
930 class LUPostInitCluster(LogicalUnit):
931 """Logical unit for running hooks after cluster initialization.
934 HPATH = "cluster-init"
935 HTYPE = constants.HTYPE_CLUSTER
938 def BuildHooksEnv(self):
942 env = {"OP_TARGET": self.cfg.GetClusterName()}
943 mn = self.cfg.GetMasterNode()
946 def CheckPrereq(self):
947 """No prerequisites to check.
952 def Exec(self, feedback_fn):
959 class LUDestroyCluster(LogicalUnit):
960 """Logical unit for destroying the cluster.
963 HPATH = "cluster-destroy"
964 HTYPE = constants.HTYPE_CLUSTER
967 def BuildHooksEnv(self):
971 env = {"OP_TARGET": self.cfg.GetClusterName()}
974 def CheckPrereq(self):
975 """Check prerequisites.
977 This checks whether the cluster is empty.
979 Any errors are signaled by raising errors.OpPrereqError.
982 master = self.cfg.GetMasterNode()
984 nodelist = self.cfg.GetNodeList()
985 if len(nodelist) != 1 or nodelist[0] != master:
986 raise errors.OpPrereqError("There are still %d node(s) in"
987 " this cluster." % (len(nodelist) - 1),
989 instancelist = self.cfg.GetInstanceList()
991 raise errors.OpPrereqError("There are still %d instance(s) in"
992 " this cluster." % len(instancelist),
995 def Exec(self, feedback_fn):
996 """Destroys the cluster.
999 master = self.cfg.GetMasterNode()
1000 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1002 # Run post hooks on master node before it's removed
1003 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1005 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1007 # pylint: disable-msg=W0702
1008 self.LogWarning("Errors occurred running hooks on %s" % master)
1010 result = self.rpc.call_node_stop_master(master, False)
1011 result.Raise("Could not disable the master role")
1013 if modify_ssh_setup:
1014 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1015 utils.CreateBackup(priv_key)
1016 utils.CreateBackup(pub_key)
1021 def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
1022 warn_days=constants.SSL_CERT_EXPIRATION_WARN,
1023 error_days=constants.SSL_CERT_EXPIRATION_ERROR):
1024 """Verifies certificate details for LUVerifyCluster.
1028 msg = "Certificate %s is expired" % filename
1030 if not_before is not None and not_after is not None:
1031 msg += (" (valid from %s to %s)" %
1032 (_FormatTimestamp(not_before),
1033 _FormatTimestamp(not_after)))
1034 elif not_before is not None:
1035 msg += " (valid from %s)" % _FormatTimestamp(not_before)
1036 elif not_after is not None:
1037 msg += " (valid until %s)" % _FormatTimestamp(not_after)
1039 return (LUVerifyCluster.ETYPE_ERROR, msg)
1041 elif not_before is not None and not_before > now:
1042 return (LUVerifyCluster.ETYPE_WARNING,
1043 "Certificate %s not yet valid (valid from %s)" %
1044 (filename, _FormatTimestamp(not_before)))
1046 elif not_after is not None:
1047 remaining_days = int((not_after - now) / (24 * 3600))
1049 msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1051 if remaining_days <= error_days:
1052 return (LUVerifyCluster.ETYPE_ERROR, msg)
1054 if remaining_days <= warn_days:
1055 return (LUVerifyCluster.ETYPE_WARNING, msg)
1060 def _VerifyCertificate(filename):
1061 """Verifies a certificate for LUVerifyCluster.
1063 @type filename: string
1064 @param filename: Path to PEM file
1068 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1069 utils.ReadFile(filename))
1070 except Exception, err: # pylint: disable-msg=W0703
1071 return (LUVerifyCluster.ETYPE_ERROR,
1072 "Failed to load X509 certificate %s: %s" % (filename, err))
1074 # Depending on the pyOpenSSL version, this can just return (None, None)
1075 (not_before, not_after) = utils.GetX509CertValidity(cert)
1077 return _VerifyCertificateInner(filename, cert.has_expired(),
1078 not_before, not_after, time.time())
1081 class LUVerifyCluster(LogicalUnit):
1082 """Verifies the cluster status.
1085 HPATH = "cluster-verify"
1086 HTYPE = constants.HTYPE_CLUSTER
1087 _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1090 TCLUSTER = "cluster"
1092 TINSTANCE = "instance"
1094 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1095 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1096 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1097 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1098 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1099 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1100 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1101 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1102 ENODEDRBD = (TNODE, "ENODEDRBD")
1103 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1104 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1105 ENODEHV = (TNODE, "ENODEHV")
1106 ENODELVM = (TNODE, "ENODELVM")
1107 ENODEN1 = (TNODE, "ENODEN1")
1108 ENODENET = (TNODE, "ENODENET")
1109 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1110 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1111 ENODERPC = (TNODE, "ENODERPC")
1112 ENODESSH = (TNODE, "ENODESSH")
1113 ENODEVERSION = (TNODE, "ENODEVERSION")
1114 ENODESETUP = (TNODE, "ENODESETUP")
1115 ENODETIME = (TNODE, "ENODETIME")
1117 ETYPE_FIELD = "code"
1118 ETYPE_ERROR = "ERROR"
1119 ETYPE_WARNING = "WARNING"
1121 class NodeImage(object):
1122 """A class representing the logical and physical status of a node.
1124 @ivar volumes: a structure as returned from
1125 L{ganeti.backend.GetVolumeList} (runtime)
1126 @ivar instances: a list of running instances (runtime)
1127 @ivar pinst: list of configured primary instances (config)
1128 @ivar sinst: list of configured secondary instances (config)
1129 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1130 of this node (config)
1131 @ivar mfree: free memory, as reported by hypervisor (runtime)
1132 @ivar dfree: free disk, as reported by the node (runtime)
1133 @ivar offline: the offline status (config)
1134 @type rpc_fail: boolean
1135 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1136 not whether the individual keys were correct) (runtime)
1137 @type lvm_fail: boolean
1138 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1139 @type hyp_fail: boolean
1140 @ivar hyp_fail: whether the RPC call didn't return the instance list
1141 @type ghost: boolean
1142 @ivar ghost: whether this is a known node or not (config)
1145 def __init__(self, offline=False):
1153 self.offline = offline
1154 self.rpc_fail = False
1155 self.lvm_fail = False
1156 self.hyp_fail = False
1159 def ExpandNames(self):
1160 self.needed_locks = {
1161 locking.LEVEL_NODE: locking.ALL_SET,
1162 locking.LEVEL_INSTANCE: locking.ALL_SET,
1164 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1166 def _Error(self, ecode, item, msg, *args, **kwargs):
1167 """Format an error message.
1169 Based on the opcode's error_codes parameter, either format a
1170 parseable error code, or a simpler error string.
1172 This must be called only from Exec and functions called from Exec.
1175 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1177 # first complete the msg
1180 # then format the whole message
1181 if self.op.error_codes:
1182 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1188 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1189 # and finally report it via the feedback_fn
1190 self._feedback_fn(" - %s" % msg)
1192 def _ErrorIf(self, cond, *args, **kwargs):
1193 """Log an error message if the passed condition is True.
1196 cond = bool(cond) or self.op.debug_simulate_errors
1198 self._Error(*args, **kwargs)
1199 # do not mark the operation as failed for WARN cases only
1200 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1201 self.bad = self.bad or cond
1203 def _VerifyNode(self, ninfo, nresult):
1204 """Run multiple tests against a node.
1208 - compares ganeti version
1209 - checks vg existence and size > 20G
1210 - checks config file checksum
1211 - checks ssh to other nodes
1213 @type ninfo: L{objects.Node}
1214 @param ninfo: the node to check
1215 @param nresult: the results from the node
1217 @return: whether overall this call was successful (and we can expect
1218 reasonable values in the respose)
1222 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1224 # main result, nresult should be a non-empty dict
1225 test = not nresult or not isinstance(nresult, dict)
1226 _ErrorIf(test, self.ENODERPC, node,
1227 "unable to verify node: no data returned")
1231 # compares ganeti version
1232 local_version = constants.PROTOCOL_VERSION
1233 remote_version = nresult.get("version", None)
1234 test = not (remote_version and
1235 isinstance(remote_version, (list, tuple)) and
1236 len(remote_version) == 2)
1237 _ErrorIf(test, self.ENODERPC, node,
1238 "connection to node returned invalid data")
1242 test = local_version != remote_version[0]
1243 _ErrorIf(test, self.ENODEVERSION, node,
1244 "incompatible protocol versions: master %s,"
1245 " node %s", local_version, remote_version[0])
1249 # node seems compatible, we can actually try to look into its results
1251 # full package version
1252 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1253 self.ENODEVERSION, node,
1254 "software version mismatch: master %s, node %s",
1255 constants.RELEASE_VERSION, remote_version[1],
1256 code=self.ETYPE_WARNING)
1258 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1259 if isinstance(hyp_result, dict):
1260 for hv_name, hv_result in hyp_result.iteritems():
1261 test = hv_result is not None
1262 _ErrorIf(test, self.ENODEHV, node,
1263 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1266 test = nresult.get(constants.NV_NODESETUP,
1267 ["Missing NODESETUP results"])
1268 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1273 def _VerifyNodeTime(self, ninfo, nresult,
1274 nvinfo_starttime, nvinfo_endtime):
1275 """Check the node time.
1277 @type ninfo: L{objects.Node}
1278 @param ninfo: the node to check
1279 @param nresult: the remote results for the node
1280 @param nvinfo_starttime: the start time of the RPC call
1281 @param nvinfo_endtime: the end time of the RPC call
1285 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1287 ntime = nresult.get(constants.NV_TIME, None)
1289 ntime_merged = utils.MergeTime(ntime)
1290 except (ValueError, TypeError):
1291 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1294 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1295 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1296 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1297 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1301 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1302 "Node time diverges by at least %s from master node time",
1305 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1306 """Check the node time.
1308 @type ninfo: L{objects.Node}
1309 @param ninfo: the node to check
1310 @param nresult: the remote results for the node
1311 @param vg_name: the configured VG name
1318 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1320 # checks vg existence and size > 20G
1321 vglist = nresult.get(constants.NV_VGLIST, None)
1323 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1325 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1326 constants.MIN_VG_SIZE)
1327 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1330 pvlist = nresult.get(constants.NV_PVLIST, None)
1331 test = pvlist is None
1332 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1334 # check that ':' is not present in PV names, since it's a
1335 # special character for lvcreate (denotes the range of PEs to
1337 for _, pvname, owner_vg in pvlist:
1338 test = ":" in pvname
1339 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1340 " '%s' of VG '%s'", pvname, owner_vg)
1342 def _VerifyNodeNetwork(self, ninfo, nresult):
1343 """Check the node time.
1345 @type ninfo: L{objects.Node}
1346 @param ninfo: the node to check
1347 @param nresult: the remote results for the node
1351 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1353 test = constants.NV_NODELIST not in nresult
1354 _ErrorIf(test, self.ENODESSH, node,
1355 "node hasn't returned node ssh connectivity data")
1357 if nresult[constants.NV_NODELIST]:
1358 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1359 _ErrorIf(True, self.ENODESSH, node,
1360 "ssh communication with node '%s': %s", a_node, a_msg)
1362 test = constants.NV_NODENETTEST not in nresult
1363 _ErrorIf(test, self.ENODENET, node,
1364 "node hasn't returned node tcp connectivity data")
1366 if nresult[constants.NV_NODENETTEST]:
1367 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1369 _ErrorIf(True, self.ENODENET, node,
1370 "tcp communication with node '%s': %s",
1371 anode, nresult[constants.NV_NODENETTEST][anode])
1373 test = constants.NV_MASTERIP not in nresult
1374 _ErrorIf(test, self.ENODENET, node,
1375 "node hasn't returned node master IP reachability data")
1377 if not nresult[constants.NV_MASTERIP]:
1378 if node == self.master_node:
1379 msg = "the master node cannot reach the master IP (not configured?)"
1381 msg = "cannot reach the master IP"
1382 _ErrorIf(True, self.ENODENET, node, msg)
1385 def _VerifyInstance(self, instance, instanceconfig, node_image):
1386 """Verify an instance.
1388 This function checks to see if the required block devices are
1389 available on the instance's node.
1392 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1393 node_current = instanceconfig.primary_node
1395 node_vol_should = {}
1396 instanceconfig.MapLVsByNode(node_vol_should)
1398 for node in node_vol_should:
1399 n_img = node_image[node]
1400 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1401 # ignore missing volumes on offline or broken nodes
1403 for volume in node_vol_should[node]:
1404 test = volume not in n_img.volumes
1405 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1406 "volume %s missing on node %s", volume, node)
1408 if instanceconfig.admin_up:
1409 pri_img = node_image[node_current]
1410 test = instance not in pri_img.instances and not pri_img.offline
1411 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1412 "instance not running on its primary node %s",
1415 for node, n_img in node_image.items():
1416 if (not node == node_current):
1417 test = instance in n_img.instances
1418 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1419 "instance should not run on node %s", node)
1421 def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1422 """Verify if there are any unknown volumes in the cluster.
1424 The .os, .swap and backup volumes are ignored. All other volumes are
1425 reported as unknown.
1428 for node, n_img in node_image.items():
1429 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1430 # skip non-healthy nodes
1432 for volume in n_img.volumes:
1433 test = (node not in node_vol_should or
1434 volume not in node_vol_should[node])
1435 self._ErrorIf(test, self.ENODEORPHANLV, node,
1436 "volume %s is unknown", volume)
1438 def _VerifyOrphanInstances(self, instancelist, node_image):
1439 """Verify the list of running instances.
1441 This checks what instances are running but unknown to the cluster.
1444 for node, n_img in node_image.items():
1445 for o_inst in n_img.instances:
1446 test = o_inst not in instancelist
1447 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1448 "instance %s on node %s should not exist", o_inst, node)
1450 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1451 """Verify N+1 Memory Resilience.
1453 Check that if one single node dies we can still start all the
1454 instances it was primary for.
1457 for node, n_img in node_image.items():
1458 # This code checks that every node which is now listed as
1459 # secondary has enough memory to host all instances it is
1460 # supposed to should a single other node in the cluster fail.
1461 # FIXME: not ready for failover to an arbitrary node
1462 # FIXME: does not support file-backed instances
1463 # WARNING: we currently take into account down instances as well
1464 # as up ones, considering that even if they're down someone
1465 # might want to start them even in the event of a node failure.
1466 for prinode, instances in n_img.sbp.items():
1468 for instance in instances:
1469 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1470 if bep[constants.BE_AUTO_BALANCE]:
1471 needed_mem += bep[constants.BE_MEMORY]
1472 test = n_img.mfree < needed_mem
1473 self._ErrorIf(test, self.ENODEN1, node,
1474 "not enough memory on to accommodate"
1475 " failovers should peer node %s fail", prinode)
1477 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1479 """Verifies and computes the node required file checksums.
1481 @type ninfo: L{objects.Node}
1482 @param ninfo: the node to check
1483 @param nresult: the remote results for the node
1484 @param file_list: required list of files
1485 @param local_cksum: dictionary of local files and their checksums
1486 @param master_files: list of files that only masters should have
1490 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1492 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1493 test = not isinstance(remote_cksum, dict)
1494 _ErrorIf(test, self.ENODEFILECHECK, node,
1495 "node hasn't returned file checksum data")
1499 for file_name in file_list:
1500 node_is_mc = ninfo.master_candidate
1501 must_have = (file_name not in master_files) or node_is_mc
1503 test1 = file_name not in remote_cksum
1505 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1507 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1508 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1509 "file '%s' missing", file_name)
1510 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1511 "file '%s' has wrong checksum", file_name)
1512 # not candidate and this is not a must-have file
1513 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1514 "file '%s' should not exist on non master"
1515 " candidates (and the file is outdated)", file_name)
1516 # all good, except non-master/non-must have combination
1517 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1518 "file '%s' should not exist"
1519 " on non master candidates", file_name)
1521 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1522 """Verifies and the node DRBD status.
1524 @type ninfo: L{objects.Node}
1525 @param ninfo: the node to check
1526 @param nresult: the remote results for the node
1527 @param instanceinfo: the dict of instances
1528 @param drbd_map: the DRBD map as returned by
1529 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1533 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1535 # compute the DRBD minors
1537 for minor, instance in drbd_map[node].items():
1538 test = instance not in instanceinfo
1539 _ErrorIf(test, self.ECLUSTERCFG, None,
1540 "ghost instance '%s' in temporary DRBD map", instance)
1541 # ghost instance should not be running, but otherwise we
1542 # don't give double warnings (both ghost instance and
1543 # unallocated minor in use)
1545 node_drbd[minor] = (instance, False)
1547 instance = instanceinfo[instance]
1548 node_drbd[minor] = (instance.name, instance.admin_up)
1550 # and now check them
1551 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1552 test = not isinstance(used_minors, (tuple, list))
1553 _ErrorIf(test, self.ENODEDRBD, node,
1554 "cannot parse drbd status file: %s", str(used_minors))
1556 # we cannot check drbd status
1559 for minor, (iname, must_exist) in node_drbd.items():
1560 test = minor not in used_minors and must_exist
1561 _ErrorIf(test, self.ENODEDRBD, node,
1562 "drbd minor %d of instance %s is not active", minor, iname)
1563 for minor in used_minors:
1564 test = minor not in node_drbd
1565 _ErrorIf(test, self.ENODEDRBD, node,
1566 "unallocated drbd minor %d is in use", minor)
1568 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1569 """Verifies and updates the node volume data.
1571 This function will update a L{NodeImage}'s internal structures
1572 with data from the remote call.
1574 @type ninfo: L{objects.Node}
1575 @param ninfo: the node to check
1576 @param nresult: the remote results for the node
1577 @param nimg: the node image object
1578 @param vg_name: the configured VG name
1582 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1584 nimg.lvm_fail = True
1585 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1588 elif isinstance(lvdata, basestring):
1589 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1590 utils.SafeEncode(lvdata))
1591 elif not isinstance(lvdata, dict):
1592 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1594 nimg.volumes = lvdata
1595 nimg.lvm_fail = False
1597 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1598 """Verifies and updates the node instance list.
1600 If the listing was successful, then updates this node's instance
1601 list. Otherwise, it marks the RPC call as failed for the instance
1604 @type ninfo: L{objects.Node}
1605 @param ninfo: the node to check
1606 @param nresult: the remote results for the node
1607 @param nimg: the node image object
1610 idata = nresult.get(constants.NV_INSTANCELIST, None)
1611 test = not isinstance(idata, list)
1612 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1613 " (instancelist): %s", utils.SafeEncode(str(idata)))
1615 nimg.hyp_fail = True
1617 nimg.instances = idata
1619 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1620 """Verifies and computes a node information map
1622 @type ninfo: L{objects.Node}
1623 @param ninfo: the node to check
1624 @param nresult: the remote results for the node
1625 @param nimg: the node image object
1626 @param vg_name: the configured VG name
1630 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1632 # try to read free memory (from the hypervisor)
1633 hv_info = nresult.get(constants.NV_HVINFO, None)
1634 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1635 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1638 nimg.mfree = int(hv_info["memory_free"])
1639 except (ValueError, TypeError):
1640 _ErrorIf(True, self.ENODERPC, node,
1641 "node returned invalid nodeinfo, check hypervisor")
1643 # FIXME: devise a free space model for file based instances as well
1644 if vg_name is not None:
1645 test = (constants.NV_VGLIST not in nresult or
1646 vg_name not in nresult[constants.NV_VGLIST])
1647 _ErrorIf(test, self.ENODELVM, node,
1648 "node didn't return data for the volume group '%s'"
1649 " - it is either missing or broken", vg_name)
1652 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1653 except (ValueError, TypeError):
1654 _ErrorIf(True, self.ENODERPC, node,
1655 "node returned invalid LVM info, check LVM status")
1657 def CheckPrereq(self):
1658 """Check prerequisites.
1660 Transform the list of checks we're going to skip into a set and check that
1661 all its members are valid.
1664 self.skip_set = frozenset(self.op.skip_checks)
1665 if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1666 raise errors.OpPrereqError("Invalid checks to be skipped specified",
1669 def BuildHooksEnv(self):
1672 Cluster-Verify hooks just ran in the post phase and their failure makes
1673 the output be logged in the verify output and the verification to fail.
1676 all_nodes = self.cfg.GetNodeList()
1678 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1680 for node in self.cfg.GetAllNodesInfo().values():
1681 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1683 return env, [], all_nodes
1685 def Exec(self, feedback_fn):
1686 """Verify integrity of cluster, performing various test on nodes.
1690 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1691 verbose = self.op.verbose
1692 self._feedback_fn = feedback_fn
1693 feedback_fn("* Verifying global settings")
1694 for msg in self.cfg.VerifyConfig():
1695 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1697 # Check the cluster certificates
1698 for cert_filename in constants.ALL_CERT_FILES:
1699 (errcode, msg) = _VerifyCertificate(cert_filename)
1700 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1702 vg_name = self.cfg.GetVGName()
1703 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1704 cluster = self.cfg.GetClusterInfo()
1705 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1706 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1707 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1708 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1709 for iname in instancelist)
1710 i_non_redundant = [] # Non redundant instances
1711 i_non_a_balanced = [] # Non auto-balanced instances
1712 n_offline = 0 # Count of offline nodes
1713 n_drained = 0 # Count of nodes being drained
1714 node_vol_should = {}
1716 # FIXME: verify OS list
1717 # do local checksums
1718 master_files = [constants.CLUSTER_CONF_FILE]
1719 master_node = self.master_node = self.cfg.GetMasterNode()
1720 master_ip = self.cfg.GetMasterIP()
1722 file_names = ssconf.SimpleStore().GetFileList()
1723 file_names.extend(constants.ALL_CERT_FILES)
1724 file_names.extend(master_files)
1725 if cluster.modify_etc_hosts:
1726 file_names.append(constants.ETC_HOSTS)
1728 local_checksums = utils.FingerprintFiles(file_names)
1730 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1731 node_verify_param = {
1732 constants.NV_FILELIST: file_names,
1733 constants.NV_NODELIST: [node.name for node in nodeinfo
1734 if not node.offline],
1735 constants.NV_HYPERVISOR: hypervisors,
1736 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1737 node.secondary_ip) for node in nodeinfo
1738 if not node.offline],
1739 constants.NV_INSTANCELIST: hypervisors,
1740 constants.NV_VERSION: None,
1741 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1742 constants.NV_NODESETUP: None,
1743 constants.NV_TIME: None,
1744 constants.NV_MASTERIP: (master_node, master_ip),
1747 if vg_name is not None:
1748 node_verify_param[constants.NV_VGLIST] = None
1749 node_verify_param[constants.NV_LVLIST] = vg_name
1750 node_verify_param[constants.NV_PVLIST] = [vg_name]
1751 node_verify_param[constants.NV_DRBDLIST] = None
1753 # Build our expected cluster state
1754 node_image = dict((node.name, self.NodeImage(offline=node.offline))
1755 for node in nodeinfo)
1757 for instance in instancelist:
1758 inst_config = instanceinfo[instance]
1760 for nname in inst_config.all_nodes:
1761 if nname not in node_image:
1763 gnode = self.NodeImage()
1765 node_image[nname] = gnode
1767 inst_config.MapLVsByNode(node_vol_should)
1769 pnode = inst_config.primary_node
1770 node_image[pnode].pinst.append(instance)
1772 for snode in inst_config.secondary_nodes:
1773 nimg = node_image[snode]
1774 nimg.sinst.append(instance)
1775 if pnode not in nimg.sbp:
1776 nimg.sbp[pnode] = []
1777 nimg.sbp[pnode].append(instance)
1779 # At this point, we have the in-memory data structures complete,
1780 # except for the runtime information, which we'll gather next
1782 # Due to the way our RPC system works, exact response times cannot be
1783 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1784 # time before and after executing the request, we can at least have a time
1786 nvinfo_starttime = time.time()
1787 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1788 self.cfg.GetClusterName())
1789 nvinfo_endtime = time.time()
1791 all_drbd_map = self.cfg.ComputeDRBDMap()
1793 feedback_fn("* Verifying node status")
1794 for node_i in nodeinfo:
1796 nimg = node_image[node]
1800 feedback_fn("* Skipping offline node %s" % (node,))
1804 if node == master_node:
1806 elif node_i.master_candidate:
1807 ntype = "master candidate"
1808 elif node_i.drained:
1814 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1816 msg = all_nvinfo[node].fail_msg
1817 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1819 nimg.rpc_fail = True
1822 nresult = all_nvinfo[node].payload
1824 nimg.call_ok = self._VerifyNode(node_i, nresult)
1825 self._VerifyNodeNetwork(node_i, nresult)
1826 self._VerifyNodeLVM(node_i, nresult, vg_name)
1827 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1829 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1830 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1832 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1833 self._UpdateNodeInstances(node_i, nresult, nimg)
1834 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1836 feedback_fn("* Verifying instance status")
1837 for instance in instancelist:
1839 feedback_fn("* Verifying instance %s" % instance)
1840 inst_config = instanceinfo[instance]
1841 self._VerifyInstance(instance, inst_config, node_image)
1842 inst_nodes_offline = []
1844 pnode = inst_config.primary_node
1845 pnode_img = node_image[pnode]
1846 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1847 self.ENODERPC, pnode, "instance %s, connection to"
1848 " primary node failed", instance)
1850 if pnode_img.offline:
1851 inst_nodes_offline.append(pnode)
1853 # If the instance is non-redundant we cannot survive losing its primary
1854 # node, so we are not N+1 compliant. On the other hand we have no disk
1855 # templates with more than one secondary so that situation is not well
1857 # FIXME: does not support file-backed instances
1858 if not inst_config.secondary_nodes:
1859 i_non_redundant.append(instance)
1860 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1861 instance, "instance has multiple secondary nodes: %s",
1862 utils.CommaJoin(inst_config.secondary_nodes),
1863 code=self.ETYPE_WARNING)
1865 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1866 i_non_a_balanced.append(instance)
1868 for snode in inst_config.secondary_nodes:
1869 s_img = node_image[snode]
1870 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1871 "instance %s, connection to secondary node failed", instance)
1874 inst_nodes_offline.append(snode)
1876 # warn that the instance lives on offline nodes
1877 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1878 "instance lives on offline node(s) %s",
1879 utils.CommaJoin(inst_nodes_offline))
1880 # ... or ghost nodes
1881 for node in inst_config.all_nodes:
1882 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1883 "instance lives on ghost node %s", node)
1885 feedback_fn("* Verifying orphan volumes")
1886 self._VerifyOrphanVolumes(node_vol_should, node_image)
1888 feedback_fn("* Verifying orphan instances")
1889 self._VerifyOrphanInstances(instancelist, node_image)
1891 if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1892 feedback_fn("* Verifying N+1 Memory redundancy")
1893 self._VerifyNPlusOneMemory(node_image, instanceinfo)
1895 feedback_fn("* Other Notes")
1897 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
1898 % len(i_non_redundant))
1900 if i_non_a_balanced:
1901 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
1902 % len(i_non_a_balanced))
1905 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
1908 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
1912 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1913 """Analyze the post-hooks' result
1915 This method analyses the hook result, handles it, and sends some
1916 nicely-formatted feedback back to the user.
1918 @param phase: one of L{constants.HOOKS_PHASE_POST} or
1919 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1920 @param hooks_results: the results of the multi-node hooks rpc call
1921 @param feedback_fn: function used send feedback back to the caller
1922 @param lu_result: previous Exec result
1923 @return: the new Exec result, based on the previous result
1927 # We only really run POST phase hooks, and are only interested in
1929 if phase == constants.HOOKS_PHASE_POST:
1930 # Used to change hooks' output to proper indentation
1931 indent_re = re.compile('^', re.M)
1932 feedback_fn("* Hooks Results")
1933 assert hooks_results, "invalid result from hooks"
1935 for node_name in hooks_results:
1936 res = hooks_results[node_name]
1938 test = msg and not res.offline
1939 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1940 "Communication failure in hooks execution: %s", msg)
1941 if res.offline or msg:
1942 # No need to investigate payload if node is offline or gave an error.
1943 # override manually lu_result here as _ErrorIf only
1944 # overrides self.bad
1947 for script, hkr, output in res.payload:
1948 test = hkr == constants.HKR_FAIL
1949 self._ErrorIf(test, self.ENODEHOOKS, node_name,
1950 "Script %s failed, output:", script)
1952 output = indent_re.sub(' ', output)
1953 feedback_fn("%s" % output)
1959 class LUVerifyDisks(NoHooksLU):
1960 """Verifies the cluster disks status.
1966 def ExpandNames(self):
1967 self.needed_locks = {
1968 locking.LEVEL_NODE: locking.ALL_SET,
1969 locking.LEVEL_INSTANCE: locking.ALL_SET,
1971 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1973 def CheckPrereq(self):
1974 """Check prerequisites.
1976 This has no prerequisites.
1981 def Exec(self, feedback_fn):
1982 """Verify integrity of cluster disks.
1984 @rtype: tuple of three items
1985 @return: a tuple of (dict of node-to-node_error, list of instances
1986 which need activate-disks, dict of instance: (node, volume) for
1990 result = res_nodes, res_instances, res_missing = {}, [], {}
1992 vg_name = self.cfg.GetVGName()
1993 nodes = utils.NiceSort(self.cfg.GetNodeList())
1994 instances = [self.cfg.GetInstanceInfo(name)
1995 for name in self.cfg.GetInstanceList()]
1998 for inst in instances:
2000 if (not inst.admin_up or
2001 inst.disk_template not in constants.DTS_NET_MIRROR):
2003 inst.MapLVsByNode(inst_lvs)
2004 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2005 for node, vol_list in inst_lvs.iteritems():
2006 for vol in vol_list:
2007 nv_dict[(node, vol)] = inst
2012 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2016 node_res = node_lvs[node]
2017 if node_res.offline:
2019 msg = node_res.fail_msg
2021 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2022 res_nodes[node] = msg
2025 lvs = node_res.payload
2026 for lv_name, (_, _, lv_online) in lvs.items():
2027 inst = nv_dict.pop((node, lv_name), None)
2028 if (not lv_online and inst is not None
2029 and inst.name not in res_instances):
2030 res_instances.append(inst.name)
2032 # any leftover items in nv_dict are missing LVs, let's arrange the
2034 for key, inst in nv_dict.iteritems():
2035 if inst.name not in res_missing:
2036 res_missing[inst.name] = []
2037 res_missing[inst.name].append(key)
2042 class LURepairDiskSizes(NoHooksLU):
2043 """Verifies the cluster disks sizes.
2046 _OP_REQP = ["instances"]
2049 def ExpandNames(self):
2050 if not isinstance(self.op.instances, list):
2051 raise errors.OpPrereqError("Invalid argument type 'instances'",
2054 if self.op.instances:
2055 self.wanted_names = []
2056 for name in self.op.instances:
2057 full_name = _ExpandInstanceName(self.cfg, name)
2058 self.wanted_names.append(full_name)
2059 self.needed_locks = {
2060 locking.LEVEL_NODE: [],
2061 locking.LEVEL_INSTANCE: self.wanted_names,
2063 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2065 self.wanted_names = None
2066 self.needed_locks = {
2067 locking.LEVEL_NODE: locking.ALL_SET,
2068 locking.LEVEL_INSTANCE: locking.ALL_SET,
2070 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2072 def DeclareLocks(self, level):
2073 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2074 self._LockInstancesNodes(primary_only=True)
2076 def CheckPrereq(self):
2077 """Check prerequisites.
2079 This only checks the optional instance list against the existing names.
2082 if self.wanted_names is None:
2083 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2085 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2086 in self.wanted_names]
2088 def _EnsureChildSizes(self, disk):
2089 """Ensure children of the disk have the needed disk size.
2091 This is valid mainly for DRBD8 and fixes an issue where the
2092 children have smaller disk size.
2094 @param disk: an L{ganeti.objects.Disk} object
2097 if disk.dev_type == constants.LD_DRBD8:
2098 assert disk.children, "Empty children for DRBD8?"
2099 fchild = disk.children[0]
2100 mismatch = fchild.size < disk.size
2102 self.LogInfo("Child disk has size %d, parent %d, fixing",
2103 fchild.size, disk.size)
2104 fchild.size = disk.size
2106 # and we recurse on this child only, not on the metadev
2107 return self._EnsureChildSizes(fchild) or mismatch
2111 def Exec(self, feedback_fn):
2112 """Verify the size of cluster disks.
2115 # TODO: check child disks too
2116 # TODO: check differences in size between primary/secondary nodes
2118 for instance in self.wanted_instances:
2119 pnode = instance.primary_node
2120 if pnode not in per_node_disks:
2121 per_node_disks[pnode] = []
2122 for idx, disk in enumerate(instance.disks):
2123 per_node_disks[pnode].append((instance, idx, disk))
2126 for node, dskl in per_node_disks.items():
2127 newl = [v[2].Copy() for v in dskl]
2129 self.cfg.SetDiskID(dsk, node)
2130 result = self.rpc.call_blockdev_getsizes(node, newl)
2132 self.LogWarning("Failure in blockdev_getsizes call to node"
2133 " %s, ignoring", node)
2135 if len(result.data) != len(dskl):
2136 self.LogWarning("Invalid result from node %s, ignoring node results",
2139 for ((instance, idx, disk), size) in zip(dskl, result.data):
2141 self.LogWarning("Disk %d of instance %s did not return size"
2142 " information, ignoring", idx, instance.name)
2144 if not isinstance(size, (int, long)):
2145 self.LogWarning("Disk %d of instance %s did not return valid"
2146 " size information, ignoring", idx, instance.name)
2149 if size != disk.size:
2150 self.LogInfo("Disk %d of instance %s has mismatched size,"
2151 " correcting: recorded %d, actual %d", idx,
2152 instance.name, disk.size, size)
2154 self.cfg.Update(instance, feedback_fn)
2155 changed.append((instance.name, idx, size))
2156 if self._EnsureChildSizes(disk):
2157 self.cfg.Update(instance, feedback_fn)
2158 changed.append((instance.name, idx, disk.size))
2162 class LURenameCluster(LogicalUnit):
2163 """Rename the cluster.
2166 HPATH = "cluster-rename"
2167 HTYPE = constants.HTYPE_CLUSTER
2170 def BuildHooksEnv(self):
2175 "OP_TARGET": self.cfg.GetClusterName(),
2176 "NEW_NAME": self.op.name,
2178 mn = self.cfg.GetMasterNode()
2179 all_nodes = self.cfg.GetNodeList()
2180 return env, [mn], all_nodes
2182 def CheckPrereq(self):
2183 """Verify that the passed name is a valid one.
2186 hostname = utils.GetHostInfo(self.op.name)
2188 new_name = hostname.name
2189 self.ip = new_ip = hostname.ip
2190 old_name = self.cfg.GetClusterName()
2191 old_ip = self.cfg.GetMasterIP()
2192 if new_name == old_name and new_ip == old_ip:
2193 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2194 " cluster has changed",
2196 if new_ip != old_ip:
2197 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2198 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2199 " reachable on the network. Aborting." %
2200 new_ip, errors.ECODE_NOTUNIQUE)
2202 self.op.name = new_name
2204 def Exec(self, feedback_fn):
2205 """Rename the cluster.
2208 clustername = self.op.name
2211 # shutdown the master IP
2212 master = self.cfg.GetMasterNode()
2213 result = self.rpc.call_node_stop_master(master, False)
2214 result.Raise("Could not disable the master role")
2217 cluster = self.cfg.GetClusterInfo()
2218 cluster.cluster_name = clustername
2219 cluster.master_ip = ip
2220 self.cfg.Update(cluster, feedback_fn)
2222 # update the known hosts file
2223 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2224 node_list = self.cfg.GetNodeList()
2226 node_list.remove(master)
2229 result = self.rpc.call_upload_file(node_list,
2230 constants.SSH_KNOWN_HOSTS_FILE)
2231 for to_node, to_result in result.iteritems():
2232 msg = to_result.fail_msg
2234 msg = ("Copy of file %s to node %s failed: %s" %
2235 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2236 self.proc.LogWarning(msg)
2239 result = self.rpc.call_node_start_master(master, False, False)
2240 msg = result.fail_msg
2242 self.LogWarning("Could not re-enable the master role on"
2243 " the master, please restart manually: %s", msg)
2246 def _RecursiveCheckIfLVMBased(disk):
2247 """Check if the given disk or its children are lvm-based.
2249 @type disk: L{objects.Disk}
2250 @param disk: the disk to check
2252 @return: boolean indicating whether a LD_LV dev_type was found or not
2256 for chdisk in disk.children:
2257 if _RecursiveCheckIfLVMBased(chdisk):
2259 return disk.dev_type == constants.LD_LV
2262 class LUSetClusterParams(LogicalUnit):
2263 """Change the parameters of the cluster.
2266 HPATH = "cluster-modify"
2267 HTYPE = constants.HTYPE_CLUSTER
2271 def CheckArguments(self):
2275 for attr in ["candidate_pool_size",
2276 "uid_pool", "add_uids", "remove_uids"]:
2277 if not hasattr(self.op, attr):
2278 setattr(self.op, attr, None)
2280 if self.op.candidate_pool_size is not None:
2282 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2283 except (ValueError, TypeError), err:
2284 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2285 str(err), errors.ECODE_INVAL)
2286 if self.op.candidate_pool_size < 1:
2287 raise errors.OpPrereqError("At least one master candidate needed",
2290 _CheckBooleanOpField(self.op, "maintain_node_health")
2292 if self.op.uid_pool:
2293 uidpool.CheckUidPool(self.op.uid_pool)
2295 if self.op.add_uids:
2296 uidpool.CheckUidPool(self.op.add_uids)
2298 if self.op.remove_uids:
2299 uidpool.CheckUidPool(self.op.remove_uids)
2301 def ExpandNames(self):
2302 # FIXME: in the future maybe other cluster params won't require checking on
2303 # all nodes to be modified.
2304 self.needed_locks = {
2305 locking.LEVEL_NODE: locking.ALL_SET,
2307 self.share_locks[locking.LEVEL_NODE] = 1
2309 def BuildHooksEnv(self):
2314 "OP_TARGET": self.cfg.GetClusterName(),
2315 "NEW_VG_NAME": self.op.vg_name,
2317 mn = self.cfg.GetMasterNode()
2318 return env, [mn], [mn]
2320 def CheckPrereq(self):
2321 """Check prerequisites.
2323 This checks whether the given params don't conflict and
2324 if the given volume group is valid.
2327 if self.op.vg_name is not None and not self.op.vg_name:
2328 instances = self.cfg.GetAllInstancesInfo().values()
2329 for inst in instances:
2330 for disk in inst.disks:
2331 if _RecursiveCheckIfLVMBased(disk):
2332 raise errors.OpPrereqError("Cannot disable lvm storage while"
2333 " lvm-based instances exist",
2336 node_list = self.acquired_locks[locking.LEVEL_NODE]
2338 # if vg_name not None, checks given volume group on all nodes
2340 vglist = self.rpc.call_vg_list(node_list)
2341 for node in node_list:
2342 msg = vglist[node].fail_msg
2344 # ignoring down node
2345 self.LogWarning("Error while gathering data on node %s"
2346 " (ignoring node): %s", node, msg)
2348 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2350 constants.MIN_VG_SIZE)
2352 raise errors.OpPrereqError("Error on node '%s': %s" %
2353 (node, vgstatus), errors.ECODE_ENVIRON)
2355 self.cluster = cluster = self.cfg.GetClusterInfo()
2356 # validate params changes
2357 if self.op.beparams:
2358 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2359 self.new_beparams = objects.FillDict(
2360 cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2362 if self.op.nicparams:
2363 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2364 self.new_nicparams = objects.FillDict(
2365 cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2366 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2369 # check all instances for consistency
2370 for instance in self.cfg.GetAllInstancesInfo().values():
2371 for nic_idx, nic in enumerate(instance.nics):
2372 params_copy = copy.deepcopy(nic.nicparams)
2373 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2375 # check parameter syntax
2377 objects.NIC.CheckParameterSyntax(params_filled)
2378 except errors.ConfigurationError, err:
2379 nic_errors.append("Instance %s, nic/%d: %s" %
2380 (instance.name, nic_idx, err))
2382 # if we're moving instances to routed, check that they have an ip
2383 target_mode = params_filled[constants.NIC_MODE]
2384 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2385 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2386 (instance.name, nic_idx))
2388 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2389 "\n".join(nic_errors))
2391 # hypervisor list/parameters
2392 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2393 if self.op.hvparams:
2394 if not isinstance(self.op.hvparams, dict):
2395 raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2397 for hv_name, hv_dict in self.op.hvparams.items():
2398 if hv_name not in self.new_hvparams:
2399 self.new_hvparams[hv_name] = hv_dict
2401 self.new_hvparams[hv_name].update(hv_dict)
2403 # os hypervisor parameters
2404 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2406 if not isinstance(self.op.os_hvp, dict):
2407 raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2409 for os_name, hvs in self.op.os_hvp.items():
2410 if not isinstance(hvs, dict):
2411 raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2412 " input"), errors.ECODE_INVAL)
2413 if os_name not in self.new_os_hvp:
2414 self.new_os_hvp[os_name] = hvs
2416 for hv_name, hv_dict in hvs.items():
2417 if hv_name not in self.new_os_hvp[os_name]:
2418 self.new_os_hvp[os_name][hv_name] = hv_dict
2420 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2422 # changes to the hypervisor list
2423 if self.op.enabled_hypervisors is not None:
2424 self.hv_list = self.op.enabled_hypervisors
2425 if not self.hv_list:
2426 raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2427 " least one member",
2429 invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2431 raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2433 utils.CommaJoin(invalid_hvs),
2435 for hv in self.hv_list:
2436 # if the hypervisor doesn't already exist in the cluster
2437 # hvparams, we initialize it to empty, and then (in both
2438 # cases) we make sure to fill the defaults, as we might not
2439 # have a complete defaults list if the hypervisor wasn't
2441 if hv not in new_hvp:
2443 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2444 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2446 self.hv_list = cluster.enabled_hypervisors
2448 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2449 # either the enabled list has changed, or the parameters have, validate
2450 for hv_name, hv_params in self.new_hvparams.items():
2451 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2452 (self.op.enabled_hypervisors and
2453 hv_name in self.op.enabled_hypervisors)):
2454 # either this is a new hypervisor, or its parameters have changed
2455 hv_class = hypervisor.GetHypervisor(hv_name)
2456 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2457 hv_class.CheckParameterSyntax(hv_params)
2458 _CheckHVParams(self, node_list, hv_name, hv_params)
2461 # no need to check any newly-enabled hypervisors, since the
2462 # defaults have already been checked in the above code-block
2463 for os_name, os_hvp in self.new_os_hvp.items():
2464 for hv_name, hv_params in os_hvp.items():
2465 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2466 # we need to fill in the new os_hvp on top of the actual hv_p
2467 cluster_defaults = self.new_hvparams.get(hv_name, {})
2468 new_osp = objects.FillDict(cluster_defaults, hv_params)
2469 hv_class = hypervisor.GetHypervisor(hv_name)
2470 hv_class.CheckParameterSyntax(new_osp)
2471 _CheckHVParams(self, node_list, hv_name, new_osp)
2474 def Exec(self, feedback_fn):
2475 """Change the parameters of the cluster.
2478 if self.op.vg_name is not None:
2479 new_volume = self.op.vg_name
2482 if new_volume != self.cfg.GetVGName():
2483 self.cfg.SetVGName(new_volume)
2485 feedback_fn("Cluster LVM configuration already in desired"
2486 " state, not changing")
2487 if self.op.hvparams:
2488 self.cluster.hvparams = self.new_hvparams
2490 self.cluster.os_hvp = self.new_os_hvp
2491 if self.op.enabled_hypervisors is not None:
2492 self.cluster.hvparams = self.new_hvparams
2493 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2494 if self.op.beparams:
2495 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2496 if self.op.nicparams:
2497 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2499 if self.op.candidate_pool_size is not None:
2500 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2501 # we need to update the pool size here, otherwise the save will fail
2502 _AdjustCandidatePool(self, [])
2504 if self.op.maintain_node_health is not None:
2505 self.cluster.maintain_node_health = self.op.maintain_node_health
2507 if self.op.add_uids is not None:
2508 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2510 if self.op.remove_uids is not None:
2511 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2513 if self.op.uid_pool is not None:
2514 self.cluster.uid_pool = self.op.uid_pool
2516 self.cfg.Update(self.cluster, feedback_fn)
2519 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2520 """Distribute additional files which are part of the cluster configuration.
2522 ConfigWriter takes care of distributing the config and ssconf files, but
2523 there are more files which should be distributed to all nodes. This function
2524 makes sure those are copied.
2526 @param lu: calling logical unit
2527 @param additional_nodes: list of nodes not in the config to distribute to
2530 # 1. Gather target nodes
2531 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2532 dist_nodes = lu.cfg.GetOnlineNodeList()
2533 if additional_nodes is not None:
2534 dist_nodes.extend(additional_nodes)
2535 if myself.name in dist_nodes:
2536 dist_nodes.remove(myself.name)
2538 # 2. Gather files to distribute
2539 dist_files = set([constants.ETC_HOSTS,
2540 constants.SSH_KNOWN_HOSTS_FILE,
2541 constants.RAPI_CERT_FILE,
2542 constants.RAPI_USERS_FILE,
2543 constants.CONFD_HMAC_KEY,
2546 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2547 for hv_name in enabled_hypervisors:
2548 hv_class = hypervisor.GetHypervisor(hv_name)
2549 dist_files.update(hv_class.GetAncillaryFiles())
2551 # 3. Perform the files upload
2552 for fname in dist_files:
2553 if os.path.exists(fname):
2554 result = lu.rpc.call_upload_file(dist_nodes, fname)
2555 for to_node, to_result in result.items():
2556 msg = to_result.fail_msg
2558 msg = ("Copy of file %s to node %s failed: %s" %
2559 (fname, to_node, msg))
2560 lu.proc.LogWarning(msg)
2563 class LURedistributeConfig(NoHooksLU):
2564 """Force the redistribution of cluster configuration.
2566 This is a very simple LU.
2572 def ExpandNames(self):
2573 self.needed_locks = {
2574 locking.LEVEL_NODE: locking.ALL_SET,
2576 self.share_locks[locking.LEVEL_NODE] = 1
2578 def CheckPrereq(self):
2579 """Check prerequisites.
2583 def Exec(self, feedback_fn):
2584 """Redistribute the configuration.
2587 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2588 _RedistributeAncillaryFiles(self)
2591 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2592 """Sleep and poll for an instance's disk to sync.
2595 if not instance.disks or disks is not None and not disks:
2598 disks = _ExpandCheckDisks(instance, disks)
2601 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2603 node = instance.primary_node
2606 lu.cfg.SetDiskID(dev, node)
2608 # TODO: Convert to utils.Retry
2611 degr_retries = 10 # in seconds, as we sleep 1 second each time
2615 cumul_degraded = False
2616 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2617 msg = rstats.fail_msg
2619 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2622 raise errors.RemoteError("Can't contact node %s for mirror data,"
2623 " aborting." % node)
2626 rstats = rstats.payload
2628 for i, mstat in enumerate(rstats):
2630 lu.LogWarning("Can't compute data for node %s/%s",
2631 node, disks[i].iv_name)
2634 cumul_degraded = (cumul_degraded or
2635 (mstat.is_degraded and mstat.sync_percent is None))
2636 if mstat.sync_percent is not None:
2638 if mstat.estimated_time is not None:
2639 rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2640 max_time = mstat.estimated_time
2642 rem_time = "no time estimate"
2643 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2644 (disks[i].iv_name, mstat.sync_percent, rem_time))
2646 # if we're done but degraded, let's do a few small retries, to
2647 # make sure we see a stable and not transient situation; therefore
2648 # we force restart of the loop
2649 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2650 logging.info("Degraded disks found, %d retries left", degr_retries)
2658 time.sleep(min(60, max_time))
2661 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2662 return not cumul_degraded
2665 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2666 """Check that mirrors are not degraded.
2668 The ldisk parameter, if True, will change the test from the
2669 is_degraded attribute (which represents overall non-ok status for
2670 the device(s)) to the ldisk (representing the local storage status).
2673 lu.cfg.SetDiskID(dev, node)
2677 if on_primary or dev.AssembleOnSecondary():
2678 rstats = lu.rpc.call_blockdev_find(node, dev)
2679 msg = rstats.fail_msg
2681 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2683 elif not rstats.payload:
2684 lu.LogWarning("Can't find disk on node %s", node)
2688 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2690 result = result and not rstats.payload.is_degraded
2693 for child in dev.children:
2694 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2699 class LUDiagnoseOS(NoHooksLU):
2700 """Logical unit for OS diagnose/query.
2703 _OP_REQP = ["output_fields", "names"]
2705 _FIELDS_STATIC = utils.FieldSet()
2706 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2707 # Fields that need calculation of global os validity
2708 _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2710 def ExpandNames(self):
2712 raise errors.OpPrereqError("Selective OS query not supported",
2715 _CheckOutputFields(static=self._FIELDS_STATIC,
2716 dynamic=self._FIELDS_DYNAMIC,
2717 selected=self.op.output_fields)
2719 # Lock all nodes, in shared mode
2720 # Temporary removal of locks, should be reverted later
2721 # TODO: reintroduce locks when they are lighter-weight
2722 self.needed_locks = {}
2723 #self.share_locks[locking.LEVEL_NODE] = 1
2724 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2726 def CheckPrereq(self):
2727 """Check prerequisites.
2732 def _DiagnoseByOS(rlist):
2733 """Remaps a per-node return list into an a per-os per-node dictionary
2735 @param rlist: a map with node names as keys and OS objects as values
2738 @return: a dictionary with osnames as keys and as value another map, with
2739 nodes as keys and tuples of (path, status, diagnose) as values, eg::
2741 {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2742 (/srv/..., False, "invalid api")],
2743 "node2": [(/srv/..., True, "")]}
2748 # we build here the list of nodes that didn't fail the RPC (at RPC
2749 # level), so that nodes with a non-responding node daemon don't
2750 # make all OSes invalid
2751 good_nodes = [node_name for node_name in rlist
2752 if not rlist[node_name].fail_msg]
2753 for node_name, nr in rlist.items():
2754 if nr.fail_msg or not nr.payload:
2756 for name, path, status, diagnose, variants in nr.payload:
2757 if name not in all_os:
2758 # build a list of nodes for this os containing empty lists
2759 # for each node in node_list
2761 for nname in good_nodes:
2762 all_os[name][nname] = []
2763 all_os[name][node_name].append((path, status, diagnose, variants))
2766 def Exec(self, feedback_fn):
2767 """Compute the list of OSes.
2770 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2771 node_data = self.rpc.call_os_diagnose(valid_nodes)
2772 pol = self._DiagnoseByOS(node_data)
2774 calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2775 calc_variants = "variants" in self.op.output_fields
2777 for os_name, os_data in pol.items():
2782 for osl in os_data.values():
2783 valid = bool(valid and osl and osl[0][1])
2788 node_variants = osl[0][3]
2789 if variants is None:
2790 variants = set(node_variants)
2792 variants.intersection_update(node_variants)
2794 for field in self.op.output_fields:
2797 elif field == "valid":
2799 elif field == "node_status":
2800 # this is just a copy of the dict
2802 for node_name, nos_list in os_data.items():
2803 val[node_name] = nos_list
2804 elif field == "variants":
2805 val = list(variants)
2807 raise errors.ParameterError(field)
2814 class LURemoveNode(LogicalUnit):
2815 """Logical unit for removing a node.
2818 HPATH = "node-remove"
2819 HTYPE = constants.HTYPE_NODE
2820 _OP_REQP = ["node_name"]
2822 def BuildHooksEnv(self):
2825 This doesn't run on the target node in the pre phase as a failed
2826 node would then be impossible to remove.
2830 "OP_TARGET": self.op.node_name,
2831 "NODE_NAME": self.op.node_name,
2833 all_nodes = self.cfg.GetNodeList()
2835 all_nodes.remove(self.op.node_name)
2837 logging.warning("Node %s which is about to be removed not found"
2838 " in the all nodes list", self.op.node_name)
2839 return env, all_nodes, all_nodes
2841 def CheckPrereq(self):
2842 """Check prerequisites.
2845 - the node exists in the configuration
2846 - it does not have primary or secondary instances
2847 - it's not the master
2849 Any errors are signaled by raising errors.OpPrereqError.
2852 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2853 node = self.cfg.GetNodeInfo(self.op.node_name)
2854 assert node is not None
2856 instance_list = self.cfg.GetInstanceList()
2858 masternode = self.cfg.GetMasterNode()
2859 if node.name == masternode:
2860 raise errors.OpPrereqError("Node is the master node,"
2861 " you need to failover first.",
2864 for instance_name in instance_list:
2865 instance = self.cfg.GetInstanceInfo(instance_name)
2866 if node.name in instance.all_nodes:
2867 raise errors.OpPrereqError("Instance %s is still running on the node,"
2868 " please remove first." % instance_name,
2870 self.op.node_name = node.name
2873 def Exec(self, feedback_fn):
2874 """Removes the node from the cluster.
2878 logging.info("Stopping the node daemon and removing configs from node %s",
2881 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2883 # Promote nodes to master candidate as needed
2884 _AdjustCandidatePool(self, exceptions=[node.name])
2885 self.context.RemoveNode(node.name)
2887 # Run post hooks on the node before it's removed
2888 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2890 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2892 # pylint: disable-msg=W0702
2893 self.LogWarning("Errors occurred running hooks on %s" % node.name)
2895 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2896 msg = result.fail_msg
2898 self.LogWarning("Errors encountered on the remote node while leaving"
2899 " the cluster: %s", msg)
2901 # Remove node from our /etc/hosts
2902 if self.cfg.GetClusterInfo().modify_etc_hosts:
2903 # FIXME: this should be done via an rpc call to node daemon
2904 utils.RemoveHostFromEtcHosts(node.name)
2905 _RedistributeAncillaryFiles(self)
2908 class LUQueryNodes(NoHooksLU):
2909 """Logical unit for querying nodes.
2912 # pylint: disable-msg=W0142
2913 _OP_REQP = ["output_fields", "names", "use_locking"]
2916 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2917 "master_candidate", "offline", "drained"]
2919 _FIELDS_DYNAMIC = utils.FieldSet(
2921 "mtotal", "mnode", "mfree",
2923 "ctotal", "cnodes", "csockets",
2926 _FIELDS_STATIC = utils.FieldSet(*[
2927 "pinst_cnt", "sinst_cnt",
2928 "pinst_list", "sinst_list",
2929 "pip", "sip", "tags",
2931 "role"] + _SIMPLE_FIELDS
2934 def ExpandNames(self):
2935 _CheckOutputFields(static=self._FIELDS_STATIC,
2936 dynamic=self._FIELDS_DYNAMIC,
2937 selected=self.op.output_fields)
2939 self.needed_locks = {}
2940 self.share_locks[locking.LEVEL_NODE] = 1
2943 self.wanted = _GetWantedNodes(self, self.op.names)
2945 self.wanted = locking.ALL_SET
2947 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2948 self.do_locking = self.do_node_query and self.op.use_locking
2950 # if we don't request only static fields, we need to lock the nodes
2951 self.needed_locks[locking.LEVEL_NODE] = self.wanted
2953 def CheckPrereq(self):
2954 """Check prerequisites.
2957 # The validation of the node list is done in the _GetWantedNodes,
2958 # if non empty, and if empty, there's no validation to do
2961 def Exec(self, feedback_fn):
2962 """Computes the list of nodes and their attributes.
2965 all_info = self.cfg.GetAllNodesInfo()
2967 nodenames = self.acquired_locks[locking.LEVEL_NODE]
2968 elif self.wanted != locking.ALL_SET:
2969 nodenames = self.wanted
2970 missing = set(nodenames).difference(all_info.keys())
2972 raise errors.OpExecError(
2973 "Some nodes were removed before retrieving their data: %s" % missing)
2975 nodenames = all_info.keys()
2977 nodenames = utils.NiceSort(nodenames)
2978 nodelist = [all_info[name] for name in nodenames]
2980 # begin data gathering
2982 if self.do_node_query:
2984 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2985 self.cfg.GetHypervisorType())
2986 for name in nodenames:
2987 nodeinfo = node_data[name]
2988 if not nodeinfo.fail_msg and nodeinfo.payload:
2989 nodeinfo = nodeinfo.payload
2990 fn = utils.TryConvert
2992 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2993 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2994 "mfree": fn(int, nodeinfo.get('memory_free', None)),
2995 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2996 "dfree": fn(int, nodeinfo.get('vg_free', None)),
2997 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2998 "bootid": nodeinfo.get('bootid', None),
2999 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3000 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3003 live_data[name] = {}
3005 live_data = dict.fromkeys(nodenames, {})
3007 node_to_primary = dict([(name, set()) for name in nodenames])
3008 node_to_secondary = dict([(name, set()) for name in nodenames])
3010 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3011 "sinst_cnt", "sinst_list"))
3012 if inst_fields & frozenset(self.op.output_fields):
3013 inst_data = self.cfg.GetAllInstancesInfo()
3015 for inst in inst_data.values():
3016 if inst.primary_node in node_to_primary:
3017 node_to_primary[inst.primary_node].add(inst.name)
3018 for secnode in inst.secondary_nodes:
3019 if secnode in node_to_secondary:
3020 node_to_secondary[secnode].add(inst.name)
3022 master_node = self.cfg.GetMasterNode()
3024 # end data gathering
3027 for node in nodelist:
3029 for field in self.op.output_fields:
3030 if field in self._SIMPLE_FIELDS:
3031 val = getattr(node, field)
3032 elif field == "pinst_list":
3033 val = list(node_to_primary[node.name])
3034 elif field == "sinst_list":
3035 val = list(node_to_secondary[node.name])
3036 elif field == "pinst_cnt":
3037 val = len(node_to_primary[node.name])
3038 elif field == "sinst_cnt":
3039 val = len(node_to_secondary[node.name])
3040 elif field == "pip":
3041 val = node.primary_ip
3042 elif field == "sip":
3043 val = node.secondary_ip
3044 elif field == "tags":
3045 val = list(node.GetTags())
3046 elif field == "master":
3047 val = node.name == master_node
3048 elif self._FIELDS_DYNAMIC.Matches(field):
3049 val = live_data[node.name].get(field, None)
3050 elif field == "role":
3051 if node.name == master_node:
3053 elif node.master_candidate:
3062 raise errors.ParameterError(field)
3063 node_output.append(val)
3064 output.append(node_output)
3069 class LUQueryNodeVolumes(NoHooksLU):
3070 """Logical unit for getting volumes on node(s).
3073 _OP_REQP = ["nodes", "output_fields"]
3075 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3076 _FIELDS_STATIC = utils.FieldSet("node")
3078 def ExpandNames(self):
3079 _CheckOutputFields(static=self._FIELDS_STATIC,
3080 dynamic=self._FIELDS_DYNAMIC,
3081 selected=self.op.output_fields)
3083 self.needed_locks = {}
3084 self.share_locks[locking.LEVEL_NODE] = 1
3085 if not self.op.nodes:
3086 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3088 self.needed_locks[locking.LEVEL_NODE] = \
3089 _GetWantedNodes(self, self.op.nodes)
3091 def CheckPrereq(self):
3092 """Check prerequisites.
3094 This checks that the fields required are valid output fields.
3097 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3099 def Exec(self, feedback_fn):
3100 """Computes the list of nodes and their attributes.
3103 nodenames = self.nodes
3104 volumes = self.rpc.call_node_volumes(nodenames)
3106 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3107 in self.cfg.GetInstanceList()]
3109 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3112 for node in nodenames:
3113 nresult = volumes[node]
3116 msg = nresult.fail_msg
3118 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3121 node_vols = nresult.payload[:]
3122 node_vols.sort(key=lambda vol: vol['dev'])
3124 for vol in node_vols:
3126 for field in self.op.output_fields:
3129 elif field == "phys":
3133 elif field == "name":
3135 elif field == "size":
3136 val = int(float(vol['size']))
3137 elif field == "instance":
3139 if node not in lv_by_node[inst]:
3141 if vol['name'] in lv_by_node[inst][node]:
3147 raise errors.ParameterError(field)
3148 node_output.append(str(val))
3150 output.append(node_output)
3155 class LUQueryNodeStorage(NoHooksLU):
3156 """Logical unit for getting information on storage units on node(s).
3159 _OP_REQP = ["nodes", "storage_type", "output_fields"]
3161 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3163 def CheckArguments(self):
3164 _CheckStorageType(self.op.storage_type)
3166 _CheckOutputFields(static=self._FIELDS_STATIC,
3167 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3168 selected=self.op.output_fields)
3170 def ExpandNames(self):
3171 self.needed_locks = {}
3172 self.share_locks[locking.LEVEL_NODE] = 1
3175 self.needed_locks[locking.LEVEL_NODE] = \
3176 _GetWantedNodes(self, self.op.nodes)
3178 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3180 def CheckPrereq(self):
3181 """Check prerequisites.
3183 This checks that the fields required are valid output fields.
3186 self.op.name = getattr(self.op, "name", None)
3188 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3190 def Exec(self, feedback_fn):
3191 """Computes the list of nodes and their attributes.
3194 # Always get name to sort by
3195 if constants.SF_NAME in self.op.output_fields:
3196 fields = self.op.output_fields[:]
3198 fields = [constants.SF_NAME] + self.op.output_fields
3200 # Never ask for node or type as it's only known to the LU
3201 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3202 while extra in fields:
3203 fields.remove(extra)
3205 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3206 name_idx = field_idx[constants.SF_NAME]
3208 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3209 data = self.rpc.call_storage_list(self.nodes,
3210 self.op.storage_type, st_args,
3211 self.op.name, fields)
3215 for node in utils.NiceSort(self.nodes):
3216 nresult = data[node]
3220 msg = nresult.fail_msg
3222 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3225 rows = dict([(row[name_idx], row) for row in nresult.payload])
3227 for name in utils.NiceSort(rows.keys()):
3232 for field in self.op.output_fields:
3233 if field == constants.SF_NODE:
3235 elif field == constants.SF_TYPE:
3236 val = self.op.storage_type
3237 elif field in field_idx:
3238 val = row[field_idx[field]]
3240 raise errors.ParameterError(field)
3249 class LUModifyNodeStorage(NoHooksLU):
3250 """Logical unit for modifying a storage volume on a node.
3253 _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3256 def CheckArguments(self):
3257 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3259 _CheckStorageType(self.op.storage_type)
3261 def ExpandNames(self):
3262 self.needed_locks = {
3263 locking.LEVEL_NODE: self.op.node_name,
3266 def CheckPrereq(self):
3267 """Check prerequisites.
3270 storage_type = self.op.storage_type
3273 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3275 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3276 " modified" % storage_type,
3279 diff = set(self.op.changes.keys()) - modifiable
3281 raise errors.OpPrereqError("The following fields can not be modified for"
3282 " storage units of type '%s': %r" %
3283 (storage_type, list(diff)),
3286 def Exec(self, feedback_fn):
3287 """Computes the list of nodes and their attributes.
3290 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3291 result = self.rpc.call_storage_modify(self.op.node_name,
3292 self.op.storage_type, st_args,
3293 self.op.name, self.op.changes)
3294 result.Raise("Failed to modify storage unit '%s' on %s" %
3295 (self.op.name, self.op.node_name))
3298 class LUAddNode(LogicalUnit):
3299 """Logical unit for adding node to the cluster.
3303 HTYPE = constants.HTYPE_NODE
3304 _OP_REQP = ["node_name"]
3306 def CheckArguments(self):
3307 # validate/normalize the node name
3308 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3310 def BuildHooksEnv(self):
3313 This will run on all nodes before, and on all nodes + the new node after.
3317 "OP_TARGET": self.op.node_name,
3318 "NODE_NAME": self.op.node_name,
3319 "NODE_PIP": self.op.primary_ip,
3320 "NODE_SIP": self.op.secondary_ip,
3322 nodes_0 = self.cfg.GetNodeList()
3323 nodes_1 = nodes_0 + [self.op.node_name, ]
3324 return env, nodes_0, nodes_1
3326 def CheckPrereq(self):
3327 """Check prerequisites.
3330 - the new node is not already in the config
3332 - its parameters (single/dual homed) matches the cluster
3334 Any errors are signaled by raising errors.OpPrereqError.
3337 node_name = self.op.node_name
3340 dns_data = utils.GetHostInfo(node_name)
3342 node = dns_data.name
3343 primary_ip = self.op.primary_ip = dns_data.ip
3344 secondary_ip = getattr(self.op, "secondary_ip", None)
3345 if secondary_ip is None:
3346 secondary_ip = primary_ip
3347 if not utils.IsValidIP(secondary_ip):
3348 raise errors.OpPrereqError("Invalid secondary IP given",
3350 self.op.secondary_ip = secondary_ip
3352 node_list = cfg.GetNodeList()
3353 if not self.op.readd and node in node_list:
3354 raise errors.OpPrereqError("Node %s is already in the configuration" %
3355 node, errors.ECODE_EXISTS)
3356 elif self.op.readd and node not in node_list:
3357 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3360 self.changed_primary_ip = False
3362 for existing_node_name in node_list:
3363 existing_node = cfg.GetNodeInfo(existing_node_name)
3365 if self.op.readd and node == existing_node_name:
3366 if existing_node.secondary_ip != secondary_ip:
3367 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3368 " address configuration as before",
3370 if existing_node.primary_ip != primary_ip:
3371 self.changed_primary_ip = True
3375 if (existing_node.primary_ip == primary_ip or
3376 existing_node.secondary_ip == primary_ip or
3377 existing_node.primary_ip == secondary_ip or
3378 existing_node.secondary_ip == secondary_ip):
3379 raise errors.OpPrereqError("New node ip address(es) conflict with"
3380 " existing node %s" % existing_node.name,
3381 errors.ECODE_NOTUNIQUE)
3383 # check that the type of the node (single versus dual homed) is the
3384 # same as for the master
3385 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3386 master_singlehomed = myself.secondary_ip == myself.primary_ip
3387 newbie_singlehomed = secondary_ip == primary_ip
3388 if master_singlehomed != newbie_singlehomed:
3389 if master_singlehomed:
3390 raise errors.OpPrereqError("The master has no private ip but the"
3391 " new node has one",
3394 raise errors.OpPrereqError("The master has a private ip but the"
3395 " new node doesn't have one",
3398 # checks reachability
3399 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3400 raise errors.OpPrereqError("Node not reachable by ping",
3401 errors.ECODE_ENVIRON)
3403 if not newbie_singlehomed:
3404 # check reachability from my secondary ip to newbie's secondary ip
3405 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3406 source=myself.secondary_ip):
3407 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3408 " based ping to noded port",
3409 errors.ECODE_ENVIRON)
3416 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3419 self.new_node = self.cfg.GetNodeInfo(node)
3420 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3422 self.new_node = objects.Node(name=node,
3423 primary_ip=primary_ip,
3424 secondary_ip=secondary_ip,
3425 master_candidate=self.master_candidate,
3426 offline=False, drained=False)
3428 def Exec(self, feedback_fn):
3429 """Adds the new node to the cluster.
3432 new_node = self.new_node
3433 node = new_node.name
3435 # for re-adds, reset the offline/drained/master-candidate flags;
3436 # we need to reset here, otherwise offline would prevent RPC calls
3437 # later in the procedure; this also means that if the re-add
3438 # fails, we are left with a non-offlined, broken node
3440 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3441 self.LogInfo("Readding a node, the offline/drained flags were reset")
3442 # if we demote the node, we do cleanup later in the procedure
3443 new_node.master_candidate = self.master_candidate
3444 if self.changed_primary_ip:
3445 new_node.primary_ip = self.op.primary_ip
3447 # notify the user about any possible mc promotion
3448 if new_node.master_candidate:
3449 self.LogInfo("Node will be a master candidate")
3451 # check connectivity
3452 result = self.rpc.call_version([node])[node]
3453 result.Raise("Can't get version information from node %s" % node)
3454 if constants.PROTOCOL_VERSION == result.payload:
3455 logging.info("Communication to node %s fine, sw version %s match",
3456 node, result.payload)
3458 raise errors.OpExecError("Version mismatch master version %s,"
3459 " node version %s" %
3460 (constants.PROTOCOL_VERSION, result.payload))
3463 if self.cfg.GetClusterInfo().modify_ssh_setup:
3464 logging.info("Copy ssh key to node %s", node)
3465 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3467 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3468 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3472 keyarray.append(utils.ReadFile(i))
3474 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3475 keyarray[2], keyarray[3], keyarray[4],
3477 result.Raise("Cannot transfer ssh keys to the new node")
3479 # Add node to our /etc/hosts, and add key to known_hosts
3480 if self.cfg.GetClusterInfo().modify_etc_hosts:
3481 # FIXME: this should be done via an rpc call to node daemon
3482 utils.AddHostToEtcHosts(new_node.name)
3484 if new_node.secondary_ip != new_node.primary_ip:
3485 result = self.rpc.call_node_has_ip_address(new_node.name,
3486 new_node.secondary_ip)
3487 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3488 prereq=True, ecode=errors.ECODE_ENVIRON)
3489 if not result.payload:
3490 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3491 " you gave (%s). Please fix and re-run this"
3492 " command." % new_node.secondary_ip)
3494 node_verify_list = [self.cfg.GetMasterNode()]
3495 node_verify_param = {
3496 constants.NV_NODELIST: [node],
3497 # TODO: do a node-net-test as well?
3500 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3501 self.cfg.GetClusterName())
3502 for verifier in node_verify_list:
3503 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3504 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3506 for failed in nl_payload:
3507 feedback_fn("ssh/hostname verification failed"
3508 " (checking from %s): %s" %
3509 (verifier, nl_payload[failed]))
3510 raise errors.OpExecError("ssh/hostname verification failed.")
3513 _RedistributeAncillaryFiles(self)
3514 self.context.ReaddNode(new_node)
3515 # make sure we redistribute the config
3516 self.cfg.Update(new_node, feedback_fn)
3517 # and make sure the new node will not have old files around
3518 if not new_node.master_candidate:
3519 result = self.rpc.call_node_demote_from_mc(new_node.name)
3520 msg = result.fail_msg
3522 self.LogWarning("Node failed to demote itself from master"
3523 " candidate status: %s" % msg)
3525 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3526 self.context.AddNode(new_node, self.proc.GetECId())
3529 class LUSetNodeParams(LogicalUnit):
3530 """Modifies the parameters of a node.
3533 HPATH = "node-modify"
3534 HTYPE = constants.HTYPE_NODE
3535 _OP_REQP = ["node_name"]
3538 def CheckArguments(self):
3539 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3540 _CheckBooleanOpField(self.op, 'master_candidate')
3541 _CheckBooleanOpField(self.op, 'offline')
3542 _CheckBooleanOpField(self.op, 'drained')
3543 _CheckBooleanOpField(self.op, 'auto_promote')
3544 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3545 if all_mods.count(None) == 3:
3546 raise errors.OpPrereqError("Please pass at least one modification",
3548 if all_mods.count(True) > 1:
3549 raise errors.OpPrereqError("Can't set the node into more than one"
3550 " state at the same time",
3553 # Boolean value that tells us whether we're offlining or draining the node
3554 self.offline_or_drain = (self.op.offline == True or
3555 self.op.drained == True)
3556 self.deoffline_or_drain = (self.op.offline == False or
3557 self.op.drained == False)
3558 self.might_demote = (self.op.master_candidate == False or
3559 self.offline_or_drain)
3561 self.lock_all = self.op.auto_promote and self.might_demote
3564 def ExpandNames(self):
3566 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3568 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3570 def BuildHooksEnv(self):
3573 This runs on the master node.
3577 "OP_TARGET": self.op.node_name,
3578 "MASTER_CANDIDATE": str(self.op.master_candidate),
3579 "OFFLINE": str(self.op.offline),
3580 "DRAINED": str(self.op.drained),
3582 nl = [self.cfg.GetMasterNode(),
3586 def CheckPrereq(self):
3587 """Check prerequisites.
3589 This only checks the instance list against the existing names.
3592 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3594 if (self.op.master_candidate is not None or
3595 self.op.drained is not None or
3596 self.op.offline is not None):
3597 # we can't change the master's node flags
3598 if self.op.node_name == self.cfg.GetMasterNode():
3599 raise errors.OpPrereqError("The master role can be changed"
3600 " only via masterfailover",
3604 if node.master_candidate and self.might_demote and not self.lock_all:
3605 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3606 # check if after removing the current node, we're missing master
3608 (mc_remaining, mc_should, _) = \
3609 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3610 if mc_remaining < mc_should:
3611 raise errors.OpPrereqError("Not enough master candidates, please"
3612 " pass auto_promote to allow promotion",
3615 if (self.op.master_candidate == True and
3616 ((node.offline and not self.op.offline == False) or
3617 (node.drained and not self.op.drained == False))):
3618 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3619 " to master_candidate" % node.name,
3622 # If we're being deofflined/drained, we'll MC ourself if needed
3623 if (self.deoffline_or_drain and not self.offline_or_drain and not
3624 self.op.master_candidate == True and not node.master_candidate):
3625 self.op.master_candidate = _DecideSelfPromotion(self)
3626 if self.op.master_candidate:
3627 self.LogInfo("Autopromoting node to master candidate")
3631 def Exec(self, feedback_fn):
3640 if self.op.offline is not None:
3641 node.offline = self.op.offline
3642 result.append(("offline", str(self.op.offline)))
3643 if self.op.offline == True:
3644 if node.master_candidate:
3645 node.master_candidate = False
3647 result.append(("master_candidate", "auto-demotion due to offline"))
3649 node.drained = False
3650 result.append(("drained", "clear drained status due to offline"))
3652 if self.op.master_candidate is not None:
3653 node.master_candidate = self.op.master_candidate
3655 result.append(("master_candidate", str(self.op.master_candidate)))
3656 if self.op.master_candidate == False:
3657 rrc = self.rpc.call_node_demote_from_mc(node.name)
3660 self.LogWarning("Node failed to demote itself: %s" % msg)
3662 if self.op.drained is not None:
3663 node.drained = self.op.drained
3664 result.append(("drained", str(self.op.drained)))
3665 if self.op.drained == True:
3666 if node.master_candidate:
3667 node.master_candidate = False
3669 result.append(("master_candidate", "auto-demotion due to drain"))
3670 rrc = self.rpc.call_node_demote_from_mc(node.name)
3673 self.LogWarning("Node failed to demote itself: %s" % msg)
3675 node.offline = False
3676 result.append(("offline", "clear offline status due to drain"))
3678 # we locked all nodes, we adjust the CP before updating this node
3680 _AdjustCandidatePool(self, [node.name])
3682 # this will trigger configuration file update, if needed
3683 self.cfg.Update(node, feedback_fn)
3685 # this will trigger job queue propagation or cleanup
3687 self.context.ReaddNode(node)
3692 class LUPowercycleNode(NoHooksLU):
3693 """Powercycles a node.
3696 _OP_REQP = ["node_name", "force"]
3699 def CheckArguments(self):
3700 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3701 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3702 raise errors.OpPrereqError("The node is the master and the force"
3703 " parameter was not set",
3706 def ExpandNames(self):
3707 """Locking for PowercycleNode.
3709 This is a last-resort option and shouldn't block on other
3710 jobs. Therefore, we grab no locks.
3713 self.needed_locks = {}
3715 def CheckPrereq(self):
3716 """Check prerequisites.
3718 This LU has no prereqs.
3723 def Exec(self, feedback_fn):
3727 result = self.rpc.call_node_powercycle(self.op.node_name,
3728 self.cfg.GetHypervisorType())
3729 result.Raise("Failed to schedule the reboot")
3730 return result.payload
3733 class LUQueryClusterInfo(NoHooksLU):
3734 """Query cluster configuration.
3740 def ExpandNames(self):
3741 self.needed_locks = {}
3743 def CheckPrereq(self):
3744 """No prerequsites needed for this LU.
3749 def Exec(self, feedback_fn):
3750 """Return cluster config.
3753 cluster = self.cfg.GetClusterInfo()
3756 # Filter just for enabled hypervisors
3757 for os_name, hv_dict in cluster.os_hvp.items():
3758 os_hvp[os_name] = {}
3759 for hv_name, hv_params in hv_dict.items():
3760 if hv_name in cluster.enabled_hypervisors:
3761 os_hvp[os_name][hv_name] = hv_params
3764 "software_version": constants.RELEASE_VERSION,
3765 "protocol_version": constants.PROTOCOL_VERSION,
3766 "config_version": constants.CONFIG_VERSION,
3767 "os_api_version": max(constants.OS_API_VERSIONS),
3768 "export_version": constants.EXPORT_VERSION,
3769 "architecture": (platform.architecture()[0], platform.machine()),
3770 "name": cluster.cluster_name,
3771 "master": cluster.master_node,
3772 "default_hypervisor": cluster.enabled_hypervisors[0],
3773 "enabled_hypervisors": cluster.enabled_hypervisors,
3774 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3775 for hypervisor_name in cluster.enabled_hypervisors]),
3777 "beparams": cluster.beparams,
3778 "nicparams": cluster.nicparams,
3779 "candidate_pool_size": cluster.candidate_pool_size,
3780 "master_netdev": cluster.master_netdev,
3781 "volume_group_name": cluster.volume_group_name,
3782 "file_storage_dir": cluster.file_storage_dir,
3783 "maintain_node_health": cluster.maintain_node_health,
3784 "ctime": cluster.ctime,
3785 "mtime": cluster.mtime,
3786 "uuid": cluster.uuid,
3787 "tags": list(cluster.GetTags()),
3788 "uid_pool": cluster.uid_pool,
3794 class LUQueryConfigValues(NoHooksLU):
3795 """Return configuration values.
3800 _FIELDS_DYNAMIC = utils.FieldSet()
3801 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3804 def ExpandNames(self):
3805 self.needed_locks = {}
3807 _CheckOutputFields(static=self._FIELDS_STATIC,
3808 dynamic=self._FIELDS_DYNAMIC,
3809 selected=self.op.output_fields)
3811 def CheckPrereq(self):
3812 """No prerequisites.
3817 def Exec(self, feedback_fn):
3818 """Dump a representation of the cluster config to the standard output.
3822 for field in self.op.output_fields:
3823 if field == "cluster_name":
3824 entry = self.cfg.GetClusterName()
3825 elif field == "master_node":
3826 entry = self.cfg.GetMasterNode()
3827 elif field == "drain_flag":
3828 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3829 elif field == "watcher_pause":
3830 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3832 raise errors.ParameterError(field)
3833 values.append(entry)
3837 class LUActivateInstanceDisks(NoHooksLU):
3838 """Bring up an instance's disks.
3841 _OP_REQP = ["instance_name"]
3844 def ExpandNames(self):
3845 self._ExpandAndLockInstance()
3846 self.needed_locks[locking.LEVEL_NODE] = []
3847 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3849 def DeclareLocks(self, level):
3850 if level == locking.LEVEL_NODE:
3851 self._LockInstancesNodes()
3853 def CheckPrereq(self):
3854 """Check prerequisites.
3856 This checks that the instance is in the cluster.
3859 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3860 assert self.instance is not None, \
3861 "Cannot retrieve locked instance %s" % self.op.instance_name
3862 _CheckNodeOnline(self, self.instance.primary_node)
3863 if not hasattr(self.op, "ignore_size"):
3864 self.op.ignore_size = False
3866 def Exec(self, feedback_fn):
3867 """Activate the disks.
3870 disks_ok, disks_info = \
3871 _AssembleInstanceDisks(self, self.instance,
3872 ignore_size=self.op.ignore_size)
3874 raise errors.OpExecError("Cannot activate block devices")
3879 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
3881 """Prepare the block devices for an instance.
3883 This sets up the block devices on all nodes.
3885 @type lu: L{LogicalUnit}
3886 @param lu: the logical unit on whose behalf we execute
3887 @type instance: L{objects.Instance}
3888 @param instance: the instance for whose disks we assemble
3889 @type disks: list of L{objects.Disk} or None
3890 @param disks: which disks to assemble (or all, if None)
3891 @type ignore_secondaries: boolean
3892 @param ignore_secondaries: if true, errors on secondary nodes
3893 won't result in an error return from the function
3894 @type ignore_size: boolean
3895 @param ignore_size: if true, the current known size of the disk
3896 will not be used during the disk activation, useful for cases
3897 when the size is wrong
3898 @return: False if the operation failed, otherwise a list of
3899 (host, instance_visible_name, node_visible_name)
3900 with the mapping from node devices to instance devices
3905 iname = instance.name
3906 disks = _ExpandCheckDisks(instance, disks)
3908 # With the two passes mechanism we try to reduce the window of
3909 # opportunity for the race condition of switching DRBD to primary
3910 # before handshaking occured, but we do not eliminate it
3912 # The proper fix would be to wait (with some limits) until the
3913 # connection has been made and drbd transitions from WFConnection
3914 # into any other network-connected state (Connected, SyncTarget,
3917 # 1st pass, assemble on all nodes in secondary mode
3918 for inst_disk in disks:
3919 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3921 node_disk = node_disk.Copy()
3922 node_disk.UnsetSize()
3923 lu.cfg.SetDiskID(node_disk, node)
3924 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3925 msg = result.fail_msg
3927 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3928 " (is_primary=False, pass=1): %s",
3929 inst_disk.iv_name, node, msg)
3930 if not ignore_secondaries:
3933 # FIXME: race condition on drbd migration to primary
3935 # 2nd pass, do only the primary node
3936 for inst_disk in disks:
3939 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3940 if node != instance.primary_node:
3943 node_disk = node_disk.Copy()
3944 node_disk.UnsetSize()
3945 lu.cfg.SetDiskID(node_disk, node)
3946 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3947 msg = result.fail_msg
3949 lu.proc.LogWarning("Could not prepare block device %s on node %s"
3950 " (is_primary=True, pass=2): %s",
3951 inst_disk.iv_name, node, msg)
3954 dev_path = result.payload
3956 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3958 # leave the disks configured for the primary node
3959 # this is a workaround that would be fixed better by
3960 # improving the logical/physical id handling
3962 lu.cfg.SetDiskID(disk, instance.primary_node)
3964 return disks_ok, device_info
3967 def _StartInstanceDisks(lu, instance, force):
3968 """Start the disks of an instance.
3971 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3972 ignore_secondaries=force)
3974 _ShutdownInstanceDisks(lu, instance)
3975 if force is not None and not force:
3976 lu.proc.LogWarning("", hint="If the message above refers to a"
3978 " you can retry the operation using '--force'.")
3979 raise errors.OpExecError("Disk consistency error")
3982 class LUDeactivateInstanceDisks(NoHooksLU):
3983 """Shutdown an instance's disks.
3986 _OP_REQP = ["instance_name"]
3989 def ExpandNames(self):
3990 self._ExpandAndLockInstance()
3991 self.needed_locks[locking.LEVEL_NODE] = []
3992 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3994 def DeclareLocks(self, level):
3995 if level == locking.LEVEL_NODE:
3996 self._LockInstancesNodes()
3998 def CheckPrereq(self):
3999 """Check prerequisites.
4001 This checks that the instance is in the cluster.
4004 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4005 assert self.instance is not None, \
4006 "Cannot retrieve locked instance %s" % self.op.instance_name
4008 def Exec(self, feedback_fn):
4009 """Deactivate the disks
4012 instance = self.instance
4013 _SafeShutdownInstanceDisks(self, instance)
4016 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4017 """Shutdown block devices of an instance.
4019 This function checks if an instance is running, before calling
4020 _ShutdownInstanceDisks.
4023 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4024 _ShutdownInstanceDisks(lu, instance, disks=disks)
4027 def _ExpandCheckDisks(instance, disks):
4028 """Return the instance disks selected by the disks list
4030 @type disks: list of L{objects.Disk} or None
4031 @param disks: selected disks
4032 @rtype: list of L{objects.Disk}
4033 @return: selected instance disks to act on
4037 return instance.disks
4039 if not set(disks).issubset(instance.disks):
4040 raise errors.ProgrammerError("Can only act on disks belonging to the"
4045 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4046 """Shutdown block devices of an instance.
4048 This does the shutdown on all nodes of the instance.
4050 If the ignore_primary is false, errors on the primary node are
4055 disks = _ExpandCheckDisks(instance, disks)
4058 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4059 lu.cfg.SetDiskID(top_disk, node)
4060 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4061 msg = result.fail_msg
4063 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4064 disk.iv_name, node, msg)
4065 if not ignore_primary or node != instance.primary_node:
4070 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4071 """Checks if a node has enough free memory.
4073 This function check if a given node has the needed amount of free
4074 memory. In case the node has less memory or we cannot get the
4075 information from the node, this function raise an OpPrereqError
4078 @type lu: C{LogicalUnit}
4079 @param lu: a logical unit from which we get configuration data
4081 @param node: the node to check
4082 @type reason: C{str}
4083 @param reason: string to use in the error message
4084 @type requested: C{int}
4085 @param requested: the amount of memory in MiB to check for
4086 @type hypervisor_name: C{str}
4087 @param hypervisor_name: the hypervisor to ask for memory stats
4088 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4089 we cannot check the node
4092 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4093 nodeinfo[node].Raise("Can't get data from node %s" % node,
4094 prereq=True, ecode=errors.ECODE_ENVIRON)
4095 free_mem = nodeinfo[node].payload.get('memory_free', None)
4096 if not isinstance(free_mem, int):
4097 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4098 " was '%s'" % (node, free_mem),
4099 errors.ECODE_ENVIRON)
4100 if requested > free_mem:
4101 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4102 " needed %s MiB, available %s MiB" %
4103 (node, reason, requested, free_mem),
4107 def _CheckNodesFreeDisk(lu, nodenames, requested):
4108 """Checks if nodes have enough free disk space in the default VG.
4110 This function check if all given nodes have the needed amount of
4111 free disk. In case any node has less disk or we cannot get the
4112 information from the node, this function raise an OpPrereqError
4115 @type lu: C{LogicalUnit}
4116 @param lu: a logical unit from which we get configuration data
4117 @type nodenames: C{list}
4118 @param nodenames: the list of node names to check
4119 @type requested: C{int}
4120 @param requested: the amount of disk in MiB to check for
4121 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4122 we cannot check the node
4125 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4126 lu.cfg.GetHypervisorType())
4127 for node in nodenames:
4128 info = nodeinfo[node]
4129 info.Raise("Cannot get current information from node %s" % node,
4130 prereq=True, ecode=errors.ECODE_ENVIRON)
4131 vg_free = info.payload.get("vg_free", None)
4132 if not isinstance(vg_free, int):
4133 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4134 " result was '%s'" % (node, vg_free),
4135 errors.ECODE_ENVIRON)
4136 if requested > vg_free:
4137 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4138 " required %d MiB, available %d MiB" %
4139 (node, requested, vg_free),
4143 class LUStartupInstance(LogicalUnit):
4144 """Starts an instance.
4147 HPATH = "instance-start"
4148 HTYPE = constants.HTYPE_INSTANCE
4149 _OP_REQP = ["instance_name", "force"]
4152 def ExpandNames(self):
4153 self._ExpandAndLockInstance()
4155 def BuildHooksEnv(self):
4158 This runs on master, primary and secondary nodes of the instance.
4162 "FORCE": self.op.force,
4164 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4165 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4168 def CheckPrereq(self):
4169 """Check prerequisites.
4171 This checks that the instance is in the cluster.
4174 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4175 assert self.instance is not None, \
4176 "Cannot retrieve locked instance %s" % self.op.instance_name
4179 self.beparams = getattr(self.op, "beparams", {})
4181 if not isinstance(self.beparams, dict):
4182 raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4183 " dict" % (type(self.beparams), ),
4185 # fill the beparams dict
4186 utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4187 self.op.beparams = self.beparams
4190 self.hvparams = getattr(self.op, "hvparams", {})
4192 if not isinstance(self.hvparams, dict):
4193 raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4194 " dict" % (type(self.hvparams), ),
4197 # check hypervisor parameter syntax (locally)
4198 cluster = self.cfg.GetClusterInfo()
4199 utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4200 filled_hvp = cluster.FillHV(instance)
4201 filled_hvp.update(self.hvparams)
4202 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4203 hv_type.CheckParameterSyntax(filled_hvp)
4204 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4205 self.op.hvparams = self.hvparams
4207 _CheckNodeOnline(self, instance.primary_node)
4209 bep = self.cfg.GetClusterInfo().FillBE(instance)
4210 # check bridges existence
4211 _CheckInstanceBridgesExist(self, instance)
4213 remote_info = self.rpc.call_instance_info(instance.primary_node,
4215 instance.hypervisor)
4216 remote_info.Raise("Error checking node %s" % instance.primary_node,
4217 prereq=True, ecode=errors.ECODE_ENVIRON)
4218 if not remote_info.payload: # not running already
4219 _CheckNodeFreeMemory(self, instance.primary_node,
4220 "starting instance %s" % instance.name,
4221 bep[constants.BE_MEMORY], instance.hypervisor)
4223 def Exec(self, feedback_fn):
4224 """Start the instance.
4227 instance = self.instance
4228 force = self.op.force
4230 self.cfg.MarkInstanceUp(instance.name)
4232 node_current = instance.primary_node
4234 _StartInstanceDisks(self, instance, force)
4236 result = self.rpc.call_instance_start(node_current, instance,
4237 self.hvparams, self.beparams)
4238 msg = result.fail_msg
4240 _ShutdownInstanceDisks(self, instance)
4241 raise errors.OpExecError("Could not start instance: %s" % msg)
4244 class LURebootInstance(LogicalUnit):
4245 """Reboot an instance.
4248 HPATH = "instance-reboot"
4249 HTYPE = constants.HTYPE_INSTANCE
4250 _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4253 def CheckArguments(self):
4254 """Check the arguments.
4257 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4258 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4260 def ExpandNames(self):
4261 if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4262 constants.INSTANCE_REBOOT_HARD,
4263 constants.INSTANCE_REBOOT_FULL]:
4264 raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4265 (constants.INSTANCE_REBOOT_SOFT,
4266 constants.INSTANCE_REBOOT_HARD,
4267 constants.INSTANCE_REBOOT_FULL))
4268 self._ExpandAndLockInstance()
4270 def BuildHooksEnv(self):
4273 This runs on master, primary and secondary nodes of the instance.
4277 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4278 "REBOOT_TYPE": self.op.reboot_type,
4279 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4281 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4282 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4285 def CheckPrereq(self):
4286 """Check prerequisites.
4288 This checks that the instance is in the cluster.
4291 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4292 assert self.instance is not None, \
4293 "Cannot retrieve locked instance %s" % self.op.instance_name
4295 _CheckNodeOnline(self, instance.primary_node)
4297 # check bridges existence
4298 _CheckInstanceBridgesExist(self, instance)
4300 def Exec(self, feedback_fn):
4301 """Reboot the instance.
4304 instance = self.instance
4305 ignore_secondaries = self.op.ignore_secondaries
4306 reboot_type = self.op.reboot_type
4308 node_current = instance.primary_node
4310 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4311 constants.INSTANCE_REBOOT_HARD]:
4312 for disk in instance.disks:
4313 self.cfg.SetDiskID(disk, node_current)
4314 result = self.rpc.call_instance_reboot(node_current, instance,
4316 self.shutdown_timeout)
4317 result.Raise("Could not reboot instance")
4319 result = self.rpc.call_instance_shutdown(node_current, instance,
4320 self.shutdown_timeout)
4321 result.Raise("Could not shutdown instance for full reboot")
4322 _ShutdownInstanceDisks(self, instance)
4323 _StartInstanceDisks(self, instance, ignore_secondaries)
4324 result = self.rpc.call_instance_start(node_current, instance, None, None)
4325 msg = result.fail_msg
4327 _ShutdownInstanceDisks(self, instance)
4328 raise errors.OpExecError("Could not start instance for"
4329 " full reboot: %s" % msg)
4331 self.cfg.MarkInstanceUp(instance.name)
4334 class LUShutdownInstance(LogicalUnit):
4335 """Shutdown an instance.
4338 HPATH = "instance-stop"
4339 HTYPE = constants.HTYPE_INSTANCE
4340 _OP_REQP = ["instance_name"]
4343 def CheckArguments(self):
4344 """Check the arguments.
4347 self.timeout = getattr(self.op, "timeout",
4348 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4350 def ExpandNames(self):
4351 self._ExpandAndLockInstance()
4353 def BuildHooksEnv(self):
4356 This runs on master, primary and secondary nodes of the instance.
4359 env = _BuildInstanceHookEnvByObject(self, self.instance)
4360 env["TIMEOUT"] = self.timeout
4361 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4364 def CheckPrereq(self):
4365 """Check prerequisites.
4367 This checks that the instance is in the cluster.
4370 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4371 assert self.instance is not None, \
4372 "Cannot retrieve locked instance %s" % self.op.instance_name
4373 _CheckNodeOnline(self, self.instance.primary_node)
4375 def Exec(self, feedback_fn):
4376 """Shutdown the instance.
4379 instance = self.instance
4380 node_current = instance.primary_node
4381 timeout = self.timeout
4382 self.cfg.MarkInstanceDown(instance.name)
4383 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4384 msg = result.fail_msg
4386 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4388 _ShutdownInstanceDisks(self, instance)
4391 class LUReinstallInstance(LogicalUnit):
4392 """Reinstall an instance.
4395 HPATH = "instance-reinstall"
4396 HTYPE = constants.HTYPE_INSTANCE
4397 _OP_REQP = ["instance_name"]
4400 def ExpandNames(self):
4401 self._ExpandAndLockInstance()
4403 def BuildHooksEnv(self):
4406 This runs on master, primary and secondary nodes of the instance.
4409 env = _BuildInstanceHookEnvByObject(self, self.instance)
4410 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4413 def CheckPrereq(self):
4414 """Check prerequisites.
4416 This checks that the instance is in the cluster and is not running.
4419 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4420 assert instance is not None, \
4421 "Cannot retrieve locked instance %s" % self.op.instance_name
4422 _CheckNodeOnline(self, instance.primary_node)
4424 if instance.disk_template == constants.DT_DISKLESS:
4425 raise errors.OpPrereqError("Instance '%s' has no disks" %
4426 self.op.instance_name,
4428 _CheckInstanceDown(self, instance, "cannot reinstall")
4430 self.op.os_type = getattr(self.op, "os_type", None)
4431 self.op.force_variant = getattr(self.op, "force_variant", False)
4432 if self.op.os_type is not None:
4434 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4435 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4437 self.instance = instance
4439 def Exec(self, feedback_fn):
4440 """Reinstall the instance.
4443 inst = self.instance
4445 if self.op.os_type is not None:
4446 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4447 inst.os = self.op.os_type
4448 self.cfg.Update(inst, feedback_fn)
4450 _StartInstanceDisks(self, inst, None)
4452 feedback_fn("Running the instance OS create scripts...")
4453 # FIXME: pass debug option from opcode to backend
4454 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4455 self.op.debug_level)
4456 result.Raise("Could not install OS for instance %s on node %s" %
4457 (inst.name, inst.primary_node))
4459 _ShutdownInstanceDisks(self, inst)
4462 class LURecreateInstanceDisks(LogicalUnit):
4463 """Recreate an instance's missing disks.
4466 HPATH = "instance-recreate-disks"
4467 HTYPE = constants.HTYPE_INSTANCE
4468 _OP_REQP = ["instance_name", "disks"]
4471 def CheckArguments(self):
4472 """Check the arguments.
4475 if not isinstance(self.op.disks, list):
4476 raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4477 for item in self.op.disks:
4478 if (not isinstance(item, int) or
4480 raise errors.OpPrereqError("Invalid disk specification '%s'" %
4481 str(item), errors.ECODE_INVAL)
4483 def ExpandNames(self):
4484 self._ExpandAndLockInstance()
4486 def BuildHooksEnv(self):
4489 This runs on master, primary and secondary nodes of the instance.
4492 env = _BuildInstanceHookEnvByObject(self, self.instance)
4493 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4496 def CheckPrereq(self):
4497 """Check prerequisites.
4499 This checks that the instance is in the cluster and is not running.
4502 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4503 assert instance is not None, \
4504 "Cannot retrieve locked instance %s" % self.op.instance_name
4505 _CheckNodeOnline(self, instance.primary_node)
4507 if instance.disk_template == constants.DT_DISKLESS:
4508 raise errors.OpPrereqError("Instance '%s' has no disks" %
4509 self.op.instance_name, errors.ECODE_INVAL)
4510 _CheckInstanceDown(self, instance, "cannot recreate disks")
4512 if not self.op.disks:
4513 self.op.disks = range(len(instance.disks))
4515 for idx in self.op.disks:
4516 if idx >= len(instance.disks):
4517 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4520 self.instance = instance
4522 def Exec(self, feedback_fn):
4523 """Recreate the disks.
4527 for idx, _ in enumerate(self.instance.disks):
4528 if idx not in self.op.disks: # disk idx has not been passed in
4532 _CreateDisks(self, self.instance, to_skip=to_skip)
4535 class LURenameInstance(LogicalUnit):
4536 """Rename an instance.
4539 HPATH = "instance-rename"
4540 HTYPE = constants.HTYPE_INSTANCE
4541 _OP_REQP = ["instance_name", "new_name"]
4543 def BuildHooksEnv(self):
4546 This runs on master, primary and secondary nodes of the instance.
4549 env = _BuildInstanceHookEnvByObject(self, self.instance)
4550 env["INSTANCE_NEW_NAME"] = self.op.new_name
4551 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4554 def CheckPrereq(self):
4555 """Check prerequisites.
4557 This checks that the instance is in the cluster and is not running.
4560 self.op.instance_name = _ExpandInstanceName(self.cfg,
4561 self.op.instance_name)
4562 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4563 assert instance is not None
4564 _CheckNodeOnline(self, instance.primary_node)
4565 _CheckInstanceDown(self, instance, "cannot rename")
4566 self.instance = instance
4568 # new name verification
4569 name_info = utils.GetHostInfo(self.op.new_name)
4571 self.op.new_name = new_name = name_info.name
4572 instance_list = self.cfg.GetInstanceList()
4573 if new_name in instance_list:
4574 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4575 new_name, errors.ECODE_EXISTS)
4577 if not getattr(self.op, "ignore_ip", False):
4578 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4579 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4580 (name_info.ip, new_name),
4581 errors.ECODE_NOTUNIQUE)
4584 def Exec(self, feedback_fn):
4585 """Reinstall the instance.
4588 inst = self.instance
4589 old_name = inst.name
4591 if inst.disk_template == constants.DT_FILE:
4592 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4594 self.cfg.RenameInstance(inst.name, self.op.new_name)
4595 # Change the instance lock. This is definitely safe while we hold the BGL
4596 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4597 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4599 # re-read the instance from the configuration after rename
4600 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4602 if inst.disk_template == constants.DT_FILE:
4603 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4604 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4605 old_file_storage_dir,
4606 new_file_storage_dir)
4607 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4608 " (but the instance has been renamed in Ganeti)" %
4609 (inst.primary_node, old_file_storage_dir,
4610 new_file_storage_dir))
4612 _StartInstanceDisks(self, inst, None)
4614 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4615 old_name, self.op.debug_level)
4616 msg = result.fail_msg
4618 msg = ("Could not run OS rename script for instance %s on node %s"
4619 " (but the instance has been renamed in Ganeti): %s" %
4620 (inst.name, inst.primary_node, msg))
4621 self.proc.LogWarning(msg)
4623 _ShutdownInstanceDisks(self, inst)
4626 class LURemoveInstance(LogicalUnit):
4627 """Remove an instance.
4630 HPATH = "instance-remove"
4631 HTYPE = constants.HTYPE_INSTANCE
4632 _OP_REQP = ["instance_name", "ignore_failures"]
4635 def CheckArguments(self):
4636 """Check the arguments.
4639 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4640 constants.DEFAULT_SHUTDOWN_TIMEOUT)
4642 def ExpandNames(self):
4643 self._ExpandAndLockInstance()
4644 self.needed_locks[locking.LEVEL_NODE] = []
4645 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4647 def DeclareLocks(self, level):
4648 if level == locking.LEVEL_NODE:
4649 self._LockInstancesNodes()
4651 def BuildHooksEnv(self):
4654 This runs on master, primary and secondary nodes of the instance.
4657 env = _BuildInstanceHookEnvByObject(self, self.instance)
4658 env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4659 nl = [self.cfg.GetMasterNode()]
4660 nl_post = list(self.instance.all_nodes) + nl
4661 return env, nl, nl_post
4663 def CheckPrereq(self):
4664 """Check prerequisites.
4666 This checks that the instance is in the cluster.
4669 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4670 assert self.instance is not None, \
4671 "Cannot retrieve locked instance %s" % self.op.instance_name
4673 def Exec(self, feedback_fn):
4674 """Remove the instance.
4677 instance = self.instance
4678 logging.info("Shutting down instance %s on node %s",
4679 instance.name, instance.primary_node)
4681 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4682 self.shutdown_timeout)
4683 msg = result.fail_msg
4685 if self.op.ignore_failures:
4686 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4688 raise errors.OpExecError("Could not shutdown instance %s on"
4690 (instance.name, instance.primary_node, msg))
4692 logging.info("Removing block devices for instance %s", instance.name)
4694 if not _RemoveDisks(self, instance):
4695 if self.op.ignore_failures:
4696 feedback_fn("Warning: can't remove instance's disks")
4698 raise errors.OpExecError("Can't remove instance's disks")
4700 logging.info("Removing instance %s out of cluster config", instance.name)
4702 self.cfg.RemoveInstance(instance.name)
4703 self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4706 class LUQueryInstances(NoHooksLU):
4707 """Logical unit for querying instances.
4710 # pylint: disable-msg=W0142
4711 _OP_REQP = ["output_fields", "names", "use_locking"]
4713 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4714 "serial_no", "ctime", "mtime", "uuid"]
4715 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4717 "disk_template", "ip", "mac", "bridge",
4718 "nic_mode", "nic_link",
4719 "sda_size", "sdb_size", "vcpus", "tags",
4720 "network_port", "beparams",
4721 r"(disk)\.(size)/([0-9]+)",
4722 r"(disk)\.(sizes)", "disk_usage",
4723 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4724 r"(nic)\.(bridge)/([0-9]+)",
4725 r"(nic)\.(macs|ips|modes|links|bridges)",
4726 r"(disk|nic)\.(count)",
4728 ] + _SIMPLE_FIELDS +
4730 for name in constants.HVS_PARAMETERS
4731 if name not in constants.HVC_GLOBALS] +
4733 for name in constants.BES_PARAMETERS])
4734 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4737 def ExpandNames(self):
4738 _CheckOutputFields(static=self._FIELDS_STATIC,
4739 dynamic=self._FIELDS_DYNAMIC,
4740 selected=self.op.output_fields)
4742 self.needed_locks = {}
4743 self.share_locks[locking.LEVEL_INSTANCE] = 1
4744 self.share_locks[locking.LEVEL_NODE] = 1
4747 self.wanted = _GetWantedInstances(self, self.op.names)
4749 self.wanted = locking.ALL_SET
4751 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4752 self.do_locking = self.do_node_query and self.op.use_locking
4754 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4755 self.needed_locks[locking.LEVEL_NODE] = []
4756 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4758 def DeclareLocks(self, level):
4759 if level == locking.LEVEL_NODE and self.do_locking:
4760 self._LockInstancesNodes()
4762 def CheckPrereq(self):
4763 """Check prerequisites.
4768 def Exec(self, feedback_fn):
4769 """Computes the list of nodes and their attributes.
4772 # pylint: disable-msg=R0912
4773 # way too many branches here
4774 all_info = self.cfg.GetAllInstancesInfo()
4775 if self.wanted == locking.ALL_SET:
4776 # caller didn't specify instance names, so ordering is not important
4778 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4780 instance_names = all_info.keys()
4781 instance_names = utils.NiceSort(instance_names)
4783 # caller did specify names, so we must keep the ordering
4785 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4787 tgt_set = all_info.keys()
4788 missing = set(self.wanted).difference(tgt_set)
4790 raise errors.OpExecError("Some instances were removed before"
4791 " retrieving their data: %s" % missing)
4792 instance_names = self.wanted
4794 instance_list = [all_info[iname] for iname in instance_names]
4796 # begin data gathering
4798 nodes = frozenset([inst.primary_node for inst in instance_list])
4799 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4803 if self.do_node_query:
4805 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4807 result = node_data[name]
4809 # offline nodes will be in both lists
4810 off_nodes.append(name)
4812 bad_nodes.append(name)
4815 live_data.update(result.payload)
4816 # else no instance is alive
4818 live_data = dict([(name, {}) for name in instance_names])
4820 # end data gathering
4825 cluster = self.cfg.GetClusterInfo()
4826 for instance in instance_list:
4828 i_hv = cluster.FillHV(instance, skip_globals=True)
4829 i_be = cluster.FillBE(instance)
4830 i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4831 nic.nicparams) for nic in instance.nics]
4832 for field in self.op.output_fields:
4833 st_match = self._FIELDS_STATIC.Matches(field)
4834 if field in self._SIMPLE_FIELDS:
4835 val = getattr(instance, field)
4836 elif field == "pnode":
4837 val = instance.primary_node
4838 elif field == "snodes":
4839 val = list(instance.secondary_nodes)
4840 elif field == "admin_state":
4841 val = instance.admin_up
4842 elif field == "oper_state":
4843 if instance.primary_node in bad_nodes:
4846 val = bool(live_data.get(instance.name))
4847 elif field == "status":
4848 if instance.primary_node in off_nodes:
4849 val = "ERROR_nodeoffline"
4850 elif instance.primary_node in bad_nodes:
4851 val = "ERROR_nodedown"
4853 running = bool(live_data.get(instance.name))
4855 if instance.admin_up:
4860 if instance.admin_up:
4864 elif field == "oper_ram":
4865 if instance.primary_node in bad_nodes:
4867 elif instance.name in live_data:
4868 val = live_data[instance.name].get("memory", "?")
4871 elif field == "vcpus":
4872 val = i_be[constants.BE_VCPUS]
4873 elif field == "disk_template":
4874 val = instance.disk_template
4877 val = instance.nics[0].ip
4880 elif field == "nic_mode":
4882 val = i_nicp[0][constants.NIC_MODE]
4885 elif field == "nic_link":
4887 val = i_nicp[0][constants.NIC_LINK]
4890 elif field == "bridge":
4891 if (instance.nics and
4892 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4893 val = i_nicp[0][constants.NIC_LINK]
4896 elif field == "mac":
4898 val = instance.nics[0].mac
4901 elif field == "sda_size" or field == "sdb_size":
4902 idx = ord(field[2]) - ord('a')
4904 val = instance.FindDisk(idx).size
4905 except errors.OpPrereqError:
4907 elif field == "disk_usage": # total disk usage per node
4908 disk_sizes = [{'size': disk.size} for disk in instance.disks]
4909 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4910 elif field == "tags":
4911 val = list(instance.GetTags())
4912 elif field == "hvparams":
4914 elif (field.startswith(HVPREFIX) and
4915 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4916 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4917 val = i_hv.get(field[len(HVPREFIX):], None)
4918 elif field == "beparams":
4920 elif (field.startswith(BEPREFIX) and
4921 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4922 val = i_be.get(field[len(BEPREFIX):], None)
4923 elif st_match and st_match.groups():
4924 # matches a variable list
4925 st_groups = st_match.groups()
4926 if st_groups and st_groups[0] == "disk":
4927 if st_groups[1] == "count":
4928 val = len(instance.disks)
4929 elif st_groups[1] == "sizes":
4930 val = [disk.size for disk in instance.disks]
4931 elif st_groups[1] == "size":
4933 val = instance.FindDisk(st_groups[2]).size
4934 except errors.OpPrereqError:
4937 assert False, "Unhandled disk parameter"
4938 elif st_groups[0] == "nic":
4939 if st_groups[1] == "count":
4940 val = len(instance.nics)
4941 elif st_groups[1] == "macs":
4942 val = [nic.mac for nic in instance.nics]
4943 elif st_groups[1] == "ips":
4944 val = [nic.ip for nic in instance.nics]
4945 elif st_groups[1] == "modes":
4946 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4947 elif st_groups[1] == "links":
4948 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4949 elif st_groups[1] == "bridges":
4952 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4953 val.append(nicp[constants.NIC_LINK])
4958 nic_idx = int(st_groups[2])
4959 if nic_idx >= len(instance.nics):
4962 if st_groups[1] == "mac":
4963 val = instance.nics[nic_idx].mac
4964 elif st_groups[1] == "ip":
4965 val = instance.nics[nic_idx].ip
4966 elif st_groups[1] == "mode":
4967 val = i_nicp[nic_idx][constants.NIC_MODE]
4968 elif st_groups[1] == "link":
4969 val = i_nicp[nic_idx][constants.NIC_LINK]
4970 elif st_groups[1] == "bridge":
4971 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4972 if nic_mode == constants.NIC_MODE_BRIDGED:
4973 val = i_nicp[nic_idx][constants.NIC_LINK]
4977 assert False, "Unhandled NIC parameter"
4979 assert False, ("Declared but unhandled variable parameter '%s'" %
4982 assert False, "Declared but unhandled parameter '%s'" % field
4989 class LUFailoverInstance(LogicalUnit):
4990 """Failover an instance.
4993 HPATH = "instance-failover"
4994 HTYPE = constants.HTYPE_INSTANCE
4995 _OP_REQP = ["instance_name", "ignore_consistency"]
4998 def CheckArguments(self):
4999 """Check the arguments.
5002 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5003 constants.DEFAULT_SHUTDOWN_TIMEOUT)
5005 def ExpandNames(self):
5006 self._ExpandAndLockInstance()
5007 self.needed_locks[locking.LEVEL_NODE] = []
5008 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5010 def DeclareLocks(self, level):
5011 if level == locking.LEVEL_NODE:
5012 self._LockInstancesNodes()
5014 def BuildHooksEnv(self):
5017 This runs on master, primary and secondary nodes of the instance.
5020 instance = self.instance
5021 source_node = instance.primary_node
5022 target_node = instance.secondary_nodes[0]
5024 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5025 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5026 "OLD_PRIMARY": source_node,
5027 "OLD_SECONDARY": target_node,
5028 "NEW_PRIMARY": target_node,
5029 "NEW_SECONDARY": source_node,
5031 env.update(_BuildInstanceHookEnvByObject(self, instance))
5032 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5034 nl_post.append(source_node)
5035 return env, nl, nl_post
5037 def CheckPrereq(self):
5038 """Check prerequisites.
5040 This checks that the instance is in the cluster.
5043 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5044 assert self.instance is not None, \
5045 "Cannot retrieve locked instance %s" % self.op.instance_name
5047 bep = self.cfg.GetClusterInfo().FillBE(instance)
5048 if instance.disk_template not in constants.DTS_NET_MIRROR:
5049 raise errors.OpPrereqError("Instance's disk layout is not"
5050 " network mirrored, cannot failover.",
5053 secondary_nodes = instance.secondary_nodes
5054 if not secondary_nodes:
5055 raise errors.ProgrammerError("no secondary node but using "
5056 "a mirrored disk template")
5058 target_node = secondary_nodes[0]
5059 _CheckNodeOnline(self, target_node)
5060 _CheckNodeNotDrained(self, target_node)
5061 if instance.admin_up:
5062 # check memory requirements on the secondary node
5063 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5064 instance.name, bep[constants.BE_MEMORY],
5065 instance.hypervisor)
5067 self.LogInfo("Not checking memory on the secondary node as"
5068 " instance will not be started")
5070 # check bridge existance
5071 _CheckInstanceBridgesExist(self, instance, node=target_node)
5073 def Exec(self, feedback_fn):
5074 """Failover an instance.
5076 The failover is done by shutting it down on its present node and
5077 starting it on the secondary.
5080 instance = self.instance
5082 source_node = instance.primary_node
5083 target_node = instance.secondary_nodes[0]
5085 if instance.admin_up:
5086 feedback_fn("* checking disk consistency between source and target")
5087 for dev in instance.disks:
5088 # for drbd, these are drbd over lvm
5089 if not _CheckDiskConsistency(self, dev, target_node, False):
5090 if not self.op.ignore_consistency:
5091 raise errors.OpExecError("Disk %s is degraded on target node,"
5092 " aborting failover." % dev.iv_name)
5094 feedback_fn("* not checking disk consistency as instance is not running")
5096 feedback_fn("* shutting down instance on source node")
5097 logging.info("Shutting down instance %s on node %s",
5098 instance.name, source_node)
5100 result = self.rpc.call_instance_shutdown(source_node, instance,
5101 self.shutdown_timeout)
5102 msg = result.fail_msg
5104 if self.op.ignore_consistency:
5105 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5106 " Proceeding anyway. Please make sure node"
5107 " %s is down. Error details: %s",
5108 instance.name, source_node, source_node, msg)
5110 raise errors.OpExecError("Could not shutdown instance %s on"
5112 (instance.name, source_node, msg))
5114 feedback_fn("* deactivating the instance's disks on source node")
5115 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5116 raise errors.OpExecError("Can't shut down the instance's disks.")
5118 instance.primary_node = target_node
5119 # distribute new instance config to the other nodes
5120 self.cfg.Update(instance, feedback_fn)
5122 # Only start the instance if it's marked as up
5123 if instance.admin_up:
5124 feedback_fn("* activating the instance's disks on target node")
5125 logging.info("Starting instance %s on node %s",
5126 instance.name, target_node)
5128 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5129 ignore_secondaries=True)
5131 _ShutdownInstanceDisks(self, instance)
5132 raise errors.OpExecError("Can't activate the instance's disks")
5134 feedback_fn("* starting the instance on the target node")
5135 result = self.rpc.call_instance_start(target_node, instance, None, None)
5136 msg = result.fail_msg
5138 _ShutdownInstanceDisks(self, instance)
5139 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5140 (instance.name, target_node, msg))
5143 class LUMigrateInstance(LogicalUnit):
5144 """Migrate an instance.
5146 This is migration without shutting down, compared to the failover,
5147 which is done with shutdown.
5150 HPATH = "instance-migrate"
5151 HTYPE = constants.HTYPE_INSTANCE
5152 _OP_REQP = ["instance_name", "live", "cleanup"]
5156 def ExpandNames(self):
5157 self._ExpandAndLockInstance()
5159 self.needed_locks[locking.LEVEL_NODE] = []
5160 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5162 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5163 self.op.live, self.op.cleanup)
5164 self.tasklets = [self._migrater]
5166 def DeclareLocks(self, level):
5167 if level == locking.LEVEL_NODE:
5168 self._LockInstancesNodes()
5170 def BuildHooksEnv(self):
5173 This runs on master, primary and secondary nodes of the instance.
5176 instance = self._migrater.instance
5177 source_node = instance.primary_node
5178 target_node = instance.secondary_nodes[0]
5179 env = _BuildInstanceHookEnvByObject(self, instance)
5180 env["MIGRATE_LIVE"] = self.op.live
5181 env["MIGRATE_CLEANUP"] = self.op.cleanup
5183 "OLD_PRIMARY": source_node,
5184 "OLD_SECONDARY": target_node,
5185 "NEW_PRIMARY": target_node,
5186 "NEW_SECONDARY": source_node,
5188 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5190 nl_post.append(source_node)
5191 return env, nl, nl_post
5194 class LUMoveInstance(LogicalUnit):
5195 """Move an instance by data-copying.
5198 HPATH = "instance-move"
5199 HTYPE = constants.HTYPE_INSTANCE
5200 _OP_REQP = ["instance_name", "target_node"]
5203 def CheckArguments(self):
5204 """Check the arguments.
5207 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5208 constants.DEFAULT_SHUTDOWN_TIMEOUT)
5210 def ExpandNames(self):
5211 self._ExpandAndLockInstance()
5212 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5213 self.op.target_node = target_node
5214 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5215 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5217 def DeclareLocks(self, level):
5218 if level == locking.LEVEL_NODE:
5219 self._LockInstancesNodes(primary_only=True)
5221 def BuildHooksEnv(self):
5224 This runs on master, primary and secondary nodes of the instance.
5228 "TARGET_NODE": self.op.target_node,
5229 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5231 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5232 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5233 self.op.target_node]
5236 def CheckPrereq(self):
5237 """Check prerequisites.
5239 This checks that the instance is in the cluster.
5242 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5243 assert self.instance is not None, \
5244 "Cannot retrieve locked instance %s" % self.op.instance_name
5246 node = self.cfg.GetNodeInfo(self.op.target_node)
5247 assert node is not None, \
5248 "Cannot retrieve locked node %s" % self.op.target_node
5250 self.target_node = target_node = node.name
5252 if target_node == instance.primary_node:
5253 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5254 (instance.name, target_node),
5257 bep = self.cfg.GetClusterInfo().FillBE(instance)
5259 for idx, dsk in enumerate(instance.disks):
5260 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5261 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5262 " cannot copy" % idx, errors.ECODE_STATE)
5264 _CheckNodeOnline(self, target_node)
5265 _CheckNodeNotDrained(self, target_node)
5267 if instance.admin_up:
5268 # check memory requirements on the secondary node
5269 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5270 instance.name, bep[constants.BE_MEMORY],
5271 instance.hypervisor)
5273 self.LogInfo("Not checking memory on the secondary node as"
5274 " instance will not be started")
5276 # check bridge existance
5277 _CheckInstanceBridgesExist(self, instance, node=target_node)
5279 def Exec(self, feedback_fn):
5280 """Move an instance.
5282 The move is done by shutting it down on its present node, copying
5283 the data over (slow) and starting it on the new node.
5286 instance = self.instance
5288 source_node = instance.primary_node
5289 target_node = self.target_node
5291 self.LogInfo("Shutting down instance %s on source node %s",
5292 instance.name, source_node)
5294 result = self.rpc.call_instance_shutdown(source_node, instance,
5295 self.shutdown_timeout)
5296 msg = result.fail_msg
5298 if self.op.ignore_consistency:
5299 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5300 " Proceeding anyway. Please make sure node"
5301 " %s is down. Error details: %s",
5302 instance.name, source_node, source_node, msg)
5304 raise errors.OpExecError("Could not shutdown instance %s on"
5306 (instance.name, source_node, msg))
5308 # create the target disks
5310 _CreateDisks(self, instance, target_node=target_node)
5311 except errors.OpExecError:
5312 self.LogWarning("Device creation failed, reverting...")
5314 _RemoveDisks(self, instance, target_node=target_node)
5316 self.cfg.ReleaseDRBDMinors(instance.name)
5319 cluster_name = self.cfg.GetClusterInfo().cluster_name
5322 # activate, get path, copy the data over
5323 for idx, disk in enumerate(instance.disks):
5324 self.LogInfo("Copying data for disk %d", idx)
5325 result = self.rpc.call_blockdev_assemble(target_node, disk,
5326 instance.name, True)
5328 self.LogWarning("Can't assemble newly created disk %d: %s",
5329 idx, result.fail_msg)
5330 errs.append(result.fail_msg)
5332 dev_path = result.payload
5333 result = self.rpc.call_blockdev_export(source_node, disk,
5334 target_node, dev_path,
5337 self.LogWarning("Can't copy data over for disk %d: %s",
5338 idx, result.fail_msg)
5339 errs.append(result.fail_msg)
5343 self.LogWarning("Some disks failed to copy, aborting")
5345 _RemoveDisks(self, instance, target_node=target_node)
5347 self.cfg.ReleaseDRBDMinors(instance.name)
5348 raise errors.OpExecError("Errors during disk copy: %s" %
5351 instance.primary_node = target_node
5352 self.cfg.Update(instance, feedback_fn)
5354 self.LogInfo("Removing the disks on the original node")
5355 _RemoveDisks(self, instance, target_node=source_node)
5357 # Only start the instance if it's marked as up
5358 if instance.admin_up:
5359 self.LogInfo("Starting instance %s on node %s",
5360 instance.name, target_node)
5362 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5363 ignore_secondaries=True)
5365 _ShutdownInstanceDisks(self, instance)
5366 raise errors.OpExecError("Can't activate the instance's disks")
5368 result = self.rpc.call_instance_start(target_node, instance, None, None)
5369 msg = result.fail_msg
5371 _ShutdownInstanceDisks(self, instance)
5372 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5373 (instance.name, target_node, msg))
5376 class LUMigrateNode(LogicalUnit):
5377 """Migrate all instances from a node.
5380 HPATH = "node-migrate"
5381 HTYPE = constants.HTYPE_NODE
5382 _OP_REQP = ["node_name", "live"]
5385 def ExpandNames(self):
5386 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5388 self.needed_locks = {
5389 locking.LEVEL_NODE: [self.op.node_name],
5392 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5394 # Create tasklets for migrating instances for all instances on this node
5398 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5399 logging.debug("Migrating instance %s", inst.name)
5400 names.append(inst.name)
5402 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5404 self.tasklets = tasklets
5406 # Declare instance locks
5407 self.needed_locks[locking.LEVEL_INSTANCE] = names
5409 def DeclareLocks(self, level):
5410 if level == locking.LEVEL_NODE:
5411 self._LockInstancesNodes()
5413 def BuildHooksEnv(self):
5416 This runs on the master, the primary and all the secondaries.
5420 "NODE_NAME": self.op.node_name,
5423 nl = [self.cfg.GetMasterNode()]
5425 return (env, nl, nl)
5428 class TLMigrateInstance(Tasklet):
5429 def __init__(self, lu, instance_name, live, cleanup):
5430 """Initializes this class.
5433 Tasklet.__init__(self, lu)
5436 self.instance_name = instance_name
5438 self.cleanup = cleanup
5440 def CheckPrereq(self):
5441 """Check prerequisites.
5443 This checks that the instance is in the cluster.
5446 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5447 instance = self.cfg.GetInstanceInfo(instance_name)
5448 assert instance is not None
5450 if instance.disk_template != constants.DT_DRBD8:
5451 raise errors.OpPrereqError("Instance's disk layout is not"
5452 " drbd8, cannot migrate.", errors.ECODE_STATE)
5454 secondary_nodes = instance.secondary_nodes
5455 if not secondary_nodes:
5456 raise errors.ConfigurationError("No secondary node but using"
5457 " drbd8 disk template")
5459 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5461 target_node = secondary_nodes[0]
5462 # check memory requirements on the secondary node
5463 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5464 instance.name, i_be[constants.BE_MEMORY],
5465 instance.hypervisor)
5467 # check bridge existance
5468 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5470 if not self.cleanup:
5471 _CheckNodeNotDrained(self.lu, target_node)
5472 result = self.rpc.call_instance_migratable(instance.primary_node,
5474 result.Raise("Can't migrate, please use failover",
5475 prereq=True, ecode=errors.ECODE_STATE)
5477 self.instance = instance
5479 def _WaitUntilSync(self):
5480 """Poll with custom rpc for disk sync.
5482 This uses our own step-based rpc call.
5485 self.feedback_fn("* wait until resync is done")
5489 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5491 self.instance.disks)
5493 for node, nres in result.items():
5494 nres.Raise("Cannot resync disks on node %s" % node)
5495 node_done, node_percent = nres.payload
5496 all_done = all_done and node_done
5497 if node_percent is not None:
5498 min_percent = min(min_percent, node_percent)
5500 if min_percent < 100:
5501 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5504 def _EnsureSecondary(self, node):
5505 """Demote a node to secondary.
5508 self.feedback_fn("* switching node %s to secondary mode" % node)
5510 for dev in self.instance.disks:
5511 self.cfg.SetDiskID(dev, node)
5513 result = self.rpc.call_blockdev_close(node, self.instance.name,
5514 self.instance.disks)
5515 result.Raise("Cannot change disk to secondary on node %s" % node)
5517 def _GoStandalone(self):
5518 """Disconnect from the network.
5521 self.feedback_fn("* changing into standalone mode")
5522 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5523 self.instance.disks)
5524 for node, nres in result.items():
5525 nres.Raise("Cannot disconnect disks node %s" % node)
5527 def _GoReconnect(self, multimaster):
5528 """Reconnect to the network.
5534 msg = "single-master"
5535 self.feedback_fn("* changing disks into %s mode" % msg)
5536 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5537 self.instance.disks,
5538 self.instance.name, multimaster)
5539 for node, nres in result.items():
5540 nres.Raise("Cannot change disks config on node %s" % node)
5542 def _ExecCleanup(self):
5543 """Try to cleanup after a failed migration.
5545 The cleanup is done by:
5546 - check that the instance is running only on one node
5547 (and update the config if needed)
5548 - change disks on its secondary node to secondary
5549 - wait until disks are fully synchronized
5550 - disconnect from the network
5551 - change disks into single-master mode
5552 - wait again until disks are fully synchronized
5555 instance = self.instance
5556 target_node = self.target_node
5557 source_node = self.source_node
5559 # check running on only one node
5560 self.feedback_fn("* checking where the instance actually runs"
5561 " (if this hangs, the hypervisor might be in"
5563 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5564 for node, result in ins_l.items():
5565 result.Raise("Can't contact node %s" % node)
5567 runningon_source = instance.name in ins_l[source_node].payload
5568 runningon_target = instance.name in ins_l[target_node].payload
5570 if runningon_source and runningon_target:
5571 raise errors.OpExecError("Instance seems to be running on two nodes,"
5572 " or the hypervisor is confused. You will have"
5573 " to ensure manually that it runs only on one"
5574 " and restart this operation.")
5576 if not (runningon_source or runningon_target):
5577 raise errors.OpExecError("Instance does not seem to be running at all."
5578 " In this case, it's safer to repair by"
5579 " running 'gnt-instance stop' to ensure disk"
5580 " shutdown, and then restarting it.")
5582 if runningon_target:
5583 # the migration has actually succeeded, we need to update the config
5584 self.feedback_fn("* instance running on secondary node (%s),"
5585 " updating config" % target_node)
5586 instance.primary_node = target_node
5587 self.cfg.Update(instance, self.feedback_fn)
5588 demoted_node = source_node
5590 self.feedback_fn("* instance confirmed to be running on its"
5591 " primary node (%s)" % source_node)
5592 demoted_node = target_node
5594 self._EnsureSecondary(demoted_node)
5596 self._WaitUntilSync()
5597 except errors.OpExecError:
5598 # we ignore here errors, since if the device is standalone, it
5599 # won't be able to sync
5601 self._GoStandalone()
5602 self._GoReconnect(False)
5603 self._WaitUntilSync()
5605 self.feedback_fn("* done")
5607 def _RevertDiskStatus(self):
5608 """Try to revert the disk status after a failed migration.
5611 target_node = self.target_node
5613 self._EnsureSecondary(target_node)
5614 self._GoStandalone()
5615 self._GoReconnect(False)
5616 self._WaitUntilSync()
5617 except errors.OpExecError, err:
5618 self.lu.LogWarning("Migration failed and I can't reconnect the"
5619 " drives: error '%s'\n"
5620 "Please look and recover the instance status" %
5623 def _AbortMigration(self):
5624 """Call the hypervisor code to abort a started migration.
5627 instance = self.instance
5628 target_node = self.target_node
5629 migration_info = self.migration_info
5631 abort_result = self.rpc.call_finalize_migration(target_node,
5635 abort_msg = abort_result.fail_msg
5637 logging.error("Aborting migration failed on target node %s: %s",
5638 target_node, abort_msg)
5639 # Don't raise an exception here, as we stil have to try to revert the
5640 # disk status, even if this step failed.
5642 def _ExecMigration(self):
5643 """Migrate an instance.
5645 The migrate is done by:
5646 - change the disks into dual-master mode
5647 - wait until disks are fully synchronized again
5648 - migrate the instance
5649 - change disks on the new secondary node (the old primary) to secondary
5650 - wait until disks are fully synchronized
5651 - change disks into single-master mode
5654 instance = self.instance
5655 target_node = self.target_node
5656 source_node = self.source_node
5658 self.feedback_fn("* checking disk consistency between source and target")
5659 for dev in instance.disks:
5660 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5661 raise errors.OpExecError("Disk %s is degraded or not fully"
5662 " synchronized on target node,"
5663 " aborting migrate." % dev.iv_name)
5665 # First get the migration information from the remote node
5666 result = self.rpc.call_migration_info(source_node, instance)
5667 msg = result.fail_msg
5669 log_err = ("Failed fetching source migration information from %s: %s" %
5671 logging.error(log_err)
5672 raise errors.OpExecError(log_err)
5674 self.migration_info = migration_info = result.payload
5676 # Then switch the disks to master/master mode
5677 self._EnsureSecondary(target_node)
5678 self._GoStandalone()
5679 self._GoReconnect(True)
5680 self._WaitUntilSync()
5682 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5683 result = self.rpc.call_accept_instance(target_node,
5686 self.nodes_ip[target_node])
5688 msg = result.fail_msg
5690 logging.error("Instance pre-migration failed, trying to revert"
5691 " disk status: %s", msg)
5692 self.feedback_fn("Pre-migration failed, aborting")
5693 self._AbortMigration()
5694 self._RevertDiskStatus()
5695 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5696 (instance.name, msg))
5698 self.feedback_fn("* migrating instance to %s" % target_node)
5700 result = self.rpc.call_instance_migrate(source_node, instance,
5701 self.nodes_ip[target_node],
5703 msg = result.fail_msg
5705 logging.error("Instance migration failed, trying to revert"
5706 " disk status: %s", msg)
5707 self.feedback_fn("Migration failed, aborting")
5708 self._AbortMigration()
5709 self._RevertDiskStatus()
5710 raise errors.OpExecError("Could not migrate instance %s: %s" %
5711 (instance.name, msg))
5714 instance.primary_node = target_node
5715 # distribute new instance config to the other nodes
5716 self.cfg.Update(instance, self.feedback_fn)
5718 result = self.rpc.call_finalize_migration(target_node,
5722 msg = result.fail_msg
5724 logging.error("Instance migration succeeded, but finalization failed:"
5726 raise errors.OpExecError("Could not finalize instance migration: %s" %
5729 self._EnsureSecondary(source_node)
5730 self._WaitUntilSync()
5731 self._GoStandalone()
5732 self._GoReconnect(False)
5733 self._WaitUntilSync()
5735 self.feedback_fn("* done")
5737 def Exec(self, feedback_fn):
5738 """Perform the migration.
5741 feedback_fn("Migrating instance %s" % self.instance.name)
5743 self.feedback_fn = feedback_fn
5745 self.source_node = self.instance.primary_node
5746 self.target_node = self.instance.secondary_nodes[0]
5747 self.all_nodes = [self.source_node, self.target_node]
5749 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5750 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5754 return self._ExecCleanup()
5756 return self._ExecMigration()
5759 def _CreateBlockDev(lu, node, instance, device, force_create,
5761 """Create a tree of block devices on a given node.
5763 If this device type has to be created on secondaries, create it and
5766 If not, just recurse to children keeping the same 'force' value.
5768 @param lu: the lu on whose behalf we execute
5769 @param node: the node on which to create the device
5770 @type instance: L{objects.Instance}
5771 @param instance: the instance which owns the device
5772 @type device: L{objects.Disk}
5773 @param device: the device to create
5774 @type force_create: boolean
5775 @param force_create: whether to force creation of this device; this
5776 will be change to True whenever we find a device which has
5777 CreateOnSecondary() attribute
5778 @param info: the extra 'metadata' we should attach to the device
5779 (this will be represented as a LVM tag)
5780 @type force_open: boolean
5781 @param force_open: this parameter will be passes to the
5782 L{backend.BlockdevCreate} function where it specifies
5783 whether we run on primary or not, and it affects both
5784 the child assembly and the device own Open() execution
5787 if device.CreateOnSecondary():
5791 for child in device.children:
5792 _CreateBlockDev(lu, node, instance, child, force_create,
5795 if not force_create:
5798 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5801 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5802 """Create a single block device on a given node.
5804 This will not recurse over children of the device, so they must be
5807 @param lu: the lu on whose behalf we execute
5808 @param node: the node on which to create the device
5809 @type instance: L{objects.Instance}
5810 @param instance: the instance which owns the device
5811 @type device: L{objects.Disk}
5812 @param device: the device to create
5813 @param info: the extra 'metadata' we should attach to the device
5814 (this will be represented as a LVM tag)
5815 @type force_open: boolean
5816 @param force_open: this parameter will be passes to the
5817 L{backend.BlockdevCreate} function where it specifies
5818 whether we run on primary or not, and it affects both
5819 the child assembly and the device own Open() execution
5822 lu.cfg.SetDiskID(device, node)
5823 result = lu.rpc.call_blockdev_create(node, device, device.size,
5824 instance.name, force_open, info)
5825 result.Raise("Can't create block device %s on"
5826 " node %s for instance %s" % (device, node, instance.name))
5827 if device.physical_id is None:
5828 device.physical_id = result.payload
5831 def _GenerateUniqueNames(lu, exts):
5832 """Generate a suitable LV name.
5834 This will generate a logical volume name for the given instance.
5839 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5840 results.append("%s%s" % (new_id, val))
5844 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5846 """Generate a drbd8 device complete with its children.
5849 port = lu.cfg.AllocatePort()
5850 vgname = lu.cfg.GetVGName()
5851 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5852 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5853 logical_id=(vgname, names[0]))
5854 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5855 logical_id=(vgname, names[1]))
5856 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5857 logical_id=(primary, secondary, port,
5860 children=[dev_data, dev_meta],
5865 def _GenerateDiskTemplate(lu, template_name,
5866 instance_name, primary_node,
5867 secondary_nodes, disk_info,
5868 file_storage_dir, file_driver,
5870 """Generate the entire disk layout for a given template type.
5873 #TODO: compute space requirements
5875 vgname = lu.cfg.GetVGName()
5876 disk_count = len(disk_info)
5878 if template_name == constants.DT_DISKLESS:
5880 elif template_name == constants.DT_PLAIN:
5881 if len(secondary_nodes) != 0:
5882 raise errors.ProgrammerError("Wrong template configuration")
5884 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5885 for i in range(disk_count)])
5886 for idx, disk in enumerate(disk_info):
5887 disk_index = idx + base_index
5888 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5889 logical_id=(vgname, names[idx]),
5890 iv_name="disk/%d" % disk_index,
5892 disks.append(disk_dev)
5893 elif template_name == constants.DT_DRBD8:
5894 if len(secondary_nodes) != 1:
5895 raise errors.ProgrammerError("Wrong template configuration")
5896 remote_node = secondary_nodes[0]
5897 minors = lu.cfg.AllocateDRBDMinor(
5898 [primary_node, remote_node] * len(disk_info), instance_name)
5901 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5902 for i in range(disk_count)]):
5903 names.append(lv_prefix + "_data")
5904 names.append(lv_prefix + "_meta")
5905 for idx, disk in enumerate(disk_info):
5906 disk_index = idx + base_index
5907 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5908 disk["size"], names[idx*2:idx*2+2],
5909 "disk/%d" % disk_index,
5910 minors[idx*2], minors[idx*2+1])
5911 disk_dev.mode = disk["mode"]
5912 disks.append(disk_dev)
5913 elif template_name == constants.DT_FILE:
5914 if len(secondary_nodes) != 0:
5915 raise errors.ProgrammerError("Wrong template configuration")
5917 _RequireFileStorage()
5919 for idx, disk in enumerate(disk_info):
5920 disk_index = idx + base_index
5921 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5922 iv_name="disk/%d" % disk_index,
5923 logical_id=(file_driver,
5924 "%s/disk%d" % (file_storage_dir,
5927 disks.append(disk_dev)
5929 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5933 def _GetInstanceInfoText(instance):
5934 """Compute that text that should be added to the disk's metadata.
5937 return "originstname+%s" % instance.name
5940 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5941 """Create all disks for an instance.
5943 This abstracts away some work from AddInstance.
5945 @type lu: L{LogicalUnit}
5946 @param lu: the logical unit on whose behalf we execute
5947 @type instance: L{objects.Instance}
5948 @param instance: the instance whose disks we should create
5950 @param to_skip: list of indices to skip
5951 @type target_node: string
5952 @param target_node: if passed, overrides the target node for creation
5954 @return: the success of the creation
5957 info = _GetInstanceInfoText(instance)
5958 if target_node is None:
5959 pnode = instance.primary_node
5960 all_nodes = instance.all_nodes
5965 if instance.disk_template == constants.DT_FILE:
5966 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5967 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5969 result.Raise("Failed to create directory '%s' on"
5970 " node %s" % (file_storage_dir, pnode))
5972 # Note: this needs to be kept in sync with adding of disks in
5973 # LUSetInstanceParams
5974 for idx, device in enumerate(instance.disks):
5975 if to_skip and idx in to_skip:
5977 logging.info("Creating volume %s for instance %s",
5978 device.iv_name, instance.name)
5980 for node in all_nodes:
5981 f_create = node == pnode
5982 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5985 def _RemoveDisks(lu, instance, target_node=None):
5986 """Remove all disks for an instance.
5988 This abstracts away some work from `AddInstance()` and
5989 `RemoveInstance()`. Note that in case some of the devices couldn't
5990 be removed, the removal will continue with the other ones (compare
5991 with `_CreateDisks()`).
5993 @type lu: L{LogicalUnit}
5994 @param lu: the logical unit on whose behalf we execute
5995 @type instance: L{objects.Instance}
5996 @param instance: the instance whose disks we should remove
5997 @type target_node: string
5998 @param target_node: used to override the node on which to remove the disks
6000 @return: the success of the removal
6003 logging.info("Removing block devices for instance %s", instance.name)
6006 for device in instance.disks:
6008 edata = [(target_node, device)]
6010 edata = device.ComputeNodeTree(instance.primary_node)
6011 for node, disk in edata:
6012 lu.cfg.SetDiskID(disk, node)
6013 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6015 lu.LogWarning("Could not remove block device %s on node %s,"
6016 " continuing anyway: %s", device.iv_name, node, msg)
6019 if instance.disk_template == constants.DT_FILE:
6020 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6024 tgt = instance.primary_node
6025 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6027 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6028 file_storage_dir, instance.primary_node, result.fail_msg)
6034 def _ComputeDiskSize(disk_template, disks):
6035 """Compute disk size requirements in the volume group
6038 # Required free disk space as a function of disk and swap space
6040 constants.DT_DISKLESS: None,
6041 constants.DT_PLAIN: sum(d["size"] for d in disks),
6042 # 128 MB are added for drbd metadata for each disk
6043 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6044 constants.DT_FILE: None,
6047 if disk_template not in req_size_dict:
6048 raise errors.ProgrammerError("Disk template '%s' size requirement"
6049 " is unknown" % disk_template)
6051 return req_size_dict[disk_template]
6054 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6055 """Hypervisor parameter validation.
6057 This function abstract the hypervisor parameter validation to be
6058 used in both instance create and instance modify.
6060 @type lu: L{LogicalUnit}
6061 @param lu: the logical unit for which we check
6062 @type nodenames: list
6063 @param nodenames: the list of nodes on which we should check
6064 @type hvname: string
6065 @param hvname: the name of the hypervisor we should use
6066 @type hvparams: dict
6067 @param hvparams: the parameters which we need to check
6068 @raise errors.OpPrereqError: if the parameters are not valid
6071 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6074 for node in nodenames:
6078 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6081 class LUCreateInstance(LogicalUnit):
6082 """Create an instance.
6085 HPATH = "instance-add"
6086 HTYPE = constants.HTYPE_INSTANCE
6087 _OP_REQP = ["instance_name", "disks",
6089 "wait_for_sync", "ip_check", "nics",
6090 "hvparams", "beparams"]
6093 def CheckArguments(self):
6097 # set optional parameters to none if they don't exist
6098 for attr in ["pnode", "snode", "iallocator", "hypervisor",
6099 "disk_template", "identify_defaults"]:
6100 if not hasattr(self.op, attr):
6101 setattr(self.op, attr, None)
6103 # do not require name_check to ease forward/backward compatibility
6105 if not hasattr(self.op, "name_check"):
6106 self.op.name_check = True
6107 if not hasattr(self.op, "no_install"):
6108 self.op.no_install = False
6109 if self.op.no_install and self.op.start:
6110 self.LogInfo("No-installation mode selected, disabling startup")
6111 self.op.start = False
6112 # validate/normalize the instance name
6113 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6114 if self.op.ip_check and not self.op.name_check:
6115 # TODO: make the ip check more flexible and not depend on the name check
6116 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6119 # check nics' parameter names
6120 for nic in self.op.nics:
6121 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6123 # check disks. parameter names and consistent adopt/no-adopt strategy
6124 has_adopt = has_no_adopt = False
6125 for disk in self.op.disks:
6126 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6131 if has_adopt and has_no_adopt:
6132 raise errors.OpPrereqError("Either all disks are adopted or none is",
6135 if self.op.disk_template != constants.DT_PLAIN:
6136 raise errors.OpPrereqError("Disk adoption is only supported for the"
6137 " 'plain' disk template",
6139 if self.op.iallocator is not None:
6140 raise errors.OpPrereqError("Disk adoption not allowed with an"
6141 " iallocator script", errors.ECODE_INVAL)
6142 if self.op.mode == constants.INSTANCE_IMPORT:
6143 raise errors.OpPrereqError("Disk adoption not allowed for"
6144 " instance import", errors.ECODE_INVAL)
6146 self.adopt_disks = has_adopt
6148 # verify creation mode
6149 if self.op.mode not in (constants.INSTANCE_CREATE,
6150 constants.INSTANCE_IMPORT):
6151 raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6152 self.op.mode, errors.ECODE_INVAL)
6154 # instance name verification
6155 if self.op.name_check:
6156 self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6157 self.op.instance_name = self.hostname1.name
6158 # used in CheckPrereq for ip ping check
6159 self.check_ip = self.hostname1.ip
6161 self.check_ip = None
6163 # file storage checks
6164 if (self.op.file_driver and
6165 not self.op.file_driver in constants.FILE_DRIVER):
6166 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6167 self.op.file_driver, errors.ECODE_INVAL)
6169 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6170 raise errors.OpPrereqError("File storage directory path not absolute",
6173 ### Node/iallocator related checks
6174 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6175 raise errors.OpPrereqError("One and only one of iallocator and primary"
6176 " node must be given",
6179 if self.op.mode == constants.INSTANCE_IMPORT:
6180 # On import force_variant must be True, because if we forced it at
6181 # initial install, our only chance when importing it back is that it
6183 self.op.force_variant = True
6185 if self.op.no_install:
6186 self.LogInfo("No-installation mode has no effect during import")
6188 else: # INSTANCE_CREATE
6189 if getattr(self.op, "os_type", None) is None:
6190 raise errors.OpPrereqError("No guest OS specified",
6192 self.op.force_variant = getattr(self.op, "force_variant", False)
6193 if self.op.disk_template is None:
6194 raise errors.OpPrereqError("No disk template specified",
6197 def ExpandNames(self):
6198 """ExpandNames for CreateInstance.
6200 Figure out the right locks for instance creation.
6203 self.needed_locks = {}
6205 instance_name = self.op.instance_name
6206 # this is just a preventive check, but someone might still add this
6207 # instance in the meantime, and creation will fail at lock-add time
6208 if instance_name in self.cfg.GetInstanceList():
6209 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6210 instance_name, errors.ECODE_EXISTS)
6212 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6214 if self.op.iallocator:
6215 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6217 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6218 nodelist = [self.op.pnode]
6219 if self.op.snode is not None:
6220 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6221 nodelist.append(self.op.snode)
6222 self.needed_locks[locking.LEVEL_NODE] = nodelist
6224 # in case of import lock the source node too
6225 if self.op.mode == constants.INSTANCE_IMPORT:
6226 src_node = getattr(self.op, "src_node", None)
6227 src_path = getattr(self.op, "src_path", None)
6229 if src_path is None:
6230 self.op.src_path = src_path = self.op.instance_name
6232 if src_node is None:
6233 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6234 self.op.src_node = None
6235 if os.path.isabs(src_path):
6236 raise errors.OpPrereqError("Importing an instance from an absolute"
6237 " path requires a source node option.",
6240 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6241 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6242 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6243 if not os.path.isabs(src_path):
6244 self.op.src_path = src_path = \
6245 utils.PathJoin(constants.EXPORT_DIR, src_path)
6247 def _RunAllocator(self):
6248 """Run the allocator based on input opcode.
6251 nics = [n.ToDict() for n in self.nics]
6252 ial = IAllocator(self.cfg, self.rpc,
6253 mode=constants.IALLOCATOR_MODE_ALLOC,
6254 name=self.op.instance_name,
6255 disk_template=self.op.disk_template,
6258 vcpus=self.be_full[constants.BE_VCPUS],
6259 mem_size=self.be_full[constants.BE_MEMORY],
6262 hypervisor=self.op.hypervisor,
6265 ial.Run(self.op.iallocator)
6268 raise errors.OpPrereqError("Can't compute nodes using"
6269 " iallocator '%s': %s" %
6270 (self.op.iallocator, ial.info),
6272 if len(ial.result) != ial.required_nodes:
6273 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6274 " of nodes (%s), required %s" %
6275 (self.op.iallocator, len(ial.result),
6276 ial.required_nodes), errors.ECODE_FAULT)
6277 self.op.pnode = ial.result[0]
6278 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6279 self.op.instance_name, self.op.iallocator,
6280 utils.CommaJoin(ial.result))
6281 if ial.required_nodes == 2:
6282 self.op.snode = ial.result[1]
6284 def BuildHooksEnv(self):
6287 This runs on master, primary and secondary nodes of the instance.
6291 "ADD_MODE": self.op.mode,
6293 if self.op.mode == constants.INSTANCE_IMPORT:
6294 env["SRC_NODE"] = self.op.src_node
6295 env["SRC_PATH"] = self.op.src_path
6296 env["SRC_IMAGES"] = self.src_images
6298 env.update(_BuildInstanceHookEnv(
6299 name=self.op.instance_name,
6300 primary_node=self.op.pnode,
6301 secondary_nodes=self.secondaries,
6302 status=self.op.start,
6303 os_type=self.op.os_type,
6304 memory=self.be_full[constants.BE_MEMORY],
6305 vcpus=self.be_full[constants.BE_VCPUS],
6306 nics=_NICListToTuple(self, self.nics),
6307 disk_template=self.op.disk_template,
6308 disks=[(d["size"], d["mode"]) for d in self.disks],
6311 hypervisor_name=self.op.hypervisor,
6314 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6318 def _ReadExportInfo(self):
6319 """Reads the export information from disk.
6321 It will override the opcode source node and path with the actual
6322 information, if these two were not specified before.
6324 @return: the export information
6327 assert self.op.mode == constants.INSTANCE_IMPORT
6329 src_node = self.op.src_node
6330 src_path = self.op.src_path
6332 if src_node is None:
6333 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6334 exp_list = self.rpc.call_export_list(locked_nodes)
6336 for node in exp_list:
6337 if exp_list[node].fail_msg:
6339 if src_path in exp_list[node].payload:
6341 self.op.src_node = src_node = node
6342 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6346 raise errors.OpPrereqError("No export found for relative path %s" %
6347 src_path, errors.ECODE_INVAL)
6349 _CheckNodeOnline(self, src_node)
6350 result = self.rpc.call_export_info(src_node, src_path)
6351 result.Raise("No export or invalid export found in dir %s" % src_path)
6353 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6354 if not export_info.has_section(constants.INISECT_EXP):
6355 raise errors.ProgrammerError("Corrupted export config",
6356 errors.ECODE_ENVIRON)
6358 ei_version = export_info.get(constants.INISECT_EXP, "version")
6359 if (int(ei_version) != constants.EXPORT_VERSION):
6360 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6361 (ei_version, constants.EXPORT_VERSION),
6362 errors.ECODE_ENVIRON)
6365 def _ReadExportParams(self, einfo):
6366 """Use export parameters as defaults.
6368 In case the opcode doesn't specify (as in override) some instance
6369 parameters, then try to use them from the export information, if
6373 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6375 if self.op.disk_template is None:
6376 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6377 self.op.disk_template = einfo.get(constants.INISECT_INS,
6380 raise errors.OpPrereqError("No disk template specified and the export"
6381 " is missing the disk_template information",
6384 if not self.op.disks:
6385 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6387 # TODO: import the disk iv_name too
6388 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6389 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6390 disks.append({"size": disk_sz})
6391 self.op.disks = disks
6393 raise errors.OpPrereqError("No disk info specified and the export"
6394 " is missing the disk information",
6397 if (not self.op.nics and
6398 einfo.has_option(constants.INISECT_INS, "nic_count")):
6400 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6402 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6403 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6408 if (self.op.hypervisor is None and
6409 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6410 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6411 if einfo.has_section(constants.INISECT_HYP):
6412 # use the export parameters but do not override the ones
6413 # specified by the user
6414 for name, value in einfo.items(constants.INISECT_HYP):
6415 if name not in self.op.hvparams:
6416 self.op.hvparams[name] = value
6418 if einfo.has_section(constants.INISECT_BEP):
6419 # use the parameters, without overriding
6420 for name, value in einfo.items(constants.INISECT_BEP):
6421 if name not in self.op.beparams:
6422 self.op.beparams[name] = value
6424 # try to read the parameters old style, from the main section
6425 for name in constants.BES_PARAMETERS:
6426 if (name not in self.op.beparams and
6427 einfo.has_option(constants.INISECT_INS, name)):
6428 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6430 def _RevertToDefaults(self, cluster):
6431 """Revert the instance parameters to the default values.
6435 hv_defs = cluster.GetHVDefaults(self.op.hypervisor, self.op.os_type)
6436 for name in self.op.hvparams.keys():
6437 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6438 del self.op.hvparams[name]
6440 be_defs = cluster.beparams.get(constants.PP_DEFAULT, {})
6441 for name in self.op.beparams.keys():
6442 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6443 del self.op.beparams[name]
6445 nic_defs = cluster.nicparams.get(constants.PP_DEFAULT, {})
6446 for nic in self.op.nics:
6447 for name in constants.NICS_PARAMETERS:
6448 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6451 def CheckPrereq(self):
6452 """Check prerequisites.
6455 if self.op.mode == constants.INSTANCE_IMPORT:
6456 export_info = self._ReadExportInfo()
6457 self._ReadExportParams(export_info)
6459 _CheckDiskTemplate(self.op.disk_template)
6461 if (not self.cfg.GetVGName() and
6462 self.op.disk_template not in constants.DTS_NOT_LVM):
6463 raise errors.OpPrereqError("Cluster does not support lvm-based"
6464 " instances", errors.ECODE_STATE)
6466 if self.op.hypervisor is None:
6467 self.op.hypervisor = self.cfg.GetHypervisorType()
6469 cluster = self.cfg.GetClusterInfo()
6470 enabled_hvs = cluster.enabled_hypervisors
6471 if self.op.hypervisor not in enabled_hvs:
6472 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6473 " cluster (%s)" % (self.op.hypervisor,
6474 ",".join(enabled_hvs)),
6477 # check hypervisor parameter syntax (locally)
6478 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6479 filled_hvp = objects.FillDict(cluster.GetHVDefaults(self.op.hypervisor,
6482 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6483 hv_type.CheckParameterSyntax(filled_hvp)
6484 self.hv_full = filled_hvp
6485 # check that we don't specify global parameters on an instance
6486 _CheckGlobalHvParams(self.op.hvparams)
6488 # fill and remember the beparams dict
6489 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6490 self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
6493 # now that hvp/bep are in final format, let's reset to defaults,
6495 if self.op.identify_defaults:
6496 self._RevertToDefaults(cluster)
6500 for idx, nic in enumerate(self.op.nics):
6501 nic_mode_req = nic.get("mode", None)
6502 nic_mode = nic_mode_req
6503 if nic_mode is None:
6504 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6506 # in routed mode, for the first nic, the default ip is 'auto'
6507 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6508 default_ip_mode = constants.VALUE_AUTO
6510 default_ip_mode = constants.VALUE_NONE
6512 # ip validity checks
6513 ip = nic.get("ip", default_ip_mode)
6514 if ip is None or ip.lower() == constants.VALUE_NONE:
6516 elif ip.lower() == constants.VALUE_AUTO:
6517 if not self.op.name_check:
6518 raise errors.OpPrereqError("IP address set to auto but name checks"
6519 " have been skipped. Aborting.",
6521 nic_ip = self.hostname1.ip
6523 if not utils.IsValidIP(ip):
6524 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6525 " like a valid IP" % ip,
6529 # TODO: check the ip address for uniqueness
6530 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6531 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6534 # MAC address verification
6535 mac = nic.get("mac", constants.VALUE_AUTO)
6536 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6537 mac = utils.NormalizeAndValidateMac(mac)
6540 self.cfg.ReserveMAC(mac, self.proc.GetECId())
6541 except errors.ReservationError:
6542 raise errors.OpPrereqError("MAC address %s already in use"
6543 " in cluster" % mac,
6544 errors.ECODE_NOTUNIQUE)
6546 # bridge verification
6547 bridge = nic.get("bridge", None)
6548 link = nic.get("link", None)
6550 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6551 " at the same time", errors.ECODE_INVAL)
6552 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6553 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6560 nicparams[constants.NIC_MODE] = nic_mode_req
6562 nicparams[constants.NIC_LINK] = link
6564 check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6566 objects.NIC.CheckParameterSyntax(check_params)
6567 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6569 # disk checks/pre-build
6571 for disk in self.op.disks:
6572 mode = disk.get("mode", constants.DISK_RDWR)
6573 if mode not in constants.DISK_ACCESS_SET:
6574 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6575 mode, errors.ECODE_INVAL)
6576 size = disk.get("size", None)
6578 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6581 except (TypeError, ValueError):
6582 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6584 new_disk = {"size": size, "mode": mode}
6586 new_disk["adopt"] = disk["adopt"]
6587 self.disks.append(new_disk)
6589 if self.op.mode == constants.INSTANCE_IMPORT:
6591 # Check that the new instance doesn't have less disks than the export
6592 instance_disks = len(self.disks)
6593 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6594 if instance_disks < export_disks:
6595 raise errors.OpPrereqError("Not enough disks to import."
6596 " (instance: %d, export: %d)" %
6597 (instance_disks, export_disks),
6601 for idx in range(export_disks):
6602 option = 'disk%d_dump' % idx
6603 if export_info.has_option(constants.INISECT_INS, option):
6604 # FIXME: are the old os-es, disk sizes, etc. useful?
6605 export_name = export_info.get(constants.INISECT_INS, option)
6606 image = utils.PathJoin(self.op.src_path, export_name)
6607 disk_images.append(image)
6609 disk_images.append(False)
6611 self.src_images = disk_images
6613 old_name = export_info.get(constants.INISECT_INS, 'name')
6615 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6616 except (TypeError, ValueError), err:
6617 raise errors.OpPrereqError("Invalid export file, nic_count is not"
6618 " an integer: %s" % str(err),
6620 if self.op.instance_name == old_name:
6621 for idx, nic in enumerate(self.nics):
6622 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6623 nic_mac_ini = 'nic%d_mac' % idx
6624 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6626 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6628 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6629 if self.op.ip_check:
6630 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6631 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6632 (self.check_ip, self.op.instance_name),
6633 errors.ECODE_NOTUNIQUE)
6635 #### mac address generation
6636 # By generating here the mac address both the allocator and the hooks get
6637 # the real final mac address rather than the 'auto' or 'generate' value.
6638 # There is a race condition between the generation and the instance object
6639 # creation, which means that we know the mac is valid now, but we're not
6640 # sure it will be when we actually add the instance. If things go bad
6641 # adding the instance will abort because of a duplicate mac, and the
6642 # creation job will fail.
6643 for nic in self.nics:
6644 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6645 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6649 if self.op.iallocator is not None:
6650 self._RunAllocator()
6652 #### node related checks
6654 # check primary node
6655 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6656 assert self.pnode is not None, \
6657 "Cannot retrieve locked node %s" % self.op.pnode
6659 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6660 pnode.name, errors.ECODE_STATE)
6662 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6663 pnode.name, errors.ECODE_STATE)
6665 self.secondaries = []
6667 # mirror node verification
6668 if self.op.disk_template in constants.DTS_NET_MIRROR:
6669 if self.op.snode is None:
6670 raise errors.OpPrereqError("The networked disk templates need"
6671 " a mirror node", errors.ECODE_INVAL)
6672 if self.op.snode == pnode.name:
6673 raise errors.OpPrereqError("The secondary node cannot be the"
6674 " primary node.", errors.ECODE_INVAL)
6675 _CheckNodeOnline(self, self.op.snode)
6676 _CheckNodeNotDrained(self, self.op.snode)
6677 self.secondaries.append(self.op.snode)
6679 nodenames = [pnode.name] + self.secondaries
6681 req_size = _ComputeDiskSize(self.op.disk_template,
6684 # Check lv size requirements, if not adopting
6685 if req_size is not None and not self.adopt_disks:
6686 _CheckNodesFreeDisk(self, nodenames, req_size)
6688 if self.adopt_disks: # instead, we must check the adoption data
6689 all_lvs = set([i["adopt"] for i in self.disks])
6690 if len(all_lvs) != len(self.disks):
6691 raise errors.OpPrereqError("Duplicate volume names given for adoption",
6693 for lv_name in all_lvs:
6695 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6696 except errors.ReservationError:
6697 raise errors.OpPrereqError("LV named %s used by another instance" %
6698 lv_name, errors.ECODE_NOTUNIQUE)
6700 node_lvs = self.rpc.call_lv_list([pnode.name],
6701 self.cfg.GetVGName())[pnode.name]
6702 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6703 node_lvs = node_lvs.payload
6704 delta = all_lvs.difference(node_lvs.keys())
6706 raise errors.OpPrereqError("Missing logical volume(s): %s" %
6707 utils.CommaJoin(delta),
6709 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6711 raise errors.OpPrereqError("Online logical volumes found, cannot"
6712 " adopt: %s" % utils.CommaJoin(online_lvs),
6714 # update the size of disk based on what is found
6715 for dsk in self.disks:
6716 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6718 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6720 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6722 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6724 # memory check on primary node
6726 _CheckNodeFreeMemory(self, self.pnode.name,
6727 "creating instance %s" % self.op.instance_name,
6728 self.be_full[constants.BE_MEMORY],
6731 self.dry_run_result = list(nodenames)
6733 def Exec(self, feedback_fn):
6734 """Create and add the instance to the cluster.
6737 instance = self.op.instance_name
6738 pnode_name = self.pnode.name
6740 ht_kind = self.op.hypervisor
6741 if ht_kind in constants.HTS_REQ_PORT:
6742 network_port = self.cfg.AllocatePort()
6746 if constants.ENABLE_FILE_STORAGE:
6747 # this is needed because os.path.join does not accept None arguments
6748 if self.op.file_storage_dir is None:
6749 string_file_storage_dir = ""
6751 string_file_storage_dir = self.op.file_storage_dir
6753 # build the full file storage dir path
6754 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6755 string_file_storage_dir, instance)
6757 file_storage_dir = ""
6760 disks = _GenerateDiskTemplate(self,
6761 self.op.disk_template,
6762 instance, pnode_name,
6766 self.op.file_driver,
6769 iobj = objects.Instance(name=instance, os=self.op.os_type,
6770 primary_node=pnode_name,
6771 nics=self.nics, disks=disks,
6772 disk_template=self.op.disk_template,
6774 network_port=network_port,
6775 beparams=self.op.beparams,
6776 hvparams=self.op.hvparams,
6777 hypervisor=self.op.hypervisor,
6780 if self.adopt_disks:
6781 # rename LVs to the newly-generated names; we need to construct
6782 # 'fake' LV disks with the old data, plus the new unique_id
6783 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6785 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6786 rename_to.append(t_dsk.logical_id)
6787 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6788 self.cfg.SetDiskID(t_dsk, pnode_name)
6789 result = self.rpc.call_blockdev_rename(pnode_name,
6790 zip(tmp_disks, rename_to))
6791 result.Raise("Failed to rename adoped LVs")
6793 feedback_fn("* creating instance disks...")
6795 _CreateDisks(self, iobj)
6796 except errors.OpExecError:
6797 self.LogWarning("Device creation failed, reverting...")
6799 _RemoveDisks(self, iobj)
6801 self.cfg.ReleaseDRBDMinors(instance)
6804 feedback_fn("adding instance %s to cluster config" % instance)
6806 self.cfg.AddInstance(iobj, self.proc.GetECId())
6808 # Declare that we don't want to remove the instance lock anymore, as we've
6809 # added the instance to the config
6810 del self.remove_locks[locking.LEVEL_INSTANCE]
6811 # Unlock all the nodes
6812 if self.op.mode == constants.INSTANCE_IMPORT:
6813 nodes_keep = [self.op.src_node]
6814 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6815 if node != self.op.src_node]
6816 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6817 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6819 self.context.glm.release(locking.LEVEL_NODE)
6820 del self.acquired_locks[locking.LEVEL_NODE]
6822 if self.op.wait_for_sync:
6823 disk_abort = not _WaitForSync(self, iobj)
6824 elif iobj.disk_template in constants.DTS_NET_MIRROR:
6825 # make sure the disks are not degraded (still sync-ing is ok)
6827 feedback_fn("* checking mirrors status")
6828 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6833 _RemoveDisks(self, iobj)
6834 self.cfg.RemoveInstance(iobj.name)
6835 # Make sure the instance lock gets removed
6836 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6837 raise errors.OpExecError("There are some degraded disks for"
6840 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6841 if self.op.mode == constants.INSTANCE_CREATE:
6842 if not self.op.no_install:
6843 feedback_fn("* running the instance OS create scripts...")
6844 # FIXME: pass debug option from opcode to backend
6845 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6846 self.op.debug_level)
6847 result.Raise("Could not add os for instance %s"
6848 " on node %s" % (instance, pnode_name))
6850 elif self.op.mode == constants.INSTANCE_IMPORT:
6851 feedback_fn("* running the instance OS import scripts...")
6852 src_node = self.op.src_node
6853 src_images = self.src_images
6854 cluster_name = self.cfg.GetClusterName()
6855 # FIXME: pass debug option from opcode to backend
6856 import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6857 src_node, src_images,
6859 self.op.debug_level)
6860 msg = import_result.fail_msg
6862 self.LogWarning("Error while importing the disk images for instance"
6863 " %s on node %s: %s" % (instance, pnode_name, msg))
6865 # also checked in the prereq part
6866 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6870 iobj.admin_up = True
6871 self.cfg.Update(iobj, feedback_fn)
6872 logging.info("Starting instance %s on node %s", instance, pnode_name)
6873 feedback_fn("* starting instance...")
6874 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6875 result.Raise("Could not start instance")
6877 return list(iobj.all_nodes)
6880 class LUConnectConsole(NoHooksLU):
6881 """Connect to an instance's console.
6883 This is somewhat special in that it returns the command line that
6884 you need to run on the master node in order to connect to the
6888 _OP_REQP = ["instance_name"]
6891 def ExpandNames(self):
6892 self._ExpandAndLockInstance()
6894 def CheckPrereq(self):
6895 """Check prerequisites.
6897 This checks that the instance is in the cluster.
6900 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6901 assert self.instance is not None, \
6902 "Cannot retrieve locked instance %s" % self.op.instance_name
6903 _CheckNodeOnline(self, self.instance.primary_node)
6905 def Exec(self, feedback_fn):
6906 """Connect to the console of an instance
6909 instance = self.instance
6910 node = instance.primary_node
6912 node_insts = self.rpc.call_instance_list([node],
6913 [instance.hypervisor])[node]
6914 node_insts.Raise("Can't get node information from %s" % node)
6916 if instance.name not in node_insts.payload:
6917 raise errors.OpExecError("Instance %s is not running." % instance.name)
6919 logging.debug("Connecting to console of %s on %s", instance.name, node)
6921 hyper = hypervisor.GetHypervisor(instance.hypervisor)
6922 cluster = self.cfg.GetClusterInfo()
6923 # beparams and hvparams are passed separately, to avoid editing the
6924 # instance and then saving the defaults in the instance itself.
6925 hvparams = cluster.FillHV(instance)
6926 beparams = cluster.FillBE(instance)
6927 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6930 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6933 class LUReplaceDisks(LogicalUnit):
6934 """Replace the disks of an instance.
6937 HPATH = "mirrors-replace"
6938 HTYPE = constants.HTYPE_INSTANCE
6939 _OP_REQP = ["instance_name", "mode", "disks"]
6942 def CheckArguments(self):
6943 if not hasattr(self.op, "remote_node"):
6944 self.op.remote_node = None
6945 if not hasattr(self.op, "iallocator"):
6946 self.op.iallocator = None
6947 if not hasattr(self.op, "early_release"):
6948 self.op.early_release = False
6950 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6953 def ExpandNames(self):
6954 self._ExpandAndLockInstance()
6956 if self.op.iallocator is not None:
6957 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6959 elif self.op.remote_node is not None:
6960 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6961 self.op.remote_node = remote_node
6963 # Warning: do not remove the locking of the new secondary here
6964 # unless DRBD8.AddChildren is changed to work in parallel;
6965 # currently it doesn't since parallel invocations of
6966 # FindUnusedMinor will conflict
6967 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6968 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6971 self.needed_locks[locking.LEVEL_NODE] = []
6972 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6974 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6975 self.op.iallocator, self.op.remote_node,
6976 self.op.disks, False, self.op.early_release)
6978 self.tasklets = [self.replacer]
6980 def DeclareLocks(self, level):
6981 # If we're not already locking all nodes in the set we have to declare the
6982 # instance's primary/secondary nodes.
6983 if (level == locking.LEVEL_NODE and
6984 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6985 self._LockInstancesNodes()
6987 def BuildHooksEnv(self):
6990 This runs on the master, the primary and all the secondaries.
6993 instance = self.replacer.instance
6995 "MODE": self.op.mode,
6996 "NEW_SECONDARY": self.op.remote_node,
6997 "OLD_SECONDARY": instance.secondary_nodes[0],
6999 env.update(_BuildInstanceHookEnvByObject(self, instance))
7001 self.cfg.GetMasterNode(),
7002 instance.primary_node,
7004 if self.op.remote_node is not None:
7005 nl.append(self.op.remote_node)
7009 class LUEvacuateNode(LogicalUnit):
7010 """Relocate the secondary instances from a node.
7013 HPATH = "node-evacuate"
7014 HTYPE = constants.HTYPE_NODE
7015 _OP_REQP = ["node_name"]
7018 def CheckArguments(self):
7019 if not hasattr(self.op, "remote_node"):
7020 self.op.remote_node = None
7021 if not hasattr(self.op, "iallocator"):
7022 self.op.iallocator = None
7023 if not hasattr(self.op, "early_release"):
7024 self.op.early_release = False
7026 TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
7027 self.op.remote_node,
7030 def ExpandNames(self):
7031 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7033 self.needed_locks = {}
7035 # Declare node locks
7036 if self.op.iallocator is not None:
7037 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7039 elif self.op.remote_node is not None:
7040 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7042 # Warning: do not remove the locking of the new secondary here
7043 # unless DRBD8.AddChildren is changed to work in parallel;
7044 # currently it doesn't since parallel invocations of
7045 # FindUnusedMinor will conflict
7046 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7047 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7050 raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7052 # Create tasklets for replacing disks for all secondary instances on this
7057 for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7058 logging.debug("Replacing disks for instance %s", inst.name)
7059 names.append(inst.name)
7061 replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7062 self.op.iallocator, self.op.remote_node, [],
7063 True, self.op.early_release)
7064 tasklets.append(replacer)
7066 self.tasklets = tasklets
7067 self.instance_names = names
7069 # Declare instance locks
7070 self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7072 def DeclareLocks(self, level):
7073 # If we're not already locking all nodes in the set we have to declare the
7074 # instance's primary/secondary nodes.
7075 if (level == locking.LEVEL_NODE and
7076 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7077 self._LockInstancesNodes()
7079 def BuildHooksEnv(self):
7082 This runs on the master, the primary and all the secondaries.
7086 "NODE_NAME": self.op.node_name,
7089 nl = [self.cfg.GetMasterNode()]
7091 if self.op.remote_node is not None:
7092 env["NEW_SECONDARY"] = self.op.remote_node
7093 nl.append(self.op.remote_node)
7095 return (env, nl, nl)
7098 class TLReplaceDisks(Tasklet):
7099 """Replaces disks for an instance.
7101 Note: Locking is not within the scope of this class.
7104 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7105 disks, delay_iallocator, early_release):
7106 """Initializes this class.
7109 Tasklet.__init__(self, lu)
7112 self.instance_name = instance_name
7114 self.iallocator_name = iallocator_name
7115 self.remote_node = remote_node
7117 self.delay_iallocator = delay_iallocator
7118 self.early_release = early_release
7121 self.instance = None
7122 self.new_node = None
7123 self.target_node = None
7124 self.other_node = None
7125 self.remote_node_info = None
7126 self.node_secondary_ip = None
7129 def CheckArguments(mode, remote_node, iallocator):
7130 """Helper function for users of this class.
7133 # check for valid parameter combination
7134 if mode == constants.REPLACE_DISK_CHG:
7135 if remote_node is None and iallocator is None:
7136 raise errors.OpPrereqError("When changing the secondary either an"
7137 " iallocator script must be used or the"
7138 " new node given", errors.ECODE_INVAL)
7140 if remote_node is not None and iallocator is not None:
7141 raise errors.OpPrereqError("Give either the iallocator or the new"
7142 " secondary, not both", errors.ECODE_INVAL)
7144 elif remote_node is not None or iallocator is not None:
7145 # Not replacing the secondary
7146 raise errors.OpPrereqError("The iallocator and new node options can"
7147 " only be used when changing the"
7148 " secondary node", errors.ECODE_INVAL)
7151 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7152 """Compute a new secondary node using an IAllocator.
7155 ial = IAllocator(lu.cfg, lu.rpc,
7156 mode=constants.IALLOCATOR_MODE_RELOC,
7158 relocate_from=relocate_from)
7160 ial.Run(iallocator_name)
7163 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7164 " %s" % (iallocator_name, ial.info),
7167 if len(ial.result) != ial.required_nodes:
7168 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7169 " of nodes (%s), required %s" %
7171 len(ial.result), ial.required_nodes),
7174 remote_node_name = ial.result[0]
7176 lu.LogInfo("Selected new secondary for instance '%s': %s",
7177 instance_name, remote_node_name)
7179 return remote_node_name
7181 def _FindFaultyDisks(self, node_name):
7182 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7185 def CheckPrereq(self):
7186 """Check prerequisites.
7188 This checks that the instance is in the cluster.
7191 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7192 assert instance is not None, \
7193 "Cannot retrieve locked instance %s" % self.instance_name
7195 if instance.disk_template != constants.DT_DRBD8:
7196 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7197 " instances", errors.ECODE_INVAL)
7199 if len(instance.secondary_nodes) != 1:
7200 raise errors.OpPrereqError("The instance has a strange layout,"
7201 " expected one secondary but found %d" %
7202 len(instance.secondary_nodes),
7205 if not self.delay_iallocator:
7206 self._CheckPrereq2()
7208 def _CheckPrereq2(self):
7209 """Check prerequisites, second part.
7211 This function should always be part of CheckPrereq. It was separated and is
7212 now called from Exec because during node evacuation iallocator was only
7213 called with an unmodified cluster model, not taking planned changes into
7217 instance = self.instance
7218 secondary_node = instance.secondary_nodes[0]
7220 if self.iallocator_name is None:
7221 remote_node = self.remote_node
7223 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7224 instance.name, instance.secondary_nodes)
7226 if remote_node is not None:
7227 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7228 assert self.remote_node_info is not None, \
7229 "Cannot retrieve locked node %s" % remote_node
7231 self.remote_node_info = None
7233 if remote_node == self.instance.primary_node:
7234 raise errors.OpPrereqError("The specified node is the primary node of"
7235 " the instance.", errors.ECODE_INVAL)
7237 if remote_node == secondary_node:
7238 raise errors.OpPrereqError("The specified node is already the"
7239 " secondary node of the instance.",
7242 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7243 constants.REPLACE_DISK_CHG):
7244 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7247 if self.mode == constants.REPLACE_DISK_AUTO:
7248 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7249 faulty_secondary = self._FindFaultyDisks(secondary_node)
7251 if faulty_primary and faulty_secondary:
7252 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7253 " one node and can not be repaired"
7254 " automatically" % self.instance_name,
7258 self.disks = faulty_primary
7259 self.target_node = instance.primary_node
7260 self.other_node = secondary_node
7261 check_nodes = [self.target_node, self.other_node]
7262 elif faulty_secondary:
7263 self.disks = faulty_secondary
7264 self.target_node = secondary_node
7265 self.other_node = instance.primary_node
7266 check_nodes = [self.target_node, self.other_node]
7272 # Non-automatic modes
7273 if self.mode == constants.REPLACE_DISK_PRI:
7274 self.target_node = instance.primary_node
7275 self.other_node = secondary_node
7276 check_nodes = [self.target_node, self.other_node]
7278 elif self.mode == constants.REPLACE_DISK_SEC:
7279 self.target_node = secondary_node
7280 self.other_node = instance.primary_node
7281 check_nodes = [self.target_node, self.other_node]
7283 elif self.mode == constants.REPLACE_DISK_CHG:
7284 self.new_node = remote_node
7285 self.other_node = instance.primary_node
7286 self.target_node = secondary_node
7287 check_nodes = [self.new_node, self.other_node]
7289 _CheckNodeNotDrained(self.lu, remote_node)
7291 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7292 assert old_node_info is not None
7293 if old_node_info.offline and not self.early_release:
7294 # doesn't make sense to delay the release
7295 self.early_release = True
7296 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7297 " early-release mode", secondary_node)
7300 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7303 # If not specified all disks should be replaced
7305 self.disks = range(len(self.instance.disks))
7307 for node in check_nodes:
7308 _CheckNodeOnline(self.lu, node)
7310 # Check whether disks are valid
7311 for disk_idx in self.disks:
7312 instance.FindDisk(disk_idx)
7314 # Get secondary node IP addresses
7317 for node_name in [self.target_node, self.other_node, self.new_node]:
7318 if node_name is not None:
7319 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7321 self.node_secondary_ip = node_2nd_ip
7323 def Exec(self, feedback_fn):
7324 """Execute disk replacement.
7326 This dispatches the disk replacement to the appropriate handler.
7329 if self.delay_iallocator:
7330 self._CheckPrereq2()
7333 feedback_fn("No disks need replacement")
7336 feedback_fn("Replacing disk(s) %s for %s" %
7337 (utils.CommaJoin(self.disks), self.instance.name))
7339 activate_disks = (not self.instance.admin_up)
7341 # Activate the instance disks if we're replacing them on a down instance
7343 _StartInstanceDisks(self.lu, self.instance, True)
7346 # Should we replace the secondary node?
7347 if self.new_node is not None:
7348 fn = self._ExecDrbd8Secondary
7350 fn = self._ExecDrbd8DiskOnly
7352 return fn(feedback_fn)
7355 # Deactivate the instance disks if we're replacing them on a
7358 _SafeShutdownInstanceDisks(self.lu, self.instance)
7360 def _CheckVolumeGroup(self, nodes):
7361 self.lu.LogInfo("Checking volume groups")
7363 vgname = self.cfg.GetVGName()
7365 # Make sure volume group exists on all involved nodes
7366 results = self.rpc.call_vg_list(nodes)
7368 raise errors.OpExecError("Can't list volume groups on the nodes")
7372 res.Raise("Error checking node %s" % node)
7373 if vgname not in res.payload:
7374 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7377 def _CheckDisksExistence(self, nodes):
7378 # Check disk existence
7379 for idx, dev in enumerate(self.instance.disks):
7380 if idx not in self.disks:
7384 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7385 self.cfg.SetDiskID(dev, node)
7387 result = self.rpc.call_blockdev_find(node, dev)
7389 msg = result.fail_msg
7390 if msg or not result.payload:
7392 msg = "disk not found"
7393 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7396 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7397 for idx, dev in enumerate(self.instance.disks):
7398 if idx not in self.disks:
7401 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7404 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7406 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7407 " replace disks for instance %s" %
7408 (node_name, self.instance.name))
7410 def _CreateNewStorage(self, node_name):
7411 vgname = self.cfg.GetVGName()
7414 for idx, dev in enumerate(self.instance.disks):
7415 if idx not in self.disks:
7418 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7420 self.cfg.SetDiskID(dev, node_name)
7422 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7423 names = _GenerateUniqueNames(self.lu, lv_names)
7425 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7426 logical_id=(vgname, names[0]))
7427 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7428 logical_id=(vgname, names[1]))
7430 new_lvs = [lv_data, lv_meta]
7431 old_lvs = dev.children
7432 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7434 # we pass force_create=True to force the LVM creation
7435 for new_lv in new_lvs:
7436 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7437 _GetInstanceInfoText(self.instance), False)
7441 def _CheckDevices(self, node_name, iv_names):
7442 for name, (dev, _, _) in iv_names.iteritems():
7443 self.cfg.SetDiskID(dev, node_name)
7445 result = self.rpc.call_blockdev_find(node_name, dev)
7447 msg = result.fail_msg
7448 if msg or not result.payload:
7450 msg = "disk not found"
7451 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7454 if result.payload.is_degraded:
7455 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7457 def _RemoveOldStorage(self, node_name, iv_names):
7458 for name, (_, old_lvs, _) in iv_names.iteritems():
7459 self.lu.LogInfo("Remove logical volumes for %s" % name)
7462 self.cfg.SetDiskID(lv, node_name)
7464 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7466 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7467 hint="remove unused LVs manually")
7469 def _ReleaseNodeLock(self, node_name):
7470 """Releases the lock for a given node."""
7471 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7473 def _ExecDrbd8DiskOnly(self, feedback_fn):
7474 """Replace a disk on the primary or secondary for DRBD 8.
7476 The algorithm for replace is quite complicated:
7478 1. for each disk to be replaced:
7480 1. create new LVs on the target node with unique names
7481 1. detach old LVs from the drbd device
7482 1. rename old LVs to name_replaced.<time_t>
7483 1. rename new LVs to old LVs
7484 1. attach the new LVs (with the old names now) to the drbd device
7486 1. wait for sync across all devices
7488 1. for each modified disk:
7490 1. remove old LVs (which have the name name_replaces.<time_t>)
7492 Failures are not very well handled.
7497 # Step: check device activation
7498 self.lu.LogStep(1, steps_total, "Check device existence")
7499 self._CheckDisksExistence([self.other_node, self.target_node])
7500 self._CheckVolumeGroup([self.target_node, self.other_node])
7502 # Step: check other node consistency
7503 self.lu.LogStep(2, steps_total, "Check peer consistency")
7504 self._CheckDisksConsistency(self.other_node,
7505 self.other_node == self.instance.primary_node,
7508 # Step: create new storage
7509 self.lu.LogStep(3, steps_total, "Allocate new storage")
7510 iv_names = self._CreateNewStorage(self.target_node)
7512 # Step: for each lv, detach+rename*2+attach
7513 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7514 for dev, old_lvs, new_lvs in iv_names.itervalues():
7515 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7517 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7519 result.Raise("Can't detach drbd from local storage on node"
7520 " %s for device %s" % (self.target_node, dev.iv_name))
7522 #cfg.Update(instance)
7524 # ok, we created the new LVs, so now we know we have the needed
7525 # storage; as such, we proceed on the target node to rename
7526 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7527 # using the assumption that logical_id == physical_id (which in
7528 # turn is the unique_id on that node)
7530 # FIXME(iustin): use a better name for the replaced LVs
7531 temp_suffix = int(time.time())
7532 ren_fn = lambda d, suff: (d.physical_id[0],
7533 d.physical_id[1] + "_replaced-%s" % suff)
7535 # Build the rename list based on what LVs exist on the node
7536 rename_old_to_new = []
7537 for to_ren in old_lvs:
7538 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7539 if not result.fail_msg and result.payload:
7541 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7543 self.lu.LogInfo("Renaming the old LVs on the target node")
7544 result = self.rpc.call_blockdev_rename(self.target_node,
7546 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7548 # Now we rename the new LVs to the old LVs
7549 self.lu.LogInfo("Renaming the new LVs on the target node")
7550 rename_new_to_old = [(new, old.physical_id)
7551 for old, new in zip(old_lvs, new_lvs)]
7552 result = self.rpc.call_blockdev_rename(self.target_node,
7554 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7556 for old, new in zip(old_lvs, new_lvs):
7557 new.logical_id = old.logical_id
7558 self.cfg.SetDiskID(new, self.target_node)
7560 for disk in old_lvs:
7561 disk.logical_id = ren_fn(disk, temp_suffix)
7562 self.cfg.SetDiskID(disk, self.target_node)
7564 # Now that the new lvs have the old name, we can add them to the device
7565 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7566 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7568 msg = result.fail_msg
7570 for new_lv in new_lvs:
7571 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7574 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7575 hint=("cleanup manually the unused logical"
7577 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7579 dev.children = new_lvs
7581 self.cfg.Update(self.instance, feedback_fn)
7584 if self.early_release:
7585 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7587 self._RemoveOldStorage(self.target_node, iv_names)
7588 # WARNING: we release both node locks here, do not do other RPCs
7589 # than WaitForSync to the primary node
7590 self._ReleaseNodeLock([self.target_node, self.other_node])
7593 # This can fail as the old devices are degraded and _WaitForSync
7594 # does a combined result over all disks, so we don't check its return value
7595 self.lu.LogStep(cstep, steps_total, "Sync devices")
7597 _WaitForSync(self.lu, self.instance)
7599 # Check all devices manually
7600 self._CheckDevices(self.instance.primary_node, iv_names)
7602 # Step: remove old storage
7603 if not self.early_release:
7604 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7606 self._RemoveOldStorage(self.target_node, iv_names)
7608 def _ExecDrbd8Secondary(self, feedback_fn):
7609 """Replace the secondary node for DRBD 8.
7611 The algorithm for replace is quite complicated:
7612 - for all disks of the instance:
7613 - create new LVs on the new node with same names
7614 - shutdown the drbd device on the old secondary
7615 - disconnect the drbd network on the primary
7616 - create the drbd device on the new secondary
7617 - network attach the drbd on the primary, using an artifice:
7618 the drbd code for Attach() will connect to the network if it
7619 finds a device which is connected to the good local disks but
7621 - wait for sync across all devices
7622 - remove all disks from the old secondary
7624 Failures are not very well handled.
7629 # Step: check device activation
7630 self.lu.LogStep(1, steps_total, "Check device existence")
7631 self._CheckDisksExistence([self.instance.primary_node])
7632 self._CheckVolumeGroup([self.instance.primary_node])
7634 # Step: check other node consistency
7635 self.lu.LogStep(2, steps_total, "Check peer consistency")
7636 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7638 # Step: create new storage
7639 self.lu.LogStep(3, steps_total, "Allocate new storage")
7640 for idx, dev in enumerate(self.instance.disks):
7641 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7642 (self.new_node, idx))
7643 # we pass force_create=True to force LVM creation
7644 for new_lv in dev.children:
7645 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7646 _GetInstanceInfoText(self.instance), False)
7648 # Step 4: dbrd minors and drbd setups changes
7649 # after this, we must manually remove the drbd minors on both the
7650 # error and the success paths
7651 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7652 minors = self.cfg.AllocateDRBDMinor([self.new_node
7653 for dev in self.instance.disks],
7655 logging.debug("Allocated minors %r", minors)
7658 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7659 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7660 (self.new_node, idx))
7661 # create new devices on new_node; note that we create two IDs:
7662 # one without port, so the drbd will be activated without
7663 # networking information on the new node at this stage, and one
7664 # with network, for the latter activation in step 4
7665 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7666 if self.instance.primary_node == o_node1:
7669 assert self.instance.primary_node == o_node2, "Three-node instance?"
7672 new_alone_id = (self.instance.primary_node, self.new_node, None,
7673 p_minor, new_minor, o_secret)
7674 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7675 p_minor, new_minor, o_secret)
7677 iv_names[idx] = (dev, dev.children, new_net_id)
7678 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7680 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7681 logical_id=new_alone_id,
7682 children=dev.children,
7685 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7686 _GetInstanceInfoText(self.instance), False)
7687 except errors.GenericError:
7688 self.cfg.ReleaseDRBDMinors(self.instance.name)
7691 # We have new devices, shutdown the drbd on the old secondary
7692 for idx, dev in enumerate(self.instance.disks):
7693 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7694 self.cfg.SetDiskID(dev, self.target_node)
7695 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7697 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7698 "node: %s" % (idx, msg),
7699 hint=("Please cleanup this device manually as"
7700 " soon as possible"))
7702 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7703 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7704 self.node_secondary_ip,
7705 self.instance.disks)\
7706 [self.instance.primary_node]
7708 msg = result.fail_msg
7710 # detaches didn't succeed (unlikely)
7711 self.cfg.ReleaseDRBDMinors(self.instance.name)
7712 raise errors.OpExecError("Can't detach the disks from the network on"
7713 " old node: %s" % (msg,))
7715 # if we managed to detach at least one, we update all the disks of
7716 # the instance to point to the new secondary
7717 self.lu.LogInfo("Updating instance configuration")
7718 for dev, _, new_logical_id in iv_names.itervalues():
7719 dev.logical_id = new_logical_id
7720 self.cfg.SetDiskID(dev, self.instance.primary_node)
7722 self.cfg.Update(self.instance, feedback_fn)
7724 # and now perform the drbd attach
7725 self.lu.LogInfo("Attaching primary drbds to new secondary"
7726 " (standalone => connected)")
7727 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7729 self.node_secondary_ip,
7730 self.instance.disks,
7733 for to_node, to_result in result.items():
7734 msg = to_result.fail_msg
7736 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7738 hint=("please do a gnt-instance info to see the"
7739 " status of disks"))
7741 if self.early_release:
7742 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7744 self._RemoveOldStorage(self.target_node, iv_names)
7745 # WARNING: we release all node locks here, do not do other RPCs
7746 # than WaitForSync to the primary node
7747 self._ReleaseNodeLock([self.instance.primary_node,
7752 # This can fail as the old devices are degraded and _WaitForSync
7753 # does a combined result over all disks, so we don't check its return value
7754 self.lu.LogStep(cstep, steps_total, "Sync devices")
7756 _WaitForSync(self.lu, self.instance)
7758 # Check all devices manually
7759 self._CheckDevices(self.instance.primary_node, iv_names)
7761 # Step: remove old storage
7762 if not self.early_release:
7763 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7764 self._RemoveOldStorage(self.target_node, iv_names)
7767 class LURepairNodeStorage(NoHooksLU):
7768 """Repairs the volume group on a node.
7771 _OP_REQP = ["node_name"]
7774 def CheckArguments(self):
7775 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7777 _CheckStorageType(self.op.storage_type)
7779 def ExpandNames(self):
7780 self.needed_locks = {
7781 locking.LEVEL_NODE: [self.op.node_name],
7784 def _CheckFaultyDisks(self, instance, node_name):
7785 """Ensure faulty disks abort the opcode or at least warn."""
7787 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7789 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7790 " node '%s'" % (instance.name, node_name),
7792 except errors.OpPrereqError, err:
7793 if self.op.ignore_consistency:
7794 self.proc.LogWarning(str(err.args[0]))
7798 def CheckPrereq(self):
7799 """Check prerequisites.
7802 storage_type = self.op.storage_type
7804 if (constants.SO_FIX_CONSISTENCY not in
7805 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7806 raise errors.OpPrereqError("Storage units of type '%s' can not be"
7807 " repaired" % storage_type,
7810 # Check whether any instance on this node has faulty disks
7811 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7812 if not inst.admin_up:
7814 check_nodes = set(inst.all_nodes)
7815 check_nodes.discard(self.op.node_name)
7816 for inst_node_name in check_nodes:
7817 self._CheckFaultyDisks(inst, inst_node_name)
7819 def Exec(self, feedback_fn):
7820 feedback_fn("Repairing storage unit '%s' on %s ..." %
7821 (self.op.name, self.op.node_name))
7823 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7824 result = self.rpc.call_storage_execute(self.op.node_name,
7825 self.op.storage_type, st_args,
7827 constants.SO_FIX_CONSISTENCY)
7828 result.Raise("Failed to repair storage unit '%s' on %s" %
7829 (self.op.name, self.op.node_name))
7832 class LUNodeEvacuationStrategy(NoHooksLU):
7833 """Computes the node evacuation strategy.
7836 _OP_REQP = ["nodes"]
7839 def CheckArguments(self):
7840 if not hasattr(self.op, "remote_node"):
7841 self.op.remote_node = None
7842 if not hasattr(self.op, "iallocator"):
7843 self.op.iallocator = None
7844 if self.op.remote_node is not None and self.op.iallocator is not None:
7845 raise errors.OpPrereqError("Give either the iallocator or the new"
7846 " secondary, not both", errors.ECODE_INVAL)
7848 def ExpandNames(self):
7849 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7850 self.needed_locks = locks = {}
7851 if self.op.remote_node is None:
7852 locks[locking.LEVEL_NODE] = locking.ALL_SET
7854 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7855 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7857 def CheckPrereq(self):
7860 def Exec(self, feedback_fn):
7861 if self.op.remote_node is not None:
7863 for node in self.op.nodes:
7864 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7867 if i.primary_node == self.op.remote_node:
7868 raise errors.OpPrereqError("Node %s is the primary node of"
7869 " instance %s, cannot use it as"
7871 (self.op.remote_node, i.name),
7873 result.append([i.name, self.op.remote_node])
7875 ial = IAllocator(self.cfg, self.rpc,
7876 mode=constants.IALLOCATOR_MODE_MEVAC,
7877 evac_nodes=self.op.nodes)
7878 ial.Run(self.op.iallocator, validate=True)
7880 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7886 class LUGrowDisk(LogicalUnit):
7887 """Grow a disk of an instance.
7891 HTYPE = constants.HTYPE_INSTANCE
7892 _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7895 def ExpandNames(self):
7896 self._ExpandAndLockInstance()
7897 self.needed_locks[locking.LEVEL_NODE] = []
7898 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7900 def DeclareLocks(self, level):
7901 if level == locking.LEVEL_NODE:
7902 self._LockInstancesNodes()
7904 def BuildHooksEnv(self):
7907 This runs on the master, the primary and all the secondaries.
7911 "DISK": self.op.disk,
7912 "AMOUNT": self.op.amount,
7914 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7915 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7918 def CheckPrereq(self):
7919 """Check prerequisites.
7921 This checks that the instance is in the cluster.
7924 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7925 assert instance is not None, \
7926 "Cannot retrieve locked instance %s" % self.op.instance_name
7927 nodenames = list(instance.all_nodes)
7928 for node in nodenames:
7929 _CheckNodeOnline(self, node)
7932 self.instance = instance
7934 if instance.disk_template not in constants.DTS_GROWABLE:
7935 raise errors.OpPrereqError("Instance's disk layout does not support"
7936 " growing.", errors.ECODE_INVAL)
7938 self.disk = instance.FindDisk(self.op.disk)
7940 if instance.disk_template != constants.DT_FILE:
7941 # TODO: check the free disk space for file, when that feature will be
7943 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7945 def Exec(self, feedback_fn):
7946 """Execute disk grow.
7949 instance = self.instance
7952 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
7954 raise errors.OpExecError("Cannot activate block device to grow")
7956 for node in instance.all_nodes:
7957 self.cfg.SetDiskID(disk, node)
7958 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7959 result.Raise("Grow request failed to node %s" % node)
7961 # TODO: Rewrite code to work properly
7962 # DRBD goes into sync mode for a short amount of time after executing the
7963 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7964 # calling "resize" in sync mode fails. Sleeping for a short amount of
7965 # time is a work-around.
7968 disk.RecordGrow(self.op.amount)
7969 self.cfg.Update(instance, feedback_fn)
7970 if self.op.wait_for_sync:
7971 disk_abort = not _WaitForSync(self, instance, disks=[disk])
7973 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7974 " status.\nPlease check the instance.")
7975 if not instance.admin_up:
7976 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
7977 elif not instance.admin_up:
7978 self.proc.LogWarning("Not shutting down the disk even if the instance is"
7979 " not supposed to be running because no wait for"
7980 " sync mode was requested.")
7983 class LUQueryInstanceData(NoHooksLU):
7984 """Query runtime instance data.
7987 _OP_REQP = ["instances", "static"]
7990 def ExpandNames(self):
7991 self.needed_locks = {}
7992 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7994 if not isinstance(self.op.instances, list):
7995 raise errors.OpPrereqError("Invalid argument type 'instances'",
7998 if self.op.instances:
7999 self.wanted_names = []
8000 for name in self.op.instances:
8001 full_name = _ExpandInstanceName(self.cfg, name)
8002 self.wanted_names.append(full_name)
8003 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8005 self.wanted_names = None
8006 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8008 self.needed_locks[locking.LEVEL_NODE] = []
8009 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8011 def DeclareLocks(self, level):
8012 if level == locking.LEVEL_NODE:
8013 self._LockInstancesNodes()
8015 def CheckPrereq(self):
8016 """Check prerequisites.
8018 This only checks the optional instance list against the existing names.
8021 if self.wanted_names is None:
8022 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8024 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8025 in self.wanted_names]
8028 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8029 """Returns the status of a block device
8032 if self.op.static or not node:
8035 self.cfg.SetDiskID(dev, node)
8037 result = self.rpc.call_blockdev_find(node, dev)
8041 result.Raise("Can't compute disk status for %s" % instance_name)
8043 status = result.payload
8047 return (status.dev_path, status.major, status.minor,
8048 status.sync_percent, status.estimated_time,
8049 status.is_degraded, status.ldisk_status)
8051 def _ComputeDiskStatus(self, instance, snode, dev):
8052 """Compute block device status.
8055 if dev.dev_type in constants.LDS_DRBD:
8056 # we change the snode then (otherwise we use the one passed in)
8057 if dev.logical_id[0] == instance.primary_node:
8058 snode = dev.logical_id[1]
8060 snode = dev.logical_id[0]
8062 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8064 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8067 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8068 for child in dev.children]
8073 "iv_name": dev.iv_name,
8074 "dev_type": dev.dev_type,
8075 "logical_id": dev.logical_id,
8076 "physical_id": dev.physical_id,
8077 "pstatus": dev_pstatus,
8078 "sstatus": dev_sstatus,
8079 "children": dev_children,
8086 def Exec(self, feedback_fn):
8087 """Gather and return data"""
8090 cluster = self.cfg.GetClusterInfo()
8092 for instance in self.wanted_instances:
8093 if not self.op.static:
8094 remote_info = self.rpc.call_instance_info(instance.primary_node,
8096 instance.hypervisor)
8097 remote_info.Raise("Error checking node %s" % instance.primary_node)
8098 remote_info = remote_info.payload
8099 if remote_info and "state" in remote_info:
8102 remote_state = "down"
8105 if instance.admin_up:
8108 config_state = "down"
8110 disks = [self._ComputeDiskStatus(instance, None, device)
8111 for device in instance.disks]
8114 "name": instance.name,
8115 "config_state": config_state,
8116 "run_state": remote_state,
8117 "pnode": instance.primary_node,
8118 "snodes": instance.secondary_nodes,
8120 # this happens to be the same format used for hooks
8121 "nics": _NICListToTuple(self, instance.nics),
8122 "disk_template": instance.disk_template,
8124 "hypervisor": instance.hypervisor,
8125 "network_port": instance.network_port,
8126 "hv_instance": instance.hvparams,
8127 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8128 "be_instance": instance.beparams,
8129 "be_actual": cluster.FillBE(instance),
8130 "serial_no": instance.serial_no,
8131 "mtime": instance.mtime,
8132 "ctime": instance.ctime,
8133 "uuid": instance.uuid,
8136 result[instance.name] = idict
8141 class LUSetInstanceParams(LogicalUnit):
8142 """Modifies an instances's parameters.
8145 HPATH = "instance-modify"
8146 HTYPE = constants.HTYPE_INSTANCE
8147 _OP_REQP = ["instance_name"]
8150 def CheckArguments(self):
8151 if not hasattr(self.op, 'nics'):
8153 if not hasattr(self.op, 'disks'):
8155 if not hasattr(self.op, 'beparams'):
8156 self.op.beparams = {}
8157 if not hasattr(self.op, 'hvparams'):
8158 self.op.hvparams = {}
8159 if not hasattr(self.op, "disk_template"):
8160 self.op.disk_template = None
8161 if not hasattr(self.op, "remote_node"):
8162 self.op.remote_node = None
8163 if not hasattr(self.op, "os_name"):
8164 self.op.os_name = None
8165 if not hasattr(self.op, "force_variant"):
8166 self.op.force_variant = False
8167 self.op.force = getattr(self.op, "force", False)
8168 if not (self.op.nics or self.op.disks or self.op.disk_template or
8169 self.op.hvparams or self.op.beparams or self.op.os_name):
8170 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8172 if self.op.hvparams:
8173 _CheckGlobalHvParams(self.op.hvparams)
8177 for disk_op, disk_dict in self.op.disks:
8178 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8179 if disk_op == constants.DDM_REMOVE:
8182 elif disk_op == constants.DDM_ADD:
8185 if not isinstance(disk_op, int):
8186 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8187 if not isinstance(disk_dict, dict):
8188 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8189 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8191 if disk_op == constants.DDM_ADD:
8192 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8193 if mode not in constants.DISK_ACCESS_SET:
8194 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8196 size = disk_dict.get('size', None)
8198 raise errors.OpPrereqError("Required disk parameter size missing",
8202 except (TypeError, ValueError), err:
8203 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8204 str(err), errors.ECODE_INVAL)
8205 disk_dict['size'] = size
8207 # modification of disk
8208 if 'size' in disk_dict:
8209 raise errors.OpPrereqError("Disk size change not possible, use"
8210 " grow-disk", errors.ECODE_INVAL)
8212 if disk_addremove > 1:
8213 raise errors.OpPrereqError("Only one disk add or remove operation"
8214 " supported at a time", errors.ECODE_INVAL)
8216 if self.op.disks and self.op.disk_template is not None:
8217 raise errors.OpPrereqError("Disk template conversion and other disk"
8218 " changes not supported at the same time",
8221 if self.op.disk_template:
8222 _CheckDiskTemplate(self.op.disk_template)
8223 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8224 self.op.remote_node is None):
8225 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8226 " one requires specifying a secondary node",
8231 for nic_op, nic_dict in self.op.nics:
8232 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8233 if nic_op == constants.DDM_REMOVE:
8236 elif nic_op == constants.DDM_ADD:
8239 if not isinstance(nic_op, int):
8240 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8241 if not isinstance(nic_dict, dict):
8242 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8243 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8245 # nic_dict should be a dict
8246 nic_ip = nic_dict.get('ip', None)
8247 if nic_ip is not None:
8248 if nic_ip.lower() == constants.VALUE_NONE:
8249 nic_dict['ip'] = None
8251 if not utils.IsValidIP(nic_ip):
8252 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8255 nic_bridge = nic_dict.get('bridge', None)
8256 nic_link = nic_dict.get('link', None)
8257 if nic_bridge and nic_link:
8258 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8259 " at the same time", errors.ECODE_INVAL)
8260 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8261 nic_dict['bridge'] = None
8262 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8263 nic_dict['link'] = None
8265 if nic_op == constants.DDM_ADD:
8266 nic_mac = nic_dict.get('mac', None)
8268 nic_dict['mac'] = constants.VALUE_AUTO
8270 if 'mac' in nic_dict:
8271 nic_mac = nic_dict['mac']
8272 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8273 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8275 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8276 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8277 " modifying an existing nic",
8280 if nic_addremove > 1:
8281 raise errors.OpPrereqError("Only one NIC add or remove operation"
8282 " supported at a time", errors.ECODE_INVAL)
8284 def ExpandNames(self):
8285 self._ExpandAndLockInstance()
8286 self.needed_locks[locking.LEVEL_NODE] = []
8287 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8289 def DeclareLocks(self, level):
8290 if level == locking.LEVEL_NODE:
8291 self._LockInstancesNodes()
8292 if self.op.disk_template and self.op.remote_node:
8293 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8294 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8296 def BuildHooksEnv(self):
8299 This runs on the master, primary and secondaries.
8303 if constants.BE_MEMORY in self.be_new:
8304 args['memory'] = self.be_new[constants.BE_MEMORY]
8305 if constants.BE_VCPUS in self.be_new:
8306 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8307 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8308 # information at all.
8311 nic_override = dict(self.op.nics)
8312 c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
8313 for idx, nic in enumerate(self.instance.nics):
8314 if idx in nic_override:
8315 this_nic_override = nic_override[idx]
8317 this_nic_override = {}
8318 if 'ip' in this_nic_override:
8319 ip = this_nic_override['ip']
8322 if 'mac' in this_nic_override:
8323 mac = this_nic_override['mac']
8326 if idx in self.nic_pnew:
8327 nicparams = self.nic_pnew[idx]
8329 nicparams = objects.FillDict(c_nicparams, nic.nicparams)
8330 mode = nicparams[constants.NIC_MODE]
8331 link = nicparams[constants.NIC_LINK]
8332 args['nics'].append((ip, mac, mode, link))
8333 if constants.DDM_ADD in nic_override:
8334 ip = nic_override[constants.DDM_ADD].get('ip', None)
8335 mac = nic_override[constants.DDM_ADD]['mac']
8336 nicparams = self.nic_pnew[constants.DDM_ADD]
8337 mode = nicparams[constants.NIC_MODE]
8338 link = nicparams[constants.NIC_LINK]
8339 args['nics'].append((ip, mac, mode, link))
8340 elif constants.DDM_REMOVE in nic_override:
8341 del args['nics'][-1]
8343 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8344 if self.op.disk_template:
8345 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8346 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8350 def _GetUpdatedParams(old_params, update_dict,
8351 default_values, parameter_types):
8352 """Return the new params dict for the given params.
8354 @type old_params: dict
8355 @param old_params: old parameters
8356 @type update_dict: dict
8357 @param update_dict: dict containing new parameter values,
8358 or constants.VALUE_DEFAULT to reset the
8359 parameter to its default value
8360 @type default_values: dict
8361 @param default_values: default values for the filled parameters
8362 @type parameter_types: dict
8363 @param parameter_types: dict mapping target dict keys to types
8364 in constants.ENFORCEABLE_TYPES
8365 @rtype: (dict, dict)
8366 @return: (new_parameters, filled_parameters)
8369 params_copy = copy.deepcopy(old_params)
8370 for key, val in update_dict.iteritems():
8371 if val == constants.VALUE_DEFAULT:
8373 del params_copy[key]
8377 params_copy[key] = val
8378 utils.ForceDictType(params_copy, parameter_types)
8379 params_filled = objects.FillDict(default_values, params_copy)
8380 return (params_copy, params_filled)
8382 def CheckPrereq(self):
8383 """Check prerequisites.
8385 This only checks the instance list against the existing names.
8388 self.force = self.op.force
8390 # checking the new params on the primary/secondary nodes
8392 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8393 cluster = self.cluster = self.cfg.GetClusterInfo()
8394 assert self.instance is not None, \
8395 "Cannot retrieve locked instance %s" % self.op.instance_name
8396 pnode = instance.primary_node
8397 nodelist = list(instance.all_nodes)
8399 if self.op.disk_template:
8400 if instance.disk_template == self.op.disk_template:
8401 raise errors.OpPrereqError("Instance already has disk template %s" %
8402 instance.disk_template, errors.ECODE_INVAL)
8404 if (instance.disk_template,
8405 self.op.disk_template) not in self._DISK_CONVERSIONS:
8406 raise errors.OpPrereqError("Unsupported disk template conversion from"
8407 " %s to %s" % (instance.disk_template,
8408 self.op.disk_template),
8410 if self.op.disk_template in constants.DTS_NET_MIRROR:
8411 _CheckNodeOnline(self, self.op.remote_node)
8412 _CheckNodeNotDrained(self, self.op.remote_node)
8413 disks = [{"size": d.size} for d in instance.disks]
8414 required = _ComputeDiskSize(self.op.disk_template, disks)
8415 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8416 _CheckInstanceDown(self, instance, "cannot change disk template")
8418 # hvparams processing
8419 if self.op.hvparams:
8420 i_hvdict, hv_new = self._GetUpdatedParams(
8421 instance.hvparams, self.op.hvparams,
8422 cluster.hvparams[instance.hypervisor],
8423 constants.HVS_PARAMETER_TYPES)
8425 hypervisor.GetHypervisor(
8426 instance.hypervisor).CheckParameterSyntax(hv_new)
8427 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8428 self.hv_new = hv_new # the new actual values
8429 self.hv_inst = i_hvdict # the new dict (without defaults)
8431 self.hv_new = self.hv_inst = {}
8433 # beparams processing
8434 if self.op.beparams:
8435 i_bedict, be_new = self._GetUpdatedParams(
8436 instance.beparams, self.op.beparams,
8437 cluster.beparams[constants.PP_DEFAULT],
8438 constants.BES_PARAMETER_TYPES)
8439 self.be_new = be_new # the new actual values
8440 self.be_inst = i_bedict # the new dict (without defaults)
8442 self.be_new = self.be_inst = {}
8446 if constants.BE_MEMORY in self.op.beparams and not self.force:
8447 mem_check_list = [pnode]
8448 if be_new[constants.BE_AUTO_BALANCE]:
8449 # either we changed auto_balance to yes or it was from before
8450 mem_check_list.extend(instance.secondary_nodes)
8451 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8452 instance.hypervisor)
8453 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8454 instance.hypervisor)
8455 pninfo = nodeinfo[pnode]
8456 msg = pninfo.fail_msg
8458 # Assume the primary node is unreachable and go ahead
8459 self.warn.append("Can't get info from primary node %s: %s" %
8461 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8462 self.warn.append("Node data from primary node %s doesn't contain"
8463 " free memory information" % pnode)
8464 elif instance_info.fail_msg:
8465 self.warn.append("Can't get instance runtime information: %s" %
8466 instance_info.fail_msg)
8468 if instance_info.payload:
8469 current_mem = int(instance_info.payload['memory'])
8471 # Assume instance not running
8472 # (there is a slight race condition here, but it's not very probable,
8473 # and we have no other way to check)
8475 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8476 pninfo.payload['memory_free'])
8478 raise errors.OpPrereqError("This change will prevent the instance"
8479 " from starting, due to %d MB of memory"
8480 " missing on its primary node" % miss_mem,
8483 if be_new[constants.BE_AUTO_BALANCE]:
8484 for node, nres in nodeinfo.items():
8485 if node not in instance.secondary_nodes:
8489 self.warn.append("Can't get info from secondary node %s: %s" %
8491 elif not isinstance(nres.payload.get('memory_free', None), int):
8492 self.warn.append("Secondary node %s didn't return free"
8493 " memory information" % node)
8494 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8495 self.warn.append("Not enough memory to failover instance to"
8496 " secondary node %s" % node)
8501 for nic_op, nic_dict in self.op.nics:
8502 if nic_op == constants.DDM_REMOVE:
8503 if not instance.nics:
8504 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8507 if nic_op != constants.DDM_ADD:
8509 if not instance.nics:
8510 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8511 " no NICs" % nic_op,
8513 if nic_op < 0 or nic_op >= len(instance.nics):
8514 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8516 (nic_op, len(instance.nics) - 1),
8518 old_nic_params = instance.nics[nic_op].nicparams
8519 old_nic_ip = instance.nics[nic_op].ip
8524 update_params_dict = dict([(key, nic_dict[key])
8525 for key in constants.NICS_PARAMETERS
8526 if key in nic_dict])
8528 if 'bridge' in nic_dict:
8529 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8531 new_nic_params, new_filled_nic_params = \
8532 self._GetUpdatedParams(old_nic_params, update_params_dict,
8533 cluster.nicparams[constants.PP_DEFAULT],
8534 constants.NICS_PARAMETER_TYPES)
8535 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8536 self.nic_pinst[nic_op] = new_nic_params
8537 self.nic_pnew[nic_op] = new_filled_nic_params
8538 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8540 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8541 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8542 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8544 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8546 self.warn.append(msg)
8548 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8549 if new_nic_mode == constants.NIC_MODE_ROUTED:
8550 if 'ip' in nic_dict:
8551 nic_ip = nic_dict['ip']
8555 raise errors.OpPrereqError('Cannot set the nic ip to None'
8556 ' on a routed nic', errors.ECODE_INVAL)
8557 if 'mac' in nic_dict:
8558 nic_mac = nic_dict['mac']
8560 raise errors.OpPrereqError('Cannot set the nic mac to None',
8562 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8563 # otherwise generate the mac
8564 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8566 # or validate/reserve the current one
8568 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8569 except errors.ReservationError:
8570 raise errors.OpPrereqError("MAC address %s already in use"
8571 " in cluster" % nic_mac,
8572 errors.ECODE_NOTUNIQUE)
8575 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8576 raise errors.OpPrereqError("Disk operations not supported for"
8577 " diskless instances",
8579 for disk_op, _ in self.op.disks:
8580 if disk_op == constants.DDM_REMOVE:
8581 if len(instance.disks) == 1:
8582 raise errors.OpPrereqError("Cannot remove the last disk of"
8583 " an instance", errors.ECODE_INVAL)
8584 _CheckInstanceDown(self, instance, "cannot remove disks")
8586 if (disk_op == constants.DDM_ADD and
8587 len(instance.nics) >= constants.MAX_DISKS):
8588 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8589 " add more" % constants.MAX_DISKS,
8591 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8593 if disk_op < 0 or disk_op >= len(instance.disks):
8594 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8596 (disk_op, len(instance.disks)),
8600 if self.op.os_name and not self.op.force:
8601 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8602 self.op.force_variant)
8606 def _ConvertPlainToDrbd(self, feedback_fn):
8607 """Converts an instance from plain to drbd.
8610 feedback_fn("Converting template to drbd")
8611 instance = self.instance
8612 pnode = instance.primary_node
8613 snode = self.op.remote_node
8615 # create a fake disk info for _GenerateDiskTemplate
8616 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8617 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8618 instance.name, pnode, [snode],
8619 disk_info, None, None, 0)
8620 info = _GetInstanceInfoText(instance)
8621 feedback_fn("Creating aditional volumes...")
8622 # first, create the missing data and meta devices
8623 for disk in new_disks:
8624 # unfortunately this is... not too nice
8625 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8627 for child in disk.children:
8628 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8629 # at this stage, all new LVs have been created, we can rename the
8631 feedback_fn("Renaming original volumes...")
8632 rename_list = [(o, n.children[0].logical_id)
8633 for (o, n) in zip(instance.disks, new_disks)]
8634 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8635 result.Raise("Failed to rename original LVs")
8637 feedback_fn("Initializing DRBD devices...")
8638 # all child devices are in place, we can now create the DRBD devices
8639 for disk in new_disks:
8640 for node in [pnode, snode]:
8641 f_create = node == pnode
8642 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8644 # at this point, the instance has been modified
8645 instance.disk_template = constants.DT_DRBD8
8646 instance.disks = new_disks
8647 self.cfg.Update(instance, feedback_fn)
8649 # disks are created, waiting for sync
8650 disk_abort = not _WaitForSync(self, instance)
8652 raise errors.OpExecError("There are some degraded disks for"
8653 " this instance, please cleanup manually")
8655 def _ConvertDrbdToPlain(self, feedback_fn):
8656 """Converts an instance from drbd to plain.
8659 instance = self.instance
8660 assert len(instance.secondary_nodes) == 1
8661 pnode = instance.primary_node
8662 snode = instance.secondary_nodes[0]
8663 feedback_fn("Converting template to plain")
8665 old_disks = instance.disks
8666 new_disks = [d.children[0] for d in old_disks]
8668 # copy over size and mode
8669 for parent, child in zip(old_disks, new_disks):
8670 child.size = parent.size
8671 child.mode = parent.mode
8673 # update instance structure
8674 instance.disks = new_disks
8675 instance.disk_template = constants.DT_PLAIN
8676 self.cfg.Update(instance, feedback_fn)
8678 feedback_fn("Removing volumes on the secondary node...")
8679 for disk in old_disks:
8680 self.cfg.SetDiskID(disk, snode)
8681 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8683 self.LogWarning("Could not remove block device %s on node %s,"
8684 " continuing anyway: %s", disk.iv_name, snode, msg)
8686 feedback_fn("Removing unneeded volumes on the primary node...")
8687 for idx, disk in enumerate(old_disks):
8688 meta = disk.children[1]
8689 self.cfg.SetDiskID(meta, pnode)
8690 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8692 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8693 " continuing anyway: %s", idx, pnode, msg)
8696 def Exec(self, feedback_fn):
8697 """Modifies an instance.
8699 All parameters take effect only at the next restart of the instance.
8702 # Process here the warnings from CheckPrereq, as we don't have a
8703 # feedback_fn there.
8704 for warn in self.warn:
8705 feedback_fn("WARNING: %s" % warn)
8708 instance = self.instance
8710 for disk_op, disk_dict in self.op.disks:
8711 if disk_op == constants.DDM_REMOVE:
8712 # remove the last disk
8713 device = instance.disks.pop()
8714 device_idx = len(instance.disks)
8715 for node, disk in device.ComputeNodeTree(instance.primary_node):
8716 self.cfg.SetDiskID(disk, node)
8717 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8719 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8720 " continuing anyway", device_idx, node, msg)
8721 result.append(("disk/%d" % device_idx, "remove"))
8722 elif disk_op == constants.DDM_ADD:
8724 if instance.disk_template == constants.DT_FILE:
8725 file_driver, file_path = instance.disks[0].logical_id
8726 file_path = os.path.dirname(file_path)
8728 file_driver = file_path = None
8729 disk_idx_base = len(instance.disks)
8730 new_disk = _GenerateDiskTemplate(self,
8731 instance.disk_template,
8732 instance.name, instance.primary_node,
8733 instance.secondary_nodes,
8738 instance.disks.append(new_disk)
8739 info = _GetInstanceInfoText(instance)
8741 logging.info("Creating volume %s for instance %s",
8742 new_disk.iv_name, instance.name)
8743 # Note: this needs to be kept in sync with _CreateDisks
8745 for node in instance.all_nodes:
8746 f_create = node == instance.primary_node
8748 _CreateBlockDev(self, node, instance, new_disk,
8749 f_create, info, f_create)
8750 except errors.OpExecError, err:
8751 self.LogWarning("Failed to create volume %s (%s) on"
8753 new_disk.iv_name, new_disk, node, err)
8754 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8755 (new_disk.size, new_disk.mode)))
8757 # change a given disk
8758 instance.disks[disk_op].mode = disk_dict['mode']
8759 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8761 if self.op.disk_template:
8762 r_shut = _ShutdownInstanceDisks(self, instance)
8764 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8765 " proceed with disk template conversion")
8766 mode = (instance.disk_template, self.op.disk_template)
8768 self._DISK_CONVERSIONS[mode](self, feedback_fn)
8770 self.cfg.ReleaseDRBDMinors(instance.name)
8772 result.append(("disk_template", self.op.disk_template))
8775 for nic_op, nic_dict in self.op.nics:
8776 if nic_op == constants.DDM_REMOVE:
8777 # remove the last nic
8778 del instance.nics[-1]
8779 result.append(("nic.%d" % len(instance.nics), "remove"))
8780 elif nic_op == constants.DDM_ADD:
8781 # mac and bridge should be set, by now
8782 mac = nic_dict['mac']
8783 ip = nic_dict.get('ip', None)
8784 nicparams = self.nic_pinst[constants.DDM_ADD]
8785 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8786 instance.nics.append(new_nic)
8787 result.append(("nic.%d" % (len(instance.nics) - 1),
8788 "add:mac=%s,ip=%s,mode=%s,link=%s" %
8789 (new_nic.mac, new_nic.ip,
8790 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8791 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8794 for key in 'mac', 'ip':
8796 setattr(instance.nics[nic_op], key, nic_dict[key])
8797 if nic_op in self.nic_pinst:
8798 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8799 for key, val in nic_dict.iteritems():
8800 result.append(("nic.%s/%d" % (key, nic_op), val))
8803 if self.op.hvparams:
8804 instance.hvparams = self.hv_inst
8805 for key, val in self.op.hvparams.iteritems():
8806 result.append(("hv/%s" % key, val))
8809 if self.op.beparams:
8810 instance.beparams = self.be_inst
8811 for key, val in self.op.beparams.iteritems():
8812 result.append(("be/%s" % key, val))
8816 instance.os = self.op.os_name
8818 self.cfg.Update(instance, feedback_fn)
8822 _DISK_CONVERSIONS = {
8823 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8824 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8827 class LUQueryExports(NoHooksLU):
8828 """Query the exports list
8831 _OP_REQP = ['nodes']
8834 def ExpandNames(self):
8835 self.needed_locks = {}
8836 self.share_locks[locking.LEVEL_NODE] = 1
8837 if not self.op.nodes:
8838 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8840 self.needed_locks[locking.LEVEL_NODE] = \
8841 _GetWantedNodes(self, self.op.nodes)
8843 def CheckPrereq(self):
8844 """Check prerequisites.
8847 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8849 def Exec(self, feedback_fn):
8850 """Compute the list of all the exported system images.
8853 @return: a dictionary with the structure node->(export-list)
8854 where export-list is a list of the instances exported on
8858 rpcresult = self.rpc.call_export_list(self.nodes)
8860 for node in rpcresult:
8861 if rpcresult[node].fail_msg:
8862 result[node] = False
8864 result[node] = rpcresult[node].payload
8869 class LUExportInstance(LogicalUnit):
8870 """Export an instance to an image in the cluster.
8873 HPATH = "instance-export"
8874 HTYPE = constants.HTYPE_INSTANCE
8875 _OP_REQP = ["instance_name", "target_node", "shutdown"]
8878 def CheckArguments(self):
8879 """Check the arguments.
8882 self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8883 constants.DEFAULT_SHUTDOWN_TIMEOUT)
8885 def ExpandNames(self):
8886 self._ExpandAndLockInstance()
8887 # FIXME: lock only instance primary and destination node
8889 # Sad but true, for now we have do lock all nodes, as we don't know where
8890 # the previous export might be, and and in this LU we search for it and
8891 # remove it from its current node. In the future we could fix this by:
8892 # - making a tasklet to search (share-lock all), then create the new one,
8893 # then one to remove, after
8894 # - removing the removal operation altogether
8895 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8897 def DeclareLocks(self, level):
8898 """Last minute lock declaration."""
8899 # All nodes are locked anyway, so nothing to do here.
8901 def BuildHooksEnv(self):
8904 This will run on the master, primary node and target node.
8908 "EXPORT_NODE": self.op.target_node,
8909 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8910 "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8912 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8913 nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8914 self.op.target_node]
8917 def CheckPrereq(self):
8918 """Check prerequisites.
8920 This checks that the instance and node names are valid.
8923 instance_name = self.op.instance_name
8924 self.instance = self.cfg.GetInstanceInfo(instance_name)
8925 assert self.instance is not None, \
8926 "Cannot retrieve locked instance %s" % self.op.instance_name
8927 _CheckNodeOnline(self, self.instance.primary_node)
8929 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8930 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8931 assert self.dst_node is not None
8933 _CheckNodeOnline(self, self.dst_node.name)
8934 _CheckNodeNotDrained(self, self.dst_node.name)
8936 # instance disk type verification
8937 for disk in self.instance.disks:
8938 if disk.dev_type == constants.LD_FILE:
8939 raise errors.OpPrereqError("Export not supported for instances with"
8940 " file-based disks", errors.ECODE_INVAL)
8942 def _CreateSnapshots(self, feedback_fn):
8943 """Creates an LVM snapshot for every disk of the instance.
8945 @return: List of snapshots as L{objects.Disk} instances
8948 instance = self.instance
8949 src_node = instance.primary_node
8951 vgname = self.cfg.GetVGName()
8955 for idx, disk in enumerate(instance.disks):
8956 feedback_fn("Creating a snapshot of disk/%s on node %s" %
8959 # result.payload will be a snapshot of an lvm leaf of the one we
8961 result = self.rpc.call_blockdev_snapshot(src_node, disk)
8962 msg = result.fail_msg
8964 self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8966 snap_disks.append(False)
8968 disk_id = (vgname, result.payload)
8969 new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8970 logical_id=disk_id, physical_id=disk_id,
8971 iv_name=disk.iv_name)
8972 snap_disks.append(new_dev)
8976 def _RemoveSnapshot(self, feedback_fn, snap_disks, disk_index):
8977 """Removes an LVM snapshot.
8979 @type snap_disks: list
8980 @param snap_disks: The list of all snapshots as returned by
8982 @type disk_index: number
8983 @param disk_index: Index of the snapshot to be removed
8985 @return: Whether removal was successful or not
8988 disk = snap_disks[disk_index]
8990 src_node = self.instance.primary_node
8992 feedback_fn("Removing snapshot of disk/%s on node %s" %
8993 (disk_index, src_node))
8995 result = self.rpc.call_blockdev_remove(src_node, disk)
8996 if not result.fail_msg:
8999 self.LogWarning("Could not remove snapshot for disk/%d from node"
9000 " %s: %s", disk_index, src_node, result.fail_msg)
9004 def _CleanupExports(self, feedback_fn):
9005 """Removes exports of current instance from all other nodes.
9007 If an instance in a cluster with nodes A..D was exported to node C, its
9008 exports will be removed from the nodes A, B and D.
9011 nodelist = self.cfg.GetNodeList()
9012 nodelist.remove(self.dst_node.name)
9014 # on one-node clusters nodelist will be empty after the removal
9015 # if we proceed the backup would be removed because OpQueryExports
9016 # substitutes an empty list with the full cluster node list.
9017 iname = self.instance.name
9019 feedback_fn("Removing old exports for instance %s" % iname)
9020 exportlist = self.rpc.call_export_list(nodelist)
9021 for node in exportlist:
9022 if exportlist[node].fail_msg:
9024 if iname in exportlist[node].payload:
9025 msg = self.rpc.call_export_remove(node, iname).fail_msg
9027 self.LogWarning("Could not remove older export for instance %s"
9028 " on node %s: %s", iname, node, msg)
9030 def Exec(self, feedback_fn):
9031 """Export an instance to an image in the cluster.
9034 instance = self.instance
9035 dst_node = self.dst_node
9036 src_node = instance.primary_node
9038 if self.op.shutdown:
9039 # shutdown the instance, but not the disks
9040 feedback_fn("Shutting down instance %s" % instance.name)
9041 result = self.rpc.call_instance_shutdown(src_node, instance,
9042 self.shutdown_timeout)
9043 result.Raise("Could not shutdown instance %s on"
9044 " node %s" % (instance.name, src_node))
9046 # set the disks ID correctly since call_instance_start needs the
9047 # correct drbd minor to create the symlinks
9048 for disk in instance.disks:
9049 self.cfg.SetDiskID(disk, src_node)
9051 activate_disks = (not instance.admin_up)
9054 # Activate the instance disks if we'exporting a stopped instance
9055 feedback_fn("Activating disks for %s" % instance.name)
9056 _StartInstanceDisks(self, instance, None)
9061 removed_snaps = [False] * len(instance.disks)
9066 snap_disks = self._CreateSnapshots(feedback_fn)
9068 if self.op.shutdown and instance.admin_up:
9069 feedback_fn("Starting instance %s" % instance.name)
9070 result = self.rpc.call_instance_start(src_node, instance,
9072 msg = result.fail_msg
9074 _ShutdownInstanceDisks(self, instance)
9075 raise errors.OpExecError("Could not start instance: %s" % msg)
9077 assert len(snap_disks) == len(instance.disks)
9078 assert len(removed_snaps) == len(instance.disks)
9080 # TODO: check for size
9082 cluster_name = self.cfg.GetClusterName()
9083 for idx, dev in enumerate(snap_disks):
9084 feedback_fn("Exporting snapshot %s from %s to %s" %
9085 (idx, src_node, dst_node.name))
9087 # FIXME: pass debug from opcode to backend
9088 result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
9089 instance, cluster_name,
9090 idx, self.op.debug_level)
9091 msg = result.fail_msg
9093 self.LogWarning("Could not export disk/%s from node %s to"
9094 " node %s: %s", idx, src_node, dst_node.name, msg)
9095 dresults.append(False)
9097 dresults.append(True)
9100 if self._RemoveSnapshot(feedback_fn, snap_disks, idx):
9101 removed_snaps[idx] = True
9103 dresults.append(False)
9105 assert len(dresults) == len(instance.disks)
9107 # Check for backwards compatibility
9108 assert compat.all(isinstance(i, bool) for i in dresults), \
9109 "Not all results are boolean: %r" % dresults
9111 feedback_fn("Finalizing export on %s" % dst_node.name)
9112 result = self.rpc.call_finalize_export(dst_node.name, instance,
9114 msg = result.fail_msg
9117 self.LogWarning("Could not finalize export for instance %s"
9118 " on node %s: %s", instance.name, dst_node.name, msg)
9121 # Remove all snapshots
9122 assert len(removed_snaps) == len(instance.disks)
9123 for idx, removed in enumerate(removed_snaps):
9125 self._RemoveSnapshot(feedback_fn, snap_disks, idx)
9129 feedback_fn("Deactivating disks for %s" % instance.name)
9130 _ShutdownInstanceDisks(self, instance)
9132 self._CleanupExports(feedback_fn)
9134 return fin_resu, dresults
9137 class LURemoveExport(NoHooksLU):
9138 """Remove exports related to the named instance.
9141 _OP_REQP = ["instance_name"]
9144 def ExpandNames(self):
9145 self.needed_locks = {}
9146 # We need all nodes to be locked in order for RemoveExport to work, but we
9147 # don't need to lock the instance itself, as nothing will happen to it (and
9148 # we can remove exports also for a removed instance)
9149 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9151 def CheckPrereq(self):
9152 """Check prerequisites.
9156 def Exec(self, feedback_fn):
9157 """Remove any export.
9160 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9161 # If the instance was not found we'll try with the name that was passed in.
9162 # This will only work if it was an FQDN, though.
9164 if not instance_name:
9166 instance_name = self.op.instance_name
9168 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9169 exportlist = self.rpc.call_export_list(locked_nodes)
9171 for node in exportlist:
9172 msg = exportlist[node].fail_msg
9174 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9176 if instance_name in exportlist[node].payload:
9178 result = self.rpc.call_export_remove(node, instance_name)
9179 msg = result.fail_msg
9181 logging.error("Could not remove export for instance %s"
9182 " on node %s: %s", instance_name, node, msg)
9184 if fqdn_warn and not found:
9185 feedback_fn("Export not found. If trying to remove an export belonging"
9186 " to a deleted instance please use its Fully Qualified"
9190 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9193 This is an abstract class which is the parent of all the other tags LUs.
9197 def ExpandNames(self):
9198 self.needed_locks = {}
9199 if self.op.kind == constants.TAG_NODE:
9200 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9201 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9202 elif self.op.kind == constants.TAG_INSTANCE:
9203 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9204 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9206 def CheckPrereq(self):
9207 """Check prerequisites.
9210 if self.op.kind == constants.TAG_CLUSTER:
9211 self.target = self.cfg.GetClusterInfo()
9212 elif self.op.kind == constants.TAG_NODE:
9213 self.target = self.cfg.GetNodeInfo(self.op.name)
9214 elif self.op.kind == constants.TAG_INSTANCE:
9215 self.target = self.cfg.GetInstanceInfo(self.op.name)
9217 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9218 str(self.op.kind), errors.ECODE_INVAL)
9221 class LUGetTags(TagsLU):
9222 """Returns the tags of a given object.
9225 _OP_REQP = ["kind", "name"]
9228 def Exec(self, feedback_fn):
9229 """Returns the tag list.
9232 return list(self.target.GetTags())
9235 class LUSearchTags(NoHooksLU):
9236 """Searches the tags for a given pattern.
9239 _OP_REQP = ["pattern"]
9242 def ExpandNames(self):
9243 self.needed_locks = {}
9245 def CheckPrereq(self):
9246 """Check prerequisites.
9248 This checks the pattern passed for validity by compiling it.
9252 self.re = re.compile(self.op.pattern)
9253 except re.error, err:
9254 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9255 (self.op.pattern, err), errors.ECODE_INVAL)
9257 def Exec(self, feedback_fn):
9258 """Returns the tag list.
9262 tgts = [("/cluster", cfg.GetClusterInfo())]
9263 ilist = cfg.GetAllInstancesInfo().values()
9264 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9265 nlist = cfg.GetAllNodesInfo().values()
9266 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9268 for path, target in tgts:
9269 for tag in target.GetTags():
9270 if self.re.search(tag):
9271 results.append((path, tag))
9275 class LUAddTags(TagsLU):
9276 """Sets a tag on a given object.
9279 _OP_REQP = ["kind", "name", "tags"]
9282 def CheckPrereq(self):
9283 """Check prerequisites.
9285 This checks the type and length of the tag name and value.
9288 TagsLU.CheckPrereq(self)
9289 for tag in self.op.tags:
9290 objects.TaggableObject.ValidateTag(tag)
9292 def Exec(self, feedback_fn):
9297 for tag in self.op.tags:
9298 self.target.AddTag(tag)
9299 except errors.TagError, err:
9300 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9301 self.cfg.Update(self.target, feedback_fn)
9304 class LUDelTags(TagsLU):
9305 """Delete a list of tags from a given object.
9308 _OP_REQP = ["kind", "name", "tags"]
9311 def CheckPrereq(self):
9312 """Check prerequisites.
9314 This checks that we have the given tag.
9317 TagsLU.CheckPrereq(self)
9318 for tag in self.op.tags:
9319 objects.TaggableObject.ValidateTag(tag)
9320 del_tags = frozenset(self.op.tags)
9321 cur_tags = self.target.GetTags()
9322 if not del_tags <= cur_tags:
9323 diff_tags = del_tags - cur_tags
9324 diff_names = ["'%s'" % tag for tag in diff_tags]
9326 raise errors.OpPrereqError("Tag(s) %s not found" %
9327 (",".join(diff_names)), errors.ECODE_NOENT)
9329 def Exec(self, feedback_fn):
9330 """Remove the tag from the object.
9333 for tag in self.op.tags:
9334 self.target.RemoveTag(tag)
9335 self.cfg.Update(self.target, feedback_fn)
9338 class LUTestDelay(NoHooksLU):
9339 """Sleep for a specified amount of time.
9341 This LU sleeps on the master and/or nodes for a specified amount of
9345 _OP_REQP = ["duration", "on_master", "on_nodes"]
9348 def ExpandNames(self):
9349 """Expand names and set required locks.
9351 This expands the node list, if any.
9354 self.needed_locks = {}
9355 if self.op.on_nodes:
9356 # _GetWantedNodes can be used here, but is not always appropriate to use
9357 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9359 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9360 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9362 def CheckPrereq(self):
9363 """Check prerequisites.
9367 def Exec(self, feedback_fn):
9368 """Do the actual sleep.
9371 if self.op.on_master:
9372 if not utils.TestDelay(self.op.duration):
9373 raise errors.OpExecError("Error during master delay test")
9374 if self.op.on_nodes:
9375 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9376 for node, node_result in result.items():
9377 node_result.Raise("Failure during rpc call to node %s" % node)
9380 class IAllocator(object):
9381 """IAllocator framework.
9383 An IAllocator instance has three sets of attributes:
9384 - cfg that is needed to query the cluster
9385 - input data (all members of the _KEYS class attribute are required)
9386 - four buffer attributes (in|out_data|text), that represent the
9387 input (to the external script) in text and data structure format,
9388 and the output from it, again in two formats
9389 - the result variables from the script (success, info, nodes) for
9393 # pylint: disable-msg=R0902
9394 # lots of instance attributes
9396 "name", "mem_size", "disks", "disk_template",
9397 "os", "tags", "nics", "vcpus", "hypervisor",
9400 "name", "relocate_from",
9406 def __init__(self, cfg, rpc, mode, **kwargs):
9409 # init buffer variables
9410 self.in_text = self.out_text = self.in_data = self.out_data = None
9411 # init all input fields so that pylint is happy
9413 self.mem_size = self.disks = self.disk_template = None
9414 self.os = self.tags = self.nics = self.vcpus = None
9415 self.hypervisor = None
9416 self.relocate_from = None
9418 self.evac_nodes = None
9420 self.required_nodes = None
9421 # init result fields
9422 self.success = self.info = self.result = None
9423 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9424 keyset = self._ALLO_KEYS
9425 fn = self._AddNewInstance
9426 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9427 keyset = self._RELO_KEYS
9428 fn = self._AddRelocateInstance
9429 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9430 keyset = self._EVAC_KEYS
9431 fn = self._AddEvacuateNodes
9433 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9434 " IAllocator" % self.mode)
9436 if key not in keyset:
9437 raise errors.ProgrammerError("Invalid input parameter '%s' to"
9438 " IAllocator" % key)
9439 setattr(self, key, kwargs[key])
9442 if key not in kwargs:
9443 raise errors.ProgrammerError("Missing input parameter '%s' to"
9444 " IAllocator" % key)
9445 self._BuildInputData(fn)
9447 def _ComputeClusterData(self):
9448 """Compute the generic allocator input data.
9450 This is the data that is independent of the actual operation.
9454 cluster_info = cfg.GetClusterInfo()
9457 "version": constants.IALLOCATOR_VERSION,
9458 "cluster_name": cfg.GetClusterName(),
9459 "cluster_tags": list(cluster_info.GetTags()),
9460 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9461 # we don't have job IDs
9463 iinfo = cfg.GetAllInstancesInfo().values()
9464 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9468 node_list = cfg.GetNodeList()
9470 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9471 hypervisor_name = self.hypervisor
9472 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9473 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9474 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9475 hypervisor_name = cluster_info.enabled_hypervisors[0]
9477 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9480 self.rpc.call_all_instances_info(node_list,
9481 cluster_info.enabled_hypervisors)
9482 for nname, nresult in node_data.items():
9483 # first fill in static (config-based) values
9484 ninfo = cfg.GetNodeInfo(nname)
9486 "tags": list(ninfo.GetTags()),
9487 "primary_ip": ninfo.primary_ip,
9488 "secondary_ip": ninfo.secondary_ip,
9489 "offline": ninfo.offline,
9490 "drained": ninfo.drained,
9491 "master_candidate": ninfo.master_candidate,
9494 if not (ninfo.offline or ninfo.drained):
9495 nresult.Raise("Can't get data for node %s" % nname)
9496 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9498 remote_info = nresult.payload
9500 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9501 'vg_size', 'vg_free', 'cpu_total']:
9502 if attr not in remote_info:
9503 raise errors.OpExecError("Node '%s' didn't return attribute"
9504 " '%s'" % (nname, attr))
9505 if not isinstance(remote_info[attr], int):
9506 raise errors.OpExecError("Node '%s' returned invalid value"
9508 (nname, attr, remote_info[attr]))
9509 # compute memory used by primary instances
9510 i_p_mem = i_p_up_mem = 0
9511 for iinfo, beinfo in i_list:
9512 if iinfo.primary_node == nname:
9513 i_p_mem += beinfo[constants.BE_MEMORY]
9514 if iinfo.name not in node_iinfo[nname].payload:
9517 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9518 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9519 remote_info['memory_free'] -= max(0, i_mem_diff)
9522 i_p_up_mem += beinfo[constants.BE_MEMORY]
9524 # compute memory used by instances
9526 "total_memory": remote_info['memory_total'],
9527 "reserved_memory": remote_info['memory_dom0'],
9528 "free_memory": remote_info['memory_free'],
9529 "total_disk": remote_info['vg_size'],
9530 "free_disk": remote_info['vg_free'],
9531 "total_cpus": remote_info['cpu_total'],
9532 "i_pri_memory": i_p_mem,
9533 "i_pri_up_memory": i_p_up_mem,
9537 node_results[nname] = pnr
9538 data["nodes"] = node_results
9542 for iinfo, beinfo in i_list:
9544 for nic in iinfo.nics:
9545 filled_params = objects.FillDict(
9546 cluster_info.nicparams[constants.PP_DEFAULT],
9548 nic_dict = {"mac": nic.mac,
9550 "mode": filled_params[constants.NIC_MODE],
9551 "link": filled_params[constants.NIC_LINK],
9553 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9554 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9555 nic_data.append(nic_dict)
9557 "tags": list(iinfo.GetTags()),
9558 "admin_up": iinfo.admin_up,
9559 "vcpus": beinfo[constants.BE_VCPUS],
9560 "memory": beinfo[constants.BE_MEMORY],
9562 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9564 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9565 "disk_template": iinfo.disk_template,
9566 "hypervisor": iinfo.hypervisor,
9568 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9570 instance_data[iinfo.name] = pir
9572 data["instances"] = instance_data
9576 def _AddNewInstance(self):
9577 """Add new instance data to allocator structure.
9579 This in combination with _AllocatorGetClusterData will create the
9580 correct structure needed as input for the allocator.
9582 The checks for the completeness of the opcode must have already been
9586 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9588 if self.disk_template in constants.DTS_NET_MIRROR:
9589 self.required_nodes = 2
9591 self.required_nodes = 1
9594 "disk_template": self.disk_template,
9597 "vcpus": self.vcpus,
9598 "memory": self.mem_size,
9599 "disks": self.disks,
9600 "disk_space_total": disk_space,
9602 "required_nodes": self.required_nodes,
9606 def _AddRelocateInstance(self):
9607 """Add relocate instance data to allocator structure.
9609 This in combination with _IAllocatorGetClusterData will create the
9610 correct structure needed as input for the allocator.
9612 The checks for the completeness of the opcode must have already been
9616 instance = self.cfg.GetInstanceInfo(self.name)
9617 if instance is None:
9618 raise errors.ProgrammerError("Unknown instance '%s' passed to"
9619 " IAllocator" % self.name)
9621 if instance.disk_template not in constants.DTS_NET_MIRROR:
9622 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9625 if len(instance.secondary_nodes) != 1:
9626 raise errors.OpPrereqError("Instance has not exactly one secondary node",
9629 self.required_nodes = 1
9630 disk_sizes = [{'size': disk.size} for disk in instance.disks]
9631 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9635 "disk_space_total": disk_space,
9636 "required_nodes": self.required_nodes,
9637 "relocate_from": self.relocate_from,
9641 def _AddEvacuateNodes(self):
9642 """Add evacuate nodes data to allocator structure.
9646 "evac_nodes": self.evac_nodes
9650 def _BuildInputData(self, fn):
9651 """Build input data structures.
9654 self._ComputeClusterData()
9657 request["type"] = self.mode
9658 self.in_data["request"] = request
9660 self.in_text = serializer.Dump(self.in_data)
9662 def Run(self, name, validate=True, call_fn=None):
9663 """Run an instance allocator and return the results.
9667 call_fn = self.rpc.call_iallocator_runner
9669 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9670 result.Raise("Failure while running the iallocator script")
9672 self.out_text = result.payload
9674 self._ValidateResult()
9676 def _ValidateResult(self):
9677 """Process the allocator results.
9679 This will process and if successful save the result in
9680 self.out_data and the other parameters.
9684 rdict = serializer.Load(self.out_text)
9685 except Exception, err:
9686 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9688 if not isinstance(rdict, dict):
9689 raise errors.OpExecError("Can't parse iallocator results: not a dict")
9691 # TODO: remove backwards compatiblity in later versions
9692 if "nodes" in rdict and "result" not in rdict:
9693 rdict["result"] = rdict["nodes"]
9696 for key in "success", "info", "result":
9697 if key not in rdict:
9698 raise errors.OpExecError("Can't parse iallocator results:"
9699 " missing key '%s'" % key)
9700 setattr(self, key, rdict[key])
9702 if not isinstance(rdict["result"], list):
9703 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9705 self.out_data = rdict
9708 class LUTestAllocator(NoHooksLU):
9709 """Run allocator tests.
9711 This LU runs the allocator tests
9714 _OP_REQP = ["direction", "mode", "name"]
9716 def CheckPrereq(self):
9717 """Check prerequisites.
9719 This checks the opcode parameters depending on the director and mode test.
9722 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9723 for attr in ["name", "mem_size", "disks", "disk_template",
9724 "os", "tags", "nics", "vcpus"]:
9725 if not hasattr(self.op, attr):
9726 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9727 attr, errors.ECODE_INVAL)
9728 iname = self.cfg.ExpandInstanceName(self.op.name)
9729 if iname is not None:
9730 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9731 iname, errors.ECODE_EXISTS)
9732 if not isinstance(self.op.nics, list):
9733 raise errors.OpPrereqError("Invalid parameter 'nics'",
9735 for row in self.op.nics:
9736 if (not isinstance(row, dict) or
9739 "bridge" not in row):
9740 raise errors.OpPrereqError("Invalid contents of the 'nics'"
9741 " parameter", errors.ECODE_INVAL)
9742 if not isinstance(self.op.disks, list):
9743 raise errors.OpPrereqError("Invalid parameter 'disks'",
9745 for row in self.op.disks:
9746 if (not isinstance(row, dict) or
9747 "size" not in row or
9748 not isinstance(row["size"], int) or
9749 "mode" not in row or
9750 row["mode"] not in ['r', 'w']):
9751 raise errors.OpPrereqError("Invalid contents of the 'disks'"
9752 " parameter", errors.ECODE_INVAL)
9753 if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9754 self.op.hypervisor = self.cfg.GetHypervisorType()
9755 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9756 if not hasattr(self.op, "name"):
9757 raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9759 fname = _ExpandInstanceName(self.cfg, self.op.name)
9760 self.op.name = fname
9761 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9762 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9763 if not hasattr(self.op, "evac_nodes"):
9764 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9765 " opcode input", errors.ECODE_INVAL)
9767 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9768 self.op.mode, errors.ECODE_INVAL)
9770 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9771 if not hasattr(self.op, "allocator") or self.op.allocator is None:
9772 raise errors.OpPrereqError("Missing allocator name",
9774 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9775 raise errors.OpPrereqError("Wrong allocator test '%s'" %
9776 self.op.direction, errors.ECODE_INVAL)
9778 def Exec(self, feedback_fn):
9779 """Run the allocator test.
9782 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9783 ial = IAllocator(self.cfg, self.rpc,
9786 mem_size=self.op.mem_size,
9787 disks=self.op.disks,
9788 disk_template=self.op.disk_template,
9792 vcpus=self.op.vcpus,
9793 hypervisor=self.op.hypervisor,
9795 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9796 ial = IAllocator(self.cfg, self.rpc,
9799 relocate_from=list(self.relocate_from),
9801 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9802 ial = IAllocator(self.cfg, self.rpc,
9804 evac_nodes=self.op.evac_nodes)
9806 raise errors.ProgrammerError("Uncatched mode %s in"
9807 " LUTestAllocator.Exec", self.op.mode)
9809 if self.op.direction == constants.IALLOCATOR_DIR_IN:
9810 result = ial.in_text
9812 ial.Run(self.op.allocator, validate=False)
9813 result = ial.out_text